-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Promote Tier0 methods with loops to InstrumentedTier0 #81051
Conversation
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch, @kunalspathak Issue Details#70941 regressed one specific scenario, namely - cold methods with loops will never benefit from PGO because thier OSR'd version will not have any profile around since Tier0 didn't collect anything. In #70941 I was hoping that such methods themselves will be promoted but forgot about truly cold methods. Since we can't have multiple OSR version we need to "self-promote" such method to instrumentation if JIT finds loops in them. Example: class Program : IDisposable
{
static void Main(string[] args) => ColdMethodWithHotLoop(new Program());
[MethodImpl(MethodImplOptions.NoInlining)]
static void ColdMethodWithHotLoop(IDisposable d)
{
for (int i = 0; i < 1000000; i++)
d?.Dispose();
}
public void Dispose() {}
} With ; Assembly listing for method ConsoleApp1.Program:ColdMethodWithHotLoop(System.IDisposable)
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; Tier-0 compilation
; MinOpts code
; instrumented for collecting profile data
; rbp based frame
; fully interruptible
; Final local variable assignments
;
; V00 arg0 [V00 ] ( 1, 1 ) ref -> [rbp+10H] do-not-enreg[] class-hnd
; V01 loc0 [V01 ] ( 1, 1 ) int -> [rbp-3CH] do-not-enreg[] must-init
; V02 OutArgs [V02 ] ( 1, 1 ) lclBlk (32) [rsp+00H] do-not-enreg[] "OutgoingArgSpace"
; V03 tmp1 [V03 ] ( 1, 1 ) ref -> [rbp-48H] do-not-enreg[] must-init "handle histogram profile tmp"
; V04 tmp2 [V04 ] ( 1, 1 ) int -> [rbp-50H] do-not-enreg[X] addr-exposed "patchpoint counter"
; V05 tmp3 [V05 ] ( 1, 1 ) ref -> [rbp-58H] do-not-enreg[] must-init "argument with side effect"
;
; Lcl frame size = 128
G_M15056_IG01: ;; offset=0000H
55 push rbp
4881EC80000000 sub rsp, 128
488DAC2480000000 lea rbp, [rsp+80H]
33C0 xor eax, eax
488945A8 mov qword ptr [rbp-58H], rax
C5D857E4 vxorps xmm4, xmm4
C5F97F65B0 vmovdqa xmmword ptr [rbp-50H], xmm4
488945C0 mov qword ptr [rbp-40H], rax
48894D10 mov gword ptr [rbp+10H], rcx
;; size=39 bbWeight=1 PerfScore 7.33
G_M15056_IG02: ;; offset=0027H
33C9 xor ecx, ecx
894DC4 mov dword ptr [rbp-3CH], ecx
C745B0E8030000 mov dword ptr [rbp-50H], 0x3E8
EB4F jmp SHORT G_M15056_IG05
;; size=14 bbWeight=1 PerfScore 4.25
G_M15056_IG03: ;; offset=0035H
48837D1000 cmp gword ptr [rbp+10H], 0
743A je SHORT G_M15056_IG04
FF0596C6FFFF inc dword ptr [(reloc 0x7ffbb329d968)]
488B4D10 mov rcx, gword ptr [rbp+10H]
48894DB8 mov gword ptr [rbp-48H], rcx
488B4DB8 mov rcx, gword ptr [rbp-48H]
48BA70D929B3FB7F0000 mov rdx, 0x7FFBB329D970
E813B1935E call CORINFO_HELP_CLASSPROFILE32
488B4DB8 mov rcx, gword ptr [rbp-48H]
48894DA8 mov gword ptr [rbp-58H], rcx
488B4DA8 mov rcx, gword ptr [rbp-58H]
49BBA00282B2FB7F0000 mov r11, 0x7FFBB28202A0 ; code for System.IDisposable:Dispose():this
41FF13 call [r11]System.IDisposable:Dispose():this
;; size=65 bbWeight=1 PerfScore 16.50
G_M15056_IG04: ;; offset=0076H
FF05ACC6FFFF inc dword ptr [(reloc 0x7ffbb329d9b8)]
8B45C4 mov eax, dword ptr [rbp-3CH]
FFC0 inc eax
8945C4 mov dword ptr [rbp-3CH], eax
;; size=14 bbWeight=1 PerfScore 5.25
G_M15056_IG05: ;; offset=0084H
8B4DB0 mov ecx, dword ptr [rbp-50H]
FFC9 dec ecx
894DB0 mov dword ptr [rbp-50H], ecx
837DB000 cmp dword ptr [rbp-50H], 0
7F0E jg SHORT G_M15056_IG07
;; size=14 bbWeight=1 PerfScore 5.25
G_M15056_IG06: ;; offset=0092H
488D4DB0 lea rcx, [rbp-50H]
BA11000000 mov edx, 17
E8509F935E call CORINFO_HELP_PATCHPOINT
;; size=14 bbWeight=0.01 PerfScore 0.02
G_M15056_IG07: ;; offset=00A0H
817DC440420F00 cmp dword ptr [rbp-3CH], 0xF4240
7C8C jl SHORT G_M15056_IG03
FF057DC6FFFF inc dword ptr [(reloc 0x7ffbb329d9bc)]
;; size=15 bbWeight=1 PerfScore 6.00
G_M15056_IG08: ;; offset=00AFH
4881C480000000 add rsp, 128
5D pop rbp
C3 ret
;; size=9 bbWeight=1 PerfScore 1.75
; Total bytes of code 184, prolog size 39, PerfScore 64.75, instruction count 44, allocated bytes for code 184 (MethodHash=baa9c52f) for method ConsoleApp1.Program:ColdMethodWithHotLoop(System.IDisposable)
; ============================================================
; Assembly listing for method ConsoleApp1.Program:ColdMethodWithHotLoop(System.IDisposable)
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; Tier-1 compilation
; OSR variant for entry point 0x11
; optimized code
; optimized using profile data
; rsp based frame
; fully interruptible
; with Dynamic PGO: edge weights are invalid, and fgCalledCount is 9999
; 0 inlinees with PGO data; 1 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 7, 4.02) ref -> rsi class-hnd single-def
; V01 loc0 [V01,T01] ( 7, 4 ) int -> rdi
; V02 OutArgs [V02 ] ( 1, 1 ) lclBlk (32) [rsp+00H] "OutgoingArgSpace"
;* V03 tmp1 [V03 ] ( 0, 0 ) ref -> zero-ref class-hnd exact "guarded devirt this exact temp"
;
; Lcl frame size = 40
G_M15056_IG01: ;; offset=0000H
4883EC38 sub rsp, 56
4889BC24B8000000 mov qword ptr [rsp+B8H], rdi
4889B424B0000000 mov qword ptr [rsp+B0H], rsi
488BB424D0000000 mov rsi, gword ptr [rsp+D0H]
8BBC2484000000 mov edi, dword ptr [rsp+84H]
;; size=35 bbWeight=1 PerfScore 6.25
G_M15056_IG02: ;; offset=0023H
81FF40420F00 cmp edi, 0xF4240
7D3E jge SHORT G_M15056_IG06
4885F6 test rsi, rsi
741B je SHORT G_M15056_IG04
48B928AFD1B2FB7F0000 mov rcx, 0x7FFBB2D1AF28 ; ConsoleApp1.Program
48390E cmp qword ptr [rsi], rcx
750C jne SHORT G_M15056_IG04
;; size=28 bbWeight=1 PerfScore 6.75
G_M15056_IG03: ;; offset=003FH
FFC7 inc edi
81FF40420F00 cmp edi, 0xF4240
7D20 jge SHORT G_M15056_IG06
EBF4 jmp SHORT G_M15056_IG03
;; size=12 bbWeight=0.99 PerfScore 3.46
G_M15056_IG04: ;; offset=004BH
4885F6 test rsi, rsi
740F je SHORT G_M15056_IG05
48B928AFD1B2FB7F0000 mov rcx, 0x7FFBB2D1AF28 ; ConsoleApp1.Program
48390E cmp qword ptr [rsi], rcx
7515 jne SHORT G_M15056_IG07
;; size=20 bbWeight=0.01 PerfScore 0.06
G_M15056_IG05: ;; offset=005FH
FFC7 inc edi
81FF40420F00 cmp edi, 0xF4240
7CE2 jl SHORT G_M15056_IG04
;; size=10 bbWeight=0.01 PerfScore 0.02
G_M15056_IG06: ;; offset=0069H
4881C4B0000000 add rsp, 176
5E pop rsi
5F pop rdi
5D pop rbp
C3 ret
;; size=11 bbWeight=0 PerfScore 0.00
G_M15056_IG07: ;; offset=0074H
488BCE mov rcx, rsi
49BBA80282B2FB7F0000 mov r11, 0x7FFBB28202A8 ; code for System.IDisposable:Dispose():this
41FF13 call [r11]System.IDisposable:Dispose():this
EBD9 jmp SHORT G_M15056_IG05
;; size=18 bbWeight=0 PerfScore 0.00
; Total bytes of code 134, prolog size 35, PerfScore 29.94, instruction count 33, allocated bytes for code 134 (MethodHash=baa9c52f) for method ConsoleApp1.Program:ColdMethodWithHotLoop(System.IDisposable)
; ============================================================
|
@AndyAyersMS PTAL I've also updated the doc and used a simpler example, because the previous one no longer relevant and was unnecessarily complicated. |
/azp list |
This comment was marked as outdated.
This comment was marked as outdated.
/azp run runtime-coreclr pgo, runtime-coreclr libraries-pgo, runtime-coreclr pgostress |
Azure Pipelines successfully started running 3 pipeline(s). |
@AndyAyersMS ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like perhaps you should also change to the jit guid, though perhaps that would only impact SPMI replays of older jits on newer collections.
…ethods-with-loops # Conflicts: # src/coreclr/inc/jiteeversionguid.h
#70941 regressed one specific scenario, namely: cold methods with loops will never benefit from PGO because thier OSR'd version will not have any profile around since Tier0 didn't collect anything. In #70941 I was hoping that such methods themselves will be promoted but forgot about truly cold methods. Since we can't have multiple OSR version we need to "self-promote" such method to instrumentation if JIT finds loops in them.
Example:
With
DynamicPGO=1
and this PR we now see the following codegen forColdMethodWithHotLoop
: