-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix regression test 46239 with Crossgen2 and improve runtime logging #51416
Changes from 1 commit
1ce126a
54389f7
1d21a98
9e4b351
fc6d7e1
84980df
ef85fac
783b02f
a2fff2b
464c9b1
46b95f0
11cc7df
531a460
c9297dd
d4da238
3df7a59
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
The regression test <code>src\tests\JIT\Regressions\JitBlue\Runtime_46239</code> exercises various interesting corner cases of type layout that weren't handled properly in Crossgen2 on x86 and ARM[32]. This change fixes the remaining deficiencies and it also adds provisions for better runtime logging upon type layout mismatches. Thanks Tomas
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -103,6 +103,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp | |
type.Context.Target.GetWellKnownTypeSize(type), | ||
type.Context.Target.GetWellKnownTypeAlignment(type), | ||
0, | ||
alignUpInstanceByteSize: true, | ||
out instanceByteSizeAndAlignment | ||
); | ||
|
||
|
@@ -304,17 +305,19 @@ protected virtual void FinalizeRuntimeSpecificStaticFieldLayout(TypeSystemContex | |
{ | ||
} | ||
|
||
protected static ComputedInstanceFieldLayout ComputeExplicitFieldLayout(MetadataType type, int numInstanceFields) | ||
protected virtual bool IsBlittableOrManagedSequential(TypeDesc type) => false; | ||
|
||
protected ComputedInstanceFieldLayout ComputeExplicitFieldLayout(MetadataType type, int numInstanceFields) | ||
{ | ||
// Instance slice size is the total size of instance not including the base type. | ||
// It is calculated as the field whose offset and size add to the greatest value. | ||
LayoutInt offsetBias = !type.IsValueType ? new LayoutInt(type.Context.Target.PointerSize) : LayoutInt.Zero; | ||
LayoutInt cumulativeInstanceFieldPos = | ||
type.HasBaseType && !type.IsValueType ? type.BaseType.InstanceByteCount : LayoutInt.Zero; | ||
LayoutInt instanceSize = cumulativeInstanceFieldPos; | ||
cumulativeInstanceFieldPos -= offsetBias; | ||
|
||
var layoutMetadata = type.GetClassLayout(); | ||
LayoutInt instanceSize = cumulativeInstanceFieldPos + new LayoutInt(layoutMetadata.Size); | ||
|
||
int packingSize = ComputePackingSize(type, layoutMetadata); | ||
LayoutInt largestAlignmentRequired = LayoutInt.One; | ||
|
@@ -349,6 +352,7 @@ protected static ComputedInstanceFieldLayout ComputeExplicitFieldLayout(Metadata | |
); | ||
if (needsToBeAligned) | ||
{ | ||
largestAlignmentRequired = LayoutInt.Max(largestAlignmentRequired, type.Context.Target.LayoutPointerSize); | ||
int offsetModulo = computedOffset.AsInt % type.Context.Target.PointerSize; | ||
if (offsetModulo != 0) | ||
{ | ||
|
@@ -365,7 +369,12 @@ protected static ComputedInstanceFieldLayout ComputeExplicitFieldLayout(Metadata | |
} | ||
|
||
SizeAndAlignment instanceByteSizeAndAlignment; | ||
var instanceSizeAndAlignment = ComputeInstanceSize(type, instanceSize, largestAlignmentRequired, layoutMetadata.Size, out instanceByteSizeAndAlignment); | ||
var instanceSizeAndAlignment = ComputeInstanceSize(type, | ||
instanceSize, | ||
largestAlignmentRequired, | ||
layoutMetadata.Size, | ||
alignUpInstanceByteSize: IsBlittableOrManagedSequential(type), | ||
out instanceByteSizeAndAlignment); | ||
|
||
ComputedInstanceFieldLayout computedLayout = new ComputedInstanceFieldLayout(); | ||
computedLayout.FieldAlignment = instanceSizeAndAlignment.Alignment; | ||
|
@@ -375,7 +384,6 @@ protected static ComputedInstanceFieldLayout ComputeExplicitFieldLayout(Metadata | |
computedLayout.Offsets = offsets; | ||
computedLayout.LayoutAbiStable = layoutAbiStable; | ||
|
||
|
||
ExplicitLayoutValidator.Validate(type, computedLayout); | ||
|
||
return computedLayout; | ||
|
@@ -422,7 +430,13 @@ protected ComputedInstanceFieldLayout ComputeSequentialFieldLayout(MetadataType | |
} | ||
|
||
SizeAndAlignment instanceByteSizeAndAlignment; | ||
var instanceSizeAndAlignment = ComputeInstanceSize(type, cumulativeInstanceFieldPos + offsetBias, largestAlignmentRequirement, layoutMetadata.Size, out instanceByteSizeAndAlignment); | ||
var instanceSizeAndAlignment = ComputeInstanceSize( | ||
type, | ||
cumulativeInstanceFieldPos + offsetBias, | ||
largestAlignmentRequirement, | ||
layoutMetadata.Size, | ||
alignUpInstanceByteSize: true, | ||
out instanceByteSizeAndAlignment); | ||
|
||
ComputedInstanceFieldLayout computedLayout = new ComputedInstanceFieldLayout(); | ||
computedLayout.FieldAlignment = instanceSizeAndAlignment.Alignment; | ||
|
@@ -704,7 +718,12 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, | |
} | ||
|
||
SizeAndAlignment instanceByteSizeAndAlignment; | ||
var instanceSizeAndAlignment = ComputeInstanceSize(type, cumulativeInstanceFieldPos + offsetBias, minAlign, 0/* specified field size unused */, out instanceByteSizeAndAlignment); | ||
var instanceSizeAndAlignment = ComputeInstanceSize(type, | ||
cumulativeInstanceFieldPos + offsetBias, | ||
minAlign, | ||
classLayoutSize: 0, | ||
alignUpInstanceByteSize: true, | ||
out instanceByteSizeAndAlignment); | ||
|
||
ComputedInstanceFieldLayout computedLayout = new ComputedInstanceFieldLayout(); | ||
computedLayout.FieldAlignment = instanceSizeAndAlignment.Alignment; | ||
|
@@ -829,7 +848,7 @@ private static int ComputePackingSize(MetadataType type, ClassLayoutMetadata lay | |
return layoutMetadata.PackingSize; | ||
} | ||
|
||
private static SizeAndAlignment ComputeInstanceSize(MetadataType type, LayoutInt instanceSize, LayoutInt alignment, int classLayoutSize, out SizeAndAlignment byteCount) | ||
private static SizeAndAlignment ComputeInstanceSize(MetadataType type, LayoutInt instanceSize, LayoutInt alignment, int classLayoutSize, bool alignUpInstanceByteSize, out SizeAndAlignment byteCount) | ||
{ | ||
SizeAndAlignment result; | ||
|
||
|
@@ -857,7 +876,9 @@ private static SizeAndAlignment ComputeInstanceSize(MetadataType type, LayoutInt | |
{ | ||
if (type.IsValueType) | ||
{ | ||
instanceSize = LayoutInt.AlignUp(instanceSize, alignment, target); | ||
instanceSize = LayoutInt.AlignUp(instanceSize, | ||
alignUpInstanceByteSize ? alignment : LayoutInt.Min(alignment, target.LayoutPointerSize), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume that the
The algorithm will compute the instanceSize of this struct as 12. Is that correct? It looks like a bug in the type loader. The instance size of this struct on ARM should be 16, so that the long field is aligned at 8 bytes, so that potential atomic 64-bit operations work fine on it. It would be better to fix the type loader instead of replicating the bug here. |
||
target); | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suspect the default here should be true. Blittable types have a requirement that they are handled in the same way on all runtimes as they describe native data structures, and by defaulting to false, this leads for a way for the algorithm to misbehave on Native AOT.
I'd prefer to see this either be default true, or make it an abstract method and force the NativeAOT compiler to deal with this when it gets there.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@MichalStrehovsky could you comment here on what you'd like to see.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe let's just move
IsManagedSequentialType
to here and then this doesn't even need to be virtual and can just do the same thing?We didn't have trouble with this in .NET Native (where this is missing) so I don't think it matters, but we ported over so many CoreCLR weirdness's to this algorithm over time that I no longer understand how layout is done and would prefer all bugs in it to be also surfaced in crossgen2 (and not have NativeAOT specific bugs that we need to troubleshoot there).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@MichalStrehovsky - Thanks for your feedback. I'm trying to modify the change based on your suggestion but I'm hitting a layering problem - IsBlittable is part of MarshalUtils and that's not in ILCompiler.TypeSystem.ReadyToRun and it seems to me that moving it over brings various bits of interop code into ILCompiler.TypeSystem.ReadyToRun which I suspect to be undesirable as my current understanding is that NativeAOT doesn't use exactly identical interop as Crossgen2 / CoreCLR. What do you think would be the ideal way to resolve this discrepancy?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
MarshalUtils are part of ILCompiler.TypeSystem on the NativeAOT side, so I'm not opposed to moving it (and things it depends on) into ILCompiler.TypeSystem.ReadyToRun as well. The split is rather arbitrary and interop is getting tangled into the type system the same way as is on CoreCLR. It was a nice dream to have a separation of concerns there.
https://github.com/dotnet/runtimelab/blob/c8e3158b0981419527b18f3c0259bcf02103e054/src/coreclr/tools/aot/ILCompiler.TypeSystem/ILCompiler.TypeSystem.csproj#L528-L530
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would there be a way to adjust the runtime side of the things to avoid this mess?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well, for now I think I have at least managed to make something like an inventory of the various runtime behaviors by implementing them in Crossgen2. In theory we may be able to simplify some of the stuff, I however suspect that some of the subtle distinctions amount to tiny memory use optimizations on the runtime side i.e. something that's very tricky to undo as it may incur working set regressions in arbitrary scenarios.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I doubt that it would show up on the radar. The behavior of the field layout algorithm is completely accidental in number of cases, and in fact there are many known situations where it is inefficient for no good reason.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Examples:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're probably the biggest expert in this field so I have no desire to doubt your assessment. My only point is that further independent development of runtime vs. Crossgen2 side of this equation is giving me goosebumps; my current expectation is that we'll ultimately go down the path you yourself suggested in the past - having a way to communicate the field layouts from the compiler to the runtime, that will give us room for experimentation with potential packing optimizations and rid us of the burdensome need to keep both codebases in 100% sync.