Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Avx10v1 to the runtime #99784

Merged
merged 10 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F, W("EnableAVX512F
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F_VL, W("EnableAVX512F_VL"), 1, "Allows AVX512F_VL+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VBMI, W("EnableAVX512VBMI"), 1, "Allows AVX512VBMI+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VBMI_VL, W("EnableAVX512VBMI_VL"), 1, "Allows AVX512VBMI_VL+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v1, W("EnableAVX10v1"), 1, "Allows AVX10v1+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI, W("EnableAVXVNNI"), 1, "Allows AVXVNNI+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI1, W("EnableBMI1"), 1, "Allows BMI1+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2, W("EnableBMI2"), 1, "Allows BMI2+ hardware intrinsics to be disabled")
Expand Down
238 changes: 174 additions & 64 deletions src/coreclr/inc/corinfoinstructionset.h

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* 86eab154-5d93-4fad-bc07-e94fd9268b70 */
0x86eab154,
0x5d93,
0x4fad,
{0xbc, 0x07, 0xe9, 0x4f, 0xd9, 0x26, 0x8b, 0x70}
constexpr GUID JITEEVersionIdentifier = { /* 521f5004-fdc8-40e9-9568-5c379fa121f2 */
0x521f5004,
0xfdc8,
0x40e9,
{0x95, 0x68, 0x5c, 0x37, 0x9f, 0xa1, 0x21, 0xf2}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_VectorT512=41,
READYTORUN_INSTRUCTION_Rcpc2=42,
READYTORUN_INSTRUCTION_Sve=43,
READYTORUN_INSTRUCTION_Avx10v1=44,
READYTORUN_INSTRUCTION_Avx10v1_V256=45,
READYTORUN_INSTRUCTION_Avx10v1_V512=46,

};

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ CONFIG_INTEGER(EnableAVX512F, W("EnableAVX512F"), 1) /
CONFIG_INTEGER(EnableAVX512F_VL, W("EnableAVX512F_VL"), 1) // Allows AVX512F+ AVX512VL+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableAVX512VBMI, W("EnableAVX512VBMI"), 1) // Allows AVX512VBMI+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableAVX512VBMI_VL, W("EnableAVX512VBMI_VL"), 1) // Allows AVX512VBMI_VL+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableAVX10v1, W("EnableAVX10v1"), 1) // Allows AVX10v1+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableAVXVNNI, W("EnableAVXVNNI"), 1) // Allows AVXVNNI+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableBMI1, W("EnableBMI1"), 1) // Allows BMI1+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableBMI2, W("EnableBMI2"), 1) // Allows BMI2+ hardware intrinsics to be disabled
Expand Down
15 changes: 15 additions & 0 deletions src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ private static class XArchIntrinsicConstants
public const int VectorT128 = 0x4000000;
public const int VectorT256 = 0x8000000;
public const int VectorT512 = 0x10000000;
public const int Avx10v1 = 20000000;
public const int Avx10v1_v256 = 40000000;
public const int Avx10v1_v512 = 50000000;
Copy link
Member

@tannergooding tannergooding Mar 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These should be hex and the last one should be 8, not 5.

Suggested change
public const int Avx10v1 = 20000000;
public const int Avx10v1_v256 = 40000000;
public const int Avx10v1_v512 = 50000000;
public const int Avx10v1 = 0x20000000;
public const int Avx10v1_v256 = 0x40000000;
public const int Avx10v1_v512 = 0x80000000;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

public const int Avx10v1_v512 = 0x80000000;

This line will raise an error that 0x80000000 cannot be implicitly converted to int, although it can be fixed by explicitly defining as:
public const int Avx10v1_v512 = -2147483648;, I wonder if this is a proper fix (I suppose it should be good in terms of the number itself as it is just a flag bit), considering it has reached the max number of ISA this design can hold, and we are expecting some more ISAs coming in the future, e.g. higher version of Avx10 and APX, etc.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either public const int Avx10v1_v512 = -2147483648; or public const int Avx10v1_v512 = unchecked((int)0x80000000)); should be fine

Changing it to uint or ulong to hold more bits might also be appropriate, it's probably a better question for @MichalStrehovsky on how he wants it handled long term for this part of the codebase.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changing it to uint or ulong to hold more bits might also be appropriate, it's probably a better question for @MichalStrehovsky on how he wants it handled long term for this part of the codebase.

I don't have an opinion; whatever works. At this pace we may run out of ulong bits too. I don't have a good understanding for why we need so many bits: the VL suffixes look redundant; the various VectorT are also redundant with other flags we set at the same time, etc.

I was involved in the design for this up to AVX2/BMI but haven't really though about it since. The old design doesn't scale if we use bits at this pace because we can't make SSE3/SSE41/etc. baseline yet (and free up bits that way), but we may already need new bits. It would be better for someone who actually understands this to design a shape that will work.

There is an advantage if this can fit in 32bits (run-time check when IsSupported is done dynamically at runtime use the same enum and 32bits are simpler), but we can also introduce yet another enum for those purposes that doesn't waste bits, or have two ints, or something like that. If we can reclaim those 7 bits, it might be enough breathing room to keep using this enum and 32bits.


public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags)
{
Expand Down Expand Up @@ -124,6 +127,12 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags)
builder.AddSupportedInstructionSet("avx512vbmi_vl");
if ((flags & Serialize) != 0)
builder.AddSupportedInstructionSet("serialize");
if ((flags & Avx10v1) != 0)
builder.AddSupportedInstructionSet("avx10v1");
if ((flags & Avx10v1_v256) != 0)
builder.AddSupportedInstructionSet("avx10v1_v256");
if ((flags & Avx10v1_v512) != 0)
builder.AddSupportedInstructionSet("avx10v1_v512");
}

public static int FromInstructionSet(InstructionSet instructionSet)
Expand Down Expand Up @@ -187,6 +196,12 @@ public static int FromInstructionSet(InstructionSet instructionSet)
InstructionSet.X64_AVX512VBMI_VL_X64 => Avx512Vbmi_vl,
InstructionSet.X64_X86Serialize => Serialize,
InstructionSet.X64_X86Serialize_X64 => Serialize,
InstructionSet.X64_AVX10v1 => Avx10v1,
InstructionSet.X64_AVX10v1_X64 => Avx10v1,
InstructionSet.X64_AVX10v1_V256 => Avx10v1_v256,
InstructionSet.X64_AVX10v1_V256_X64 => Avx10v1_v256,
InstructionSet.X64_AVX10v1_V512 => Avx10v1_v512,
InstructionSet.X64_AVX10v1_V512_X64 => Avx10v1_v512,

// Baseline ISAs - they're always available
InstructionSet.X64_SSE => 0,
Expand Down
24 changes: 24 additions & 0 deletions src/coreclr/tools/Common/InstructionSetHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,30 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vbmi");
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vbmi_vl");
}

Debug.Assert(InstructionSet.X64_AVX10v1_V256 == InstructionSet.X86_AVX10v1_V256);
if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX10v1_V256))
{
Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX10v1));
Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX2));
Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_FMA));
}

Debug.Assert(InstructionSet.X64_AVX10v1_V512 == InstructionSet.X86_AVX10v1_V512);
if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
{
Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX10v1_V256));
Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512F));
Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512F_VL));
Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512BW));
Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512BW_VL));
Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512CD));
Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512CD_VL));
Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512DQ));
Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL));
Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512VBMI));
Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL));
}
Copy link
Member

@tannergooding tannergooding Mar 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure we need these.

The main intent of these paths is to add optimisticInstructionSet that is ones where a cached CPUID check is beneficial for AOT code. I don't think that's worth doing for Avx10v1 here since it would only be applicable if Avx512F was there and that itself requires VL support.

The Debug.Assert are then there to setup the expectation that Avx512F was only supported when BW+CD+DQ+VL were also supported, even though they are technically supersets of Avx512F. We don't have a similar consideration here so no Debug.Assert is needed

}
else if (targetArchitecture == TargetArchitecture.ARM64)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ public enum ReadyToRunInstructionSet
VectorT512=41,
Rcpc2=42,
Sve=43,
Avx10v1=44,
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
Avx10v1_V256=45,
Avx10v1_V512=46,

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ public static class ReadyToRunInstructionSetHelper
case InstructionSet.X64_AVX512VBMI_X64: return ReadyToRunInstructionSet.Avx512Vbmi;
case InstructionSet.X64_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL;
case InstructionSet.X64_AVX512VBMI_VL_X64: return ReadyToRunInstructionSet.Avx512Vbmi_VL;
case InstructionSet.X64_AVX10v1: return ReadyToRunInstructionSet.Avx10v1;
case InstructionSet.X64_AVX10v1_X64: return ReadyToRunInstructionSet.Avx10v1;
case InstructionSet.X64_AVX10v1_V256: return ReadyToRunInstructionSet.Avx10v1_V256;
case InstructionSet.X64_AVX10v1_V256_X64: return ReadyToRunInstructionSet.Avx10v1_V256;
case InstructionSet.X64_AVX10v1_V512: return ReadyToRunInstructionSet.Avx10v1_V512;
case InstructionSet.X64_AVX10v1_V512_X64: return ReadyToRunInstructionSet.Avx10v1_V512;
case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128;
case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256;
case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512;
Expand Down Expand Up @@ -191,6 +197,12 @@ public static class ReadyToRunInstructionSetHelper
case InstructionSet.X86_AVX512VBMI_X64: return null;
case InstructionSet.X86_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL;
case InstructionSet.X86_AVX512VBMI_VL_X64: return null;
case InstructionSet.X86_AVX10v1: return ReadyToRunInstructionSet.Avx10v1;
case InstructionSet.X86_AVX10v1_X64: return null;
case InstructionSet.X86_AVX10v1_V256: return ReadyToRunInstructionSet.Avx10v1_V256;
case InstructionSet.X86_AVX10v1_V256_X64: return null;
case InstructionSet.X86_AVX10v1_V512: return ReadyToRunInstructionSet.Avx10v1_V512;
case InstructionSet.X86_AVX10v1_V512_X64: return null;
case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128;
case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256;
case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512;
Expand Down
Loading