Skip to content

Commit

Permalink
Arm64/Sve: Implement LoadVector*NonFaultingSignExtendTo* APIs (dotnet…
Browse files Browse the repository at this point in the history
…#102903)

* Add *NonFaultingSignExtendTo* APIs

* Map APIs to instructions

* add test coverage

* Rename some of the test names

* fix test cases

* fix the codegen

* ignore gpr

* fix test template

* Comment ConditionalSelect_FalseOp()

* review feedback
  • Loading branch information
kunalspathak committed May 31, 2024
1 parent 3750ac5 commit 214cbbf
Show file tree
Hide file tree
Showing 8 changed files with 694 additions and 36 deletions.
3 changes: 3 additions & 0 deletions src/coreclr/jit/emitarm64sve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4383,6 +4383,9 @@ void emitter::emitInsSve_R_R_R(instruction ins,
case INS_sve_ld1w:
case INS_sve_ld1sw:
case INS_sve_ld1d:
case INS_sve_ldnf1sh:
case INS_sve_ldnf1sw:
case INS_sve_ldnf1sb:
return emitIns_R_R_R_I(ins, size, reg1, reg2, reg3, 0, opt);

default:
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
opt);
}
}
else if (targetReg == embMaskOp1Reg)
else if (emitter::isVectorRegister(embMaskOp1Reg) && (targetReg == embMaskOp1Reg))
{
// target != falseValue, but we do not want to overwrite target with `embMaskOp1Reg`.
// We will first do the predicate operation and then do conditionalSelect inactive
Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,24 @@ HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt64,
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToUInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToUInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToUInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt32NonFaultingSignExtendToInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt32NonFaultingSignExtendToUInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt32SignExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt32SignExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToInt16, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToUInt16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToUInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorSByteNonFaultingSignExtendToUInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sb, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1890,6 +1890,108 @@ internal Arm64() { }
/// </summary>
public static unsafe (Vector<ulong>, Vector<ulong>, Vector<ulong>, Vector<ulong>) Load4xVectorAndUnzip(Vector<ulong> mask, ulong* address) { throw new PlatformNotSupportedException(); }

/// Load 16-bit data and sign-extend, non-faulting

/// <summary>
/// svint32_t svldnf1sh_s32(svbool_t pg, const int16_t *base)
/// LDNF1SH Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<int> LoadVectorInt16NonFaultingSignExtendToInt32(short* address) { throw new PlatformNotSupportedException(); }


/// Load 16-bit data and sign-extend, non-faulting

/// <summary>
/// svint64_t svldnf1sh_s64(svbool_t pg, const int16_t *base)
/// LDNF1SH Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<long> LoadVectorInt16NonFaultingSignExtendToInt64(short* address) { throw new PlatformNotSupportedException(); }


/// Load 16-bit data and sign-extend, non-faulting

/// <summary>
/// svuint32_t svldnf1sh_u32(svbool_t pg, const int16_t *base)
/// LDNF1SH Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<uint> LoadVectorInt16NonFaultingSignExtendToUInt32(short* address) { throw new PlatformNotSupportedException(); }


/// Load 16-bit data and sign-extend, non-faulting

/// <summary>
/// svuint64_t svldnf1sh_u64(svbool_t pg, const int16_t *base)
/// LDNF1SH Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ulong> LoadVectorInt16NonFaultingSignExtendToUInt64(short* address) { throw new PlatformNotSupportedException(); }

/// Load 32-bit data and sign-extend, non-faulting

/// <summary>
/// svint64_t svldnf1sw_s64(svbool_t pg, const int32_t *base)
/// LDNF1SW Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<long> LoadVectorInt32NonFaultingSignExtendToInt64(int* address) { throw new PlatformNotSupportedException(); }

/// Load 32-bit data and sign-extend, non-faulting

/// <summary>
/// svuint64_t svldnf1sw_u64(svbool_t pg, const int32_t *base)
/// LDNF1SW Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ulong> LoadVectorInt32NonFaultingSignExtendToUInt64(int* address) { throw new PlatformNotSupportedException(); }

/// Load 8-bit data and sign-extend, non-faulting

/// <summary>
/// svint16_t svldnf1sb_s16(svbool_t pg, const int8_t *base)
/// LDNF1SB Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<short> LoadVectorSByteNonFaultingSignExtendToInt16(sbyte* address) { throw new PlatformNotSupportedException(); }


/// Load 8-bit data and sign-extend, non-faulting

/// <summary>
/// svint32_t svldnf1sb_s32(svbool_t pg, const int8_t *base)
/// LDNF1SB Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<int> LoadVectorSByteNonFaultingSignExtendToInt32(sbyte* address) { throw new PlatformNotSupportedException(); }

/// Load 8-bit data and sign-extend, non-faulting

/// <summary>
/// svint64_t svldnf1sb_s64(svbool_t pg, const int8_t *base)
/// LDNF1SB Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<long> LoadVectorSByteNonFaultingSignExtendToInt64(sbyte* address) { throw new PlatformNotSupportedException(); }


/// Load 8-bit data and sign-extend, non-faulting

/// <summary>
/// svuint16_t svldnf1sb_u16(svbool_t pg, const int8_t *base)
/// LDNF1SB Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ushort> LoadVectorSByteNonFaultingSignExtendToUInt16(sbyte* address) { throw new PlatformNotSupportedException(); }


/// Load 8-bit data and sign-extend, non-faulting

/// <summary>
/// svuint32_t svldnf1sb_u32(svbool_t pg, const int8_t *base)
/// LDNF1SB Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<uint> LoadVectorSByteNonFaultingSignExtendToUInt32(sbyte* address) { throw new PlatformNotSupportedException(); }


/// Load 8-bit data and sign-extend, non-faulting

/// <summary>
/// svuint64_t svldnf1sb_u64(svbool_t pg, const int8_t *base)
/// LDNF1SB Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ulong> LoadVectorSByteNonFaultingSignExtendToUInt64(sbyte* address) { throw new PlatformNotSupportedException(); }

/// Max : Maximum

Expand Down
Loading

0 comments on commit 214cbbf

Please sign in to comment.