Skip to content

Commit

Permalink
Implement AVX2 MaskLoad and MaskStore (dotnet#19513)
Browse files Browse the repository at this point in the history
* Implement AVX2 MaskLoad and MaskStore

* Add test cases for AVX2 MaskLoad and MaskStore

* Fix AVX MaskStore tests

* template AVX MaskLoad tests
  • Loading branch information
FeiPengIntel authored and CarolEidt committed Aug 23, 2018
1 parent 1c45896 commit 8013a4b
Show file tree
Hide file tree
Showing 45 changed files with 8,470 additions and 230 deletions.
3 changes: 3 additions & 0 deletions src/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,8 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
case INS_vinserti128:
case INS_vmaskmovps:
case INS_vmaskmovpd:
case INS_vpmaskmovd:
case INS_vpmaskmovq:
case INS_vpblendd:
case INS_vperm2i128:
case INS_vperm2f128:
Expand Down Expand Up @@ -586,6 +588,7 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
case INS_vfnmsub132sd:
case INS_vfnmsub213sd:
case INS_vfnmsub231sd:
case INS_vpmaskmovq:
return true;
default:
break;
Expand Down
4 changes: 2 additions & 2 deletions src/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
else if (category == HW_Category_MemoryLoad)
{
if (intrinsicId == NI_AVX_MaskLoad)
if (intrinsicId == NI_AVX_MaskLoad || intrinsicId == NI_AVX2_MaskLoad)
{
emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op2Reg, op1Reg);
}
Expand Down Expand Up @@ -253,7 +253,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
else if (category == HW_Category_MemoryStore)
{
if (intrinsicId == NI_AVX_MaskStore)
if (intrinsicId == NI_AVX_MaskStore || intrinsicId == NI_AVX2_MaskStore)
{
emit->emitIns_AR_R_R(ins, simdSize, op2Reg, op3Reg, op1Reg, 0);
}
Expand Down
2 changes: 2 additions & 0 deletions src/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,8 @@ HARDWARE_INTRINSIC(AVX2_HorizontalSubtract, "HorizontalS
HARDWARE_INTRINSIC(AVX2_HorizontalSubtractSaturate, "HorizontalSubtractSaturate", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2_InsertVector128, "InsertVector128", AVX2, -1, 32, 3, {INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX2_LoadAlignedVector256NonTemporal, "LoadAlignedVector256NonTemporal", AVX2, -1, 32, 1, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoContainment)
HARDWARE_INTRINSIC(AVX2_MaskLoad, "MaskLoad", AVX2, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize)
HARDWARE_INTRINSIC(AVX2_MaskStore, "MaskStore", AVX2, -1, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVX2_Max, "Max", AVX2, -1, 32, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2_Min, "Min", AVX2, -1, 32, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2_MoveMask, "MoveMask", AVX2, -1, 32, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
Expand Down
2 changes: 2 additions & 0 deletions src/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,8 @@ INST3(vbroadcastf128,"broadcastf128",0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS
INST3(vbroadcasti128,"broadcasti128",0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x5A)) // Broadcast packed integer values read from memory to entire ymm register
INST3(vmaskmovps, "maskmovps" ,0, IUM_WR, 0, 0, SSE38(0x2E), BAD_CODE, SSE38(0x2C)) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores
INST3(vmaskmovpd, "maskmovpd" ,0, IUM_WR, 0, 0, SSE38(0x2F), BAD_CODE, SSE38(0x2D)) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores
INST3(vpmaskmovd, "pmaskmovd" ,0, IUM_WR, 0, 0, SSE38(0x8E), BAD_CODE, SSE38(0x8C)) // Conditional SIMD Integer Packed Dword Loads and Stores
INST3(vpmaskmovq, "pmaskmovq" ,0, IUM_WR, 0, 0, SSE38(0x8E), BAD_CODE, SSE38(0x8C)) // Conditional SIMD Integer Packed Qword Loads and Stores

INST3(FIRST_FMA_INSTRUCTION, "FIRST_FMA_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
// enum name FP updmode rf wf MR MI RM
Expand Down
2 changes: 2 additions & 0 deletions tests/src/JIT/HardwareIntrinsics/X86/Avx/Avx_r.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@
<Compile Include="GetLowerHalf.UInt16.cs" />
<Compile Include="GetLowerHalf.UInt32.cs" />
<Compile Include="GetLowerHalf.UInt64.cs" />
<Compile Include="MaskLoad.Double.cs" />
<Compile Include="MaskLoad.Single.cs" />
<Compile Include="MaskStore.Double.cs" />
<Compile Include="MaskStore.Single.cs" />
<Compile Include="Max.Double.cs" />
Expand Down
2 changes: 2 additions & 0 deletions tests/src/JIT/HardwareIntrinsics/X86/Avx/Avx_ro.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@
<Compile Include="GetLowerHalf.UInt16.cs" />
<Compile Include="GetLowerHalf.UInt32.cs" />
<Compile Include="GetLowerHalf.UInt64.cs" />
<Compile Include="MaskLoad.Double.cs" />
<Compile Include="MaskLoad.Single.cs" />
<Compile Include="MaskStore.Double.cs" />
<Compile Include="MaskStore.Single.cs" />
<Compile Include="Max.Double.cs" />
Expand Down
Loading

0 comments on commit 8013a4b

Please sign in to comment.