Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for folding core SIMD operations that produce TYP_MASK on newer hardware #104875

Merged
merged 11 commits into from
Jul 15, 2024
8 changes: 4 additions & 4 deletions src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3004,10 +3004,10 @@ GenTree* Compiler::optVNBasedFoldConstExpr(BasicBlock* block, GenTree* parent, G
{
simdmask_t value = vnStore->ConstantValue<simdmask_t>(vnCns);

GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet());
memcpy(&vecCon->gtSimdVal, &value, sizeof(simdmask_t));
GenTreeMskCon* mskCon = gtNewMskConNode(tree->TypeGet());
memcpy(&mskCon->gtSimdMaskVal, &value, sizeof(simdmask_t));

conValTree = vecCon;
conValTree = mskCon;
break;
}
break;
Expand Down Expand Up @@ -3136,7 +3136,7 @@ bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock,
}
#endif // FEATURE_HW_INTRINSICS
}
else if (!value->IsCnsFltOrDbl())
else if (!value->IsCnsFltOrDbl() && !value->IsCnsMsk())
{
return true;
}
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,7 @@ class CodeGen final : public CodeGenInterface
void genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree);
#if defined(FEATURE_SIMD)
void genSetRegToConst(regNumber targetReg, var_types targetType, simd_t* val);
void genSetRegToConst(regNumber targetReg, var_types targetType, simdmask_t* val);
#endif
void genCodeForTreeNode(GenTree* treeNode);
void genCodeForBinary(GenTreeOp* treeNode);
Expand Down
5 changes: 0 additions & 5 deletions src/coreclr/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,11 +304,6 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
}
break;

case GT_CNS_VEC:
{
unreached();
}

default:
unreached();
}
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
case GT_CNS_INT:
case GT_CNS_DBL:
case GT_CNS_VEC:
case GT_CNS_MSK:
genSetRegToConst(targetReg, targetType, treeNode);
genProduceReg(treeNode);
break;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8327,7 +8327,7 @@ void CodeGen::genCodeForReuseVal(GenTree* treeNode)
assert(treeNode->IsReuseRegVal());

// For now, this is only used for constant nodes.
assert(treeNode->OperIs(GT_CNS_INT, GT_CNS_DBL, GT_CNS_VEC));
assert(treeNode->OperIs(GT_CNS_INT, GT_CNS_DBL, GT_CNS_VEC, GT_CNS_MSK));
JITDUMP(" TreeNode is marked ReuseReg\n");

if (treeNode->IsIntegralConst(0) && GetEmitter()->emitCurIGnonEmpty())
Expand Down
53 changes: 48 additions & 5 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
}
break;
}

case TYP_SIMD12:
{
simd12_t val12 = *(simd12_t*)val;
Expand Down Expand Up @@ -516,6 +517,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
}
break;
}

case TYP_SIMD16:
{
simd16_t val16 = *(simd16_t*)val;
Expand Down Expand Up @@ -543,6 +545,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
}
break;
}

case TYP_SIMD32:
{
simd32_t val32 = *(simd32_t*)val;
Expand Down Expand Up @@ -570,6 +573,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
}
break;
}

case TYP_SIMD64:
{
simd64_t val64 = *(simd64_t*)val;
Expand All @@ -595,18 +599,49 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
}
break;
}

default:
{
unreached();
}
}
}

//----------------------------------------------------------------------------------
// genSetRegToConst: generate code to set target SIMD register to a given constant value
//
// Arguments:
// targetReg - target SIMD register
// targetType - target's type
// simdmask_t - constant data (its width depends on type)
//
void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simdmask_t* val)
{
assert(varTypeIsMask(targetType));

emitter* emit = GetEmitter();
emitAttr attr = emitTypeSize(targetType);

if (val->IsAllBitsSet())
{
emit->emitIns_SIMD_R_R_R(INS_kxnorq, EA_8BYTE, targetReg, targetReg, targetReg, INS_OPTS_NONE);
}
else if (val->IsZero())
{
emit->emitIns_SIMD_R_R_R(INS_kxorq, EA_8BYTE, targetReg, targetReg, targetReg, INS_OPTS_NONE);
}
else
{
CORINFO_FIELD_HANDLE hnd = emit->emitSimdMaskConst(*val);
emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0);
}
}
#endif // FEATURE_SIMD

/***********************************************************************************
*
* Generate code to set a register 'targetReg' of type 'targetType' to the constant
* specified by the constant (GT_CNS_INT, GT_CNS_DBL, or GT_CNS_VEC) in 'tree'. This
* specified by the constant (GT_CNS_INT, GT_CNS_DBL, GT_CNS_VEC, or GT_CNS_MSK) in 'tree'. This
* does not call genProduceReg() on the target register.
*/
void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree)
Expand Down Expand Up @@ -700,6 +735,17 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
break;
}

case GT_CNS_MSK:
{
#if defined(FEATURE_MASKED_HW_INTRINSICS)
GenTreeMskCon* mskCon = tree->AsMskCon();
genSetRegToConst(mskCon->GetRegNum(), targetType, &mskCon->gtSimdMaskVal);
#else
unreached();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at multiple places, for GT_CNS_MSK we have

#if defined(FEATURE_MASKED_HW_INTRINSICS)
   // logic
#else
   unreached();
#endif

wondering if there is a single place where we can add this check (for example, during the creation of such node) and then assume that we will never have GT_CNS_MSK when FEATURE_MASKED_HW_INTRINSICS is not enabled? I think this goes with my earlier comment if we can just have the definition of GT_CNS_MSK under #ifdef FEATURE_MASKED_HW_INTRINSICS

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that may be better overall. I'll investigate it in a follow up PR as indicated above.

#endif
break;
}

default:
unreached();
}
Expand Down Expand Up @@ -1860,11 +1906,8 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
FALLTHROUGH;

case GT_CNS_DBL:
genSetRegToConst(targetReg, targetType, treeNode);
genProduceReg(treeNode);
break;

case GT_CNS_VEC:
case GT_CNS_MSK:
genSetRegToConst(targetReg, targetType, treeNode);
genProduceReg(treeNode);
break;
Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3055,6 +3055,8 @@ class Compiler

GenTreeVecCon* gtNewVconNode(var_types type, void* data);

GenTreeMskCon* gtNewMskConNode(var_types type);

GenTree* gtNewAllBitsSetConNode(var_types type);

GenTree* gtNewZeroConNode(var_types type);
Expand Down Expand Up @@ -3232,7 +3234,9 @@ class Compiler
CorInfoType simdBaseJitType,
unsigned simdSize);

#if defined(FEATURE_MASKED_HW_INTRINSICS)
GenTree* gtNewSimdCvtMaskToVectorNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize);
#endif // FEATURE_MASKED_HW_INTRINSICS

GenTree* gtNewSimdCvtNode(var_types type,
GenTree* op1,
Expand All @@ -3246,7 +3250,9 @@ class Compiler
CorInfoType simdSourceBaseJitType,
unsigned simdSize);

#if defined(FEATURE_MASKED_HW_INTRINSICS)
GenTree* gtNewSimdCvtVectorToMaskNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize);
#endif // FEATURE_MASKED_HW_INTRINSICS

GenTree* gtNewSimdCreateBroadcastNode(
var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize);
Expand Down Expand Up @@ -11675,6 +11681,7 @@ class GenTreeVisitor
case GT_CNS_DBL:
case GT_CNS_STR:
case GT_CNS_VEC:
case GT_CNS_MSK:
case GT_MEMORYBARRIER:
case GT_JMP:
case GT_JCC:
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4358,6 +4358,7 @@ void GenTree::VisitOperands(TVisitor visitor)
case GT_CNS_DBL:
case GT_CNS_STR:
case GT_CNS_VEC:
case GT_CNS_MSK:
case GT_MEMORYBARRIER:
case GT_JMP:
case GT_JCC:
Expand Down
Loading
Loading