Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[arm64] JIT: Enable CSE/hoisting for "arrayBase + elementOffset" #61293

Merged
merged 22 commits into from
Nov 10, 2021
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1574,6 +1574,15 @@ void CodeGen::genConsumeRegs(GenTree* tree)
{
genConsumeAddress(tree);
}
#ifdef TARGET_ARM64
else if (tree->OperIs(GT_BFIZ))
{
// Can be contained as part of LEA on ARM64
GenTreeCast* cast = tree->gtGetOp1()->AsCast();
assert(cast->isContained());
genConsumeAddress(cast->CastOp());
}
#endif
else if (tree->OperIsLocalRead())
{
// A contained lcl var must be living on stack and marked as reg optional, or not be a
Expand Down
20 changes: 18 additions & 2 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13481,8 +13481,24 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
}
else // no scale
{
// Then load/store dataReg from/to [memBase + index]
emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum());
if (index->OperIs(GT_BFIZ) && index->isContained())
{
// Then load/store dataReg from/to [memBase + index*scale with sign/zero extension]
GenTreeCast* cast = index->gtGetOp1()->AsCast();

// For now, this code only supports extensions from i32/u32
assert(cast->isContained() && varTypeIsInt(cast->CastFromType()));

const bool isZeroExtended = cast->IsUnsigned() || varTypeIsUnsigned(cast->CastToType());
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
emitIns_R_R_R_Ext(ins, attr, dataReg, memBase->GetRegNum(), cast->CastOp()->GetRegNum(),
isZeroExtended ? INS_OPTS_UXTW : INS_OPTS_SXTW,
(int)index->gtGetOp2()->AsIntCon()->IconValue());
}
else
{
// Then load/store dataReg from/to [memBase + index]
emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum());
}
}
}
}
Expand Down
22 changes: 22 additions & 0 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5033,6 +5033,28 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, var_types ta
}
}

#ifdef TARGET_ARM64
// Check if we can "contain" LEA(BFIZ) in order to extend 32bit index to 64bit as part of load/store.
if (index != nullptr && index->OperIs(GT_BFIZ) && index->gtGetOp1()->OperIs(GT_CAST) &&
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
index->gtGetOp2()->IsCnsIntOrI() && varTypeIsIntegral(targetType))
{
// BFIZ node is a binary op where op1 is GT_CAST and op2 is GT_CNS_INT
GenTreeCast* cast = index->gtGetOp1()->AsCast();
assert(cast->isContained());

const unsigned shiftBy = (unsigned)index->gtGetOp2()->AsIntCon()->IconValue();

// 'scale' and 'offset' have to be unset since we're going to use [base + index * SXTW/UXTW scale] form
// where there is no room for additional offsets/scales on ARM64. 'shiftBy' has to match target's width.
if (varTypeIsInt(cast->CastFromType()) && varTypeIsLong(cast->CastToType()) &&
(genTypeSize(targetType) == (1U << shiftBy)) && (scale == 1) && (offset == 0))
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
{
// TODO: Make sure that genCreateAddrMode marks such BFIZ candidates as GTF_DONT_CSE for better CQ.
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
MakeSrcContained(addrMode, index);
}
}
#endif

JITDUMP("New addressing mode node:\n ");
DISPNODE(addrMode);
JITDUMP("\n");
Expand Down
11 changes: 10 additions & 1 deletion src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,16 @@ int LinearScan::BuildNode(GenTree* tree)
if (index != nullptr)
{
srcCount++;
BuildUse(index);
if (index->OperIs(GT_BFIZ) && index->isContained())
{
GenTreeCast* cast = index->gtGetOp1()->AsCast();
assert(cast->isContained() && (cns == 0));
BuildUse(cast->CastOp());
}
else
{
BuildUse(index);
}
}
assert(dstCount == 1);

Expand Down
18 changes: 15 additions & 3 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3021,10 +3021,22 @@ int LinearScan::BuildAddrUses(GenTree* addr, regMaskTP candidates)
BuildUse(addrMode->Base(), candidates);
srcCount++;
}
if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained())
if (addrMode->Index() != nullptr)
{
BuildUse(addrMode->Index(), candidates);
srcCount++;
if (!addrMode->Index()->isContained())
{
BuildUse(addrMode->Index(), candidates);
srcCount++;
}
#ifdef TARGET_ARM64
else if (addrMode->Index()->OperIs(GT_BFIZ))
{
GenTreeCast* cast = addrMode->Index()->gtGetOp1()->AsCast();
assert(cast->isContained());
BuildUse(cast->CastOp(), candidates);
srcCount++;
}
#endif
}
return srcCount;
}
Expand Down
75 changes: 49 additions & 26 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4909,7 +4909,7 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call,
GenTree* arg = fgMakeTmpArgNode(argEntry);

// Change the expression to "(tmp=val),tmp"
arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);

#endif // FEATURE_FIXED_OUT_ARGS

Expand Down Expand Up @@ -5465,11 +5465,21 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)

GenTree* cns = gtNewIconNode(elemOffs, TYP_I_IMPL);

// We're going to create a platform-specific address:
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
//
// XArch: "arrRef + (index + elemOffset)"
// ArmArch: "(arrRef + elemOffset) + index" // both ADD are BYREF here
//
// It'll allow us to hoist/CSE the invariant part which is "arrRef + elemOffset" on ArmArch.
// We don't really need it on XArch because of more powerful addressing modes such as [base + index*scale + offset]
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
//
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
#ifdef TARGET_ARMARCH
GenTree* basePlusOffset = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, cns);
addr = gtNewOperNode(GT_ADD, TYP_BYREF, basePlusOffset, addr);
#else
addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, cns);

/* Add the object ref to the element's offset */

addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
#endif

assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) != 0) ||
(GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL));
Expand Down Expand Up @@ -5539,16 +5549,16 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
}

JITDUMP("fgMorphArrayIndex (before remorph):\n");
DISPTREE(tree);
JITDUMP("fgMorphArrayIndex (before remorph):\n")
DISPTREE(tree)

// Currently we morph the tree to perform some folding operations prior
// to attaching fieldSeq info and labeling constant array index contributions
//
tree = fgMorphTree(tree);

JITDUMP("fgMorphArrayIndex (after remorph):\n");
DISPTREE(tree);
JITDUMP("fgMorphArrayIndex (after remorph):\n")
DISPTREE(tree)

// Ideally we just want to proceed to attaching fieldSeq info and labeling the
// constant array index contributions, but the morphing operation may have changed
Expand All @@ -5562,48 +5572,61 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)

if (fgIsCommaThrow(tree))
{
if ((arrElem != indTree) || // A new tree node may have been created
(indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
if ((arrElem != indTree) || // A new tree node may have been created
(indTree->OperIs(GT_IND))) // The GT_IND may have been changed to a GT_CNS_INT
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
{
return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
}
}

assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
DBEXEC(fgGlobalMorph && (arrElem == tree), tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED);

addr = arrElem->AsOp()->gtOp1;
DBEXEC(fgGlobalMorph && (arrElem == tree), tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED)

assert(addr->TypeGet() == TYP_BYREF);
addr = arrElem->gtGetOp1();

GenTree* cnsOff = nullptr;
if (addr->OperGet() == GT_ADD)
if (addr->OperIs(GT_ADD))
{
assert(addr->TypeGet() == TYP_BYREF);
assert(addr->AsOp()->gtOp1->TypeGet() == TYP_REF);

addr = addr->AsOp()->gtOp2;
#ifdef TARGET_ARMARCH
// On arm/arm64 addr tree is a bit different, see above ^
if (addr->gtGetOp1()->OperIs(GT_ADD) && addr->gtGetOp1()->gtGetOp2()->IsCnsIntOrI())
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
{
assert(addr->gtGetOp1()->gtGetOp1()->TypeIs(TYP_REF));
cnsOff = addr->gtGetOp1()->gtGetOp2();
addr = addr->gtGetOp2();
// Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
addr->LabelIndex(this);
}
else
{
assert(addr->gtGetOp2()->IsCnsIntOrI());
cnsOff = addr->gtGetOp2();
addr = nullptr;
}
#else
assert(addr->TypeIs(TYP_BYREF));
assert(addr->gtGetOp1()->TypeIs(TYP_REF));
addr = addr->gtGetOp2();

// Look for the constant [#FirstElem] node here, or as the RHS of an ADD.

if (addr->gtOper == GT_CNS_INT)
if (addr->IsCnsIntOrI())
{
cnsOff = addr;
addr = nullptr;
}
else
{
if ((addr->OperGet() == GT_ADD) && (addr->AsOp()->gtOp2->gtOper == GT_CNS_INT))
if ((addr->OperIs(GT_ADD)) && addr->gtGetOp2()->IsCnsIntOrI())
{
cnsOff = addr->AsOp()->gtOp2;
addr = addr->AsOp()->gtOp1;
cnsOff = addr->gtGetOp2();
addr = addr->gtGetOp1();
}

// Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
addr->LabelIndex(this);
}
#endif
}
else if (addr->OperGet() == GT_CNS_INT)
else if (addr->IsCnsIntOrI())
{
cnsOff = addr;
}
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/vartype.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,12 @@ inline bool varTypeIsLong(T vt)
return (TypeGet(vt) >= TYP_LONG) && (TypeGet(vt) <= TYP_ULONG);
}

template <class T>
inline bool varTypeIsInt(T vt)
{
return (TypeGet(vt) >= TYP_INT) && (TypeGet(vt) <= TYP_UINT);
}

template <class T>
inline bool varTypeIsMultiReg(T vt)
{
Expand Down