Skip to content

Commit

Permalink
[RISCV] Add 16 bit GPR sub-register for Zhinx. (#107446)
Browse files Browse the repository at this point in the history
This patches adds a 16 bit register class for use with Zhinx
instructions. This makes them more similar to Zfh instructions and
allows us to only spill 16 bits.

I've added CodeGenOnly instructions for load/store using GPRF16 as that
gave better results than insert_subreg/extract_subreg. I'm using FSGNJ
for GPRF16 copy with Zhinx as that gave better results. Zhinxmin will
use ADDI+subreg operations.

Function arguments use this new GPRF16 register class for f16 arguments
with Zhinxmin. Eliminating the need to use RISCVISD::FMV* nodes.

I plan to extend this idea to Zfinx next.
  • Loading branch information
topperc authored Sep 27, 2024
1 parent af3837c commit 8a7843c
Show file tree
Hide file tree
Showing 23 changed files with 964 additions and 282 deletions.
10 changes: 10 additions & 0 deletions llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,13 @@ struct RISCVOperand final : public MCParsedAsmOperand {
RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.RegNum);
}

bool isGPRF16() const {
return Kind == KindTy::Register &&
RISCVMCRegisterClasses[RISCV::GPRF16RegClassID].contains(Reg.RegNum);
}

bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; }
bool isGPRAsFPR16() const { return isGPRF16() && Reg.IsGPRAsFPR; }
bool isGPRPairAsFPR() const { return isGPRPair() && Reg.IsGPRAsFPR; }

bool isGPRPair() const {
Expand Down Expand Up @@ -1342,6 +1348,10 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
Op.Reg.RegNum = convertFPR64ToFPR16(Reg);
return Match_Success;
}
if (Kind == MCK_GPRAsFPR16 && Op.isGPRAsFPR()) {
Op.Reg.RegNum = Reg - RISCV::X0 + RISCV::X0_H;
return Match_Success;
}

// There are some GPRF64AsFPR instructions that have no RV32 equivalent. We
// reject them at parsing thinking we should match as GPRPairAsFPR for RV32.
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,19 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint32_t RegNo,
return MCDisassembler::Success;
}

static DecodeStatus DecodeGPRF16RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
bool IsRVE = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureStdExtE);

if (RegNo >= 32 || (IsRVE && RegNo >= 16))
return MCDisassembler::Fail;

MCRegister Reg = RISCV::X0_H + RegNo;
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}

static DecodeStatus DecodeGPRX1X5RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
Expand Down
55 changes: 53 additions & 2 deletions llvm/lib/Target/RISCV/RISCVCallingConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,23 @@ ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) {
return ArrayRef(ArgIGPRs);
}

static ArrayRef<MCPhysReg> getArgGPR16s(const RISCVABI::ABI ABI) {
// The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
// the ILP32E ABI.
static const MCPhysReg ArgIGPRs[] = {RISCV::X10_H, RISCV::X11_H, RISCV::X12_H,
RISCV::X13_H, RISCV::X14_H, RISCV::X15_H,
RISCV::X16_H, RISCV::X17_H};
// The GPRs used for passing arguments in the ILP32E/LP64E ABI.
static const MCPhysReg ArgEGPRs[] = {RISCV::X10_H, RISCV::X11_H,
RISCV::X12_H, RISCV::X13_H,
RISCV::X14_H, RISCV::X15_H};

if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
return ArrayRef(ArgEGPRs);

return ArrayRef(ArgIGPRs);
}

static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
// The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
// for save-restore libcall, so we don't use them.
Expand All @@ -157,6 +174,26 @@ static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
return ArrayRef(FastCCIGPRs);
}

static ArrayRef<MCPhysReg> getFastCCArgGPRF16s(const RISCVABI::ABI ABI) {
// The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
// for save-restore libcall, so we don't use them.
// Don't use X7 for fastcc, since Zicfilp uses X7 as the label register.
static const MCPhysReg FastCCIGPRs[] = {
RISCV::X10_H, RISCV::X11_H, RISCV::X12_H, RISCV::X13_H,
RISCV::X14_H, RISCV::X15_H, RISCV::X16_H, RISCV::X17_H,
RISCV::X28_H, RISCV::X29_H, RISCV::X30_H, RISCV::X31_H};

// The GPRs used for passing arguments in the FastCC when using ILP32E/LP64E.
static const MCPhysReg FastCCEGPRs[] = {RISCV::X10_H, RISCV::X11_H,
RISCV::X12_H, RISCV::X13_H,
RISCV::X14_H, RISCV::X15_H};

if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
return ArrayRef(FastCCEGPRs);

return ArrayRef(FastCCIGPRs);
}

// Pass a 2*XLEN argument that has been split into two XLEN values through
// registers or the stack as necessary.
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
Expand Down Expand Up @@ -320,6 +357,13 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}

if ((ValVT == MVT::f16 && Subtarget.hasStdExtZhinxmin())) {
if (MCRegister Reg = State.AllocateReg(getArgGPR16s(ABI))) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
}

ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI);

// Zfinx/Zdinx use GPR without a bitcast when possible.
Expand Down Expand Up @@ -564,9 +608,16 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,

MVT XLenVT = Subtarget.getXLenVT();

// Check if there is an available GPRF16 before hitting the stack.
if ((LocVT == MVT::f16 && Subtarget.hasStdExtZhinxmin())) {
if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRF16s(ABI))) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
}

// Check if there is an available GPR before hitting the stack.
if ((LocVT == MVT::f16 && Subtarget.hasStdExtZhinxmin()) ||
(LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
(LocVT == MVT::f64 && Subtarget.is64Bit() &&
Subtarget.hasStdExtZdinx())) {
if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,19 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
continue;
LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n ";
MI.print(dbgs()));
Register X0Reg;
const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
if (!(RC && RC->contains(RISCV::X0))) {
if (RC && RC->contains(RISCV::X0)) {
X0Reg = RISCV::X0;
} else if (RC && RC->contains(RISCV::X0_H)) {
X0Reg = RISCV::X0_H;
} else {
LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
continue;
}
assert(LIS.hasInterval(Reg));
LIS.removeInterval(Reg);
MO.setReg(RISCV::X0);
MO.setReg(X0Reg);
LLVM_DEBUG(dbgs() << " Replacing with zero register. New:\n ";
MI.print(dbgs()));
++NumDeadDefsReplaced;
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ class RISCVExpandPseudo : public MachineFunctionPass {
MachineBasicBlock::iterator &NextMBBI);
bool expandVMSET_VMCLR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, unsigned Opcode);
bool expandMV_FPR16INX(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
bool expandRV32ZdinxStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
bool expandRV32ZdinxLoad(MachineBasicBlock &MBB,
Expand Down Expand Up @@ -104,6 +106,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
// expanded instructions for each pseudo is correct in the Size field of the
// tablegen definition for the pseudo.
switch (MBBI->getOpcode()) {
case RISCV::PseudoMV_FPR16INX:
return expandMV_FPR16INX(MBB, MBBI);
case RISCV::PseudoRV32ZdinxSD:
return expandRV32ZdinxStore(MBB, MBBI);
case RISCV::PseudoRV32ZdinxLD:
Expand Down Expand Up @@ -266,6 +270,23 @@ bool RISCVExpandPseudo::expandVMSET_VMCLR(MachineBasicBlock &MBB,
return true;
}

bool RISCVExpandPseudo::expandMV_FPR16INX(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
DebugLoc DL = MBBI->getDebugLoc();
const TargetRegisterInfo *TRI = STI->getRegisterInfo();
Register DstReg = TRI->getMatchingSuperReg(
MBBI->getOperand(0).getReg(), RISCV::sub_16, &RISCV::GPRRegClass);
Register SrcReg = TRI->getMatchingSuperReg(
MBBI->getOperand(1).getReg(), RISCV::sub_16, &RISCV::GPRRegClass);

BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), DstReg)
.addReg(SrcReg, getKillRegState(MBBI->getOperand(1).isKill()))
.addImm(0);

MBBI->eraseFromParent(); // The pseudo instruction is gone now.
return true;
}

// This function expands the PseudoRV32ZdinxSD for storing a double-precision
// floating-point value into memory by generating an equivalent instruction
// sequence for RV32.
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}

SDNode *Res;
if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
Res =
CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
} else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
Res = CurDAG->getMachineNode(
Opc, DL, VT, Imm,
CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
Expand Down
22 changes: 22 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
MemBytes = 1;
break;
case RISCV::LH:
case RISCV::LH_INX:
case RISCV::LHU:
case RISCV::FLH:
MemBytes = 2;
Expand Down Expand Up @@ -144,6 +145,7 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
MemBytes = 1;
break;
case RISCV::SH:
case RISCV::SH_INX:
case RISCV::FSH:
MemBytes = 2;
break;
Expand Down Expand Up @@ -462,6 +464,13 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}

if (RISCV::GPRF16RegClass.contains(DstReg, SrcReg)) {
BuildMI(MBB, MBBI, DL, get(RISCV::PseudoMV_FPR16INX), DstReg)
.addReg(SrcReg,
getKillRegState(KillSrc) | getRenamableRegState(RenamableSrc));
return;
}

if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
// Emit an ADDI for both parts of GPRPair.
BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
Expand Down Expand Up @@ -583,6 +592,9 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::SW : RISCV::SD;
IsScalableVector = false;
} else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::SH_INX;
IsScalableVector = false;
} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoRV32ZdinxSD;
IsScalableVector = false;
Expand Down Expand Up @@ -666,6 +678,9 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::LW : RISCV::LD;
IsScalableVector = false;
} else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::LH_INX;
IsScalableVector = false;
} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoRV32ZdinxLD;
IsScalableVector = false;
Expand Down Expand Up @@ -1538,6 +1553,9 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
}

switch (Opcode) {
case RISCV::PseudoMV_FPR16INX:
// MV is always compressible to either c.mv or c.li rd, 0.
return STI.hasStdExtCOrZca() ? 2 : 4;
case TargetOpcode::STACKMAP:
// The upper bound for a stackmap intrinsic is the full length of its shadow
return StackMapOpers(&MI).getNumPatchBytes();
Expand Down Expand Up @@ -2593,6 +2611,7 @@ bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
case RISCV::LB:
case RISCV::LBU:
case RISCV::LH:
case RISCV::LH_INX:
case RISCV::LHU:
case RISCV::LW:
case RISCV::LWU:
Expand All @@ -2602,6 +2621,7 @@ bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
case RISCV::FLD:
case RISCV::SB:
case RISCV::SH:
case RISCV::SH_INX:
case RISCV::SW:
case RISCV::SD:
case RISCV::FSH:
Expand Down Expand Up @@ -2665,9 +2685,11 @@ bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
case RISCV::LBU:
case RISCV::SB:
case RISCV::LH:
case RISCV::LH_INX:
case RISCV::LHU:
case RISCV::FLH:
case RISCV::SH:
case RISCV::SH_INX:
case RISCV::FSH:
case RISCV::LW:
case RISCV::LWU:
Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -520,8 +520,8 @@ class BranchCC_rri<bits<3> funct3, string opcodestr>
}

let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
class Load_ri<bits<3> funct3, string opcodestr>
: RVInstI<funct3, OPC_LOAD, (outs GPR:$rd), (ins GPRMem:$rs1, simm12:$imm12),
class Load_ri<bits<3> funct3, string opcodestr, DAGOperand rty = GPR>
: RVInstI<funct3, OPC_LOAD, (outs rty:$rd), (ins GPRMem:$rs1, simm12:$imm12),
opcodestr, "$rd, ${imm12}(${rs1})">;

class HLoad_r<bits<7> funct7, bits<5> funct5, string opcodestr>
Expand All @@ -535,9 +535,9 @@ class HLoad_r<bits<7> funct7, bits<5> funct5, string opcodestr>
// reflecting the order these fields are specified in the instruction
// encoding.
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
class Store_rri<bits<3> funct3, string opcodestr>
class Store_rri<bits<3> funct3, string opcodestr, DAGOperand rty = GPR>
: RVInstS<funct3, OPC_STORE, (outs),
(ins GPR:$rs2, GPRMem:$rs1, simm12:$imm12),
(ins rty:$rs2, GPRMem:$rs1, simm12:$imm12),
opcodestr, "$rs2, ${imm12}(${rs1})">;

class HStore_rr<bits<7> funct7, string opcodestr>
Expand All @@ -549,8 +549,8 @@ class HStore_rr<bits<7> funct7, string opcodestr>
}

let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class ALU_ri<bits<3> funct3, string opcodestr>
: RVInstI<funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1, simm12:$imm12),
class ALU_ri<bits<3> funct3, string opcodestr, DAGOperand rty = GPR>
: RVInstI<funct3, OPC_OP_IMM, (outs rty:$rd), (ins rty:$rs1, simm12:$imm12),
opcodestr, "$rd, $rs1, $imm12">,
Sched<[WriteIALU, ReadIALU]>;

Expand Down
25 changes: 21 additions & 4 deletions llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,9 @@ class CLoadB_ri<bits<6> funct6, string OpcodeStr>
}

let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
class CLoadH_ri<bits<6> funct6, bit funct1, string OpcodeStr>
: RVInst16CLH<funct6, funct1, 0b00, (outs GPRC:$rd),
class CLoadH_ri<bits<6> funct6, bit funct1, string OpcodeStr,
DAGOperand rty = GPRC>
: RVInst16CLH<funct6, funct1, 0b00, (outs rty:$rd),
(ins GPRCMem:$rs1, uimm2_lsb0:$imm),
OpcodeStr, "$rd, ${imm}(${rs1})"> {
bits<2> imm;
Expand All @@ -132,9 +133,10 @@ class CStoreB_rri<bits<6> funct6, string OpcodeStr>
}

let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
class CStoreH_rri<bits<6> funct6, bit funct1, string OpcodeStr>
class CStoreH_rri<bits<6> funct6, bit funct1, string OpcodeStr,
DAGOperand rty = GPRC>
: RVInst16CSH<funct6, funct1, 0b00, (outs),
(ins GPRC:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm),
(ins rty:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm),
OpcodeStr, "$rs2, ${imm}(${rs1})"> {
bits<2> imm;

Expand Down Expand Up @@ -202,7 +204,15 @@ def C_SB : CStoreB_rri<0b100010, "c.sb">,
Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
def C_SH : CStoreH_rri<0b100011, 0b0, "c.sh">,
Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;

// Compressed versions of Zhinx load/store.
let isCodeGenOnly = 1 in {
def C_LH_INX : CLoadH_ri<0b100001, 0b1, "c.lh", GPRF16C>,
Sched<[WriteLDH, ReadMemBase]>;
def C_SH_INX : CStoreH_rri<0b100011, 0b0, "c.sh", GPRF16C>,
Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
}
} // Predicates = [HasStdExtZcb]

// Zcmp
let DecoderNamespace = "RVZcmp", Predicates = [HasStdExtZcmp],
Expand Down Expand Up @@ -318,6 +328,13 @@ def : CompressPat<(SB GPRC:$rs2, GPRCMem:$rs1, uimm2:$imm),
(C_SB GPRC:$rs2, GPRCMem:$rs1, uimm2:$imm)>;
def : CompressPat<(SH GPRC:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm),
(C_SH GPRC:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm)>;

let isCompressOnly = true in {
def : CompressPat<(LH_INX GPRF16C:$rd, GPRCMem:$rs1, uimm2_lsb0:$imm),
(C_LH_INX GPRF16C:$rd, GPRCMem:$rs1, uimm2_lsb0:$imm)>;
def : CompressPat<(SH_INX GPRF16C:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm),
(C_SH_INX GPRF16C:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm)>;
}
}// Predicates = [HasStdExtZcb]


Expand Down
Loading

0 comments on commit 8a7843c

Please sign in to comment.