From c8ee27e1df5c70ec1284c3626064fc1f76d922a9 Mon Sep 17 00:00:00 2001 From: Hanbum Park Date: Sun, 21 Apr 2024 14:29:41 +0900 Subject: [PATCH] [AArch64] Replace AND with LSL#2 for LDR target (#34101) Currently, process of replacing bitwise operations consisting of `LSR`/`LSL` with `And` is performed by `DAGCombiner`. However, in certain cases, the `AND` generated by this process can be removed. Consider following case: ``` lsr x8, x8, #56 and x8, x8, #0xfc ldr w0, [x2, x8] ret ``` In this case, we can remove the `AND` by changing the target of `LDR` to `[X2, X8, LSL #2]` and right-shifting amount change to 56 to 58. after changed: ``` lsr x8, x8, #58 ldr w0, [x2, x8, lsl #2] ret ``` This patch checks to see if the `SHIFTING` + `AND` operation on load target can be optimized and optimizes it if it can. --- .../Target/AArch64/AArch64MIPeepholeOpt.cpp | 62 +++++++++++++++++++ llvm/test/CodeGen/AArch64/peephole-load.mir | 10 ++- 2 files changed, 66 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp index 22da7ddef98a2a..2dacc2a504c8e9 100644 --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -128,6 +128,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass { bool visitINSviGPR(MachineInstr &MI, unsigned Opc); bool visitINSvi64lane(MachineInstr &MI); bool visitFMOVDr(MachineInstr &MI); + bool visitLOAD(MachineInstr &MI); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -690,6 +691,64 @@ bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) { return true; } +bool AArch64MIPeepholeOpt::visitLOAD(MachineInstr &MI) { + Register LdOp2Reg = MI.getOperand(2).getReg(); + unsigned RegSize = TRI->getRegSizeInBits(LdOp2Reg, *MRI); + + // Consider: + // (ldr w, [x, (and x, (ubfm x, x, imms, immr), C1)]) + // If bitmask C1 of And is all the bits remaining after + // bitshifting to UBFM minus last 2 bits, try to optimize. + // Optimize to: + // (ldr w, [x (ubfm x, x, imms, immr), lsl #2]) + { + if (!MI.getOperand(4).isImm() || MI.getOperand(4).getImm() != 0) + return false; + + MachineInstr *AndMI = MRI->getUniqueVRegDef(LdOp2Reg); + if (!AndMI || AndMI->getOpcode() != AArch64::ANDXri || + !AndMI->getOperand(2).isImm()) + return false; + + uint64_t AndMask = AArch64_AM::decodeLogicalImmediate( + AndMI->getOperand(2).getImm(), RegSize); + MachineInstr *ShtMI = MRI->getUniqueVRegDef(AndMI->getOperand(1).getReg()); + uint64_t Mask = 0; + if (!ShtMI || ShtMI->getOpcode() != AArch64::UBFMXri) + return false; + uint64_t imms = ShtMI->getOperand(2).getImm(); + uint64_t immr = ShtMI->getOperand(3).getImm(); + uint64_t new_imms = 0; + uint64_t new_immr = 0; + if (imms <= immr) { + if (immr != RegSize - 1) + return false; + Mask = ((uint64_t)1 << (RegSize - imms)) - 4; + new_imms = imms + 2; + new_immr = immr; + } else { + // we only need to handle case lsl #1 + if ((imms - immr != 1) || imms != RegSize - 1) + return false; + Mask = UINT64_MAX - 3; + new_imms = 1; + new_immr = imms; + } + + // check this shifting can be treat as PreIndex Shifting. + if (AndMask == Mask) { + AndMI->eraseFromParent(); + ShtMI->getOperand(2).setImm(new_imms); + ShtMI->getOperand(3).setImm(new_immr); + MI.getOperand(2).setReg(ShtMI->getOperand(0).getReg()); + MI.getOperand(4).setImm(1); + return true; + } + } + + return false; +} + bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -771,6 +830,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { case AArch64::FMOVDr: Changed |= visitFMOVDr(MI); break; + case AArch64::LDRWroX: + Changed |= visitLOAD(MI); + break; } } } diff --git a/llvm/test/CodeGen/AArch64/peephole-load.mir b/llvm/test/CodeGen/AArch64/peephole-load.mir index df1c9a3977e79f..8c9555a48997b9 100644 --- a/llvm/test/CodeGen/AArch64/peephole-load.mir +++ b/llvm/test/CodeGen/AArch64/peephole-load.mir @@ -14,9 +14,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[COPY1]], [[COPY2]], $xzr - ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri killed [[MADDXrrr]], 56, 63 - ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri killed [[UBFMXri]], 8069 - ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], killed [[ANDXri]], 0, 0 + ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri killed [[MADDXrrr]], 58, 63 + ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], killed [[UBFMXri]], 0, 1 ; CHECK-NEXT: $w0 = COPY [[LDRWroX]] ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:gpr64common = COPY $x2 @@ -42,9 +41,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[COPY1]], [[COPY2]], $xzr - ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri killed [[MADDXrrr]], 63, 62 - ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri killed [[UBFMXri]], 8125 - ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], killed [[ANDXri]], 0, 0 + ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri killed [[MADDXrrr]], 1, 63 + ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], killed [[UBFMXri]], 0, 1 ; CHECK-NEXT: $w0 = COPY [[LDRWroX]] ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:gpr64common = COPY $x2