From 2ca45150c7984eea123409e6a7d25b2c7606ef5c Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 28 Jan 2024 17:01:21 +0000 Subject: [PATCH] Revert "[AArch64] merge index address with large offset into base address" This reverts commit 32878c2065c8005b3ea30c79e16dfd7eed55d645 due to #79756 and #76202. (cherry picked from commit 915c3d9e5a2d1314afe64cd6116a3b6c9809ec90) --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 10 - llvm/lib/Target/AArch64/AArch64InstrInfo.h | 3 - .../AArch64/AArch64LoadStoreOptimizer.cpp | 229 ------------------ llvm/test/CodeGen/AArch64/arm64-addrmode.ll | 15 +- .../AArch64/large-offset-ldr-merge.mir | 5 +- 5 files changed, 12 insertions(+), 250 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 2e8d8c63d6bec2..13e9d9725cc2ed 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4098,16 +4098,6 @@ AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) { return MI.getOperand(Idx); } -const MachineOperand & -AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) { - switch (MI.getOpcode()) { - default: - llvm_unreachable("Unexpected opcode"); - case AArch64::LDRBBroX: - return MI.getOperand(4); - } -} - static const TargetRegisterClass *getRegClass(const MachineInstr &MI, Register Reg) { if (MI.getParent() == nullptr) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index db24a19fe5f8e3..6526f6740747ab 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -111,9 +111,6 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { /// Returns the immediate offset operator of a load/store. static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI); - /// Returns the shift amount operator of a load/store. - static const MachineOperand &getLdStAmountOp(const MachineInstr &MI); - /// Returns whether the instruction is FP or NEON. static bool isFpOrNEON(const MachineInstr &MI); diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index e90b8a8ca7acee..926a89466255ca 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -62,8 +62,6 @@ STATISTIC(NumUnscaledPairCreated, "Number of load/store from unscaled generated"); STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); -STATISTIC(NumConstOffsetFolded, - "Number of const offset of index address folded"); DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming", "Controls which pairs are considered for renaming"); @@ -77,11 +75,6 @@ static cl::opt LdStLimit("aarch64-load-store-scan-limit", static cl::opt UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden); -// The LdStConstLimit limits how far we search for const offset instructions -// when we form index address load/store instructions. -static cl::opt LdStConstLimit("aarch64-load-store-const-scan-limit", - cl::init(10), cl::Hidden); - // Enable register renaming to find additional store pairing opportunities. static cl::opt EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden); @@ -178,13 +171,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit); - // Scan the instruction list to find a register assigned with a const - // value that can be combined with the current instruction (a load or store) - // using base addressing with writeback. Scan forwards. - MachineBasicBlock::iterator - findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit, - unsigned &Offset); - // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using // pre or post indexed addressing with writeback. Scan backwards. @@ -196,19 +182,11 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, unsigned BaseReg, int Offset); - bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI, - unsigned IndexReg, unsigned &Offset); - // Merge a pre- or post-index base register update into a ld/st instruction. MachineBasicBlock::iterator mergeUpdateInsn(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update, bool IsPreIdx); - MachineBasicBlock::iterator - mergeConstOffsetInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update, unsigned Offset, - int Scale); - // Find and merge zero store instructions. bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI); @@ -221,9 +199,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Find and merge a base register updates before or after a ld/st instruction. bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI); - // Find and merge a index ldr/st instructions into a base ld/st instruction. - bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale); - bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt); bool runOnMachineFunction(MachineFunction &Fn) override; @@ -506,16 +481,6 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { } } -static unsigned getBaseAddressOpcode(unsigned Opc) { - // TODO: Add more index address loads/stores. - switch (Opc) { - default: - llvm_unreachable("Opcode has no base address equivalent!"); - case AArch64::LDRBBroX: - return AArch64::LDRBBui; - } -} - static unsigned getPostIndexedOpcode(unsigned Opc) { switch (Opc) { default: @@ -757,20 +722,6 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) { } } -// Make sure this is a reg+reg Ld/St -static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) { - unsigned Opc = MI.getOpcode(); - switch (Opc) { - default: - return false; - // Scaled instructions. - // TODO: Add more index address loads/stores. - case AArch64::LDRBBroX: - Scale = 1; - return true; - } -} - static bool isRewritableImplicitDef(unsigned Opc) { switch (Opc) { default: @@ -2097,63 +2048,6 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, return NextI; } -MachineBasicBlock::iterator -AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update, - unsigned Offset, int Scale) { - assert((Update->getOpcode() == AArch64::MOVKWi) && - "Unexpected const mov instruction to merge!"); - MachineBasicBlock::iterator E = I->getParent()->end(); - MachineBasicBlock::iterator NextI = next_nodbg(I, E); - MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E); - MachineInstr &MemMI = *I; - unsigned Mask = (1 << 12) * Scale - 1; - unsigned Low = Offset & Mask; - unsigned High = Offset - Low; - Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg(); - Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg(); - MachineInstrBuilder AddMIB, MemMIB; - - // Add IndexReg, BaseReg, High (the BaseReg may be SP) - AddMIB = - BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri)) - .addDef(IndexReg) - .addUse(BaseReg) - .addImm(High >> 12) // shifted value - .addImm(12); // shift 12 - (void)AddMIB; - // Ld/St DestReg, IndexReg, Imm12 - unsigned NewOpc = getBaseAddressOpcode(I->getOpcode()); - MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) - .add(getLdStRegOp(MemMI)) - .add(AArch64InstrInfo::getLdStOffsetOp(MemMI)) - .addImm(Low / Scale) - .setMemRefs(I->memoperands()) - .setMIFlags(I->mergeFlagsWith(*Update)); - (void)MemMIB; - - ++NumConstOffsetFolded; - LLVM_DEBUG(dbgs() << "Creating base address load/store.\n"); - LLVM_DEBUG(dbgs() << " Replacing instructions:\n "); - LLVM_DEBUG(PrevI->print(dbgs())); - LLVM_DEBUG(dbgs() << " "); - LLVM_DEBUG(Update->print(dbgs())); - LLVM_DEBUG(dbgs() << " "); - LLVM_DEBUG(I->print(dbgs())); - LLVM_DEBUG(dbgs() << " with instruction:\n "); - LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs())); - LLVM_DEBUG(dbgs() << " "); - LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs())); - LLVM_DEBUG(dbgs() << "\n"); - - // Erase the old instructions for the block. - I->eraseFromParent(); - PrevI->eraseFromParent(); - Update->eraseFromParent(); - - return NextI; -} - bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, unsigned BaseReg, int Offset) { @@ -2201,31 +2095,6 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, return false; } -bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI, - MachineInstr &MI, - unsigned IndexReg, - unsigned &Offset) { - // The update instruction source and destination register must be the - // same as the load/store index register. - if (MI.getOpcode() == AArch64::MOVKWi && - TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) { - - // movz + movk hold a large offset of a Ld/St instruction. - MachineBasicBlock::iterator B = MI.getParent()->begin(); - MachineBasicBlock::iterator MBBI = &MI; - MBBI = prev_nodbg(MBBI, B); - MachineInstr &MovzMI = *MBBI; - if (MovzMI.getOpcode() == AArch64::MOVZWi) { - unsigned Low = MovzMI.getOperand(1).getImm(); - unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm(); - Offset = High + Low; - // 12-bit optionally shifted immediates are legal for adds. - return Offset >> 24 == 0; - } - } - return false; -} - MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); @@ -2381,60 +2250,6 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( return E; } -MachineBasicBlock::iterator -AArch64LoadStoreOpt::findMatchingConstOffsetBackward( - MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) { - MachineBasicBlock::iterator B = I->getParent()->begin(); - MachineBasicBlock::iterator E = I->getParent()->end(); - MachineInstr &MemMI = *I; - MachineBasicBlock::iterator MBBI = I; - - // If the load is the first instruction in the block, there's obviously - // not any matching load or store. - if (MBBI == B) - return E; - - // Make sure the IndexReg is killed and the shift amount is zero. - // TODO: Relex this restriction to extend, simplify processing now. - if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() || - !AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() || - (AArch64InstrInfo::getLdStAmountOp(MemMI).getImm() != 0)) - return E; - - Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg(); - - // Track which register units have been modified and used between the first - // insn (inclusive) and the second insn. - ModifiedRegUnits.clear(); - UsedRegUnits.clear(); - unsigned Count = 0; - do { - MBBI = prev_nodbg(MBBI, B); - MachineInstr &MI = *MBBI; - - // Don't count transient instructions towards the search limit since there - // may be different numbers of them if e.g. debug information is present. - if (!MI.isTransient()) - ++Count; - - // If we found a match, return it. - if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) { - return MBBI; - } - - // Update the status of what the instruction clobbered and used. - LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); - - // Otherwise, if the index register is used or modified, we have no match, - // so return early. - if (!ModifiedRegUnits.available(IndexReg) || - !UsedRegUnits.available(IndexReg)) - return E; - - } while (MBBI != B && Count < Limit); - return E; -} - bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; @@ -2619,34 +2434,6 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate return false; } -bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, - int Scale) { - MachineInstr &MI = *MBBI; - MachineBasicBlock::iterator E = MI.getParent()->end(); - MachineBasicBlock::iterator Update; - - // Don't know how to handle unscaled pre/post-index versions below, so bail. - if (TII->hasUnscaledLdStOffset(MI.getOpcode())) - return false; - - // Look back to try to find a const offset for index LdSt instruction. For - // example, - // mov x8, #LargeImm ; = a * (1<<12) + imm12 - // ldr x1, [x0, x8] - // merged into: - // add x8, x0, a * (1<<12) - // ldr x1, [x8, imm12] - unsigned Offset; - Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset); - if (Update != E && (Offset & (Scale - 1)) == 0) { - // Merge the imm12 into the ld/st. - MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale); - return true; - } - - return false; -} - bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt) { @@ -2725,22 +2512,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, ++MBBI; } - // 5) Find a register assigned with a const value that can be combined with - // into the load or store. e.g., - // mov x8, #LargeImm ; = a * (1<<12) + imm12 - // ldr x1, [x0, x8] - // ; becomes - // add x8, x0, a * (1<<12) - // ldr x1, [x8, imm12] - for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - MBBI != E;) { - int Scale; - if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale)) - Modified = true; - else - ++MBBI; - } - return Modified; } diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll index 2181eaaee7db68..d39029163a47aa 100644 --- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll +++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll @@ -214,8 +214,9 @@ define void @t17(i64 %a) { define i8 @LdOffset_i8(ptr %a) { ; CHECK-LABEL: LdOffset_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 -; CHECK-NEXT: ldrb w0, [x8, #3704] +; CHECK-NEXT: mov w8, #56952 // =0xde78 +; CHECK-NEXT: movk w8, #15, lsl #16 +; CHECK-NEXT: ldrb w0, [x0, x8] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 %val = load i8, ptr %arrayidx, align 1 @@ -226,8 +227,9 @@ define i8 @LdOffset_i8(ptr %a) { define i32 @LdOffset_i8_zext32(ptr %a) { ; CHECK-LABEL: LdOffset_i8_zext32: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 -; CHECK-NEXT: ldrb w0, [x8, #3704] +; CHECK-NEXT: mov w8, #56952 // =0xde78 +; CHECK-NEXT: movk w8, #15, lsl #16 +; CHECK-NEXT: ldrb w0, [x0, x8] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 %val = load i8, ptr %arrayidx, align 1 @@ -253,8 +255,9 @@ define i32 @LdOffset_i8_sext32(ptr %a) { define i64 @LdOffset_i8_zext64(ptr %a) { ; CHECK-LABEL: LdOffset_i8_zext64: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 -; CHECK-NEXT: ldrb w0, [x8, #3704] +; CHECK-NEXT: mov w8, #56952 // =0xde78 +; CHECK-NEXT: movk w8, #15, lsl #16 +; CHECK-NEXT: ldrb w0, [x0, x8] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 %val = load i8, ptr %arrayidx, align 1 diff --git a/llvm/test/CodeGen/AArch64/large-offset-ldr-merge.mir b/llvm/test/CodeGen/AArch64/large-offset-ldr-merge.mir index 15b6700398ea08..488f1ffdb52f3b 100755 --- a/llvm/test/CodeGen/AArch64/large-offset-ldr-merge.mir +++ b/llvm/test/CodeGen/AArch64/large-offset-ldr-merge.mir @@ -14,8 +14,9 @@ body: | ; CHECK-LABEL: name: LdOffset ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $x8 = ADDXri $x0, 253, 12 - ; CHECK-NEXT: renamable $w0 = LDRBBui killed renamable $x8, 3704 + ; CHECK-NEXT: renamable $w8 = MOVZWi 56952, 0 + ; CHECK-NEXT: renamable $w8 = MOVKWi $w8, 15, 16, implicit-def $x8 + ; CHECK-NEXT: renamable $w0 = LDRBBroX killed renamable $x0, killed renamable $x8, 0, 0 ; CHECK-NEXT: RET undef $lr, implicit $w0 renamable $w8 = MOVZWi 56952, 0 renamable $w8 = MOVKWi $w8, 15, 16, implicit-def $x8