From a9fa7ec1dac44360f98458673299d1982a9cb7f2 Mon Sep 17 00:00:00 2001 From: hanbeom Date: Tue, 23 Apr 2024 02:41:51 +0900 Subject: [PATCH] [AArch64] prevent (shl (srl x, c1), c2) -> (and (shift x, c3)) when load Currently, process of replacing bitwise operations consisting of `(shl (srl x, c1), c2)` with `And` is performed by `DAGCombiner`. However, in certain case like `(shl (srl, x, c1) 2)` is do not need to transform to `AND` if it was used to `Load` Target. Consider following case: ``` lsr x8, x8, #56 and x8, x8, #0xfc ldr w0, [x2, x8] ret ``` In this case, we can remove the `AND` by changing the target of `LDR` to `[X2, X8, LSL #2]` and right-shifting amount change to 56 to 58. after changed: ``` lsr x8, x8, #58 ldr w0, [x2, x8, lsl #2] ret ``` This patch checks to see if the `(shl (srl x, c1) 2)` operation on `load` target can be prevent transform to `And`. --- .../Target/AArch64/AArch64ISelLowering.cpp | 17 ++++++++ llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll | 43 ++++++++----------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f552f91929201c..5c486d598b81f7 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16918,6 +16918,23 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask( return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue()); } + // We do not need to fold when this shifting used in specific load case: + // (ldr x, (add x, (shl (srl x, c1) 2))) + if (N->getOpcode() == ISD::SHL && N->hasOneUse()) { + if (auto C2 = dyn_cast(N->getOperand(1))) { + unsigned ShlAmt = C2->getZExtValue(); + auto ShouldADD = *N->use_begin(); + if (ShlAmt <= 3 && ShouldADD && ShouldADD->getOpcode() == ISD::ADD) { + if (auto ShouldLOAD = dyn_cast(*ShouldADD->use_begin())) { + unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8; + if ((1 << ShlAmt) == ByteVT && + isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT())) + return false; + } + } + } + } + return true; } diff --git a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll index 55871c4682b993..9dfc8df703ce64 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll @@ -6,9 +6,8 @@ define i16 @load16_shr63(i64 %a, i64 %b, ptr %table) { ; CHECK-LABEL: load16_shr63: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul x8, x1, x0 -; CHECK-NEXT: lsr x8, x8, #62 -; CHECK-NEXT: and x8, x8, #0x2 -; CHECK-NEXT: ldrh w0, [x2, x8] +; CHECK-NEXT: lsr x8, x8, #63 +; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1] ; CHECK-NEXT: ret entry: %mul = mul i64 %b, %a @@ -22,9 +21,8 @@ define i16 @load16_shr2(i64 %a, i64 %b, ptr %table) { ; CHECK-LABEL: load16_shr2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul x8, x1, x0 -; CHECK-NEXT: lsr x8, x8, #1 -; CHECK-NEXT: and x8, x8, #0x7ffffffffffffffe -; CHECK-NEXT: ldrh w0, [x2, x8] +; CHECK-NEXT: lsr x8, x8, #2 +; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1] ; CHECK-NEXT: ret entry: %mul = mul i64 %b, %a @@ -38,8 +36,8 @@ define i16 @load16_shr1(i64 %a, i64 %b, ptr %table) { ; CHECK-LABEL: load16_shr1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul x8, x1, x0 -; CHECK-NEXT: and x8, x8, #0xfffffffffffffffe -; CHECK-NEXT: ldrh w0, [x2, x8] +; CHECK-NEXT: lsr x8, x8, #1 +; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1] ; CHECK-NEXT: ret entry: %mul = mul i64 %b, %a @@ -53,9 +51,8 @@ define i32 @load32_shr63(i64 %a, i64 %b, ptr %table) { ; CHECK-LABEL: load32_shr63: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul x8, x1, x0 -; CHECK-NEXT: lsr x8, x8, #61 -; CHECK-NEXT: and x8, x8, #0x4 -; CHECK-NEXT: ldr w0, [x2, x8] +; CHECK-NEXT: lsr x8, x8, #63 +; CHECK-NEXT: ldr w0, [x2, x8, lsl #2] ; CHECK-NEXT: ret entry: %mul = mul i64 %b, %a @@ -69,8 +66,8 @@ define i32 @load32_shr2(i64 %a, i64 %b, ptr %table) { ; CHECK-LABEL: load32_shr2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul x8, x1, x0 -; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc -; CHECK-NEXT: ldr w0, [x2, x8] +; CHECK-NEXT: lsr x8, x8, #2 +; CHECK-NEXT: ldr w0, [x2, x8, lsl #2] ; CHECK-NEXT: ret entry: %mul = mul i64 %b, %a @@ -84,9 +81,8 @@ define i32 @load32_shr1(i64 %a, i64 %b, ptr %table) { ; CHECK-LABEL: load32_shr1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul x8, x1, x0 -; CHECK-NEXT: lsl x8, x8, #1 -; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc -; CHECK-NEXT: ldr w0, [x2, x8] +; CHECK-NEXT: lsr x8, x8, #1 +; CHECK-NEXT: ldr w0, [x2, x8, lsl #2] ; CHECK-NEXT: ret entry: %mul = mul i64 %b, %a @@ -100,9 +96,8 @@ define i64 @load64_shr63(i64 %a, i64 %b, ptr %table) { ; CHECK-LABEL: load64_shr63: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul x8, x1, x0 -; CHECK-NEXT: lsr x8, x8, #60 -; CHECK-NEXT: and x8, x8, #0x8 -; CHECK-NEXT: ldr x0, [x2, x8] +; CHECK-NEXT: lsr x8, x8, #63 +; CHECK-NEXT: ldr x0, [x2, x8, lsl #3] ; CHECK-NEXT: ret entry: %mul = mul i64 %b, %a @@ -116,9 +111,8 @@ define i64 @load64_shr2(i64 %a, i64 %b, ptr %table) { ; CHECK-LABEL: load64_shr2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul x8, x1, x0 -; CHECK-NEXT: lsl x8, x8, #1 -; CHECK-NEXT: and x8, x8, #0xfffffffffffffff8 -; CHECK-NEXT: ldr x0, [x2, x8] +; CHECK-NEXT: lsr x8, x8, #2 +; CHECK-NEXT: ldr x0, [x2, x8, lsl #3] ; CHECK-NEXT: ret entry: %mul = mul i64 %b, %a @@ -132,9 +126,8 @@ define i64 @load64_shr1(i64 %a, i64 %b, ptr %table) { ; CHECK-LABEL: load64_shr1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul x8, x1, x0 -; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: and x8, x8, #0xfffffffffffffff8 -; CHECK-NEXT: ldr x0, [x2, x8] +; CHECK-NEXT: lsr x8, x8, #1 +; CHECK-NEXT: ldr x0, [x2, x8, lsl #3] ; CHECK-NEXT: ret entry: %mul = mul i64 %b, %a