Skip to content

Commit

Permalink
[SimplifyCFG] Simplify nested branches (#97067)
Browse files Browse the repository at this point in the history
This patch folds the following pattern (I don't know what to call this):
```
bb0:
  br i1 %cond1, label %bb1, label %bb2
bb1:
  br i1 %cond2, label %bb3, label %bb4
bb2:
  br i1 %cond2, label %bb4, label %bb3
bb3:
  ...
bb4:
  ...
```
into
```
bb0:
  %cond = xor i1 %cond1, %cond2
  br i1 %cond, label %bb4, label %bb3
bb3:
  ...
bb4:
  ...
```

Alive2: https://alive2.llvm.org/ce/z/5iOJEL
Closes #97022.
Closes #83417.

I found this pattern in some verilator-generated code, which is widely
used in RTL simulation. This fold will reduces branches and improves the
performance of CPU frontend. To my surprise, this pattern is also common
in C/C++ code base.
Affected libraries/applications:
cmake/cvc5/freetype/git/gromacs/jq/linux/openblas/openmpi/openssl/php/postgres/ruby/sqlite/wireshark/z3/...
  • Loading branch information
dtcxzyw authored Jun 30, 2024
1 parent 3efac5c commit 4997af9
Show file tree
Hide file tree
Showing 3 changed files with 478 additions and 16 deletions.
93 changes: 93 additions & 0 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7361,6 +7361,95 @@ static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
return PredPred;
}

/// Fold the following pattern:
/// bb0:
/// br i1 %cond1, label %bb1, label %bb2
/// bb1:
/// br i1 %cond2, label %bb3, label %bb4
/// bb2:
/// br i1 %cond2, label %bb4, label %bb3
/// bb3:
/// ...
/// bb4:
/// ...
/// into
/// bb0:
/// %cond = xor i1 %cond1, %cond2
/// br i1 %cond, label %bb4, label %bb3
/// bb3:
/// ...
/// bb4:
/// ...
/// NOTE: %cond2 always dominates the terminator of bb0.
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU) {
BasicBlock *BB = BI->getParent();
BasicBlock *BB1 = BI->getSuccessor(0);
BasicBlock *BB2 = BI->getSuccessor(1);
auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
if (Succ == BB)
return false;
if (&Succ->front() != Succ->getTerminator())
return false;
SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
if (!SuccBI || !SuccBI->isConditional())
return false;
BasicBlock *Succ1 = SuccBI->getSuccessor(0);
BasicBlock *Succ2 = SuccBI->getSuccessor(1);
return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
!isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
};
BranchInst *BB1BI, *BB2BI;
if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
return false;

if (BB1BI->getCondition() != BB2BI->getCondition() ||
BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
return false;

BasicBlock *BB3 = BB1BI->getSuccessor(0);
BasicBlock *BB4 = BB1BI->getSuccessor(1);
IRBuilder<> Builder(BI);
BI->setCondition(
Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
BB1->removePredecessor(BB);
BI->setSuccessor(0, BB4);
BB2->removePredecessor(BB);
BI->setSuccessor(1, BB3);
if (DTU) {
SmallVector<DominatorTree::UpdateType, 4> Updates;
Updates.push_back({DominatorTree::Delete, BB, BB1});
Updates.push_back({DominatorTree::Insert, BB, BB4});
Updates.push_back({DominatorTree::Delete, BB, BB2});
Updates.push_back({DominatorTree::Insert, BB, BB3});

DTU->applyUpdates(Updates);
}
bool HasWeight = false;
uint64_t BBTWeight, BBFWeight;
if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
HasWeight = true;
else
BBTWeight = BBFWeight = 1;
uint64_t BB1TWeight, BB1FWeight;
if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
HasWeight = true;
else
BB1TWeight = BB1FWeight = 1;
uint64_t BB2TWeight, BB2FWeight;
if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
HasWeight = true;
else
BB2TWeight = BB2FWeight = 1;
if (HasWeight) {
uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
FitWeights(Weights);
setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
}
return true;
}

bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
assert(
!isa<ConstantInt>(BI->getCondition()) &&
Expand Down Expand Up @@ -7468,6 +7557,10 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
return requestResimplify();

// Look for nested conditional branches.
if (mergeNestedCondBranch(BI, DTU))
return requestResimplify();

return false;
}

Expand Down
50 changes: 34 additions & 16 deletions llvm/test/CodeGen/ARM/and-cmp0-sink.ll
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ exit:
}

; Test with a mask that can be encoded with T32 instruction set, but not with A32.
define i32 @f0(i1 %c0, i32 %v) {
define i32 @f0(i1 %c0, i32 %v, ptr %p) {
; V7M-LABEL: f0:
; V7M: @ %bb.0: @ %E
; V7M-NEXT: lsls r0, r0, #31
Expand All @@ -198,7 +198,9 @@ define i32 @f0(i1 %c0, i32 %v) {
; V7M-NEXT: bxeq lr
; V7M-NEXT: b .LBB1_3
; V7M-NEXT: .LBB1_2: @ %B
; V7M-NEXT: movs r0, #1
; V7M-NEXT: tst.w r1, #16843009
; V7M-NEXT: str r0, [r2]
; V7M-NEXT: itt ne
; V7M-NEXT: movne r0, #0
; V7M-NEXT: bxne lr
Expand All @@ -208,10 +210,10 @@ define i32 @f0(i1 %c0, i32 %v) {
;
; V7A-LABEL: f0:
; V7A: @ %bb.0: @ %E
; V7A-NEXT: movw r2, #257
; V7A-NEXT: movw r3, #257
; V7A-NEXT: tst r0, #1
; V7A-NEXT: movt r2, #257
; V7A-NEXT: and r1, r1, r2
; V7A-NEXT: movt r3, #257
; V7A-NEXT: and r1, r1, r3
; V7A-NEXT: beq .LBB1_3
; V7A-NEXT: @ %bb.1: @ %A
; V7A-NEXT: cmp r1, #0
Expand All @@ -221,8 +223,10 @@ define i32 @f0(i1 %c0, i32 %v) {
; V7A-NEXT: mov r0, #1
; V7A-NEXT: bx lr
; V7A-NEXT: .LBB1_3: @ %B
; V7A-NEXT: mov r0, #0
; V7A-NEXT: mov r0, #1
; V7A-NEXT: cmp r1, #0
; V7A-NEXT: str r0, [r2]
; V7A-NEXT: mov r0, #0
; V7A-NEXT: moveq r0, #1
; V7A-NEXT: bx lr
;
Expand All @@ -237,7 +241,9 @@ define i32 @f0(i1 %c0, i32 %v) {
; V7A-T-NEXT: bxeq lr
; V7A-T-NEXT: b .LBB1_3
; V7A-T-NEXT: .LBB1_2: @ %B
; V7A-T-NEXT: movs r0, #1
; V7A-T-NEXT: tst.w r1, #16843009
; V7A-T-NEXT: str r0, [r2]
; V7A-T-NEXT: itt ne
; V7A-T-NEXT: movne r0, #0
; V7A-T-NEXT: bxne lr
Expand All @@ -247,18 +253,20 @@ define i32 @f0(i1 %c0, i32 %v) {
;
; V6M-LABEL: f0:
; V6M: @ %bb.0: @ %E
; V6M-NEXT: ldr r2, .LCPI1_0
; V6M-NEXT: ands r2, r1
; V6M-NEXT: ldr r3, .LCPI1_0
; V6M-NEXT: ands r3, r1
; V6M-NEXT: lsls r0, r0, #31
; V6M-NEXT: beq .LBB1_3
; V6M-NEXT: @ %bb.1: @ %A
; V6M-NEXT: cmp r2, #0
; V6M-NEXT: cmp r3, #0
; V6M-NEXT: bne .LBB1_5
; V6M-NEXT: @ %bb.2:
; V6M-NEXT: movs r0, #0
; V6M-NEXT: bx lr
; V6M-NEXT: .LBB1_3: @ %B
; V6M-NEXT: cmp r2, #0
; V6M-NEXT: movs r0, #1
; V6M-NEXT: str r0, [r2]
; V6M-NEXT: cmp r3, #0
; V6M-NEXT: beq .LBB1_5
; V6M-NEXT: @ %bb.4:
; V6M-NEXT: movs r0, #0
Expand All @@ -280,6 +288,7 @@ A:

B:
%c2 = icmp eq i32 %a, 0
store i32 1, ptr %p, align 4
br i1 %c2, label %D, label %C

C:
Expand All @@ -294,7 +303,7 @@ X:
}

; Test with a mask that can be encoded both with T32 and A32 instruction sets.
define i32 @f1(i1 %c0, i32 %v) {
define i32 @f1(i1 %c0, i32 %v, ptr %p) {
; V7M-LABEL: f1:
; V7M: @ %bb.0: @ %E
; V7M-NEXT: lsls r0, r0, #31
Expand All @@ -306,7 +315,9 @@ define i32 @f1(i1 %c0, i32 %v) {
; V7M-NEXT: bxeq lr
; V7M-NEXT: b .LBB2_3
; V7M-NEXT: .LBB2_2: @ %B
; V7M-NEXT: movs r0, #1
; V7M-NEXT: tst.w r1, #100663296
; V7M-NEXT: str r0, [r2]
; V7M-NEXT: itt ne
; V7M-NEXT: movne r0, #0
; V7M-NEXT: bxne lr
Expand All @@ -326,8 +337,10 @@ define i32 @f1(i1 %c0, i32 %v) {
; V7A-NEXT: mov r0, #1
; V7A-NEXT: bx lr
; V7A-NEXT: .LBB2_3: @ %B
; V7A-NEXT: mov r0, #0
; V7A-NEXT: mov r0, #1
; V7A-NEXT: tst r1, #100663296
; V7A-NEXT: str r0, [r2]
; V7A-NEXT: mov r0, #0
; V7A-NEXT: moveq r0, #1
; V7A-NEXT: bx lr
;
Expand All @@ -342,7 +355,9 @@ define i32 @f1(i1 %c0, i32 %v) {
; V7A-T-NEXT: bxeq lr
; V7A-T-NEXT: b .LBB2_3
; V7A-T-NEXT: .LBB2_2: @ %B
; V7A-T-NEXT: movs r0, #1
; V7A-T-NEXT: tst.w r1, #100663296
; V7A-T-NEXT: str r0, [r2]
; V7A-T-NEXT: itt ne
; V7A-T-NEXT: movne r0, #0
; V7A-T-NEXT: bxne lr
Expand All @@ -352,19 +367,21 @@ define i32 @f1(i1 %c0, i32 %v) {
;
; V6M-LABEL: f1:
; V6M: @ %bb.0: @ %E
; V6M-NEXT: movs r2, #3
; V6M-NEXT: lsls r2, r2, #25
; V6M-NEXT: ands r2, r1
; V6M-NEXT: movs r3, #3
; V6M-NEXT: lsls r3, r3, #25
; V6M-NEXT: ands r3, r1
; V6M-NEXT: lsls r0, r0, #31
; V6M-NEXT: beq .LBB2_3
; V6M-NEXT: @ %bb.1: @ %A
; V6M-NEXT: cmp r2, #0
; V6M-NEXT: cmp r3, #0
; V6M-NEXT: bne .LBB2_5
; V6M-NEXT: @ %bb.2:
; V6M-NEXT: movs r0, #0
; V6M-NEXT: bx lr
; V6M-NEXT: .LBB2_3: @ %B
; V6M-NEXT: cmp r2, #0
; V6M-NEXT: movs r0, #1
; V6M-NEXT: str r0, [r2]
; V6M-NEXT: cmp r3, #0
; V6M-NEXT: beq .LBB2_5
; V6M-NEXT: @ %bb.4:
; V6M-NEXT: movs r0, #0
Expand All @@ -382,6 +399,7 @@ A:

B:
%c2 = icmp eq i32 %a, 0
store i32 1, ptr %p, align 4
br i1 %c2, label %D, label %C

C:
Expand Down
Loading

0 comments on commit 4997af9

Please sign in to comment.