Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PASS] O1 optimizations wrongly use -loop-simplify #11

Open
xiangzhai opened this issue Nov 1, 2017 · 1 comment
Open

[PASS] O1 optimizations wrongly use -loop-simplify #11

xiangzhai opened this issue Nov 1, 2017 · 1 comment

Comments

@xiangzhai
Copy link

Hi ScaffCC developers,

-loop-simplify will produce the disable unroll meta data (!llvm.loop !2), for example:

$ cat cat_state.n044.ll
; ModuleID = 'cat_state.n045a.ll'
source_filename = "cat_state.n04_merged.scaffold"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: noinline nounwind uwtable
define void @catN(i16* %bit, i32 %n) local_unnamed_addr #0 {
entry:
   %0 = load i16, i16* %bit, align 2
   tail call void @llvm.H.i16(i16 %0)
   %cmp1 = icmp sgt i32 %n, 1
   br i1 %cmp1, label %for.body.lr.ph, label %for.end

for.body.lr.ph:                                   ; preds = %entry
   %1 = add i32 %n, -1
   %2 = add i32 %n, -2
   %xtraiter = and i32 %1, 1
   %3 = icmp ult i32 %2, 1
   br i1 %3, label %for.cond.for.end_crit_edge.unr-lcssa, label 
%for.body.lr.ph.new

for.body.lr.ph.new:                               ; preds = %for.body.lr.ph
   %unroll_iter = sub i32 %1, %xtraiter
   br label %for.body

for.body:                                         ; preds = %for.body, 
%for.body.lr.ph.new
   %inc3 = phi i32 [ 1, %for.body.lr.ph.new ], [ %inc.1, %for.body ]
   %niter = phi i32 [ %unroll_iter, %for.body.lr.ph.new ], [ 
%niter.nsub.1, %for.body ]
   %idxprom = sext i32 %inc3 to i64
   %arrayidx1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom
   %4 = load i16, i16* %arrayidx1, align 2
   %sub = add nsw i32 %inc3, -1
   %idxprom2 = sext i32 %sub to i64
   %arrayidx3 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2
   %5 = load i16, i16* %arrayidx3, align 2
   tail call void @llvm.CNOT.i16.i16(i16 %4, i16 %5)
   %inc = add nsw i32 %inc3, 1
   %niter.nsub = sub i32 %niter, 1
   %idxprom.1 = sext i32 %inc to i64
   %arrayidx1.1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom.1
   %6 = load i16, i16* %arrayidx1.1, align 2
   %idxprom2.1 = sext i32 %inc3 to i64
   %arrayidx3.1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2.1
   %7 = load i16, i16* %arrayidx3.1, align 2
   tail call void @llvm.CNOT.i16.i16(i16 %6, i16 %7)
   %inc.1 = add nsw i32 %inc, 1
   %niter.nsub.1 = sub i32 %niter.nsub, 1
   %niter.ncmp.1 = icmp ne i32 %niter.nsub.1, 0
   br i1 %niter.ncmp.1, label %for.body, label 
%for.cond.for.end_crit_edge.unr-lcssa

for.cond.for.end_crit_edge.unr-lcssa:             ; preds = %for.body, 
%for.body.lr.ph
   %inc3.unr = phi i32 [ 1, %for.body.lr.ph ], [ %inc.1, %for.body ]
   %lcmp.mod = icmp ne i32 %xtraiter, 0
   br i1 %lcmp.mod, label %for.body.epil, label %for.end

for.body.epil:                                    ; preds = 
%for.cond.for.end_crit_edge.unr-lcssa
   %inc3.epil = phi i32 [ %inc3.unr, %for.cond.for.end_crit_edge.unr-lcssa ]
   %idxprom.epil = sext i32 %inc3.epil to i64
   %arrayidx1.epil = getelementptr inbounds i16, i16* %bit, i64 
%idxprom.epil
   %8 = load i16, i16* %arrayidx1.epil, align 2
   %sub.epil = add nsw i32 %inc3.epil, -1
   %idxprom2.epil = sext i32 %sub.epil to i64
   %arrayidx3.epil = getelementptr inbounds i16, i16* %bit, i64 
%idxprom2.epil
   %9 = load i16, i16* %arrayidx3.epil, align 2
   tail call void @llvm.CNOT.i16.i16(i16 %8, i16 %9)
   %inc.epil = add nsw i32 %inc3.epil, 1
   %cmp.epil = icmp slt i32 %inc.epil, %n
   br label %for.end

for.end:                                          ; preds = 
%for.body.epil, %for.cond.for.end_crit_edge.unr-lcssa, %entry
   ret void
}

; Function Attrs: nounwind
declare void @llvm.H.i16(i16) #1

; Function Attrs: nounwind
declare void @llvm.CNOT.i16.i16(i16, i16) #1

; Function Attrs: noinline nounwind uwtable
define void @unCatN(i16* %bit, i32 %n) local_unnamed_addr #0 {
entry:
   %storemerge1 = add nsw i32 %n, -1
   %cmp2 = icmp sgt i32 %n, 1
   br i1 %cmp2, label %for.body.peel, label %for.end

for.body.peel:                                    ; preds = %entry
   %idxprom.peel = sext i32 %storemerge1 to i64
   %arrayidx.peel = getelementptr inbounds i16, i16* %bit, i64 %idxprom.peel
   %0 = load i16, i16* %arrayidx.peel, align 2
   %sub1.peel = add nsw i32 %n, -2
   %idxprom2.peel = sext i32 %sub1.peel to i64
   %arrayidx3.peel = getelementptr inbounds i16, i16* %bit, i64 
%idxprom2.peel
   %1 = load i16, i16* %arrayidx3.peel, align 2
   tail call void @llvm.CNOT.i16.i16(i16 %0, i16 %1)
   %storemerge.peel = add nsw i32 %storemerge1, -1
   %cmp.peel = icmp sgt i32 %storemerge1, 1
   br i1 %cmp.peel, label %for.body.lr.ph.peel.newph, label %for.end

for.body.lr.ph.peel.newph:                        ; preds = %for.body.peel
   br label %for.body

for.body:                                         ; preds = %for.body, 
%for.body.lr.ph.peel.newph
   %storemerge5 = phi i32 [ %storemerge.peel, %for.body.lr.ph.peel.newph 
], [ %storemerge, %for.body ]
   %storemerge.in3 = phi i32 [ %storemerge1, %for.body.lr.ph.peel.newph 
], [ %storemerge5, %for.body ]
   %idxprom = sext i32 %storemerge5 to i64
   %arrayidx = getelementptr inbounds i16, i16* %bit, i64 %idxprom
   %2 = load i16, i16* %arrayidx, align 2
   %sub1 = add nsw i32 %storemerge.in3, -2
   %idxprom2 = sext i32 %sub1 to i64
   %arrayidx3 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2
   %3 = load i16, i16* %arrayidx3, align 2
   tail call void @llvm.CNOT.i16.i16(i16 %2, i16 %3)
   %storemerge = add nsw i32 %storemerge5, -1
   %cmp = icmp sgt i32 %storemerge5, 1
   br i1 %cmp, label %for.body, label %for.end, !llvm.loop !2

for.end:                                          ; preds = %for.body, 
%for.body.peel, %entry
   %.lcssa = phi i16* [ %bit, %entry ], [ %bit, %for.body.peel ], [ 
%bit, %for.body ]
   %4 = load i16, i16* %.lcssa, align 2
   tail call void @llvm.H.i16(i16 %4)
   ret void
}

; Function Attrs: noinline nounwind uwtable
define i32 @main() local_unnamed_addr #0 {
entry:
   %bits = alloca [4 x i16], align 2
   %arraydecay = getelementptr inbounds [4 x i16], [4 x i16]* %bits, i64 
0, i64 0
   call void @catN_IP4_IPx_IPx_IPx_DPx_DPx_DPx_DPx(i16* %arraydecay, i32 
undef)
   ret i32 0
}

define void @catN_IP4_IPx_IPx_IPx_DPx_DPx_DPx_DPx(i16* %bit, i32 %n) {
entry.:
   %0 = load i16, i16* %bit, align 2
   tail call void @llvm.H.i16(i16 %0)
   %arrayidx1. = getelementptr inbounds i16, i16* %bit, i64 1
   %1 = load i16, i16* %arrayidx1., align 2
   %2 = load i16, i16* %bit, align 2
   tail call void @llvm.CNOT.i16.i16(i16 %1, i16 %2)
   %arrayidx1.1. = getelementptr inbounds i16, i16* %bit, i64 2
   %3 = load i16, i16* %arrayidx1.1., align 2
   %arrayidx3.1. = getelementptr inbounds i16, i16* %bit, i64 1
   %4 = load i16, i16* %arrayidx3.1., align 2
   tail call void @llvm.CNOT.i16.i16(i16 %3, i16 %4)
   %arrayidx1.epil. = getelementptr inbounds i16, i16* %bit, i64 3
   %5 = load i16, i16* %arrayidx1.epil., align 2
   %arrayidx3.epil. = getelementptr inbounds i16, i16* %bit, i64 2
   %6 = load i16, i16* %arrayidx3.epil., align 2
   tail call void @llvm.CNOT.i16.i16(i16 %5, i16 %6)
   ret void
}

attributes #0 = { noinline nounwind uwtable 
"correctly-rounded-divide-sqrt-fp-math"="false" 
"disable-tail-calls"="false" "less-precise-fpmad"="false" 
"no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" 
"no-infs-fp-math"="false" "no-jump-tables"="false" 
"no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" 
"no-trapping-math"="false" "stack-protector-buffer-size"="8" 
"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" 
"unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 6.0.0 (git at github.com:llvm-mirror/clang.git 
0aed123216ad4a38a9c2b16f1783895fd5cb1a04) 
(git at github.com:llvm-mirror/llvm.git 
d209b37aec1e392dabbf9b5324ea4a60c36fbc55)"}
!2 = distinct !{!2, !3}
!3 = !{!"llvm.loop.unroll.disable"}

Then it failed to Unroll:

$(OPT) -S cat_state.n044.ll -mem2reg -loops -loop-simplify -loop-rotate 
-lcssa -loop-unroll -unroll-threshold=100000000 -sccp -simplifycfg -o 
cat_state.n045.ll

There are still for-loops in the cat_state.n045.ll, although it is able to workaround use -internalize -globaldce to remove unCatN and catN DeadFunction which including for-loops :)

Regards,
Leslie Zhai

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant