Skip to content

Commit

Permalink
Bad SLPVectorization shufflevector replacement, resulting in write to…
Browse files Browse the repository at this point in the history
… wrong memory location

We see that it might otherwise do:

  %10 = getelementptr {}**, <2 x {}***> %9, <2 x i32> <i32 10, i32 4>
  %11 = bitcast <2 x {}***> %10 to <2 x i64*>
...
  %27 = extractelement <2 x i64*> %11, i32 0
  %28 = bitcast i64* %27 to <2 x i64>*
  store <2 x i64> %22, <2 x i64>* %28, align 4, !tbaa !2

Which is an out-of-bounds store (the extractelement got offset 10
instead of offset 4 as intended). With the fix, we correctly generate
extractelement for i32 1 and generate correct code.

Differential Revision: https://reviews.llvm.org/D106613

(cherry picked from commit 980d2f6)
  • Loading branch information
vtjnash authored and vchuravy committed Oct 9, 2021
1 parent 94fdebc commit 37f68c7
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 6 deletions.
25 changes: 19 additions & 6 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5430,8 +5430,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The pointer operand uses an in-tree scalar so we add the new BitCast
// to ExternalUses list to make sure that an extract will be generated
// in the future.
if (getTreeEntry(PO))
ExternalUses.emplace_back(PO, cast<User>(VecPtr), 0);
if (TreeEntry *Entry = getTreeEntry(PO)) {
// Find which lane we need to extract.
unsigned FoundLane = Entry->findLaneForValue(PO);
ExternalUses.emplace_back(PO, cast<User>(VecPtr), FoundLane);
}

NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
} else {
Expand Down Expand Up @@ -5474,8 +5477,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The pointer operand uses an in-tree scalar, so add the new BitCast to
// ExternalUses to make sure that an extract will be generated in the
// future.
if (getTreeEntry(ScalarPtr))
ExternalUses.push_back(ExternalUser(ScalarPtr, cast<User>(VecPtr), 0));
if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) {
// Find which lane we need to extract.
unsigned FoundLane = Entry->findLaneForValue(ScalarPtr);
ExternalUses.push_back(
ExternalUser(ScalarPtr, cast<User>(VecPtr), FoundLane));
}

Value *V = propagateMetadata(ST, E->Scalars);

Expand Down Expand Up @@ -5577,8 +5584,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The scalar argument uses an in-tree scalar so we add the new vectorized
// call to ExternalUses list to make sure that an extract will be
// generated in the future.
if (ScalarArg && getTreeEntry(ScalarArg))
ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
if (ScalarArg) {
if (TreeEntry *Entry = getTreeEntry(ScalarArg)) {
// Find which lane we need to extract.
unsigned FoundLane = Entry->findLaneForValue(ScalarArg);
ExternalUses.push_back(
ExternalUser(ScalarArg, cast<User>(V), FoundLane));
}
}

propagateIRFlags(V, E->Scalars, VL0);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
Expand Down
33 changes: 33 additions & 0 deletions llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,36 @@ entry:
ret void

}

define void @externally_used_ptrs() {
; CHECK-LABEL: @externally_used_ptrs(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** @a, align 8
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64*> [[TMP1]], i64* [[TMP0]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> <i64 56, i64 11>
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x i64*> [[TMP3]] to <2 x i64>
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 12
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP8]], <2 x i64>* [[TMP9]], align 8
; CHECK-NEXT: ret void
;
entry:
%0 = load i64*, i64** @a, align 8
%add.ptr = getelementptr inbounds i64, i64* %0, i64 11
%1 = ptrtoint i64* %add.ptr to i64
%add.ptr1 = getelementptr inbounds i64, i64* %0, i64 56
%2 = ptrtoint i64* %add.ptr1 to i64
%arrayidx2 = getelementptr inbounds i64, i64* %0, i64 12
%3 = load i64, i64* %arrayidx2, align 8
%4 = load i64, i64* %add.ptr, align 8
%5 = add i64 %1, %3
%6 = add i64 %2, %4
store i64 %6, i64* %add.ptr, align 8
store i64 %5, i64* %arrayidx2, align 8
ret void
}

0 comments on commit 37f68c7

Please sign in to comment.