Skip to content

Commit

Permalink
[SPIR-V] Emit SPIR-V bitcasts between source/expected pointer type (#…
Browse files Browse the repository at this point in the history
…69621)

This patch introduces a new spv_ptrcast intrinsic for tracking expected
pointer types. The change fixes multiple OpenCL CTS regressions due the
switch to opaque pointers (e.g. basic/hiloeo).
  • Loading branch information
michalpaszkowski authored Jan 5, 2024
1 parent f686479 commit b4cfb50
Show file tree
Hide file tree
Showing 19 changed files with 264 additions and 66 deletions.
1 change: 1 addition & 0 deletions llvm/include/llvm/IR/IntrinsicsSPIRV.td
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ let TargetPrefix = "spv" in {
def int_spv_insertelt : Intrinsic<[llvm_any_ty], [llvm_any_ty, llvm_any_ty, llvm_anyint_ty]>;
def int_spv_const_composite : Intrinsic<[llvm_i32_ty], [llvm_vararg_ty]>;
def int_spv_bitcast : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
def int_spv_ptrcast : Intrinsic<[llvm_any_ty], [llvm_any_ty, llvm_metadata_ty, llvm_i32_ty], [ImmArg<ArgIndex<2>>]>;
def int_spv_switch : Intrinsic<[], [llvm_any_ty, llvm_vararg_ty]>;
def int_spv_cmpxchg : Intrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_vararg_ty]>;
def int_spv_unreachable : Intrinsic<[], []>;
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,8 @@ static SPIRVType *getArgSPIRVType(const Function &F, unsigned ArgIdx,

MDString *MDKernelArgType =
getKernelArgAttribute(F, ArgIdx, "kernel_arg_type");
if (!MDKernelArgType || (MDKernelArgType->getString().ends_with("*") &&
MDKernelArgType->getString().ends_with("_t")))
if (!MDKernelArgType || (!MDKernelArgType->getString().ends_with("*") &&
!MDKernelArgType->getString().ends_with("_t")))
return GR->getOrCreateSPIRVType(OriginalArgType, MIRBuilder, ArgAccessQual);

if (MDKernelArgType->getString().ends_with("*"))
Expand Down Expand Up @@ -438,7 +438,8 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
assert(Arg.Regs.size() == 1 && "Call arg has multiple VRegs");
ArgVRegs.push_back(Arg.Regs[0]);
SPIRVType *SPIRVTy = GR->getOrCreateSPIRVType(Arg.Ty, MIRBuilder);
GR->assignSPIRVTypeToVReg(SPIRVTy, Arg.Regs[0], MIRBuilder.getMF());
if (!GR->getSPIRVTypeForVReg(Arg.Regs[0]))
GR->assignSPIRVTypeToVReg(SPIRVTy, Arg.Regs[0], MIRBuilder.getMF());
}
if (auto Res = SPIRV::lowerBuiltin(
DemangledName, SPIRV::InstructionSet::OpenCL_std, MIRBuilder,
Expand Down
125 changes: 124 additions & 1 deletion llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class SPIRVEmitIntrinsics
void processInstrAfterVisit(Instruction *I);
void insertAssignPtrTypeIntrs(Instruction *I);
void insertAssignTypeIntrs(Instruction *I);
void insertPtrCastInstr(Instruction *I);
void processGlobalValue(GlobalVariable &GV);

public:
Expand Down Expand Up @@ -255,7 +256,19 @@ Instruction *SPIRVEmitIntrinsics::visitGetElementPtrInst(GetElementPtrInst &I) {
}

Instruction *SPIRVEmitIntrinsics::visitBitCastInst(BitCastInst &I) {
SmallVector<Type *, 2> Types = {I.getType(), I.getOperand(0)->getType()};
Value *Source = I.getOperand(0);

// SPIR-V, contrary to LLVM 17+ IR, supports bitcasts between pointers of
// varying element types. In case of IR coming from older versions of LLVM
// such bitcasts do not provide sufficient information, should be just skipped
// here, and handled in insertPtrCastInstr.
if (I.getType()->isPointerTy()) {
I.replaceAllUsesWith(Source);
I.eraseFromParent();
return nullptr;
}

SmallVector<Type *, 2> Types = {I.getType(), Source->getType()};
SmallVector<Value *> Args(I.op_begin(), I.op_end());
auto *NewI = IRB->CreateIntrinsic(Intrinsic::spv_bitcast, {Types}, {Args});
std::string InstName = I.hasName() ? I.getName().str() : "";
Expand All @@ -265,6 +278,111 @@ Instruction *SPIRVEmitIntrinsics::visitBitCastInst(BitCastInst &I) {
return NewI;
}

void SPIRVEmitIntrinsics::insertPtrCastInstr(Instruction *I) {
Value *Pointer;
Type *ExpectedElementType;
unsigned OperandToReplace;
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
Pointer = SI->getPointerOperand();
ExpectedElementType = SI->getValueOperand()->getType();
OperandToReplace = 1;
} else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
Pointer = LI->getPointerOperand();
ExpectedElementType = LI->getType();
OperandToReplace = 0;
} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
Pointer = GEPI->getPointerOperand();
ExpectedElementType = GEPI->getSourceElementType();
OperandToReplace = 0;
} else {
return;
}

// If Pointer is the result of nop BitCastInst (ptr -> ptr), use the source
// pointer instead. The BitCastInst should be later removed when visited.
while (BitCastInst *BC = dyn_cast<BitCastInst>(Pointer))
Pointer = BC->getOperand(0);

// Do not emit spv_ptrcast if Pointer is a GlobalValue of expected type.
GlobalValue *GV = dyn_cast<GlobalValue>(Pointer);
if (GV && GV->getValueType() == ExpectedElementType)
return;

// Do not emit spv_ptrcast if Pointer is a result of alloca with expected
// type.
AllocaInst *A = dyn_cast<AllocaInst>(Pointer);
if (A && A->getAllocatedType() == ExpectedElementType)
return;

if (dyn_cast<GetElementPtrInst>(Pointer))
return;

setInsertPointSkippingPhis(*IRB, I);
Constant *ExpectedElementTypeConst =
Constant::getNullValue(ExpectedElementType);
ConstantAsMetadata *CM =
ValueAsMetadata::getConstant(ExpectedElementTypeConst);
MDTuple *TyMD = MDNode::get(F->getContext(), CM);
MetadataAsValue *VMD = MetadataAsValue::get(F->getContext(), TyMD);
unsigned AddressSpace = Pointer->getType()->getPointerAddressSpace();
bool FirstPtrCastOrAssignPtrType = true;

// Do not emit new spv_ptrcast if equivalent one already exists or when
// spv_assign_ptr_type already targets this pointer with the same element
// type.
for (auto User : Pointer->users()) {
auto *II = dyn_cast<IntrinsicInst>(User);
if (!II ||
(II->getIntrinsicID() != Intrinsic::spv_assign_ptr_type &&
II->getIntrinsicID() != Intrinsic::spv_ptrcast) ||
II->getOperand(0) != Pointer)
continue;

// There is some spv_ptrcast/spv_assign_ptr_type already targeting this
// pointer.
FirstPtrCastOrAssignPtrType = false;
if (II->getOperand(1) != VMD ||
dyn_cast<ConstantInt>(II->getOperand(2))->getSExtValue() !=
AddressSpace)
continue;

// The spv_ptrcast/spv_assign_ptr_type targeting this pointer is of the same
// element type and address space.
if (II->getIntrinsicID() != Intrinsic::spv_ptrcast)
return;

// This must be a spv_ptrcast, do not emit new if this one has the same BB
// as I. Otherwise, search for other spv_ptrcast/spv_assign_ptr_type.
if (II->getParent() != I->getParent())
continue;

I->setOperand(OperandToReplace, II);
return;
}

// Do not emit spv_ptrcast if it would cast to the default pointer element
// type (i8) of the same address space.
if (ExpectedElementType->isIntegerTy(8))
return;

// If this would be the first spv_ptrcast and there is no spv_assign_ptr_type
// for this pointer before, do not emit spv_ptrcast but emit
// spv_assign_ptr_type instead.
if (FirstPtrCastOrAssignPtrType && isa<Instruction>(Pointer)) {
buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {Pointer->getType()},
ExpectedElementTypeConst, Pointer,
{IRB->getInt32(AddressSpace)});
return;
} else {
SmallVector<Type *, 2> Types = {Pointer->getType(), Pointer->getType()};
SmallVector<Value *, 2> Args = {Pointer, VMD, IRB->getInt32(AddressSpace)};
auto *PtrCastI =
IRB->CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
I->setOperand(OperandToReplace, PtrCastI);
return;
}
}

Instruction *SPIRVEmitIntrinsics::visitInsertElementInst(InsertElementInst &I) {
SmallVector<Type *, 4> Types = {I.getType(), I.getOperand(0)->getType(),
I.getOperand(1)->getType(),
Expand Down Expand Up @@ -522,13 +640,18 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
for (auto &I : Worklist) {
insertAssignPtrTypeIntrs(I);
insertAssignTypeIntrs(I);
insertPtrCastInstr(I);
}

for (auto *I : Worklist) {
TrackConstants = true;
if (!I->getType()->isVoidTy() || isa<StoreInst>(I))
IRB->SetInsertPoint(I->getNextNode());
// Visitors return either the original/newly created instruction for further
// processing, nullptr otherwise.
I = visit(*I);
if (!I)
continue;
processInstrAfterVisit(I);
}
return true;
Expand Down
24 changes: 22 additions & 2 deletions llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,32 @@ static void insertBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR,
SmallVector<MachineInstr *, 10> ToErase;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
if (!isSpvIntrinsic(MI, Intrinsic::spv_bitcast))
if (!isSpvIntrinsic(MI, Intrinsic::spv_bitcast) &&
!isSpvIntrinsic(MI, Intrinsic::spv_ptrcast))
continue;
assert(MI.getOperand(2).isReg());
MIB.setInsertPt(*MI.getParent(), MI);
MIB.buildBitcast(MI.getOperand(0).getReg(), MI.getOperand(2).getReg());
ToErase.push_back(&MI);
if (isSpvIntrinsic(MI, Intrinsic::spv_bitcast)) {
MIB.buildBitcast(MI.getOperand(0).getReg(), MI.getOperand(2).getReg());
continue;
}
Register Def = MI.getOperand(0).getReg();
Register Source = MI.getOperand(2).getReg();
SPIRVType *BaseTy = GR->getOrCreateSPIRVType(
getMDOperandAsType(MI.getOperand(3).getMetadata(), 0), MIB);
SPIRVType *AssignedPtrType = GR->getOrCreateSPIRVPointerType(
BaseTy, MI, *MF.getSubtarget<SPIRVSubtarget>().getInstrInfo(),
addressSpaceToStorageClass(MI.getOperand(4).getImm()));

// If the bitcast would be redundant, replace all uses with the source
// register.
if (GR->getSPIRVTypeForVReg(Source) == AssignedPtrType) {
MIB.getMRI()->replaceRegWith(Def, Source);
} else {
GR->assignSPIRVTypeToVReg(AssignedPtrType, Def, MF);
MIB.buildBitcast(Def, Source);
}
}
}
for (MachineInstr *MI : ToErase)
Expand Down
31 changes: 12 additions & 19 deletions llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,47 +10,40 @@
; CHECK-SPIRV-DAG: %[[#Struct:]] = OpTypeStruct %[[#Int]] %[[#Bool]]
; CHECK-SPIRV-DAG: %[[#UndefStruct:]] = OpUndef %[[#Struct]]

; CHECK-SPIRV: %[[#Pointer:]] = OpFunctionParameter %[[#]]
; CHECK-SPIRV: %[[#Value_ptr:]] = OpFunctionParameter %[[#]]
; CHECK-SPIRV: %[[#Comparator:]] = OpFunctionParameter %[[#]]

; CHECK-SPIRV: %[[#Value:]] = OpLoad %[[#Int]] %[[#Value_ptr]]
; CHECK-SPIRV: %[[#Res:]] = OpAtomicCompareExchange %[[#Int]] %[[#Pointer]] %[[#MemScope_Device]]
; CHECK-SPIRV-SAME: %[[#MemSemEqual_SeqCst]] %[[#MemSemUnequal_Acquire]] %[[#Value]] %[[#Comparator]]
; CHECK-SPIRV: %[[#Value:]] = OpLoad %[[#Int]] %[[#Value_ptr:]]
; CHECK-SPIRV: %[[#Res:]] = OpAtomicCompareExchange %[[#Int]] %[[#Pointer:]] %[[#MemScope_Device]]
; CHECK-SPIRV-SAME: %[[#MemSemEqual_SeqCst]] %[[#MemSemUnequal_Acquire]] %[[#Value]] %[[#Comparator:]]
; CHECK-SPIRV: %[[#Success:]] = OpIEqual %[[#]] %[[#Res]] %[[#Comparator]]
; CHECK-SPIRV: %[[#Composite_0:]] = OpCompositeInsert %[[#Struct]] %[[#Res]] %[[#UndefStruct]] 0
; CHECK-SPIRV: %[[#Composite_1:]] = OpCompositeInsert %[[#Struct]] %[[#Success]] %[[#Composite_0]] 1
; CHECK-SPIRV: %[[#]] = OpCompositeExtract %[[#Bool]] %[[#Composite_1]] 1

define dso_local spir_func void @test(i32* %ptr, i32* %value_ptr, i32 %comparator) local_unnamed_addr {
define dso_local spir_func void @test(ptr %ptr, ptr %value_ptr, i32 %comparator) local_unnamed_addr {
entry:
%0 = load i32, i32* %value_ptr, align 4
%1 = cmpxchg i32* %ptr, i32 %comparator, i32 %0 seq_cst acquire
%0 = load i32, ptr %value_ptr, align 4
%1 = cmpxchg ptr %ptr, i32 %comparator, i32 %0 seq_cst acquire
%2 = extractvalue { i32, i1 } %1, 1
br i1 %2, label %cmpxchg.continue, label %cmpxchg.store_expected

cmpxchg.store_expected: ; preds = %entry
%3 = extractvalue { i32, i1 } %1, 0
store i32 %3, i32* %value_ptr, align 4
store i32 %3, ptr %value_ptr, align 4
br label %cmpxchg.continue

cmpxchg.continue: ; preds = %cmpxchg.store_expected, %entry
ret void
}

; CHECK-SPIRV: %[[#Ptr:]] = OpFunctionParameter %[[#]]
; CHECK-SPIRV: %[[#Store_ptr:]] = OpFunctionParameter %[[#]]

; CHECK-SPIRV: %[[#Res_1:]] = OpAtomicCompareExchange %[[#Int]] %[[#Ptr]] %[[#MemScope_Device]]
; CHECK-SPIRV: %[[#Res_1:]] = OpAtomicCompareExchange %[[#Int]] %[[#Ptr:]] %[[#MemScope_Device]]
; CHECK-SPIRV-SAME: %[[#MemSemEqual_SeqCst]] %[[#MemSemUnequal_Acquire]] %[[#Constant_456]] %[[#Constant_128]]
; CHECK-SPIRV: %[[#Success_1:]] = OpIEqual %[[#]] %[[#Res_1]] %[[#Constant_128]]
; CHECK-SPIRV: %[[#Composite:]] = OpCompositeInsert %[[#Struct]] %[[#Res_1]] %[[#UndefStruct]] 0
; CHECK-SPIRV: %[[#Composite_1:]] = OpCompositeInsert %[[#Struct]] %[[#Success_1]] %[[#Composite]] 1
; CHECK-SPIRV: OpStore %[[#Store_ptr]] %[[#Composite_1]]
; CHECK-SPIRV: OpStore %[[#Store_ptr:]] %[[#Composite_1]]

define dso_local spir_func void @test2(i32* %ptr, {i32, i1}* %store_ptr) local_unnamed_addr {
define dso_local spir_func void @test2(ptr %ptr, ptr %store_ptr) local_unnamed_addr {
entry:
%0 = cmpxchg i32* %ptr, i32 128, i32 456 seq_cst acquire
store { i32, i1 } %0, { i32, i1 }* %store_ptr, align 4
%0 = cmpxchg ptr %ptr, i32 128, i32 456 seq_cst acquire
store { i32, i1 } %0, ptr %store_ptr, align 4
ret void
}
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/SPIRV/function/alloca-load-store.ll
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s

target triple = "spirv32-unknown-unknown"

; CHECK-DAG: OpName %[[#BAR:]] "bar"
; CHECK-DAG: OpName %[[#FOO:]] "foo"
; CHECK-DAG: OpName %[[#GOO:]] "goo"

; CHECK-DAG: %[[#CHAR:]] = OpTypeInt 8
; CHECK-DAG: %[[#INT:]] = OpTypeInt 32
; CHECK-DAG: %[[#STACK_PTR:]] = OpTypePointer Function %[[#INT]]
; CHECK-DAG: %[[#GLOBAL_PTR:]] = OpTypePointer CrossWorkgroup %[[#CHAR]]
; CHECK-DAG: %[[#STACK_PTR_INT:]] = OpTypePointer Function %[[#INT]]
; CHECK-DAG: %[[#GLOBAL_PTR_INT:]] = OpTypePointer CrossWorkgroup %[[#INT]]
; CHECK-DAG: %[[#GLOBAL_PTR_CHAR:]] = OpTypePointer CrossWorkgroup %[[#CHAR]]
; CHECK-DAG: %[[#FN1:]] = OpTypeFunction %[[#INT]] %[[#INT]]
; CHECK-DAG: %[[#FN2:]] = OpTypeFunction %[[#INT]] %[[#INT]] %[[#GLOBAL_PTR]]
; CHECK-DAG: %[[#FN2:]] = OpTypeFunction %[[#INT]] %[[#INT]] %[[#GLOBAL_PTR_CHAR]]

define i32 @bar(i32 %a) {
%p = alloca i32
Expand All @@ -23,7 +22,7 @@ define i32 @bar(i32 %a) {
; CHECK: %[[#BAR]] = OpFunction %[[#INT]] None %[[#FN1]]
; CHECK: %[[#A:]] = OpFunctionParameter %[[#INT]]
; CHECK: OpLabel
; CHECK: %[[#P:]] = OpVariable %[[#STACK_PTR]] Function
; CHECK: %[[#P:]] = OpVariable %[[#STACK_PTR_INT]] Function
; CHECK: OpStore %[[#P]] %[[#A]]
; CHECK: %[[#B:]] = OpLoad %[[#INT]] %[[#P]]
; CHECK: OpReturnValue %[[#B]]
Expand All @@ -40,25 +39,26 @@ define i32 @foo(i32 %a) {
; CHECK: %[[#FOO]] = OpFunction %[[#INT]] None %[[#FN1]]
; CHECK: %[[#A:]] = OpFunctionParameter %[[#INT]]
; CHECK: OpLabel
; CHECK: %[[#P:]] = OpVariable %[[#STACK_PTR]] Function
; CHECK: %[[#P:]] = OpVariable %[[#STACK_PTR_INT]] Function
; CHECK: OpStore %[[#P]] %[[#A]] Volatile
; CHECK: %[[#B:]] = OpLoad %[[#INT]] %[[#P]] Volatile
; CHECK: OpReturnValue %[[#B]]
; CHECK: OpFunctionEnd


;; Test load and store in global address space.
define i32 @goo(i32 %a, i32 addrspace(1)* %p) {
define i32 @goo(i32 %a, ptr addrspace(1) %p) {
store i32 %a, i32 addrspace(1)* %p
%b = load i32, i32 addrspace(1)* %p
ret i32 %b
}

; CHECK: %[[#GOO]] = OpFunction %[[#INT]] None %[[#FN2]]
; CHECK: %[[#A:]] = OpFunctionParameter %[[#INT]]
; CHECK: %[[#P:]] = OpFunctionParameter %[[#GLOBAL_PTR]]
; CHECK: %[[#P:]] = OpFunctionParameter %[[#GLOBAL_PTR_CHAR]]
; CHECK: OpLabel
; CHECK: OpStore %[[#P]] %[[#A]]
; CHECK: %[[#B:]] = OpLoad %[[#INT]] %[[#P]]
; CHECK: %[[#C:]] = OpBitcast %[[#GLOBAL_PTR_INT]] %[[#P]]
; CHECK: OpStore %[[#C]] %[[#A]]
; CHECK: %[[#B:]] = OpLoad %[[#INT]] %[[#C]]
; CHECK: OpReturnValue %[[#B]]
; CHECK: OpFunctionEnd
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
; CHECK: %[[#]] = OpFunction %[[#]] None %[[#]]
; CHECK-NEXT: %[[#PTR:]] = OpFunctionParameter %[[#]]
; CHECK-NEXT: %[[#]] = OpLabel
; CHECK-NEXT: OpStore %[[#PTR]] %[[#UNDEF]] Aligned 4
; CHECK-NEXT: %[[#BC:]] = OpBitcast %[[#]] %[[#PTR]]
; CHECK-NEXT: OpStore %[[#BC]] %[[#UNDEF]] Aligned 4
; CHECK-NEXT: OpReturn
; CHECK-NEXT: OpFunctionEnd

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
; CHECK: %[[#]] = OpFunction %[[#]] None %[[#]]
; CHECK-NEXT: %[[#PTR:]] = OpFunctionParameter %[[#]]
; CHECK-NEXT: %[[#]] = OpLabel
; CHECK-NEXT: OpStore %[[#PTR]] %[[#UNDEF]] Aligned 4
; CHECK-NEXT: %[[#BC:]] = OpBitcast %[[#]] %[[#PTR]]
; CHECK-NEXT: OpStore %[[#BC]] %[[#UNDEF]] Aligned 4
; CHECK-NEXT: OpReturn
; CHECK-NEXT: OpFunctionEnd

Expand Down
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/SPIRV/llvm-intrinsics/memset.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@
; CHECK: %[[#VarNull:]] = OpVariable %[[#]] UniformConstant %[[#ConstNull]]

; CHECK-DAG: %[[#Int8PtrConst:]] = OpTypePointer UniformConstant %[[#Int8]]
; CHECK: %[[#Target:]] = OpBitcast %[[#Int8Ptr]] %[[#]]
; CHECK: %[[#Source:]] = OpBitcast %[[#Int8PtrConst]] %[[#VarNull]]
; CHECK: OpCopyMemorySized %[[#Target]] %[[#Source]] %[[#Const12]] Aligned 4
; CHECK: OpCopyMemorySized %[[#Target:]] %[[#Source:]] %[[#Const12]] Aligned 4

; CHECK: %[[#SourceComp:]] = OpBitcast %[[#Int8PtrConst]] %[[#VarComp]]
; CHECK: OpCopyMemorySized %[[#]] %[[#SourceComp]] %[[#Const4]] Aligned 4
Expand Down
Loading

0 comments on commit b4cfb50

Please sign in to comment.