-
Notifications
You must be signed in to change notification settings - Fork 11.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Clang] Introduce scoped variants of GNU atomic functions #72280
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: Joseph Huber (jhuber6) ChangesSummary: Previously these scopes were only exported via the The scope here can be one of the following
Naming consistency was attempted, but it is difficult to cpature to full Patch is 73.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72280.diff 14 Files Affected:
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index a9c4c67a60e8e8e..a41f2d66b37b69d 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -6498,7 +6498,7 @@ class AtomicExpr : public Expr {
return cast<Expr>(SubExprs[ORDER_FAIL]);
}
Expr *getVal2() const {
- if (Op == AO__atomic_exchange)
+ if (Op == AO__atomic_exchange || Op == AO__scoped_atomic_exchange)
return cast<Expr>(SubExprs[ORDER_FAIL]);
assert(NumSubExprs > VAL2);
return cast<Expr>(SubExprs[VAL2]);
@@ -6539,7 +6539,9 @@ class AtomicExpr : public Expr {
getOp() == AO__opencl_atomic_compare_exchange_weak ||
getOp() == AO__hip_atomic_compare_exchange_weak ||
getOp() == AO__atomic_compare_exchange ||
- getOp() == AO__atomic_compare_exchange_n;
+ getOp() == AO__atomic_compare_exchange_n ||
+ getOp() == AO__scoped_atomic_compare_exchange ||
+ getOp() == AO__scoped_atomic_compare_exchange_n;
}
bool isOpenCL() const {
@@ -6569,13 +6571,13 @@ class AtomicExpr : public Expr {
/// \return empty atomic scope model if the atomic op code does not have
/// scope operand.
static std::unique_ptr<AtomicScopeModel> getScopeModel(AtomicOp Op) {
- auto Kind =
- (Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max)
- ? AtomicScopeModelKind::OpenCL
- : (Op >= AO__hip_atomic_load && Op <= AO__hip_atomic_fetch_max)
- ? AtomicScopeModelKind::HIP
- : AtomicScopeModelKind::None;
- return AtomicScopeModel::create(Kind);
+ if (Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max)
+ return AtomicScopeModel::create(AtomicScopeModelKind::OpenCL);
+ else if (Op >= AO__hip_atomic_load && Op <= AO__hip_atomic_fetch_max)
+ return AtomicScopeModel::create(AtomicScopeModelKind::HIP);
+ else if (Op >= AO__scoped_atomic_load && Op <= AO__scoped_atomic_fetch_max)
+ return AtomicScopeModel::create(AtomicScopeModelKind::Generic);
+ return AtomicScopeModel::create(AtomicScopeModelKind::None);
}
/// Get atomic scope model.
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index ec39e926889b936..4dcbaf8a7beaa65 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -904,6 +904,32 @@ BUILTIN(__atomic_signal_fence, "vi", "n")
BUILTIN(__atomic_always_lock_free, "bzvCD*", "nE")
BUILTIN(__atomic_is_lock_free, "bzvCD*", "nE")
+// GNU atomic builtins with atomic scopes.
+ATOMIC_BUILTIN(__scoped_atomic_load, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_load_n, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_store, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_store_n, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_exchange, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_exchange_n, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_compare_exchange, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_compare_exchange_n, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_add, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_sub, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_and, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_or, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_xor, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_nand, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_add_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_sub_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_and_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_or_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_xor_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_max_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_min_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_nand_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_min, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_max, "v.", "t")
+
// OpenCL 2.0 atomic builtins.
ATOMIC_BUILTIN(__opencl_atomic_init, "v.", "t")
ATOMIC_BUILTIN(__opencl_atomic_load, "v.", "t")
diff --git a/clang/include/clang/Basic/SyncScope.h b/clang/include/clang/Basic/SyncScope.h
index 7919f64c6daf97b..bc7ec7b5cf777ef 100644
--- a/clang/include/clang/Basic/SyncScope.h
+++ b/clang/include/clang/Basic/SyncScope.h
@@ -40,6 +40,11 @@ namespace clang {
/// Update getAsString.
///
enum class SyncScope {
+ SystemScope,
+ DeviceScope,
+ WorkgroupScope,
+ WavefrontScope,
+ SingleScope,
HIPSingleThread,
HIPWavefront,
HIPWorkgroup,
@@ -54,6 +59,16 @@ enum class SyncScope {
inline llvm::StringRef getAsString(SyncScope S) {
switch (S) {
+ case SyncScope::SystemScope:
+ return "system_scope";
+ case SyncScope::DeviceScope:
+ return "device_scope";
+ case SyncScope::WorkgroupScope:
+ return "workgroup_scope";
+ case SyncScope::WavefrontScope:
+ return "wavefront_scope";
+ case SyncScope::SingleScope:
+ return "single_scope";
case SyncScope::HIPSingleThread:
return "hip_singlethread";
case SyncScope::HIPWavefront:
@@ -77,7 +92,7 @@ inline llvm::StringRef getAsString(SyncScope S) {
}
/// Defines the kind of atomic scope models.
-enum class AtomicScopeModelKind { None, OpenCL, HIP };
+enum class AtomicScopeModelKind { None, OpenCL, HIP, Generic };
/// Defines the interface for synch scope model.
class AtomicScopeModel {
@@ -205,6 +220,56 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
}
};
+/// Defines the generic atomic scope model.
+class AtomicScopeGenericModel : public AtomicScopeModel {
+public:
+ /// The enum values match predefined built-in macros __ATOMIC_SCOPE_*.
+ enum ID {
+ System = 0,
+ Device = 1,
+ Workgroup = 2,
+ Wavefront = 3,
+ Single = 4,
+ Last = Single
+ };
+
+ AtomicScopeGenericModel() = default;
+
+ SyncScope map(unsigned S) const override {
+ switch (static_cast<ID>(S)) {
+ case Device:
+ return SyncScope::DeviceScope;
+ case System:
+ return SyncScope::SystemScope;
+ case Workgroup:
+ return SyncScope::WorkgroupScope;
+ case Wavefront:
+ return SyncScope::WavefrontScope;
+ case Single:
+ return SyncScope::SingleScope;
+ }
+ llvm_unreachable("Invalid language sync scope value");
+ }
+
+ bool isValid(unsigned S) const override {
+ return S >= static_cast<unsigned>(System) &&
+ S <= static_cast<unsigned>(Last);
+ }
+
+ ArrayRef<unsigned> getRuntimeValues() const override {
+ static_assert(Last == Single, "Does not include all sync scopes");
+ static const unsigned Scopes[] = {
+ static_cast<unsigned>(Device), static_cast<unsigned>(System),
+ static_cast<unsigned>(Workgroup), static_cast<unsigned>(Wavefront),
+ static_cast<unsigned>(Single)};
+ return llvm::ArrayRef(Scopes);
+ }
+
+ unsigned getFallBackValue() const override {
+ return static_cast<unsigned>(System);
+ }
+};
+
inline std::unique_ptr<AtomicScopeModel>
AtomicScopeModel::create(AtomicScopeModelKind K) {
switch (K) {
@@ -214,6 +279,8 @@ AtomicScopeModel::create(AtomicScopeModelKind K) {
return std::make_unique<AtomicScopeOpenCLModel>();
case AtomicScopeModelKind::HIP:
return std::make_unique<AtomicScopeHIPModel>();
+ case AtomicScopeModelKind::Generic:
+ return std::make_unique<AtomicScopeGenericModel>();
}
llvm_unreachable("Invalid atomic scope model kind");
}
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 55c6b732b7081f4..b125fc676da8419 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -4887,6 +4887,7 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__atomic_load_n:
return 2;
+ case AO__scoped_atomic_load_n:
case AO__opencl_atomic_load:
case AO__hip_atomic_load:
case AO__c11_atomic_store:
@@ -4921,6 +4922,26 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__atomic_fetch_max:
return 3;
+ case AO__scoped_atomic_load:
+ case AO__scoped_atomic_store:
+ case AO__scoped_atomic_store_n:
+ case AO__scoped_atomic_fetch_add:
+ case AO__scoped_atomic_fetch_sub:
+ case AO__scoped_atomic_fetch_and:
+ case AO__scoped_atomic_fetch_or:
+ case AO__scoped_atomic_fetch_xor:
+ case AO__scoped_atomic_fetch_nand:
+ case AO__scoped_atomic_add_fetch:
+ case AO__scoped_atomic_sub_fetch:
+ case AO__scoped_atomic_and_fetch:
+ case AO__scoped_atomic_or_fetch:
+ case AO__scoped_atomic_xor_fetch:
+ case AO__scoped_atomic_nand_fetch:
+ case AO__scoped_atomic_min_fetch:
+ case AO__scoped_atomic_max_fetch:
+ case AO__scoped_atomic_fetch_min:
+ case AO__scoped_atomic_fetch_max:
+ case AO__scoped_atomic_exchange_n:
case AO__hip_atomic_exchange:
case AO__hip_atomic_fetch_add:
case AO__hip_atomic_fetch_sub:
@@ -4942,6 +4963,7 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__atomic_exchange:
return 4;
+ case AO__scoped_atomic_exchange:
case AO__c11_atomic_compare_exchange_strong:
case AO__c11_atomic_compare_exchange_weak:
return 5;
@@ -4952,6 +4974,10 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__atomic_compare_exchange:
case AO__atomic_compare_exchange_n:
return 6;
+
+ case AO__scoped_atomic_compare_exchange:
+ case AO__scoped_atomic_compare_exchange_n:
+ return 7;
}
llvm_unreachable("unknown atomic op");
}
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index ab4a013de5f552c..c04cb313c3387a3 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1841,6 +1841,7 @@ void StmtPrinter::VisitAtomicExpr(AtomicExpr *Node) {
PrintExpr(Node->getPtr());
if (Node->getOp() != AtomicExpr::AO__c11_atomic_load &&
Node->getOp() != AtomicExpr::AO__atomic_load_n &&
+ Node->getOp() != AtomicExpr::AO__scoped_atomic_load_n &&
Node->getOp() != AtomicExpr::AO__opencl_atomic_load &&
Node->getOp() != AtomicExpr::AO__hip_atomic_load) {
OS << ", ";
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index f7c597e181b0bd9..b7636522f2c8932 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -508,9 +508,11 @@ static llvm::Value *EmitPostAtomicMinMax(CGBuilderTy &Builder,
default:
llvm_unreachable("Unexpected min/max operation");
case AtomicExpr::AO__atomic_max_fetch:
+ case AtomicExpr::AO__scoped_atomic_max_fetch:
Pred = IsSigned ? llvm::CmpInst::ICMP_SGT : llvm::CmpInst::ICMP_UGT;
break;
case AtomicExpr::AO__atomic_min_fetch:
+ case AtomicExpr::AO__scoped_atomic_min_fetch:
Pred = IsSigned ? llvm::CmpInst::ICMP_SLT : llvm::CmpInst::ICMP_ULT;
break;
}
@@ -545,7 +547,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
FailureOrder, Size, Order, Scope);
return;
case AtomicExpr::AO__atomic_compare_exchange:
- case AtomicExpr::AO__atomic_compare_exchange_n: {
+ case AtomicExpr::AO__atomic_compare_exchange_n:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange_n: {
if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
Val1, Val2, FailureOrder, Size, Order, Scope);
@@ -578,7 +582,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__opencl_atomic_load:
case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load_n:
- case AtomicExpr::AO__atomic_load: {
+ case AtomicExpr::AO__atomic_load:
+ case AtomicExpr::AO__scoped_atomic_load_n:
+ case AtomicExpr::AO__scoped_atomic_load: {
llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
Load->setAtomic(Order, Scope);
Load->setVolatile(E->isVolatile());
@@ -590,7 +596,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__opencl_atomic_store:
case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__atomic_store:
- case AtomicExpr::AO__atomic_store_n: {
+ case AtomicExpr::AO__atomic_store_n:
+ case AtomicExpr::AO__scoped_atomic_store:
+ case AtomicExpr::AO__scoped_atomic_store_n: {
llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
Store->setAtomic(Order, Scope);
@@ -603,10 +611,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__opencl_atomic_exchange:
case AtomicExpr::AO__atomic_exchange_n:
case AtomicExpr::AO__atomic_exchange:
+ case AtomicExpr::AO__scoped_atomic_exchange_n:
+ case AtomicExpr::AO__scoped_atomic_exchange:
Op = llvm::AtomicRMWInst::Xchg;
break;
case AtomicExpr::AO__atomic_add_fetch:
+ case AtomicExpr::AO__scoped_atomic_add_fetch:
PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FAdd
: llvm::Instruction::Add;
[[fallthrough]];
@@ -614,11 +625,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__hip_atomic_fetch_add:
case AtomicExpr::AO__opencl_atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_add:
+ case AtomicExpr::AO__scoped_atomic_fetch_add:
Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FAdd
: llvm::AtomicRMWInst::Add;
break;
case AtomicExpr::AO__atomic_sub_fetch:
+ case AtomicExpr::AO__scoped_atomic_sub_fetch:
PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FSub
: llvm::Instruction::Sub;
[[fallthrough]];
@@ -626,17 +639,20 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__hip_atomic_fetch_sub:
case AtomicExpr::AO__opencl_atomic_fetch_sub:
case AtomicExpr::AO__atomic_fetch_sub:
+ case AtomicExpr::AO__scoped_atomic_fetch_sub:
Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FSub
: llvm::AtomicRMWInst::Sub;
break;
case AtomicExpr::AO__atomic_min_fetch:
+ case AtomicExpr::AO__scoped_atomic_min_fetch:
PostOpMinMax = true;
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_min:
case AtomicExpr::AO__hip_atomic_fetch_min:
case AtomicExpr::AO__opencl_atomic_fetch_min:
case AtomicExpr::AO__atomic_fetch_min:
+ case AtomicExpr::AO__scoped_atomic_fetch_min:
Op = E->getValueType()->isFloatingType()
? llvm::AtomicRMWInst::FMin
: (E->getValueType()->isSignedIntegerType()
@@ -645,12 +661,14 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
break;
case AtomicExpr::AO__atomic_max_fetch:
+ case AtomicExpr::AO__scoped_atomic_max_fetch:
PostOpMinMax = true;
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_max:
case AtomicExpr::AO__hip_atomic_fetch_max:
case AtomicExpr::AO__opencl_atomic_fetch_max:
case AtomicExpr::AO__atomic_fetch_max:
+ case AtomicExpr::AO__scoped_atomic_fetch_max:
Op = E->getValueType()->isFloatingType()
? llvm::AtomicRMWInst::FMax
: (E->getValueType()->isSignedIntegerType()
@@ -659,40 +677,48 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
break;
case AtomicExpr::AO__atomic_and_fetch:
+ case AtomicExpr::AO__scoped_atomic_and_fetch:
PostOp = llvm::Instruction::And;
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_and:
case AtomicExpr::AO__hip_atomic_fetch_and:
case AtomicExpr::AO__opencl_atomic_fetch_and:
case AtomicExpr::AO__atomic_fetch_and:
+ case AtomicExpr::AO__scoped_atomic_fetch_and:
Op = llvm::AtomicRMWInst::And;
break;
case AtomicExpr::AO__atomic_or_fetch:
+ case AtomicExpr::AO__scoped_atomic_or_fetch:
PostOp = llvm::Instruction::Or;
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__hip_atomic_fetch_or:
case AtomicExpr::AO__opencl_atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_or:
+ case AtomicExpr::AO__scoped_atomic_fetch_or:
Op = llvm::AtomicRMWInst::Or;
break;
case AtomicExpr::AO__atomic_xor_fetch:
+ case AtomicExpr::AO__scoped_atomic_xor_fetch:
PostOp = llvm::Instruction::Xor;
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_xor:
case AtomicExpr::AO__hip_atomic_fetch_xor:
case AtomicExpr::AO__opencl_atomic_fetch_xor:
case AtomicExpr::AO__atomic_fetch_xor:
+ case AtomicExpr::AO__scoped_atomic_fetch_xor:
Op = llvm::AtomicRMWInst::Xor;
break;
case AtomicExpr::AO__atomic_nand_fetch:
+ case AtomicExpr::AO__scoped_atomic_nand_fetch:
PostOp = llvm::Instruction::And; // the NOT is special cased below
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_nand:
case AtomicExpr::AO__atomic_fetch_nand:
+ case AtomicExpr::AO__scoped_atomic_fetch_nand:
Op = llvm::AtomicRMWInst::Nand;
break;
}
@@ -712,7 +738,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
else if (PostOp)
Result = CGF.Builder.CreateBinOp((llvm::Instruction::BinaryOps)PostOp, RMWI,
LoadVal1);
- if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
+ if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_nand_fetch)
Result = CGF.Builder.CreateNot(Result);
CGF.Builder.CreateStore(Result, Dest);
}
@@ -865,17 +892,21 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__opencl_atomic_load:
case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load_n:
+ case AtomicExpr::AO__scoped_atomic_load_n:
break;
case AtomicExpr::AO__atomic_load:
+ case AtomicExpr::AO__scoped_atomic_load:
Dest = EmitPointerWithAlignment(E->getVal1());
break;
case AtomicExpr::AO__atomic_store:
+ case AtomicExpr::AO__scoped_atomic_store:
Val1 = EmitPointerWithAlignment(E->getVal1());
break;
case AtomicExpr::AO__atomic_exchange:
+ case AtomicExpr::AO__scoped_atomic_exchange:
Val1 = EmitPointerWithAlignment(E->getVal1());
Dest = EmitPointerWithAlignment(E->getVal2());
break;
@@ -888,14 +919,19 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
case AtomicExpr::AO__atomic_compare_exchange_n:
case AtomicExpr::AO__atomic_compare_exchange:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange_n:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange:
Val1 = EmitPointerWithAlignment(E->getVal1());
- if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
+ if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange)
Val2 = EmitPointerWithAlignment(E->getVal2());
else
Val2 = EmitValToTemp(*this, E->getVal2());
OrderFail = EmitScalarExpr(E->getOrderFail());
if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange_n ||
- E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
+ E->getOp() == AtomicExpr::AO__atomic_compare_exchange ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange_n ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange)
IsWeak = EmitScalarExpr(E->getWeak());
break;
@@ -929,6 +965,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__atomic_max_fetch:
case AtomicExpr::AO__atomic_min_fetch:
case AtomicExpr::AO__atomic_sub_fetch:
+ case AtomicExpr::AO__scoped_atomic_fetch_add:
+ case AtomicExpr::AO__scoped_atomic_fetch_max:
+ case AtomicExpr::AO__scoped_atomic_fetch_min:
+ case AtomicExpr::AO__scoped_atomic_fetch_sub:
+ case AtomicExpr::AO__scoped_atomic_add_fetch:
+ case AtomicExpr::AO__scoped_atomic_max_fetch:
+ case AtomicExpr::AO__scoped_atomic_min_fetch:
+ case AtomicExpr::AO__scoped_atomic_sub_fetch:
case AtomicExpr::AO__c11_atomic_fetch_max:
case AtomicExpr::AO__c11_atomic_fetch_min:
case AtomicExpr::AO__opencl_atomic_fetch_max:
@@ -946,6 +990,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__hip_atomic_exchange:
case AtomicExpr::AO__atomic_...
[truncated]
|
@llvm/pr-subscribers-backend-amdgpu Author: Joseph Huber (jhuber6) ChangesSummary: Previously these scopes were only exported via the The scope here can be one of the following
Naming consistency was attempted, but it is difficult to cpature to full Patch is 73.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72280.diff 14 Files Affected:
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index a9c4c67a60e8e8e..a41f2d66b37b69d 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -6498,7 +6498,7 @@ class AtomicExpr : public Expr {
return cast<Expr>(SubExprs[ORDER_FAIL]);
}
Expr *getVal2() const {
- if (Op == AO__atomic_exchange)
+ if (Op == AO__atomic_exchange || Op == AO__scoped_atomic_exchange)
return cast<Expr>(SubExprs[ORDER_FAIL]);
assert(NumSubExprs > VAL2);
return cast<Expr>(SubExprs[VAL2]);
@@ -6539,7 +6539,9 @@ class AtomicExpr : public Expr {
getOp() == AO__opencl_atomic_compare_exchange_weak ||
getOp() == AO__hip_atomic_compare_exchange_weak ||
getOp() == AO__atomic_compare_exchange ||
- getOp() == AO__atomic_compare_exchange_n;
+ getOp() == AO__atomic_compare_exchange_n ||
+ getOp() == AO__scoped_atomic_compare_exchange ||
+ getOp() == AO__scoped_atomic_compare_exchange_n;
}
bool isOpenCL() const {
@@ -6569,13 +6571,13 @@ class AtomicExpr : public Expr {
/// \return empty atomic scope model if the atomic op code does not have
/// scope operand.
static std::unique_ptr<AtomicScopeModel> getScopeModel(AtomicOp Op) {
- auto Kind =
- (Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max)
- ? AtomicScopeModelKind::OpenCL
- : (Op >= AO__hip_atomic_load && Op <= AO__hip_atomic_fetch_max)
- ? AtomicScopeModelKind::HIP
- : AtomicScopeModelKind::None;
- return AtomicScopeModel::create(Kind);
+ if (Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max)
+ return AtomicScopeModel::create(AtomicScopeModelKind::OpenCL);
+ else if (Op >= AO__hip_atomic_load && Op <= AO__hip_atomic_fetch_max)
+ return AtomicScopeModel::create(AtomicScopeModelKind::HIP);
+ else if (Op >= AO__scoped_atomic_load && Op <= AO__scoped_atomic_fetch_max)
+ return AtomicScopeModel::create(AtomicScopeModelKind::Generic);
+ return AtomicScopeModel::create(AtomicScopeModelKind::None);
}
/// Get atomic scope model.
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index ec39e926889b936..4dcbaf8a7beaa65 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -904,6 +904,32 @@ BUILTIN(__atomic_signal_fence, "vi", "n")
BUILTIN(__atomic_always_lock_free, "bzvCD*", "nE")
BUILTIN(__atomic_is_lock_free, "bzvCD*", "nE")
+// GNU atomic builtins with atomic scopes.
+ATOMIC_BUILTIN(__scoped_atomic_load, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_load_n, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_store, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_store_n, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_exchange, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_exchange_n, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_compare_exchange, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_compare_exchange_n, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_add, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_sub, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_and, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_or, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_xor, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_nand, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_add_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_sub_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_and_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_or_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_xor_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_max_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_min_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_nand_fetch, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_min, "v.", "t")
+ATOMIC_BUILTIN(__scoped_atomic_fetch_max, "v.", "t")
+
// OpenCL 2.0 atomic builtins.
ATOMIC_BUILTIN(__opencl_atomic_init, "v.", "t")
ATOMIC_BUILTIN(__opencl_atomic_load, "v.", "t")
diff --git a/clang/include/clang/Basic/SyncScope.h b/clang/include/clang/Basic/SyncScope.h
index 7919f64c6daf97b..bc7ec7b5cf777ef 100644
--- a/clang/include/clang/Basic/SyncScope.h
+++ b/clang/include/clang/Basic/SyncScope.h
@@ -40,6 +40,11 @@ namespace clang {
/// Update getAsString.
///
enum class SyncScope {
+ SystemScope,
+ DeviceScope,
+ WorkgroupScope,
+ WavefrontScope,
+ SingleScope,
HIPSingleThread,
HIPWavefront,
HIPWorkgroup,
@@ -54,6 +59,16 @@ enum class SyncScope {
inline llvm::StringRef getAsString(SyncScope S) {
switch (S) {
+ case SyncScope::SystemScope:
+ return "system_scope";
+ case SyncScope::DeviceScope:
+ return "device_scope";
+ case SyncScope::WorkgroupScope:
+ return "workgroup_scope";
+ case SyncScope::WavefrontScope:
+ return "wavefront_scope";
+ case SyncScope::SingleScope:
+ return "single_scope";
case SyncScope::HIPSingleThread:
return "hip_singlethread";
case SyncScope::HIPWavefront:
@@ -77,7 +92,7 @@ inline llvm::StringRef getAsString(SyncScope S) {
}
/// Defines the kind of atomic scope models.
-enum class AtomicScopeModelKind { None, OpenCL, HIP };
+enum class AtomicScopeModelKind { None, OpenCL, HIP, Generic };
/// Defines the interface for synch scope model.
class AtomicScopeModel {
@@ -205,6 +220,56 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
}
};
+/// Defines the generic atomic scope model.
+class AtomicScopeGenericModel : public AtomicScopeModel {
+public:
+ /// The enum values match predefined built-in macros __ATOMIC_SCOPE_*.
+ enum ID {
+ System = 0,
+ Device = 1,
+ Workgroup = 2,
+ Wavefront = 3,
+ Single = 4,
+ Last = Single
+ };
+
+ AtomicScopeGenericModel() = default;
+
+ SyncScope map(unsigned S) const override {
+ switch (static_cast<ID>(S)) {
+ case Device:
+ return SyncScope::DeviceScope;
+ case System:
+ return SyncScope::SystemScope;
+ case Workgroup:
+ return SyncScope::WorkgroupScope;
+ case Wavefront:
+ return SyncScope::WavefrontScope;
+ case Single:
+ return SyncScope::SingleScope;
+ }
+ llvm_unreachable("Invalid language sync scope value");
+ }
+
+ bool isValid(unsigned S) const override {
+ return S >= static_cast<unsigned>(System) &&
+ S <= static_cast<unsigned>(Last);
+ }
+
+ ArrayRef<unsigned> getRuntimeValues() const override {
+ static_assert(Last == Single, "Does not include all sync scopes");
+ static const unsigned Scopes[] = {
+ static_cast<unsigned>(Device), static_cast<unsigned>(System),
+ static_cast<unsigned>(Workgroup), static_cast<unsigned>(Wavefront),
+ static_cast<unsigned>(Single)};
+ return llvm::ArrayRef(Scopes);
+ }
+
+ unsigned getFallBackValue() const override {
+ return static_cast<unsigned>(System);
+ }
+};
+
inline std::unique_ptr<AtomicScopeModel>
AtomicScopeModel::create(AtomicScopeModelKind K) {
switch (K) {
@@ -214,6 +279,8 @@ AtomicScopeModel::create(AtomicScopeModelKind K) {
return std::make_unique<AtomicScopeOpenCLModel>();
case AtomicScopeModelKind::HIP:
return std::make_unique<AtomicScopeHIPModel>();
+ case AtomicScopeModelKind::Generic:
+ return std::make_unique<AtomicScopeGenericModel>();
}
llvm_unreachable("Invalid atomic scope model kind");
}
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 55c6b732b7081f4..b125fc676da8419 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -4887,6 +4887,7 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__atomic_load_n:
return 2;
+ case AO__scoped_atomic_load_n:
case AO__opencl_atomic_load:
case AO__hip_atomic_load:
case AO__c11_atomic_store:
@@ -4921,6 +4922,26 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__atomic_fetch_max:
return 3;
+ case AO__scoped_atomic_load:
+ case AO__scoped_atomic_store:
+ case AO__scoped_atomic_store_n:
+ case AO__scoped_atomic_fetch_add:
+ case AO__scoped_atomic_fetch_sub:
+ case AO__scoped_atomic_fetch_and:
+ case AO__scoped_atomic_fetch_or:
+ case AO__scoped_atomic_fetch_xor:
+ case AO__scoped_atomic_fetch_nand:
+ case AO__scoped_atomic_add_fetch:
+ case AO__scoped_atomic_sub_fetch:
+ case AO__scoped_atomic_and_fetch:
+ case AO__scoped_atomic_or_fetch:
+ case AO__scoped_atomic_xor_fetch:
+ case AO__scoped_atomic_nand_fetch:
+ case AO__scoped_atomic_min_fetch:
+ case AO__scoped_atomic_max_fetch:
+ case AO__scoped_atomic_fetch_min:
+ case AO__scoped_atomic_fetch_max:
+ case AO__scoped_atomic_exchange_n:
case AO__hip_atomic_exchange:
case AO__hip_atomic_fetch_add:
case AO__hip_atomic_fetch_sub:
@@ -4942,6 +4963,7 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__atomic_exchange:
return 4;
+ case AO__scoped_atomic_exchange:
case AO__c11_atomic_compare_exchange_strong:
case AO__c11_atomic_compare_exchange_weak:
return 5;
@@ -4952,6 +4974,10 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__atomic_compare_exchange:
case AO__atomic_compare_exchange_n:
return 6;
+
+ case AO__scoped_atomic_compare_exchange:
+ case AO__scoped_atomic_compare_exchange_n:
+ return 7;
}
llvm_unreachable("unknown atomic op");
}
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index ab4a013de5f552c..c04cb313c3387a3 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1841,6 +1841,7 @@ void StmtPrinter::VisitAtomicExpr(AtomicExpr *Node) {
PrintExpr(Node->getPtr());
if (Node->getOp() != AtomicExpr::AO__c11_atomic_load &&
Node->getOp() != AtomicExpr::AO__atomic_load_n &&
+ Node->getOp() != AtomicExpr::AO__scoped_atomic_load_n &&
Node->getOp() != AtomicExpr::AO__opencl_atomic_load &&
Node->getOp() != AtomicExpr::AO__hip_atomic_load) {
OS << ", ";
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index f7c597e181b0bd9..b7636522f2c8932 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -508,9 +508,11 @@ static llvm::Value *EmitPostAtomicMinMax(CGBuilderTy &Builder,
default:
llvm_unreachable("Unexpected min/max operation");
case AtomicExpr::AO__atomic_max_fetch:
+ case AtomicExpr::AO__scoped_atomic_max_fetch:
Pred = IsSigned ? llvm::CmpInst::ICMP_SGT : llvm::CmpInst::ICMP_UGT;
break;
case AtomicExpr::AO__atomic_min_fetch:
+ case AtomicExpr::AO__scoped_atomic_min_fetch:
Pred = IsSigned ? llvm::CmpInst::ICMP_SLT : llvm::CmpInst::ICMP_ULT;
break;
}
@@ -545,7 +547,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
FailureOrder, Size, Order, Scope);
return;
case AtomicExpr::AO__atomic_compare_exchange:
- case AtomicExpr::AO__atomic_compare_exchange_n: {
+ case AtomicExpr::AO__atomic_compare_exchange_n:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange_n: {
if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
Val1, Val2, FailureOrder, Size, Order, Scope);
@@ -578,7 +582,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__opencl_atomic_load:
case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load_n:
- case AtomicExpr::AO__atomic_load: {
+ case AtomicExpr::AO__atomic_load:
+ case AtomicExpr::AO__scoped_atomic_load_n:
+ case AtomicExpr::AO__scoped_atomic_load: {
llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
Load->setAtomic(Order, Scope);
Load->setVolatile(E->isVolatile());
@@ -590,7 +596,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__opencl_atomic_store:
case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__atomic_store:
- case AtomicExpr::AO__atomic_store_n: {
+ case AtomicExpr::AO__atomic_store_n:
+ case AtomicExpr::AO__scoped_atomic_store:
+ case AtomicExpr::AO__scoped_atomic_store_n: {
llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
Store->setAtomic(Order, Scope);
@@ -603,10 +611,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__opencl_atomic_exchange:
case AtomicExpr::AO__atomic_exchange_n:
case AtomicExpr::AO__atomic_exchange:
+ case AtomicExpr::AO__scoped_atomic_exchange_n:
+ case AtomicExpr::AO__scoped_atomic_exchange:
Op = llvm::AtomicRMWInst::Xchg;
break;
case AtomicExpr::AO__atomic_add_fetch:
+ case AtomicExpr::AO__scoped_atomic_add_fetch:
PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FAdd
: llvm::Instruction::Add;
[[fallthrough]];
@@ -614,11 +625,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__hip_atomic_fetch_add:
case AtomicExpr::AO__opencl_atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_add:
+ case AtomicExpr::AO__scoped_atomic_fetch_add:
Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FAdd
: llvm::AtomicRMWInst::Add;
break;
case AtomicExpr::AO__atomic_sub_fetch:
+ case AtomicExpr::AO__scoped_atomic_sub_fetch:
PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FSub
: llvm::Instruction::Sub;
[[fallthrough]];
@@ -626,17 +639,20 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__hip_atomic_fetch_sub:
case AtomicExpr::AO__opencl_atomic_fetch_sub:
case AtomicExpr::AO__atomic_fetch_sub:
+ case AtomicExpr::AO__scoped_atomic_fetch_sub:
Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FSub
: llvm::AtomicRMWInst::Sub;
break;
case AtomicExpr::AO__atomic_min_fetch:
+ case AtomicExpr::AO__scoped_atomic_min_fetch:
PostOpMinMax = true;
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_min:
case AtomicExpr::AO__hip_atomic_fetch_min:
case AtomicExpr::AO__opencl_atomic_fetch_min:
case AtomicExpr::AO__atomic_fetch_min:
+ case AtomicExpr::AO__scoped_atomic_fetch_min:
Op = E->getValueType()->isFloatingType()
? llvm::AtomicRMWInst::FMin
: (E->getValueType()->isSignedIntegerType()
@@ -645,12 +661,14 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
break;
case AtomicExpr::AO__atomic_max_fetch:
+ case AtomicExpr::AO__scoped_atomic_max_fetch:
PostOpMinMax = true;
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_max:
case AtomicExpr::AO__hip_atomic_fetch_max:
case AtomicExpr::AO__opencl_atomic_fetch_max:
case AtomicExpr::AO__atomic_fetch_max:
+ case AtomicExpr::AO__scoped_atomic_fetch_max:
Op = E->getValueType()->isFloatingType()
? llvm::AtomicRMWInst::FMax
: (E->getValueType()->isSignedIntegerType()
@@ -659,40 +677,48 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
break;
case AtomicExpr::AO__atomic_and_fetch:
+ case AtomicExpr::AO__scoped_atomic_and_fetch:
PostOp = llvm::Instruction::And;
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_and:
case AtomicExpr::AO__hip_atomic_fetch_and:
case AtomicExpr::AO__opencl_atomic_fetch_and:
case AtomicExpr::AO__atomic_fetch_and:
+ case AtomicExpr::AO__scoped_atomic_fetch_and:
Op = llvm::AtomicRMWInst::And;
break;
case AtomicExpr::AO__atomic_or_fetch:
+ case AtomicExpr::AO__scoped_atomic_or_fetch:
PostOp = llvm::Instruction::Or;
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__hip_atomic_fetch_or:
case AtomicExpr::AO__opencl_atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_or:
+ case AtomicExpr::AO__scoped_atomic_fetch_or:
Op = llvm::AtomicRMWInst::Or;
break;
case AtomicExpr::AO__atomic_xor_fetch:
+ case AtomicExpr::AO__scoped_atomic_xor_fetch:
PostOp = llvm::Instruction::Xor;
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_xor:
case AtomicExpr::AO__hip_atomic_fetch_xor:
case AtomicExpr::AO__opencl_atomic_fetch_xor:
case AtomicExpr::AO__atomic_fetch_xor:
+ case AtomicExpr::AO__scoped_atomic_fetch_xor:
Op = llvm::AtomicRMWInst::Xor;
break;
case AtomicExpr::AO__atomic_nand_fetch:
+ case AtomicExpr::AO__scoped_atomic_nand_fetch:
PostOp = llvm::Instruction::And; // the NOT is special cased below
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_nand:
case AtomicExpr::AO__atomic_fetch_nand:
+ case AtomicExpr::AO__scoped_atomic_fetch_nand:
Op = llvm::AtomicRMWInst::Nand;
break;
}
@@ -712,7 +738,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
else if (PostOp)
Result = CGF.Builder.CreateBinOp((llvm::Instruction::BinaryOps)PostOp, RMWI,
LoadVal1);
- if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
+ if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_nand_fetch)
Result = CGF.Builder.CreateNot(Result);
CGF.Builder.CreateStore(Result, Dest);
}
@@ -865,17 +892,21 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__opencl_atomic_load:
case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load_n:
+ case AtomicExpr::AO__scoped_atomic_load_n:
break;
case AtomicExpr::AO__atomic_load:
+ case AtomicExpr::AO__scoped_atomic_load:
Dest = EmitPointerWithAlignment(E->getVal1());
break;
case AtomicExpr::AO__atomic_store:
+ case AtomicExpr::AO__scoped_atomic_store:
Val1 = EmitPointerWithAlignment(E->getVal1());
break;
case AtomicExpr::AO__atomic_exchange:
+ case AtomicExpr::AO__scoped_atomic_exchange:
Val1 = EmitPointerWithAlignment(E->getVal1());
Dest = EmitPointerWithAlignment(E->getVal2());
break;
@@ -888,14 +919,19 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
case AtomicExpr::AO__atomic_compare_exchange_n:
case AtomicExpr::AO__atomic_compare_exchange:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange_n:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange:
Val1 = EmitPointerWithAlignment(E->getVal1());
- if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
+ if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange)
Val2 = EmitPointerWithAlignment(E->getVal2());
else
Val2 = EmitValToTemp(*this, E->getVal2());
OrderFail = EmitScalarExpr(E->getOrderFail());
if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange_n ||
- E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
+ E->getOp() == AtomicExpr::AO__atomic_compare_exchange ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange_n ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange)
IsWeak = EmitScalarExpr(E->getWeak());
break;
@@ -929,6 +965,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__atomic_max_fetch:
case AtomicExpr::AO__atomic_min_fetch:
case AtomicExpr::AO__atomic_sub_fetch:
+ case AtomicExpr::AO__scoped_atomic_fetch_add:
+ case AtomicExpr::AO__scoped_atomic_fetch_max:
+ case AtomicExpr::AO__scoped_atomic_fetch_min:
+ case AtomicExpr::AO__scoped_atomic_fetch_sub:
+ case AtomicExpr::AO__scoped_atomic_add_fetch:
+ case AtomicExpr::AO__scoped_atomic_max_fetch:
+ case AtomicExpr::AO__scoped_atomic_min_fetch:
+ case AtomicExpr::AO__scoped_atomic_sub_fetch:
case AtomicExpr::AO__c11_atomic_fetch_max:
case AtomicExpr::AO__c11_atomic_fetch_min:
case AtomicExpr::AO__opencl_atomic_fetch_max:
@@ -946,6 +990,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__hip_atomic_exchange:
case AtomicExpr::AO__atomic_...
[truncated]
|
Looks solid to me. The patch to clang is long but straightforward and the tests look reassuringly exhaustive. Probably good that you ignored my name suggestion of integers 0 through N. This patch is partly motivated by us wanting device scope atomics in libc. It removes one of the remaining stumbling blocks for people who like freestanding C++ as a GPU programming language. Hopefully the clang people consider the extension acceptable. |
Just a FYI, that recent NVIDIA GPUs have introduced a concept of thread block cluster. We may need another level of granularity between the block and device. |
Should be easy enough, though the numbers would no longer be incremental if we put it between there. It's somewhat difficult to decide what these things should be called. Also I was somewhat tempted to keep the names all the same length like the That being said, As far as I'm aware the Nvidia backend doesn't handle scoped atomics at all yet, we simply emit |
Yeah, it's on my ever growing todo. :-( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any actual difference now between these and the HIP/OpenCL flavors other than dropping the language from the name?
@@ -904,6 +904,32 @@ BUILTIN(__atomic_signal_fence, "vi", "n") | |||
BUILTIN(__atomic_always_lock_free, "bzvCD*", "nE") | |||
BUILTIN(__atomic_is_lock_free, "bzvCD*", "nE") | |||
|
|||
// GNU atomic builtins with atomic scopes. | |||
ATOMIC_BUILTIN(__scoped_atomic_load, "v.", "t") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if it would be better to preserve __atomic as the prefix, and move the _scope part to the name suffix
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Naming things is hard, we could do
__atomic_scoped_load
__scoped_atomic_load
__atomic_load_scoped
Unsure which is the best.
@@ -54,6 +59,16 @@ enum class SyncScope { | |||
|
|||
inline llvm::StringRef getAsString(SyncScope S) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess this is a pre-existing problem, but why don't these just match the backend string names?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's because this is for AST printing purposes, while the backend strings vary per target.
case Single: | ||
return SyncScope::SingleScope; | ||
} | ||
llvm_unreachable("Invalid language sync scope value"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe it would be better to just assume anything else is system?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Mostly just copying the existing code for this, but we have semantic checks to ensure that the value is valid. So there's no chance that a user will actually get to specify anything different from the macros provided.
Builder.defineMacro("__MEMORY_SCOPE_WRKGRP", "2"); | ||
Builder.defineMacro("__MEMORY_SCOPE_WVFRNT", "3"); | ||
Builder.defineMacro("__MEMORY_SCOPE_SINGLE", "4"); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should the HIP flavors be defined in terms of these?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could, though I might need to think of some better names. It's difficult to cover all the cases people might need. I think that cleanup would best be done in a follow-up patch.
Yes, these directly copy the GNU functions and names. The OpenCL / HIP ones use a different format. |
Overall I think it is the right way to go. Memory scope has been used by different offloading languages and the atomic clang builtins are essentially the same. Adding a generic clang atomic builtins with memory scope allows code sharing among offloading languages. |
I agree, I'm hoping to hear something from people more familiar with C/C++ or GNU stuff to see if they agree with this direction. Also it might help to decide on some better names for the memory scopes. |
I'm a little wary of adding these without actually going through any sort of standardization process; if other vendors don't support the same interface, we just have more variations. (See also https://clang.llvm.org/get_involved.html#criteria ) How consistent are the various scopes across targets? How likely is it that some target will need additional scopes that you haven't specified? |
I figured we can just treat these as |
I'm not sure what you mean here. If you mean that users are expected to use the OpenCL/HIP/etc. standard APIs, and you only expect to use these as part of language runtimes, then maybe we don't care so much if it's clang-only. It might be worth considering using string literals instead of numbers for the different scopes. It removes any question of whether the list of scopes is complete and the order of of numbers on the list. And it doesn't require defining a bunch of new preprocessor macros. |
They should be available to users, but this level of programming is highly compiler-dependent already so I don't see it as much different.
The underlying implementation is a string literal in the LLVM |
We don't have to use the same set of strings as syncscope if that doesn't make sense. |
I don't think there's much of a point to making them strings if it's not directly invoking the syncscope name for the backend. Realistically as long as we give them descriptive names we can just ignore ones that don't apply on various targets. Like right now you can use these scoped variants in x64 code but it has no effect. Either that or we could use logic to go to the next heirarchy level that makes sense. As always, naming stuff is hard. |
There is another reason that string syncscope is not favored is that some languages uses enums for syncscope in their own atomic APIs, e.g. OpenCL. Currently clang is able to define macros that matches the language's enum for syncscope so that implementing OpenCL atomic API is just a trivial mapping to clang's builtin. However, if clang's atomic builtin uses string for syncscope, that would be quite inconvenient to use those builtin's to implement OpenCL's atomic API. |
We're already assigning names to the different scopes; we're just doing it in the If these are supposed to match the OpenCL enum values, though, I guess that's an argument for using numbers. |
It would be good to be able to provide an easy way to interface with other languages. A lot of this offloading stuff is very common, but was unfortunately tied to OpenCL directly. Are there any conceptual issues with this patch besides potential naming issues? I'm trying to think of a good catch-all term for all the heirarchies between a device and a single thread but it's difficult. |
Missing change to clang/docs/LanguageExtensions.rst describing the new builtins. Are there any other projects that we might want to coordinate with here? gcc, maybe? |
Will do.
Unknown, I've never collaborated with anyone outside of LLVM. I know they have handling of GPU programming for NVPTX and AMDGPU targets, but I don't know what level of support they have for this. I think it's sufficient to keep it as a |
e08deb9
to
b244d36
Compare
My primary concerns here are:
That said, I don't want to block progress here; this is clearly important functionality to have. |
Technically these are "standardized" in the case of OpenCL. The problem is that the underlying concept is much broader so we'd need something that covers all use-cases in a familiar manner. IMHO this is probably as close as we'll get since all we're doing is expanding the already existing GCC intrinsics, which are well defined and understood. The thing which may change is simply the names we call them, or whatever mapping of integer to behavior we expect. I think that's fairly easy enough to change even in the future.
My first thought was to make these an overloaded version of the GCC intrinsics, but there was apparently some resistance to that, as it made a bit more of an implied divergence between source codes or something. We'd still need the memory scopes however. That being said, we already do this for HIP and OpenCL, so if we can rewrite those in terms of this it would be a net reduction in preprocessor macros.
Yeah, my main use-case is for use inside my |
Summary: The standard GNU atomic operations are a very common way to target hardware atomics on the device. With more hetergenous devices being introduced, the concept of memory scopes has been in the LLVM language for awhile via the `syncscope` modifier. For targets, such as the GPU, this can change code generation depending on whether or not we only need to be consistent with the memory ordering with the entire system, the single GPU device, or lower. Previously these scopes were only exported via the `opencl` and `hip` variants of these functions. However, this made it difficult to use outside of those languages and the semantics were different from the standard GNU versions. This patch introduces a `__scoped_atomic` variant for the common functions. There was some discussion over whether or not these should be overloads of the existing ones, or simply new variants. I leant towards new variants to be less disruptive. The scope here can be one of the following ``` __MEMORY_SCOPE_SYSTEM // All devices and systems __MEMORY_SCOPE_DEVICE // Just this device __MEMORY_SCOPE_WRKGRP // A 'work-group' AKA CUDA block __MEMORY_SCOPE_WVFRNT // A 'wavefront' AKA CUDA warp __MEMORY_SCOPE_SINGLE // A single thread. ``` Naming consistency was attempted, but it is difficult to cpature to full spectrum with no many names. Suggestions appreciated.
b244d36
to
ce494cd
Compare
The capability is more important than the naming. If GCC picks a different set of names - maybe they go with defines for scope and we go with strings, and the names differ - we get to pick between renaming ours, adding aliases, ignoring the divergence. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is functionally correct and useful as is - if gcc decide to do something divergent we can change it later, it's basically an internal interface anyway. Let's have it now and change the names if we come up with better ideas later.
Summary: A recent patch in llvm#72280 provided `clang` the ability to easily use scoped atomics. These are a special modifier on atomics that some backends support. They are intended for providing more fine-grained control over the affected memory of an atomic action. The default is a "system" scope, e.g. coherence with the GPU and CPU memory on a heterogeneous system. If we use "device" scope, that implies that the memory is only ordered with respect to the current GPU. These builtins are direct replacements for the GCC atomic builitins in cases where the backend doesn't do anything with the information, so these should be a drop-in. This introduces some noise, but hopefully it isn't too contentious.
Summary: A recent patch in llvm#72280 provided `clang` the ability to easily use scoped atomics. These are a special modifier on atomics that some backends support. They are intended for providing more fine-grained control over the affected memory of an atomic action. The default is a "system" scope, e.g. coherence with the GPU and CPU memory on a heterogeneous system. If we use "device" scope, that implies that the memory is only ordered with respect to the current GPU. These builtins are direct replacements for the GCC atomic builitins in cases where the backend doesn't do anything with the information, so these should be a drop-in. This introduces some noise, but hopefully it isn't too contentious.
) Summary: A recent patch in #72280 provided `clang` the ability to easily use scoped atomics. These are a special modifier on atomics that some backends support. They are intended for providing more fine-grained control over the affected memory of an atomic action. The default is a "system" scope, e.g. coherence with the GPU and CPU memory on a heterogeneous system. If we use "device" scope, that implies that the memory is only ordered with respect to the current GPU. These builtins are direct replacements for the GCC atomic builitins in cases where the backend doesn't do anything with the information, so these should be a drop-in. This introduces some noise, but hopefully it isn't too contentious.
…cs if available from the compiler (#74769) Summary: A recent patch in llvm/llvm-project#72280 provided `clang` the ability to easily use scoped atomics. These are a special modifier on atomics that some backends support. They are intended for providing more fine-grained control over the affected memory of an atomic action. The default is a "system" scope, e.g. coherence with the GPU and CPU memory on a heterogeneous system. If we use "device" scope, that implies that the memory is only ordered with respect to the current GPU. These builtins are direct replacements for the GCC atomic builitins in cases where the backend doesn't do anything with the information, so these should be a drop-in. This introduces some noise, but hopefully it isn't too contentious. GitOrigin-RevId: 80ec4fb9b13f72c5666304b7150b0a663ad34f5f Original-Revision: 0c92cb5d4602b275fe8faf86149a641f08e9c6f3 Roller-URL: https://ci.chromium.org/b/8758523691713229585 CQ-Do-Not-Cancel-Tryjobs: true Change-Id: I384af94d4c90e13ab3461b80377ce0f20020bfd3 Reviewed-on: https://fuchsia-review.googlesource.com/c/fuchsia/+/976539
Summary:
The standard GNU atomic operations are a very common way to target
hardware atomics on the device. With more heterogenous devices being
introduced, the concept of memory scopes has been in the LLVM language
for awhile via the
syncscope
modifier. For targets, such as the GPU,this can change code generation depending on whether or not we only need
to be consistent with the memory ordering with the entire system, the
single GPU device, or lower.
Previously these scopes were only exported via the
opencl
andhip
variants of these functions. However, this made it difficult to use
outside of those languages and the semantics were different from the
standard GNU versions. This patch introduces a
__scoped_atomic
variantfor the common functions. There was some discussion over whether or not
these should be overloads of the existing ones, or simply new variants.
I leant towards new variants to be less disruptive.
The scope here can be one of the following
Naming consistency was attempted, but it is difficult to capture to full
spectrum with no many names. Suggestions appreciated.