-
Notifications
You must be signed in to change notification settings - Fork 11.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[FMV] Emit the resolver along with the default version definition. #84405
[FMV] Emit the resolver along with the default version definition. #84405
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: Alexandros Lamprineas (labrinea) ChangesWe would like the resolver to be generated eagerly, even if the versioned function is not called from the current translation unit. Fixes #81494. Patch is 42.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/84405.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 967319bdfc4571..ee688a4c17c1d4 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3969,8 +3969,11 @@ void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD,
EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr);
// Ensure that the resolver function is also emitted.
GetOrCreateMultiVersionResolver(GD);
- } else if (FD->hasAttr<TargetVersionAttr>()) {
- GetOrCreateMultiVersionResolver(GD);
+ } else if (auto *TVA = FD->getAttr<TargetVersionAttr>()) {
+ EmitGlobalFunctionDefinition(GD, GV);
+ // Emit the resolver alongside with the default version definition.
+ if (TVA->isDefaultVersion() && FD->doesThisDeclarationHaveABody())
+ GetOrCreateMultiVersionResolver(GD);
} else
EmitGlobalFunctionDefinition(GD, GV);
}
diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c
index b7112c783da913..cb425b12702db3 100644
--- a/clang/test/CodeGen/attr-target-version.c
+++ b/clang/test/CodeGen/attr-target-version.c
@@ -85,7 +85,21 @@ int hoo(void) {
}
+// This should generate one target version but no resolver.
+__attribute__((target_version("default"))) int unused_with_forward_default_decl(void);
+__attribute__((target_version("mops"))) int unused_with_forward_default_decl(void) { return 0; }
+// FIXME: If the default declaration follows the non-default definition,
+// then target versioning does not trigger.
+__attribute__((target_version("aes"))) int unused_with_default_decl(void) { return 0; }
+__attribute__((target_version("default"))) int unused_with_default_decl(void);
+
+// This should generate two target versions and the resolver.
+__attribute__((target_version("sve"))) int unused_with_default_def(void) { return 0; }
+__attribute__((target_version("default"))) int unused_with_default_def(void) { return 1; }
+
+// This should generate a normal function.
+__attribute__((target_version("rdm"))) int unused_without_default(void) { return 0; }
//.
@@ -97,6 +111,7 @@ int hoo(void) {
// CHECK: @fmv_c.ifunc = weak_odr alias void (), ptr @fmv_c
// CHECK: @fmv_inline.ifunc = weak_odr alias i32 (), ptr @fmv_inline
// CHECK: @fmv_d.ifunc = internal alias i32 (), ptr @fmv_d
+// CHECK: @unused_with_default_def.ifunc = weak_odr alias i32 (), ptr @unused_with_default_def
// CHECK: @fmv = weak_odr ifunc i32 (), ptr @fmv.resolver
// CHECK: @fmv_one = weak_odr ifunc i32 (), ptr @fmv_one.resolver
// CHECK: @fmv_two = weak_odr ifunc i32 (), ptr @fmv_two.resolver
@@ -104,6 +119,7 @@ int hoo(void) {
// CHECK: @fmv_c = weak_odr ifunc void (), ptr @fmv_c.resolver
// CHECK: @fmv_inline = weak_odr ifunc i32 (), ptr @fmv_inline.resolver
// CHECK: @fmv_d = internal ifunc i32 (), ptr @fmv_d.resolver
+// CHECK: @unused_with_default_def = weak_odr ifunc i32 (), ptr @unused_with_default_def.resolver
//.
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv._MflagmMfp16fmlMrng
@@ -113,29 +129,66 @@ int hoo(void) {
//
//
// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv_one._Mls64Msimd
+// CHECK-LABEL: define {{[^@]+}}@fmv._Mflagm2Msme-i16i64
// CHECK-SAME: () #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i32 1
+// CHECK-NEXT: ret i32 2
//
//
// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv_two._Mfp
-// CHECK-SAME: () #[[ATTR1]] {
+// CHECK-LABEL: define {{[^@]+}}@fmv._MlseMsha2
+// CHECK-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i32 1
+// CHECK-NEXT: ret i32 3
//
//
// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@foo
-// CHECK-SAME: () #[[ATTR2:[0-9]+]] {
+// CHECK-LABEL: define {{[^@]+}}@fmv._MdotprodMls64_accdata
+// CHECK-SAME: () #[[ATTR3:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[CALL:%.*]] = call i32 @fmv()
-// CHECK-NEXT: [[CALL1:%.*]] = call i32 @fmv_one()
-// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]]
-// CHECK-NEXT: [[CALL2:%.*]] = call i32 @fmv_two()
-// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]]
-// CHECK-NEXT: ret i32 [[ADD3]]
+// CHECK-NEXT: ret i32 4
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv._Mfp16fmlMmemtag
+// CHECK-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 5
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv._MaesMfp
+// CHECK-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 6
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv._McrcMls64_v
+// CHECK-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 7
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv._Mbti
+// CHECK-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 8
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv._Msme2
+// CHECK-SAME: () #[[ATTR8:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 9
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv.default
+// CHECK-SAME: () #[[ATTR9:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 0
//
//
// CHECK-LABEL: define {{[^@]+}}@fmv.resolver() comdat {
@@ -216,16 +269,91 @@ int hoo(void) {
// CHECK-NEXT: ret ptr @fmv.default
//
//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv_one._Mls64Msimd
+// CHECK-SAME: () #[[ATTR5]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv_one._Mdpb
+// CHECK-SAME: () #[[ATTR10:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 2
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv_one.default
+// CHECK-SAME: () #[[ATTR9]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 0
+//
+//
// CHECK-LABEL: define {{[^@]+}}@fmv_one.resolver() comdat {
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: ret ptr @fmv_one._Mls64Msimd
//
//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv_two._Mfp
+// CHECK-SAME: () #[[ATTR5]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 1
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv_two._Msimd
+// CHECK-SAME: () #[[ATTR5]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 2
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv_two._Mdgh
+// CHECK-SAME: () #[[ATTR9]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 3
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv_two._Mfp16Msimd
+// CHECK-SAME: () #[[ATTR5]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 4
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv_two.default
+// CHECK-SAME: () #[[ATTR9]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 0
+//
+//
// CHECK-LABEL: define {{[^@]+}}@fmv_two.resolver() comdat {
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: ret ptr @fmv_two._Mfp16Msimd
//
//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@foo
+// CHECK-SAME: () #[[ATTR9]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[CALL:%.*]] = call i32 @fmv()
+// CHECK-NEXT: [[CALL1:%.*]] = call i32 @fmv_one()
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]]
+// CHECK-NEXT: [[CALL2:%.*]] = call i32 @fmv_two()
+// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]]
+// CHECK-NEXT: ret i32 [[ADD3]]
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv_e.default
+// CHECK-SAME: () #[[ATTR9]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i32 20
+//
+//
// CHECK-LABEL: define {{[^@]+}}@fmv_e.resolver() comdat {
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: ret ptr @fmv_e._Mls64
@@ -233,11 +361,25 @@ int hoo(void) {
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_default
-// CHECK-SAME: () #[[ATTR2]] {
+// CHECK-SAME: () #[[ATTR9]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 111
//
//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv_c._Mssbs
+// CHECK-SAME: () #[[ATTR9]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@fmv_c.default
+// CHECK-SAME: () #[[ATTR9]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret void
+//
+//
// CHECK-LABEL: define {{[^@]+}}@fmv_c.resolver() comdat {
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
@@ -254,7 +396,7 @@ int hoo(void) {
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@goo
-// CHECK-SAME: () #[[ATTR2]] {
+// CHECK-SAME: () #[[ATTR9]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CALL:%.*]] = call i32 @fmv_inline()
// CHECK-NEXT: [[CALL1:%.*]] = call i32 @fmv_e()
@@ -414,7 +556,7 @@ int hoo(void) {
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@recur
-// CHECK-SAME: () #[[ATTR2]] {
+// CHECK-SAME: () #[[ATTR9]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @reca()
// CHECK-NEXT: ret void
@@ -422,7 +564,7 @@ int hoo(void) {
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@main
-// CHECK-SAME: () #[[ATTR2]] {
+// CHECK-SAME: () #[[ATTR9]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4
@@ -433,7 +575,7 @@ int hoo(void) {
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@hoo
-// CHECK-SAME: () #[[ATTR2]] {
+// CHECK-SAME: () #[[ATTR9]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[FP1:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[FP2:%.*]] = alloca ptr, align 8
@@ -449,260 +591,183 @@ int hoo(void) {
//
//
// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv._Mflagm2Msme-i16i64
-// CHECK-SAME: () #[[ATTR4:[0-9]+]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i32 2
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv._MlseMsha2
-// CHECK-SAME: () #[[ATTR5:[0-9]+]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i32 3
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv._MdotprodMls64_accdata
-// CHECK-SAME: () #[[ATTR6:[0-9]+]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i32 4
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv._Mfp16fmlMmemtag
-// CHECK-SAME: () #[[ATTR7:[0-9]+]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i32 5
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv._MaesMfp
-// CHECK-SAME: () #[[ATTR1]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i32 6
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv._McrcMls64_v
-// CHECK-SAME: () #[[ATTR8:[0-9]+]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i32 7
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv._Mbti
-// CHECK-SAME: () #[[ATTR9:[0-9]+]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i32 8
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv._Msme2
-// CHECK-SAME: () #[[ATTR10:[0-9]+]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i32 9
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv.default
-// CHECK-SAME: () #[[ATTR2]] {
+// CHECK-LABEL: define {{[^@]+}}@unused_with_forward_default_decl._Mmops
+// CHECK-SAME: () #[[ATTR12:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 0
//
//
// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv_one._Mdpb
-// CHECK-SAME: () #[[ATTR11:[0-9]+]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i32 2
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv_one.default
-// CHECK-SAME: () #[[ATTR2]] {
+// CHECK-LABEL: define {{[^@]+}}@unused_with_default_decl
+// CHECK-SAME: () #[[ATTR5]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 0
//
//
// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv_two._Msimd
-// CHECK-SAME: () #[[ATTR1]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i32 2
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv_two._Mdgh
-// CHECK-SAME: () #[[ATTR2]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i32 3
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv_two._Mfp16Msimd
-// CHECK-SAME: () #[[ATTR1]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i32 4
-//
-//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv_two.default
-// CHECK-SAME: () #[[ATTR2]] {
+// CHECK-LABEL: define {{[^@]+}}@unused_with_default_def._Msve
+// CHECK-SAME: () #[[ATTR13:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 0
//
//
// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv_e.default
-// CHECK-SAME: () #[[ATTR2]] {
+// CHECK-LABEL: define {{[^@]+}}@unused_with_default_def.default
+// CHECK-SAME: () #[[ATTR9]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i32 20
+// CHECK-NEXT: ret i32 1
//
//
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv_c.default
-// CHECK-SAME: () #[[ATTR2]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: ret void
+// CHECK-LABEL: define {{[^@]+}}@unused_with_default_def.resolver() comdat {
+// CHECK-NEXT: resolver_entry:
+// CHECK-NEXT: call void @__init_cpu_features_resolver()
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1073741824
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1073741824
+// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK: resolver_return:
+// CHECK-NEXT: ret ptr @unused_with_default_def._Msve
+// CHECK: resolver_else:
+// CHECK-NEXT: ret ptr @unused_with_default_def.default
//
//
// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv_c._Mssbs
-// CHECK-SAME: () #[[ATTR2]] {
+// CHECK-LABEL: define {{[^@]+}}@unused_without_default
+// CHECK-SAME: () #[[ATTR14:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: ret void
+// CHECK-NEXT: ret i32 0
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mf64mmMpmullMsha1
-// CHECK-SAME: () #[[ATTR12:[0-9]+]] {
+// CHECK-SAME: () #[[ATTR15:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 1
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MfcmaMfp16MrdmMsme
-// CHECK-SAME: () #[[ATTR13:[0-9]+]] {
+// CHECK-SAME: () #[[ATTR16:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 2
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mf32mmMi8mmMsha3
-// CHECK-SAME: () #[[ATTR14:[0-9]+]] {
+// CHECK-SAME: () #[[ATTR17:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 12
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MditMsve-ebf16
-// CHECK-SAME: () #[[ATTR15:[0-9]+]] {
+// CHECK-SAME: () #[[ATTR18:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 8
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MdpbMrcpc2
-// CHECK-SAME: () #[[ATTR16:[0-9]+]] {
+// CHECK-SAME: () #[[ATTR19:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 6
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mdpb2Mjscvt
-// CHECK-SAME: () #[[ATTR17:[0-9]+]] {
+// CHECK-SAME: () #[[ATTR20:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 7
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MfrinttsMrcpc
-// CHECK-SAME: () #[[ATTR18:[0-9]+]] {
+// CHECK-SAME: () #[[ATTR21:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 3
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MsveMsve-bf16
-// CHECK-SAME: () #[[ATTR19:[0-9]+]] {
+// CHECK-SAME: () #[[ATTR22:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 4
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Msve2-aesMsve2-sha3
-// CHECK-SAME: () #[[ATTR20:[0-9]+]] {
+// CHECK-SAME: () #[[ATTR23:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 5
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Msve2Msve2-bitpermMsve2-pmull128
-// CHECK-SAME: () #[[ATTR21:[0-9]+]] {
+// CHECK-SAME: () #[[ATTR24:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 9
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mmemtag2Msve2-sm4
-// CHECK-SAME: () #[[ATTR22:[0-9]+]] {
+// CHECK-SAME: () #[[ATTR25:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 10
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mmemtag3MmopsMrcpc3
-// CHECK-SAME: () #[[ATTR23:[0-9]+]] {
+// CHECK-SAME: () #[[ATTR26:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 11
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MaesMdotprod
-// CHECK-SAME: () #[[ATTR6]] {
+// CHECK-SAME: () #[[ATTR3]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 13
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mfp16fmlMsimd
-// CHECK-SAME: () #[[ATTR7]] {
+// CHECK-SAME: () #[[ATTR4]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 14
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MfpMsm4
-// CHECK-SAME: () #[[ATTR24:[0-9]+]] {
+// CHECK-SAME: () #[[ATTR27:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 15
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MlseMrdm
-// CHECK-SAME: () #[[ATTR25:[0-9]+]] {
+// CHECK-SAME: () #[[ATTR28:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 16
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline.default
-// CHECK-SAME: () #[[ATTR2]] {
+// CHECK-SAME: () #[[ATTR9]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 3
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_d._Msb
-// CHECK-SAME: () #[[ATTR26:[0-9]+]] {
+// CHECK-SAME: () #[[ATTR29:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 0
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_d.default
-// CHECK-SAME: () #[[ATTR2]] {
+// CHECK-SAME: () #[[ATTR9]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 1
//
@@ -815,34 +880,44 @@ int hoo(void) {
// CHECK-NOFMV-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]]
// CHECK-NOFMV-NEXT: ret i32 [[ADD]]
//
+//
+// CHECK-NOFMV: Function Attrs: noinline nounwind optnone
+// CHECK-NOFMV-LABEL: define {{[^@]+}}@unused_with_default_def
+// CHECK-NOFMV-SAME: () #[[ATTR0]] {
...
[truncated]
|
|
||
// FIXME: If the default declaration follows the non-default definition, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This bug (if we consider it as such) seems unrelated to this patch, but since I found it it deserves a test at least to monitor what we are currently generating for it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ouch. Yeah, I think we should consider that a bug.
|
||
// FIXME: If the default declaration follows the non-default definition, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ouch. Yeah, I think we should consider that a bug.
d257243
to
bde335c
Compare
The key takeaway from my conversation with @DanielKristofKiss was that it is important for Function Multi Versioning to work even when the default target version attribute is omitted. It was quite a headache to get this working in clang, but the latest revision supports the desired behavior. It also fixes the previously discovered bug I had mentioned in the test file. |
Can you elaborate on that conversation a bit more? I want to make sure we cover all the corner cases. |
I'd like to support FMV in existing codebases as lean as possible, so the default version attribute would be optional to write as not all version/toolchain will support it. smallest possible codebase change to introduce multi versioning: int foo(void);
+ #ifdef __HAVE_FUNCTION_MULTI_VERSIONING
+ int __attribute__((target_version("feature"))) foo(void);
+ #endif |
bde335c
to
2576857
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
We would like the resolver to be generated eagerly, even if the versioned function is not called from the current translation unit. Fixes llvm#81494. It further allows Multi Versioning to work even if the default target version attribute is omitted from function declarations.
2576857
to
6a49501
Compare
This was a limitation which has now been lifted upon request. Please read the thread below for more details: llvm#84405 (comment) Basically it allows to separate versioned implementations across different TUs without having to share private header files which contain the default declaration.
The current patch looks mostly good, but I'm still hung up on this:
Are you imagining this would multi-version between the un-decorated |
I am not sure I understand. Are you refering to the StringSet I added which keeps non-mangled names of versioned functions? The declarations corresponding to those unique non-mangled names will be appended to |
I'm referring to:
and:
and am confused why |
Ah, I see. That is addressed in #85454 but I have already expressed a concern there about the resolver's emission. |
This was a limitation which has now been lifted upon request. Please read the thread below for more details: llvm#84405 (comment) Basically it allows to separate versioned implementations across different TUs without having to share private header files which contain the default declaration. If a resolver is required (because there is a caller in the TU), then a default declaration is implicitly generated.
This was a limitation which has now been lifted upon request. Please read the thread below for more details: llvm#84405 (comment) Basically it allows to separate versioned implementations across different TUs without having to share private header files which contain the default declaration. If a resolver is required (because there is a caller in the TU), then a default declaration is implicitly generated.
I have raised two related pull requests stacked on this one:
Is there anything else we would like to address in this patch? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All three LGTM.
This was a limitation which has now been lifted. Please read the thread below for more details: llvm#84405 (comment) Basically it allows to separate versioned implementations across different TUs without having to share private header files which contain the default declaration. The ACLE spec has been updated accordingly to make this explicit: "Each version declaration should be visible at the translation unit in which the corresponding function version resides." ARM-software/acle#310 If a resolver is required (because there is a caller in the TU), then a default declaration is implicitly generated.
…lvm#84405) We would like the resolver to be generated eagerly, even if the versioned function is not called from the current translation unit. Fixes llvm#81494. It further allows Multi Versioning to work even if the default target version attribute is omitted from function declarations.
…tion." (llvm#85914) Reverts llvm#84405 In between of passing the precommit tests on github and being merged some change (perhaps in the AArch64 backend?) landed which resulted in altering the generated resolver. I will regenerate the tests perhaps using a less sensitive runline to such changes.
This was a limitation which has now been lifted. Please read the thread below for more details: llvm#84405 (comment) Basically it allows to separate versioned implementations across different TUs without having to share private header files which contain the default declaration. The ACLE spec has been updated accordingly to make this explicit: "Each version declaration should be visible at the translation unit in which the corresponding function version resides." ARM-software/acle#310 If a resolver is required (because there is a caller in the TU), then a default declaration is implicitly generated.
This was a limitation which has now been lifted. Please read the thread below for more details: #84405 (comment) Basically it allows to separate versioned implementations across different TUs without having to share private header files which contain the default declaration. The ACLE spec has been updated accordingly to make this explicit: "Each version declaration should be visible at the translation unit in which the corresponding function version resides." ARM-software/acle#310 If a resolver is required (because there is a caller in the TU), then a default declaration is implicitly generated.
…tion." (#85914) Reverts llvm/llvm-project#84405 In between of passing the precommit tests on github and being merged some change (perhaps in the AArch64 backend?) landed which resulted in altering the generated resolver. I will regenerate the tests perhaps using a less sensitive runline to such changes.
It was raised in llvm#81494 that we are not generating correct code when there is no TU-local caller. The suggestion was to emit a resolver: * Whenever there is a use in the TU. * When the TU has a definition of the default version. See the comment for more details: llvm#81494 (comment) This got addressed with llvm#84405. Generating a resolver on use means that we may end up with multiple resolvers across different translation units. Those resolvers may not be the same because each translation unit may contain different version declarations (user's fault). Therefore the order of linking the final image determines which of these weak symbols gets selected, resulting in non consisted behavior. I am proposing to stop emitting a resolver on use and only do so in the translation unit which contains the default defition. This way we guarantee the existance of a single resolver. Now, when a versioned function is used we want to emit a declaration of the function symbol omitting the multiversion mangling. I have added a requirement to ACLE mandating that all the function versions are declared in the translation unit which contains the default definition: ARM-software/acle#328
It was raised in #81494 that we are not generating correct code when there is no TU-local caller. The suggestion was to emit a resolver: * Whenever there is a use in the TU. * When the TU has a definition of the default version. See the comment for more details: #81494 (comment) This got addressed with #84405. Generating a resolver on use means that we may end up with multiple resolvers across different translation units. Those resolvers may not be the same because each translation unit may contain different version declarations (user's fault). Therefore the order of linking the final image determines which of these weak symbols gets selected, resulting in non consisted behavior. I am proposing to stop emitting a resolver on use and only do so in the translation unit which contains the default definition. This way we guarantee the existence of a single resolver. Now, when a versioned function is used we want to emit a declaration of the function symbol omitting the multiversion mangling. I have added a requirement to ACLE mandating that all the function versions are declared in the translation unit which contains the default definition: ARM-software/acle#328
It was raised in llvm#81494 that we are not generating correct code when there is no TU-local caller. The suggestion was to emit a resolver: * Whenever there is a use in the TU. * When the TU has a definition of the default version. See the comment for more details: llvm#81494 (comment) This got addressed with llvm#84405. Generating a resolver on use means that we may end up with multiple resolvers across different translation units. Those resolvers may not be the same because each translation unit may contain different version declarations (user's fault). Therefore the order of linking the final image determines which of these weak symbols gets selected, resulting in non consisted behavior. I am proposing to stop emitting a resolver on use and only do so in the translation unit which contains the default definition. This way we guarantee the existence of a single resolver. Now, when a versioned function is used we want to emit a declaration of the function symbol omitting the multiversion mangling. I have added a requirement to ACLE mandating that all the function versions are declared in the translation unit which contains the default definition: ARM-software/acle#328
It was raised in llvm#81494 that we are not generating correct code when there is no TU-local caller. The suggestion was to emit a resolver: * Whenever there is a use in the TU. * When the TU has a definition of the default version. See the comment for more details: llvm#81494 (comment) This got addressed with llvm#84405. Generating a resolver on use means that we may end up with multiple resolvers across different translation units. Those resolvers may not be the same because each translation unit may contain different version declarations (user's fault). Therefore the order of linking the final image determines which of these weak symbols gets selected, resulting in non consisted behavior. I am proposing to stop emitting a resolver on use and only do so in the translation unit which contains the default definition. This way we guarantee the existence of a single resolver. Now, when a versioned function is used we want to emit a declaration of the function symbol omitting the multiversion mangling. I have added a requirement to ACLE mandating that all the function versions are declared in the translation unit which contains the default definition: ARM-software/acle#328
Summary: It was raised in #81494 that we are not generating correct code when there is no TU-local caller. The suggestion was to emit a resolver: * Whenever there is a use in the TU. * When the TU has a definition of the default version. See the comment for more details: #81494 (comment) This got addressed with #84405. Generating a resolver on use means that we may end up with multiple resolvers across different translation units. Those resolvers may not be the same because each translation unit may contain different version declarations (user's fault). Therefore the order of linking the final image determines which of these weak symbols gets selected, resulting in non consisted behavior. I am proposing to stop emitting a resolver on use and only do so in the translation unit which contains the default definition. This way we guarantee the existence of a single resolver. Now, when a versioned function is used we want to emit a declaration of the function symbol omitting the multiversion mangling. I have added a requirement to ACLE mandating that all the function versions are declared in the translation unit which contains the default definition: ARM-software/acle#328 Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250878
We would like the resolver to be generated eagerly, even if the
versioned function is not called from the current translation
unit. Fixes #81494. It further allows Multi Versioning to work
even if the default target version attribute is omitted from
function declarations.