Skip to content

Commit

Permalink
[FMV][compiler-rt] Fix cpu features initialization.
Browse files Browse the repository at this point in the history
To detect features we either use HWCAPs or directly extract system register
bitfields and compare with a value. In many cases equality comparisons give
wrong results for example FEAT_SVE is not set if SVE2 is available (see the
issue llvm#93651). I am also making the access to __aarch64_cpu_features atomic.

The corresponding PR for the ACLE specification is
ARM-software/acle#322.
  • Loading branch information
labrinea committed Jun 11, 2024
1 parent b1fe03f commit 6c1433f
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 105 deletions.
12 changes: 8 additions & 4 deletions compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "../cpu_model/aarch64.h"

struct FEATURES {
long long features;
unsigned long long features;
};

extern struct FEATURES __aarch64_cpu_features;
Expand All @@ -23,14 +23,18 @@ extern bool __aarch64_has_sme_and_tpidr2_el0;
#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
#endif
__attribute__((constructor(90))) static void get_aarch64_cpu_features(void) {
if (!__aarch64_cpu_features.features)
__init_cpu_features();
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;

__init_cpu_features();
}

__attribute__((target("sve"))) long
__arm_get_current_vg(void) __arm_streaming_compatible {
struct SME_STATE State = __arm_sme_state();
bool HasSVE = __aarch64_cpu_features.features & (1ULL << FEAT_SVE);
unsigned long long features =
__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED);
bool HasSVE = features & (1ULL << FEAT_SVE);

if (!HasSVE && !__aarch64_has_sme_and_tpidr2_el0)
return 0;
Expand Down
4 changes: 2 additions & 2 deletions compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
if (__aarch64_cpu_features.features)
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;

// ifunc resolvers don't have hwcaps in arguments on Android API lower
Expand All @@ -17,7 +17,7 @@ void __init_cpu_features_resolver(unsigned long hwcap,

void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
// CPU features already initialized.
if (__aarch64_cpu_features.features)
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;

// Don't set any CPU features,
Expand Down
4 changes: 2 additions & 2 deletions compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
if (__aarch64_cpu_features.features)
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;

__init_cpu_features_constructor(hwcap, arg);
Expand All @@ -10,7 +10,7 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
unsigned long hwcap = 0;
unsigned long hwcap2 = 0;
// CPU features already initialized.
if (__aarch64_cpu_features.features)
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;

int res = 0;
Expand Down
8 changes: 5 additions & 3 deletions compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include <zircon/syscalls.h>

void __init_cpu_features_resolver() {
if (__aarch64_cpu_features.features)
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;

// This ensures the vDSO is a direct link-time dependency of anything that
Expand All @@ -13,8 +13,8 @@ void __init_cpu_features_resolver() {
if (status != ZX_OK)
return;

#define setCPUFeature(cpu_feature) \
__aarch64_cpu_features.features |= 1ULL << cpu_feature
unsigned long long feat = 0;
#define setCPUFeature(cpu_feature) feat |= 1ULL << cpu_feature

if (features & ZX_ARM64_FEATURE_ISA_FP)
setCPUFeature(FEAT_FP);
Expand Down Expand Up @@ -48,4 +48,6 @@ void __init_cpu_features_resolver() {
setCPUFeature(FEAT_SVE);

setCPUFeature(FEAT_INIT);

__atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
}
135 changes: 43 additions & 92 deletions compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
#define HAVE_SYS_AUXV_H
#endif



static void __init_cpu_features_constructor(unsigned long hwcap,
const __ifunc_arg_t *arg) {
#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
unsigned long long feat = 0;
#define setCPUFeature(F) feat |= 1ULL << F
#define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
#define extractBits(val, start, number) \
(val & ((1ULL << number) - 1ULL) << start) >> start
Expand All @@ -20,26 +19,20 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_PMULL);
if (hwcap & HWCAP_FLAGM)
setCPUFeature(FEAT_FLAGM);
if (hwcap2 & HWCAP2_FLAGM2) {
setCPUFeature(FEAT_FLAGM);
if (hwcap2 & HWCAP2_FLAGM2)
setCPUFeature(FEAT_FLAGM2);
}
if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
if (hwcap & HWCAP_SM4)
setCPUFeature(FEAT_SM4);
if (hwcap & HWCAP_ASIMDDP)
setCPUFeature(FEAT_DOTPROD);
if (hwcap & HWCAP_ASIMDFHM)
setCPUFeature(FEAT_FP16FML);
if (hwcap & HWCAP_FPHP) {
if (hwcap & HWCAP_FPHP)
setCPUFeature(FEAT_FP16);
setCPUFeature(FEAT_FP);
}
if (hwcap & HWCAP_DIT)
setCPUFeature(FEAT_DIT);
if (hwcap & HWCAP_ASIMDRDM)
setCPUFeature(FEAT_RDM);
if (hwcap & HWCAP_ILRCPC)
setCPUFeature(FEAT_RCPC2);
if (hwcap & HWCAP_AES)
setCPUFeature(FEAT_AES);
if (hwcap & HWCAP_SHA1)
Expand All @@ -52,23 +45,20 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_FCMA);
if (hwcap & HWCAP_SB)
setCPUFeature(FEAT_SB);
if (hwcap & HWCAP_SSBS)
if (hwcap & HWCAP_SSBS) {
setCPUFeature(FEAT_SSBS);
setCPUFeature(FEAT_SSBS2);
}
if (hwcap2 & HWCAP2_MTE) {
setCPUFeature(FEAT_MEMTAG);
setCPUFeature(FEAT_MEMTAG2);
}
if (hwcap2 & HWCAP2_MTE3) {
setCPUFeature(FEAT_MEMTAG);
setCPUFeature(FEAT_MEMTAG2);
if (hwcap2 & HWCAP2_MTE3)
setCPUFeature(FEAT_MEMTAG3);
}
if (hwcap2 & HWCAP2_SVEAES)
setCPUFeature(FEAT_SVE_AES);
if (hwcap2 & HWCAP2_SVEPMULL) {
setCPUFeature(FEAT_SVE_AES);
if (hwcap2 & HWCAP2_SVEPMULL)
setCPUFeature(FEAT_SVE_PMULL128);
}
if (hwcap2 & HWCAP2_SVEBITPERM)
setCPUFeature(FEAT_SVE_BITPERM);
if (hwcap2 & HWCAP2_SVESHA3)
Expand Down Expand Up @@ -105,6 +95,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_WFXT);
if (hwcap2 & HWCAP2_SME)
setCPUFeature(FEAT_SME);
if (hwcap2 & HWCAP2_SME2)
setCPUFeature(FEAT_SME2);
if (hwcap2 & HWCAP2_SME_I16I64)
setCPUFeature(FEAT_SME_I64);
if (hwcap2 & HWCAP2_SME_F64F64)
Expand All @@ -113,86 +105,45 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_MOPS);
if (hwcap & HWCAP_CPUID) {
unsigned long ftr;
getCPUFeature(ID_AA64PFR1_EL1, ftr);
// ID_AA64PFR1_EL1.MTE >= 0b0001
if (extractBits(ftr, 8, 4) >= 0x1)
setCPUFeature(FEAT_MEMTAG);
// ID_AA64PFR1_EL1.SSBS == 0b0001
if (extractBits(ftr, 4, 4) == 0x1)
setCPUFeature(FEAT_SSBS);
// ID_AA64PFR1_EL1.SME == 0b0010
if (extractBits(ftr, 24, 4) == 0x2)
setCPUFeature(FEAT_SME2);
getCPUFeature(ID_AA64PFR0_EL1, ftr);
// ID_AA64PFR0_EL1.FP != 0b1111
if (extractBits(ftr, 16, 4) != 0xF) {
setCPUFeature(FEAT_FP);
// ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP
setCPUFeature(FEAT_SIMD);
}
// ID_AA64PFR0_EL1.SVE != 0b0000
if (extractBits(ftr, 32, 4) != 0x0) {
// get ID_AA64ZFR0_EL1, that name supported
// if sve enabled only
getCPUFeature(S3_0_C0_C4_4, ftr);
// ID_AA64ZFR0_EL1.SVEver == 0b0000
if (extractBits(ftr, 0, 4) == 0x0)
setCPUFeature(FEAT_SVE);
// ID_AA64ZFR0_EL1.SVEver == 0b0001
if (extractBits(ftr, 0, 4) == 0x1)
setCPUFeature(FEAT_SVE2);
// ID_AA64ZFR0_EL1.BF16 != 0b0000
if (extractBits(ftr, 20, 4) != 0x0)
setCPUFeature(FEAT_SVE_BF16);
}
getCPUFeature(ID_AA64ISAR0_EL1, ftr);
// ID_AA64ISAR0_EL1.SHA3 != 0b0000
if (extractBits(ftr, 32, 4) != 0x0)
setCPUFeature(FEAT_SHA3);

getCPUFeature(ID_AA64ISAR1_EL1, ftr);
// ID_AA64ISAR1_EL1.DPB >= 0b0001
if (extractBits(ftr, 0, 4) >= 0x1)
setCPUFeature(FEAT_DPB);
// ID_AA64ISAR1_EL1.LRCPC != 0b0000
if (extractBits(ftr, 20, 4) != 0x0)
setCPUFeature(FEAT_RCPC);
// ID_AA64ISAR1_EL1.LRCPC == 0b0011
if (extractBits(ftr, 20, 4) == 0x3)
setCPUFeature(FEAT_RCPC3);
// ID_AA64ISAR1_EL1.SPECRES == 0b0001
if (extractBits(ftr, 40, 4) == 0x2)
/* ID_AA64ISAR1_EL1.SPECRES >= 0b0001 */
if (extractBits(ftr, 40, 4) >= 0x1)
setCPUFeature(FEAT_PREDRES);
// ID_AA64ISAR1_EL1.BF16 != 0b0000
if (extractBits(ftr, 44, 4) != 0x0)
setCPUFeature(FEAT_BF16);
// ID_AA64ISAR1_EL1.LS64 >= 0b0001
/* ID_AA64ISAR1_EL1.LS64 >= 0b0001 */
if (extractBits(ftr, 60, 4) >= 0x1)
setCPUFeature(FEAT_LS64);
// ID_AA64ISAR1_EL1.LS64 >= 0b0010
/* ID_AA64ISAR1_EL1.LS64 >= 0b0010 */
if (extractBits(ftr, 60, 4) >= 0x2)
setCPUFeature(FEAT_LS64_V);
// ID_AA64ISAR1_EL1.LS64 >= 0b0011
/* ID_AA64ISAR1_EL1.LS64 >= 0b0011 */
if (extractBits(ftr, 60, 4) >= 0x3)
setCPUFeature(FEAT_LS64_ACCDATA);
} else {
// Set some features in case of no CPUID support
if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
setCPUFeature(FEAT_FP);
// FP and AdvSIMD fields have the same value
setCPUFeature(FEAT_SIMD);
}
if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP)
setCPUFeature(FEAT_DPB);
if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC)
setCPUFeature(FEAT_RCPC);
if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16)
setCPUFeature(FEAT_BF16);
if (hwcap2 & HWCAP2_SVEBF16)
setCPUFeature(FEAT_SVE_BF16);
if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE)
setCPUFeature(FEAT_SVE2);
if (hwcap & HWCAP_SHA3)
setCPUFeature(FEAT_SHA3);
}
if (hwcap & HWCAP_FP) {
setCPUFeature(FEAT_FP);
// FP and AdvSIMD fields have the same value
setCPUFeature(FEAT_SIMD);
}
if (hwcap & HWCAP_DCPOP)
setCPUFeature(FEAT_DPB);
if (hwcap & HWCAP_LRCPC)
setCPUFeature(FEAT_RCPC);
if (hwcap & HWCAP_ILRCPC)
setCPUFeature(FEAT_RCPC2);
if (hwcap2 & HWCAP2_LRCPC3)
setCPUFeature(FEAT_RCPC3);
if (hwcap2 & HWCAP2_BF16)
setCPUFeature(FEAT_BF16);
if (hwcap2 & HWCAP2_SVEBF16)
setCPUFeature(FEAT_SVE_BF16);
if (hwcap & HWCAP_SVE)
setCPUFeature(FEAT_SVE);
if (hwcap2 & HWCAP2_SVE2)
setCPUFeature(FEAT_SVE2);
if (hwcap & HWCAP_SHA3)
setCPUFeature(FEAT_SHA3);
setCPUFeature(FEAT_INIT);

__atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
}
4 changes: 2 additions & 2 deletions compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
if (__aarch64_cpu_features.features)
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
__init_cpu_features_constructor(hwcap, arg);
}

void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
// CPU features already initialized.
if (__aarch64_cpu_features.features)
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;

unsigned long hwcap = getauxval(AT_HWCAP);
Expand Down
6 changes: 6 additions & 0 deletions compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,12 @@
#ifndef HWCAP2_SVE_EBF16
#define HWCAP2_SVE_EBF16 (1ULL << 33)
#endif
#ifndef HWCAP2_SME2
#define HWCAP2_SME2 (1UL << 37)
#endif
#ifndef HWCAP2_MOPS
#define HWCAP2_MOPS (1ULL << 43)
#endif
#ifndef HWCAP2_LRCPC3
#define HWCAP2_LRCPC3 (1UL << 46)
#endif

0 comments on commit 6c1433f

Please sign in to comment.