diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index 7db188808e26a..2a8a462c977b8 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -25,9 +25,15 @@ class AsmOffsets public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbe0; #endif // TARGET_UNIX #elif TARGET_ARM64 +#if TARGET_UNIX + public const int SIZEOF__REGDISPLAY = 0x9e0; + public const int OFFSETOF__REGDISPLAY__SP = 0x938; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x940; +#else // TARGET_UNIX public const int SIZEOF__REGDISPLAY = 0x940; public const int OFFSETOF__REGDISPLAY__SP = 0x898; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x8a0; +#endif // TARGET_UNIX #elif TARGET_ARM public const int SIZEOF__REGDISPLAY = 0x410; public const int OFFSETOF__REGDISPLAY__SP = 0x3ec; @@ -71,9 +77,15 @@ class AsmOffsets public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbd8; #endif // TARGET_UNIX #elif TARGET_ARM64 +#if TARGET_UNIX + public const int SIZEOF__REGDISPLAY = 0x9d0; + public const int OFFSETOF__REGDISPLAY__SP = 0x930; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x938; +#else // TARGET_UNIX public const int SIZEOF__REGDISPLAY = 0x930; public const int OFFSETOF__REGDISPLAY__SP = 0x890; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x898; +#endif // TARGET_UNIX #elif TARGET_ARM public const int SIZEOF__REGDISPLAY = 0x408; public const int OFFSETOF__REGDISPLAY__SP = 0x3e8; @@ -111,9 +123,13 @@ class AsmOffsets public const int SIZEOF__PAL_LIMITED_CONTEXT = 0xc20; #else // TARGET_UNIX public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x4d0; -#endif // TARGET_UNIx +#endif // TARGET_UNIX #elif TARGET_ARM64 +#if TARGET_UNIX + public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x3e0; +#else // TARGET_UNIX public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x390; +#endif // TARGET_UNIX #elif TARGET_ARM public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x1a0; #elif TARGET_X86 diff --git a/src/coreclr/debug/inc/dbgtargetcontext.h b/src/coreclr/debug/inc/dbgtargetcontext.h index e43cf5c3e5d3a..dab7ca29c7db3 100644 --- a/src/coreclr/debug/inc/dbgtargetcontext.h +++ b/src/coreclr/debug/inc/dbgtargetcontext.h @@ -470,7 +470,12 @@ typedef DECLSPEC_ALIGN(16) struct { } DT_CONTEXT; + +#if !defined(CROSS_COMPILE) && !defined(TARGET_WINDOWS) +static_assert(sizeof(DT_CONTEXT) == offsetof(T_CONTEXT, XStateFeaturesMask), "DT_CONTEXT must not include the SVE registers on AMD64"); +#else static_assert(sizeof(DT_CONTEXT) == sizeof(T_CONTEXT), "DT_CONTEXT size must equal the T_CONTEXT size on ARM64"); +#endif #elif defined(DTCONTEXT_IS_LOONGARCH64) diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 920c44f98cd4c..05c27cf185bb6 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1844,6 +1844,12 @@ typedef struct _IMAGE_ARM_RUNTIME_FUNCTION_ENTRY { #define CONTEXT_EXCEPTION_REQUEST 0x40000000L #define CONTEXT_EXCEPTION_REPORTING 0x80000000L +#define CONTEXT_XSTATE (CONTEXT_ARM64 | 0x40L) + +#define XSTATE_SVE (0) + +#define XSTATE_MASK_SVE (UI64(1) << (XSTATE_SVE)) + // // This flag is set by the unwinder if it has unwound to a call // site, and cleared whenever it unwinds through a trap frame. @@ -1944,7 +1950,18 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { /* +0x338 */ DWORD64 Bvr[ARM64_MAX_BREAKPOINTS]; /* +0x378 */ DWORD Wcr[ARM64_MAX_WATCHPOINTS]; /* +0x380 */ DWORD64 Wvr[ARM64_MAX_WATCHPOINTS]; - /* +0x390 */ + + /* +0x390 */ DWORD64 XStateFeaturesMask; + + // + // Sve Registers + // + // TODO-SVE: Support Vector register sizes >128bit + // For 128bit, Z and V registers fully overlap, so there is no need to load/store both. + /* +0x398 */ DWORD Vl; + /* +0x39c */ DWORD Ffr; + /* +0x3a0 */ DWORD P[16]; + /* +0x3e0 */ } CONTEXT, *PCONTEXT, *LPCONTEXT; diff --git a/src/coreclr/pal/src/CMakeLists.txt b/src/coreclr/pal/src/CMakeLists.txt index 2398334e89989..28fc8765768a5 100644 --- a/src/coreclr/pal/src/CMakeLists.txt +++ b/src/coreclr/pal/src/CMakeLists.txt @@ -90,6 +90,11 @@ if(CLR_CMAKE_HOST_ARCH_AMD64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET add_definitions(-DXSTATE_SUPPORTED) endif(CLR_CMAKE_HOST_ARCH_AMD64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_LINUX_MUSL) +if(CLR_CMAKE_HOST_ARCH_ARM64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_LINUX_MUSL) + # Currently the _xstate is not available on Alpine Linux + add_definitions(-DXSTATE_SUPPORTED) +endif(CLR_CMAKE_HOST_ARCH_ARM64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_LINUX_MUSL) + if(CLR_CMAKE_TARGET_LINUX_MUSL) # Setting RLIMIT_NOFILE breaks debugging of coreclr on Alpine Linux for some reason add_definitions(-DDONT_SET_RLIMIT_NOFILE) diff --git a/src/coreclr/pal/src/arch/arm64/asmconstants.h b/src/coreclr/pal/src/arch/arm64/asmconstants.h index a657b8e5eb1c5..d6379a28ce801 100644 --- a/src/coreclr/pal/src/arch/arm64/asmconstants.h +++ b/src/coreclr/pal/src/arch/arm64/asmconstants.h @@ -18,6 +18,13 @@ #define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT) +#define CONTEXT_XSTATE_BIT (6) +#define CONTEXT_XSTATE (1 << CONTEXT_XSTATE_BIT) + +#define XSTATE_SVE_BIT (0) + +#define XSTATE_MASK_SVE (UI64(1) << (XSTATE_SVE)) + #define CONTEXT_ContextFlags 0 #define CONTEXT_Cpsr CONTEXT_ContextFlags+4 @@ -54,6 +61,7 @@ #define CONTEXT_Lr CONTEXT_Fp+8 #define CONTEXT_Sp CONTEXT_Lr+8 #define CONTEXT_Pc CONTEXT_Sp+8 + #define CONTEXT_NEON_OFFSET CONTEXT_Pc+8 #define CONTEXT_V0 0 #define CONTEXT_V1 CONTEXT_V0+16 @@ -89,7 +97,42 @@ #define CONTEXT_V31 CONTEXT_V30+16 #define CONTEXT_FLOAT_CONTROL_OFFSET CONTEXT_V31+16 #define CONTEXT_Fpcr 0 -#define CONTEXT_Fpsr CONTEXT_Fpcr+8 -#define CONTEXT_Size ((CONTEXT_NEON_OFFSET + CONTEXT_Fpsr + 8 + 0xf) & ~0xf) +#define CONTEXT_Fpsr CONTEXT_Fpcr+4 +#define CONTEXT_NEON_SIZE CONTEXT_FLOAT_CONTROL_OFFSET+CONTEXT_Fpsr+4 + +#define CONTEXT_DEBUG_OFFSET CONTEXT_NEON_OFFSET+CONTEXT_NEON_SIZE +#define CONTEXT_DEBUG_SIZE 120 // (8*4)+(8*8)+(2*4)+(2*8) + +#define CONTEXT_XSTATEFEATURESMASK_OFFSET CONTEXT_DEBUG_OFFSET+CONTEXT_DEBUG_SIZE + +// TODO-SVE: Support Vector register sizes >128bit + +#define CONTEXT_SVE_OFFSET CONTEXT_XSTATEFEATURESMASK_OFFSET+8 +#define CONTEXT_VL_OFFSET 0 + +// SVE register offsets are multiples of the vector length +#define CONTEXT_SVE_REGS_OFFSET CONTEXT_VL_OFFSET+4 +#define CONTEXT_FFR_VL 0 +#define CONTEXT_P0_VL CONTEXT_FFR_VL+1 +#define CONTEXT_P1_VL CONTEXT_P0_VL+1 +#define CONTEXT_P2_VL CONTEXT_P1_VL+1 +#define CONTEXT_P3_VL CONTEXT_P2_VL+1 +#define CONTEXT_P4_VL CONTEXT_P3_VL+1 +#define CONTEXT_P5_VL CONTEXT_P4_VL+1 +#define CONTEXT_P6_VL CONTEXT_P5_VL+1 +#define CONTEXT_P7_VL CONTEXT_P6_VL+1 +#define CONTEXT_P8_VL CONTEXT_P7_VL+1 +#define CONTEXT_P9_VL CONTEXT_P8_VL+1 +#define CONTEXT_P10_VL CONTEXT_P9_VL+1 +#define CONTEXT_P11_VL CONTEXT_P10_VL+1 +#define CONTEXT_P12_VL CONTEXT_P11_VL+1 +#define CONTEXT_P13_VL CONTEXT_P12_VL+1 +#define CONTEXT_P14_VL CONTEXT_P13_VL+1 +#define CONTEXT_P15_VL CONTEXT_P14_VL+1 + +#define CONTEXT_SVE_REGS_SIZE ((CONTEXT_P15_VL+1) * 4) +#define CONTEXT_SVE_SIZE CONTEXT_SVE_REGS_SIZE + 8 + +#define CONTEXT_Size CONTEXT_SVE_OFFSET + CONTEXT_SVE_SIZE #endif diff --git a/src/coreclr/pal/src/arch/arm64/context2.S b/src/coreclr/pal/src/arch/arm64/context2.S index 23bc0c065581e..4bfde2f19fbcb 100644 --- a/src/coreclr/pal/src/arch/arm64/context2.S +++ b/src/coreclr/pal/src/arch/arm64/context2.S @@ -1,7 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // -// Implementation of _CONTEXT_CaptureContext for the ARM platform. +// Implementation of _CONTEXT_CaptureContext for the ARM64 platform. // This function is processor dependent. It is used by exception handling, // and is always apply to the current thread. // @@ -12,6 +12,7 @@ // Incoming: // x0: Context* // +.arch_extension sve LEAF_ENTRY CONTEXT_CaptureContext, _TEXT PROLOG_STACK_ALLOC 32 .cfi_adjust_cfa_offset 32 @@ -69,7 +70,6 @@ LOCAL_LABEL(Done_CONTEXT_CONTROL): stp x26, x27, [x0, CONTEXT_X26] str x28, [x0, CONTEXT_X28] - LOCAL_LABEL(Done_CONTEXT_INTEGER): ldr w1, [x0, CONTEXT_ContextFlags] // clangs assembler doesn't seem to support the mov Wx, imm32 yet @@ -104,6 +104,41 @@ LOCAL_LABEL(Done_CONTEXT_INTEGER): sub x0, x0, CONTEXT_FLOAT_CONTROL_OFFSET + CONTEXT_NEON_OFFSET LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT): + ldr w1, [x0, CONTEXT_ContextFlags] + // clangs assembler doesn't seem to support the mov Wx, imm32 yet + movz w2, #0x40, lsl #16 + movk w2, #0x40 + mov w3, w2 + and w2, w1, w2 + cmp w2, w3 + b.ne LOCAL_LABEL(Done_CONTEXT_SVE) + + ldr x1, [x0, CONTEXT_XSTATEFEATURESMASK_OFFSET] + tbz x1, #XSTATE_SVE_BIT, LOCAL_LABEL(Done_CONTEXT_SVE) + + add x0, x0, CONTEXT_SVE_OFFSET + str p0, [x0, CONTEXT_P0_VL, MUL VL] + str p1, [x0, CONTEXT_P1_VL, MUL VL] + str p2, [x0, CONTEXT_P2_VL, MUL VL] + str p3, [x0, CONTEXT_P3_VL, MUL VL] + str p4, [x0, CONTEXT_P4_VL, MUL VL] + str p5, [x0, CONTEXT_P5_VL, MUL VL] + str p6, [x0, CONTEXT_P6_VL, MUL VL] + str p7, [x0, CONTEXT_P7_VL, MUL VL] + str p8, [x0, CONTEXT_P8_VL, MUL VL] + str p9, [x0, CONTEXT_P9_VL, MUL VL] + str p10, [x0, CONTEXT_P10_VL, MUL VL] + str p11, [x0, CONTEXT_P11_VL, MUL VL] + str p12, [x0, CONTEXT_P12_VL, MUL VL] + str p13, [x0, CONTEXT_P13_VL, MUL VL] + str p14, [x0, CONTEXT_P14_VL, MUL VL] + str p15, [x0, CONTEXT_P15_VL, MUL VL] + rdffr p0.b + str p0, [x0, CONTEXT_FFR_VL, MUL VL] + ldr p0, [x0, 0, MUL VL] + sub x0, x0, CONTEXT_SVE_OFFSET + +LOCAL_LABEL(Done_CONTEXT_SVE): EPILOG_STACK_FREE 32 ret @@ -124,6 +159,7 @@ LEAF_ENTRY RtlCaptureContext, _TEXT orr w1, w1, #0x4 orr w1, w1, #0x8 str w1, [x0, CONTEXT_ContextFlags] + str xzr, [x0, CONTEXT_XSTATEFEATURESMASK_OFFSET] ldr x1, [sp] EPILOG_STACK_FREE 16 b C_FUNC(CONTEXT_CaptureContext) @@ -133,6 +169,7 @@ LEAF_END RtlCaptureContext, _TEXT // x0: Context* // x1: Exception* // +.arch_extension sve LEAF_ENTRY RtlRestoreContext, _TEXT #ifdef HAS_ADDRESS_SANITIZER @@ -154,6 +191,34 @@ LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT): // since we potentially clobber x0 below, we'll bank it in x16 mov x16, x0 + ldr w17, [x16, CONTEXT_ContextFlags] + tbz w17, #CONTEXT_XSTATE_BIT, LOCAL_LABEL(No_Restore_CONTEXT_SVE) + + ldr w17, [x16, CONTEXT_XSTATEFEATURESMASK_OFFSET] + tbz w17, #XSTATE_SVE_BIT, LOCAL_LABEL(No_Restore_CONTEXT_SVE) + + add x16, x16, CONTEXT_SVE_OFFSET + ldr p0, [x16, CONTEXT_FFR_VL, MUL VL] + wrffr p0.b + ldr p0, [x16, CONTEXT_P0_VL, MUL VL] + ldr p1, [x16, CONTEXT_P1_VL, MUL VL] + ldr p2, [x16, CONTEXT_P2_VL, MUL VL] + ldr p3, [x16, CONTEXT_P3_VL, MUL VL] + ldr p4, [x16, CONTEXT_P4_VL, MUL VL] + ldr p5, [x16, CONTEXT_P5_VL, MUL VL] + ldr p6, [x16, CONTEXT_P6_VL, MUL VL] + ldr p7, [x16, CONTEXT_P7_VL, MUL VL] + ldr p8, [x16, CONTEXT_P8_VL, MUL VL] + ldr p9, [x16, CONTEXT_P9_VL, MUL VL] + ldr p10, [x16, CONTEXT_P10_VL, MUL VL] + ldr p11, [x16, CONTEXT_P11_VL, MUL VL] + ldr p12, [x16, CONTEXT_P12_VL, MUL VL] + ldr p13, [x16, CONTEXT_P13_VL, MUL VL] + ldr p14, [x16, CONTEXT_P14_VL, MUL VL] + ldr p15, [x16, CONTEXT_P15_VL, MUL VL] + sub x16, x16, CONTEXT_SVE_OFFSET + +LOCAL_LABEL(No_Restore_CONTEXT_SVE): ldr w17, [x16, CONTEXT_ContextFlags] tbz w17, #CONTEXT_FLOATING_POINT_BIT, LOCAL_LABEL(No_Restore_CONTEXT_FLOATING_POINT) @@ -230,3 +295,12 @@ LEAF_ENTRY RestoreCompleteContext, _TEXT LEAF_END RestoreCompleteContext, _TEXT #endif // __APPLE__ + +// Incoming: +// None +// +.arch_extension sve + LEAF_ENTRY CONTEXT_GetSveLengthFromOS, _TEXT + rdvl x0, 1 + ret lr + LEAF_END CONTEXT_GetSveLengthFromOS, _TEXT diff --git a/src/coreclr/pal/src/exception/signal.cpp b/src/coreclr/pal/src/exception/signal.cpp index b0b0662ff0f22..5dd071b5c61dc 100644 --- a/src/coreclr/pal/src/exception/signal.cpp +++ b/src/coreclr/pal/src/exception/signal.cpp @@ -863,7 +863,7 @@ static void inject_activation_handler(int code, siginfo_t *siginfo, void *contex ULONG contextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT; -#if defined(HOST_AMD64) +#if defined(HOST_AMD64) || defined(HOST_ARM64) contextFlags |= CONTEXT_XSTATE; #endif @@ -1053,7 +1053,7 @@ static bool common_signal_handler(int code, siginfo_t *siginfo, void *sigcontext ULONG contextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT; -#if defined(HOST_AMD64) +#if defined(HOST_AMD64) || defined(HOST_ARM64) contextFlags |= CONTEXT_XSTATE; #endif diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index 6eeeaa6fed745..932b40c52a821 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -42,14 +42,14 @@ typedef ucontext_t native_context_t; #if !HAVE_MACH_EXCEPTIONS -#if defined(XSTATE_SUPPORTED) && !HAVE_PUBLIC_XSTATE_STRUCT +#if defined(XSTATE_SUPPORTED) && defined(HOST_AMD64) && !HAVE_PUBLIC_XSTATE_STRUCT namespace asm_sigcontext { #include }; using asm_sigcontext::_fpx_sw_bytes; using asm_sigcontext::_xstate; -#endif // defined(XSTATE_SUPPORTED) && !HAVE_PUBLIC_XSTATE_STRUCT +#endif // XSTATE_SUPPORTED && HOST_AMD64 && !HAVE_PUBLIC_XSTATE_STRUCT #else // !HAVE_MACH_EXCEPTIONS #include @@ -60,6 +60,90 @@ using asm_sigcontext::_xstate; bool Xstate_IsAvx512Supported(); #endif // XSTATE_SUPPORTED || (HOST_AMD64 && HAVE_MACH_EXCEPTIONS) +#if defined(HOST_64BIT) && defined(HOST_ARM64) && !defined(TARGET_FREEBSD) && !defined(TARGET_OSX) +#if !defined(SVE_MAGIC) + +// Add the missing SVE defines + +#define EXTRA_MAGIC 0x45585401 + +struct extra_context { + struct _aarch64_ctx head; + __u64 datap; /* 16-byte aligned pointer to extra space cast to __u64 */ + __u32 size; /* size in bytes of the extra space */ + __u32 __reserved[3]; +}; + +#define SVE_MAGIC 0x53564501 + +struct sve_context { + struct _aarch64_ctx head; + __u16 vl; + __u16 flags; + __u16 __reserved[2]; +}; + +#define __SVE_VQ_BYTES 16 /* number of bytes per quadword */ + +#define __SVE_NUM_ZREGS 32 +#define __SVE_NUM_PREGS 16 + +#define sve_vq_from_vl(vl) ((vl) / __SVE_VQ_BYTES) +#define sve_vl_from_vq(vq) ((vq) * __SVE_VQ_BYTES) + +#define __SVE_ZREG_SIZE(vq) ((__u32)(vq) * __SVE_VQ_BYTES) +#define __SVE_PREG_SIZE(vq) ((__u32)(vq) * (__SVE_VQ_BYTES / 8)) +#define __SVE_FFR_SIZE(vq) __SVE_PREG_SIZE(vq) + +#define __SVE_ZREGS_OFFSET 0 +#define __SVE_ZREG_OFFSET(vq, n) \ + (__SVE_ZREGS_OFFSET + __SVE_ZREG_SIZE(vq) * (n)) +#define __SVE_ZREGS_SIZE(vq) \ + (__SVE_ZREG_OFFSET(vq, __SVE_NUM_ZREGS) - __SVE_ZREGS_OFFSET) + +#define __SVE_PREGS_OFFSET(vq) \ + (__SVE_ZREGS_OFFSET + __SVE_ZREGS_SIZE(vq)) +#define __SVE_PREG_OFFSET(vq, n) \ + (__SVE_PREGS_OFFSET(vq) + __SVE_PREG_SIZE(vq) * (n)) +#define __SVE_PREGS_SIZE(vq) \ + (__SVE_PREG_OFFSET(vq, __SVE_NUM_PREGS) - __SVE_PREGS_OFFSET(vq)) + +#define __SVE_FFR_OFFSET(vq) \ + (__SVE_PREGS_OFFSET(vq) + __SVE_PREGS_SIZE(vq)) + + +#define SVE_SIG_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) +#define SVE_SIG_PREG_SIZE(vq) __SVE_PREG_SIZE(vq) +#define SVE_SIG_FFR_SIZE(vq) __SVE_FFR_SIZE(vq) + +#define SVE_SIG_REGS_OFFSET \ + ((sizeof(struct sve_context) + (__SVE_VQ_BYTES - 1)) \ + / __SVE_VQ_BYTES * __SVE_VQ_BYTES) + +#define SVE_SIG_ZREGS_OFFSET \ + (SVE_SIG_REGS_OFFSET + __SVE_ZREGS_OFFSET) +#define SVE_SIG_ZREG_OFFSET(vq, n) \ + (SVE_SIG_REGS_OFFSET + __SVE_ZREG_OFFSET(vq, n)) +#define SVE_SIG_ZREGS_SIZE(vq) __SVE_ZREGS_SIZE(vq) + +#define SVE_SIG_PREGS_OFFSET(vq) \ + (SVE_SIG_REGS_OFFSET + __SVE_PREGS_OFFSET(vq)) +#define SVE_SIG_PREG_OFFSET(vq, n) \ + (SVE_SIG_REGS_OFFSET + __SVE_PREG_OFFSET(vq, n)) +#define SVE_SIG_PREGS_SIZE(vq) __SVE_PREGS_SIZE(vq) + +#define SVE_SIG_FFR_OFFSET(vq) \ + (SVE_SIG_REGS_OFFSET + __SVE_FFR_OFFSET(vq)) + +#define SVE_SIG_REGS_SIZE(vq) \ + (__SVE_FFR_OFFSET(vq) + __SVE_FFR_SIZE(vq)) + +#define SVE_SIG_CONTEXT_SIZE(vq) \ + (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq)) + +#endif // SVE_MAGIC +#endif // HOST_64BIT && HOST_ARM64 && !TARGET_FREEBSD && !TARGET_OSX + #ifdef HOST_S390X #define MCREG_PSWMask(mc) ((mc).psw.mask) @@ -351,7 +435,7 @@ bool Xstate_IsAvx512Supported(); ///////////////////// // Extended state -#ifdef XSTATE_SUPPORTED +#if defined(XSTATE_SUPPORTED) && defined(HOST_AMD64) #if HAVE_FPSTATE_GLIBC_RESERVED1 #define FPSTATE_RESERVED __glibc_reserved1 @@ -542,7 +626,7 @@ inline void *FPREG_Xstate_Hi16Zmm(const ucontext_t *uc, uint32_t *featureSize) _ASSERTE(FPREG_HasAvx512Registers(uc)); return FPREG_Xstate_ExtendedFeature(uc, featureSize, XSTATE_AVX512_ZMM); } -#endif // XSTATE_SUPPORTED +#endif // XSTATE_SUPPORTED && HOST_AMD64 ///////////////////// @@ -662,41 +746,18 @@ const struct fpregs* GetConstNativeSigSimdContext(const native_context_t *mc) #define MCREG_Pc(mc) ((mc).pc) #define MCREG_Cpsr(mc) ((mc).pstate) +void _GetNativeSigSimdContext(uint8_t *data, uint32_t size, fpsimd_context **fp_ptr, sve_context **sve_ptr); inline -fpsimd_context* GetNativeSigSimdContext(native_context_t *mc) +void GetNativeSigSimdContext(native_context_t *mc, fpsimd_context **fp_ptr, sve_context **sve_ptr) { - size_t size = 0; - - do - { - fpsimd_context* fp = reinterpret_cast(&mc->uc_mcontext.__reserved[size]); - - if(fp->head.magic == FPSIMD_MAGIC) - { - _ASSERTE(fp->head.size >= sizeof(fpsimd_context)); - _ASSERTE(size + fp->head.size <= sizeof(mc->uc_mcontext.__reserved)); - - return fp; - } - - if (fp->head.size == 0) - { - break; - } - - size += fp->head.size; - } while (size + sizeof(fpsimd_context) <= sizeof(mc->uc_mcontext.__reserved)); - - _ASSERTE(false); - - return nullptr; + _GetNativeSigSimdContext((uint8_t *)&mc->uc_mcontext.__reserved[0], sizeof(mc->uc_mcontext.__reserved), fp_ptr, sve_ptr); } inline -const fpsimd_context* GetConstNativeSigSimdContext(const native_context_t *mc) +void GetConstNativeSigSimdContext(const native_context_t *mc, fpsimd_context const **fp_ptr, sve_context const **sve_ptr) { - return GetNativeSigSimdContext(const_cast(mc)); + GetNativeSigSimdContext(const_cast(mc), const_cast(fp_ptr), const_cast(sve_ptr)); } #else // TARGET_OSX @@ -1483,6 +1544,22 @@ DWORD CONTEXTGetExceptionCodeForSignal(const siginfo_t *siginfo, #endif // HAVE_MACH_EXCEPTIONS else +#if defined(HOST_ARM64) +/*++ +Function : + CONTEXT_GetSveLengthFromOS + + Gets the SVE vector length +Parameters : + None +Return value : + The SVE vector length in bytes +--*/ +DWORD64 +CONTEXT_GetSveLengthFromOS( + ); +#endif // HOST_ARM64 + #ifdef __cplusplus } #endif // __cplusplus diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 04fabab0e7253..48fd7e94d3c3d 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -384,9 +384,9 @@ bool Xstate_IsAvx512Supported() #if !HAVE_MACH_EXCEPTIONS -#ifdef XSTATE_SUPPORTED +#if defined(XSTATE_SUPPORTED) && defined(HOST_AMD64) Xstate_ExtendedFeature Xstate_ExtendedFeatures[Xstate_ExtendedFeatures_Count]; -#endif // XSTATE_SUPPORTED +#endif // XSTATE_SUPPORTED && HOST_AMD64 /*++ Function: @@ -660,6 +660,16 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) #endif // (HAVE_GREGSET_T || HAVE___GREGSET_T) && !HOST_S390X && !HOST_LOONGARCH64 && !HOST_RISCV64 && !HOST_POWERPC64 #endif // !HAVE_FPREGS_WITH_CW +#if defined(HOST_ARM64) && !defined(TARGET_OSX) && !defined(TARGET_FREEBSD) + sve_context* sve = nullptr; + fpsimd_context* fp = nullptr; + if (((lpContext->ContextFlags & CONTEXT_FLOATING_POINT) == CONTEXT_FLOATING_POINT) || + ((lpContext->ContextFlags & CONTEXT_XSTATE) == CONTEXT_XSTATE)) + { + GetNativeSigSimdContext(native, &fp, &sve); + } +#endif // HOST_ARM64 && !TARGET_OSX && !TARGET_FREEBSD + if ((lpContext->ContextFlags & CONTEXT_FLOATING_POINT) == CONTEXT_FLOATING_POINT) { #ifdef HOST_AMD64 @@ -708,7 +718,6 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) } } #else // TARGET_OSX - fpsimd_context* fp = GetNativeSigSimdContext(native); if (fp) { fp->fpsr = lpContext->Fpsr; @@ -770,9 +779,10 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) } // TODO: Enable for all Unix systems -#if defined(HOST_AMD64) && defined(XSTATE_SUPPORTED) +#if defined(XSTATE_SUPPORTED) if ((lpContext->ContextFlags & CONTEXT_XSTATE) == CONTEXT_XSTATE) { +#if defined(HOST_AMD64) if (FPREG_HasYmmRegisters(native)) { _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_AVX) == XSTATE_MASK_AVX); @@ -801,9 +811,129 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) memcpy_s(dest, sizeof(M512) * 16, &lpContext->Zmm16, sizeof(M512) * 16); } } +#elif defined(HOST_ARM64) + if (sve && sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))) + { + //TODO-SVE: This only handles vector lengths of 128bits. + if (CONTEXT_GetSveLengthFromOS() == 16) + { + _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_SVE) == XSTATE_MASK_SVE); + + uint16_t vq = sve_vq_from_vl(lpContext->Vl); + + // Vector length should not have changed. + _ASSERTE(lpContext->Vl == sve->vl); + + //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. + *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)) = lpContext->Ffr; + + //TODO-SVE: Copy SVE registers once they are >128bits + //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. + + for (int i = 0; i < 16; i++) + { + //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. + *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)) = lpContext->P[i]; + } + } + } +#endif //HOST_AMD64 + } +#endif //XSTATE_SUPPORTED + +} + +#if defined(HOST_64BIT) && defined(HOST_ARM64) && !defined(TARGET_FREEBSD) && !defined(TARGET_OSX) +/*++ +Function : + _GetNativeSigSimdContext + + Finds the FP and SVE context from the reserved data section of a native context. + +Parameters : + uint8_t *data : native context reserved data. + uint32_t size : size of the reserved data. + fpsimd_context **fp_ptr : returns a pointer to the FP context. + sve_context **sve_ptr : returns a pointer to the SVE context. + +Return value : + None. + +--*/ +void _GetNativeSigSimdContext(uint8_t *data, uint32_t size, fpsimd_context **fp_ptr, sve_context **sve_ptr) +{ + size_t position = 0; + fpsimd_context *fp = nullptr; + sve_context *sve = nullptr; + extra_context *extra = nullptr; + bool done = false; + + while (!done) + { + _aarch64_ctx *ctx = reinterpret_cast<_aarch64_ctx *>(&data[position]); + + _ASSERTE(position + ctx->size <= size); + + switch (ctx->magic) + { + case FPSIMD_MAGIC: + _ASSERTE(fp == nullptr); + _ASSERTE(ctx->size >= sizeof(fpsimd_context)); + fp = reinterpret_cast(&data[position]); + break; + + case SVE_MAGIC: + _ASSERTE(sve == nullptr); + _ASSERTE(ctx->size >= sizeof(sve_context)); + sve = reinterpret_cast(&data[position]); + break; + + case EXTRA_MAGIC: + { + // Points to an additional section of reserved data. + _ASSERTE(extra == nullptr); + _ASSERTE(ctx->size >= sizeof(extra_context)); + fpsimd_context *fpOrig = fp; + sve_context *sveOrig = sve; + + extra = reinterpret_cast(&data[position]); + _GetNativeSigSimdContext((uint8_t*)extra->datap, extra->size, &fp, &sve); + + // There should only be one block of each type. + _ASSERTE(fpOrig == nullptr || fp == fpOrig); + _ASSERTE(sveOrig == nullptr || sve == sveOrig); + break; + } + + case 0: + _ASSERTE(ctx->size == 0); + done = true; + break; + + default: + // Any other section. + _ASSERTE(ctx->size != 0); + break; + } + + position += ctx->size; + } + + if (fp) + { + *fp_ptr = fp; + } + if (sve) + { + // If this ever fires then we have an SVE context but no FP context. Given that V and Z + // registers overlap, then when propagating this data to other structures, the SVE + // context should be used to fill the FP data. + _ASSERTE(fp != nullptr); + + *sve_ptr = sve; } -#endif //HOST_AMD64 && XSTATE_SUPPORTED } +#endif // HOST_64BIT && HOST_ARM64 && !TARGET_FREEBSD && !TARGET_OSX /*++ Function : @@ -870,6 +1000,16 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex #endif // (HAVE_GREGSET_T || HAVE___GREGSET_T) && !HOST_S390X && !HOST_LOONGARCH64 && !HOST_RISCV64 && !HOST_POWERPC64 && !HOST_POWERPC64 #endif // !HAVE_FPREGS_WITH_CW +#if defined(HOST_ARM64) && !defined(TARGET_OSX) && !defined(TARGET_FREEBSD) + const fpsimd_context* fp = nullptr; + const sve_context* sve = nullptr; + if (((lpContext->ContextFlags & CONTEXT_FLOATING_POINT) == CONTEXT_FLOATING_POINT) || + ((lpContext->ContextFlags & CONTEXT_XSTATE) == CONTEXT_XSTATE)) + { + GetConstNativeSigSimdContext(native, &fp, &sve); + } +#endif // HOST_ARM64 && !TARGET_OSX && !TARGET_FREEBSD + if ((contextFlags & CONTEXT_FLOATING_POINT) == CONTEXT_FLOATING_POINT) { #ifdef HOST_AMD64 @@ -917,7 +1057,6 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex } } #else // TARGET_OSX - const fpsimd_context* fp = GetConstNativeSigSimdContext(native); if (fp) { lpContext->Fpsr = fp->fpsr; @@ -985,11 +1124,12 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex #endif } -#ifdef HOST_AMD64 +#if defined(HOST_AMD64) || defined(HOST_ARM64) if ((contextFlags & CONTEXT_XSTATE) == CONTEXT_XSTATE) { // TODO: Enable for all Unix systems -#if XSTATE_SUPPORTED +#if defined(XSTATE_SUPPORTED) +#if defined(HOST_AMD64) if (FPREG_HasYmmRegisters(native)) { uint32_t size; @@ -1018,6 +1158,33 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex lpContext->XStateFeaturesMask |= XSTATE_MASK_AVX512; } } +#elif defined(HOST_ARM64) + if (sve && sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))) + { + //TODO-SVE: This only handles vector lengths of 128bits. + if (CONTEXT_GetSveLengthFromOS() == 16) + { + _ASSERTE((sve->vl > 0) && (sve->vl % 16 == 0)); + lpContext->Vl = sve->vl; + + uint16_t vq = sve_vq_from_vl(sve->vl); + + lpContext->XStateFeaturesMask |= XSTATE_MASK_SVE; + + //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. + lpContext->Ffr = *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)); + + //TODO-SVE: Copy SVE registers once they are >128bits + //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. + + for (int i = 0; i < 16; i++) + { + //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. + lpContext->P[i] = *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)); + } + } + } +#endif // HOST_AMD64 else #endif // XSTATE_SUPPORTED { @@ -1027,7 +1194,7 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex lpContext->ContextFlags &= ~xstateFlags; } } -#endif // HOST_AMD64 +#endif // HOST_AMD64 || HOST_ARM64 } #if !HAVE_MACH_EXCEPTIONS diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index bcb8424b70246..eb2462f8ec78e 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -121,7 +121,11 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__GSCookie == sizeof(GSCookie)); #define SIZEOF__Frame 0x10 ASMCONSTANTS_C_ASSERT(SIZEOF__Frame == sizeof(Frame)); +#if !defined(HOST_WINDOWS) +#define SIZEOF__CONTEXT 0x3e0 +#else #define SIZEOF__CONTEXT 0x390 +#endif ASMCONSTANTS_C_ASSERT(SIZEOF__CONTEXT == sizeof(T_CONTEXT));