From 5b0b7909388585aafc159ad8496a84442a7d8fa9 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 1 Aug 2024 10:12:57 +0100 Subject: [PATCH 1/2] Fix SVE2.1 quadword gather load/scatter store intrinsics --- main/acle.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/main/acle.md b/main/acle.md index 96957a27..9ff5c3f8 100644 --- a/main/acle.md +++ b/main/acle.md @@ -404,6 +404,7 @@ Armv8.4-A [[ARMARMv84]](#ARMARMv84). Support is added for the Dot Product intrin level of the [Function Multi Versioning](#function-multi-versioning). * Added [**Alpha**](#current-status-and-anticipated-changes) support for SME2.1 (FEAT_SME2p1). +* Fixed SVE2.1 quadword gather load/scatter store intrinsics. ### References @@ -8959,14 +8960,14 @@ Gather Load Quadword. // _bf16, _f16, _f32, _f64 svint8_t svld1q_gather[_u64base]_s8(svbool_t pg, svuint64_t zn); svint8_t svld1q_gather[_u64base]_offset_s8(svbool_t pg, svuint64_t zn, int64_t offset); - svint8_t svld1q_gather_[u64]offset[_s8](svbool_t pg, const int8_t *base, svuint64_t offset); + svint8_t svld1q_gather[_u64]_offset[_s8](svbool_t pg, const int8_t *base, svuint64_t offset); // Variants are also available for: // _u16, _u32, _s32, _u64, _s64 // _bf16, _f16, _f32, _f64 - svint16_t svld1q_gather_[u64]index[_s16](svbool_t pg, const int16_t *base, svuint64_t index); - svint8_t svld1q_gather[_u64base]_index_s8(svbool_t pg, svuint64_t zn, int64_t index); + svint16_t svld1q_gather[_u64]_index[_s16](svbool_t pg, const int16_t *base, svuint64_t index); + svint16_t svld1q_gather[_u64base]_index_s16(svbool_t pg, svuint64_t zn, int64_t index); ``` #### LD2Q, LD3Q, LD4Q @@ -9054,14 +9055,14 @@ Scatter store quadwords. // _bf16, _f16, _f32, _f64 void svst1q_scatter[_u64base][_s8](svbool_t pg, svuint64_t zn, svint8_t data); void svst1q_scatter[_u64base]_offset[_s8](svbool_t pg, svuint64_t zn, int64_t offset, svint8_t data); - void svst1q_scatter_[u64]offset[_s8](svbool_t pg, const uint8_t *base, svuint64_t offset, svint8_t data); + void svst1q_scatter[_u64]_offset[_s8](svbool_t pg, const uint8_t *base, svuint64_t offset, svint8_t data); // Variants are also available for: // _u16, _u32, _s32, _u64, _s64 // _bf16, _f16, _f32, _f64 - void svst1q_scatter[_u64base]_index[_s8](svbool_t pg, svuint64_t zn, int64_t index, svint8_t data); - void svst1q_scatter_[u64]index_[s16](svbool_t pg, const int16_t *base, svuint64_t index, svint16_t data); - ``` + void svst1q_scatter[_u64]_index[_s16](svbool_t pg, const int16_t *base, svuint64_t index, svint16_t data); + void svst1q_scatter[_u64base]_index[_s16](svbool_t pg, svuint64_t zn, int64_t index, svint16_t data); +``` #### ST2Q, ST3Q, ST4Q From eca37fbe83aebf995257f1139f0f8e8675a04f26 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 3 Sep 2024 13:42:00 +0100 Subject: [PATCH 2/2] [fixup] Revert bad changes to scatter/gather u64offset --- main/acle.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/main/acle.md b/main/acle.md index 9ff5c3f8..46609fb9 100644 --- a/main/acle.md +++ b/main/acle.md @@ -8960,13 +8960,13 @@ Gather Load Quadword. // _bf16, _f16, _f32, _f64 svint8_t svld1q_gather[_u64base]_s8(svbool_t pg, svuint64_t zn); svint8_t svld1q_gather[_u64base]_offset_s8(svbool_t pg, svuint64_t zn, int64_t offset); - svint8_t svld1q_gather[_u64]_offset[_s8](svbool_t pg, const int8_t *base, svuint64_t offset); + svint8_t svld1q_gather_[u64]offset[_s8](svbool_t pg, const int8_t *base, svuint64_t offset); // Variants are also available for: // _u16, _u32, _s32, _u64, _s64 // _bf16, _f16, _f32, _f64 - svint16_t svld1q_gather[_u64]_index[_s16](svbool_t pg, const int16_t *base, svuint64_t index); + svint16_t svld1q_gather_[u64]index[_s16](svbool_t pg, const int16_t *base, svuint64_t index); svint16_t svld1q_gather[_u64base]_index_s16(svbool_t pg, svuint64_t zn, int64_t index); ``` @@ -9055,12 +9055,12 @@ Scatter store quadwords. // _bf16, _f16, _f32, _f64 void svst1q_scatter[_u64base][_s8](svbool_t pg, svuint64_t zn, svint8_t data); void svst1q_scatter[_u64base]_offset[_s8](svbool_t pg, svuint64_t zn, int64_t offset, svint8_t data); - void svst1q_scatter[_u64]_offset[_s8](svbool_t pg, const uint8_t *base, svuint64_t offset, svint8_t data); + void svst1q_scatter_[u64]offset[_s8](svbool_t pg, const uint8_t *base, svuint64_t offset, svint8_t data); // Variants are also available for: // _u16, _u32, _s32, _u64, _s64 // _bf16, _f16, _f32, _f64 - void svst1q_scatter[_u64]_index[_s16](svbool_t pg, const int16_t *base, svuint64_t index, svint16_t data); + void svst1q_scatter_[u64]index[_s16](svbool_t pg, const int16_t *base, svuint64_t index, svint16_t data); void svst1q_scatter[_u64base]_index[_s16](svbool_t pg, svuint64_t zn, int64_t index, svint16_t data); ```