Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

code format modify #4

Merged
merged 1 commit into from
Mar 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
code format modify
  • Loading branch information
Enigmo-g committed Mar 28, 2020
commit c04c72c7a37497bc0017e0875593e578e4a7a517
82 changes: 41 additions & 41 deletions avx512intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1835,72 +1835,72 @@ FORCE_INLINE __m512i _mm512_inserti64x4 (__m512i a, __m256i b, int imm8)

FORCE_INLINE __m512i _mm512_load_epi32 (void const* mem_addr)
{
__m512i res;
res.vect_s32[0] = vld1q_s32((const int32_t *)mem_addr);
res.vect_s32[1] = vld1q_s32((const int32_t *)mem_addr + 4);
res.vect_s32[2] = vld1q_s32((const int32_t *)mem_addr + 8);
res.vect_s32[3] = vld1q_s32((const int32_t *)mem_addr + 12);
return res;
__m512i res;
res.vect_s32[0] = vld1q_s32((const int32_t *)mem_addr);
res.vect_s32[1] = vld1q_s32((const int32_t *)mem_addr + 4);
res.vect_s32[2] = vld1q_s32((const int32_t *)mem_addr + 8);
res.vect_s32[3] = vld1q_s32((const int32_t *)mem_addr + 12);
return res;
}

FORCE_INLINE __m512i _mm512_load_epi64 (void const* mem_addr)
{
__m512i res;
res.vect_s64[0] = vld1q_s64((const int64_t *)mem_addr);
res.vect_s64[1] = vld1q_s64((const int64_t *)mem_addr + 2);
res.vect_s64[2] = vld1q_s64((const int64_t *)mem_addr + 4);
res.vect_s64[3] = vld1q_s64((const int64_t *)mem_addr + 6);
return res;
__m512i res;
res.vect_s64[0] = vld1q_s64((const int64_t *)mem_addr);
res.vect_s64[1] = vld1q_s64((const int64_t *)mem_addr + 2);
res.vect_s64[2] = vld1q_s64((const int64_t *)mem_addr + 4);
res.vect_s64[3] = vld1q_s64((const int64_t *)mem_addr + 6);
return res;
}

FORCE_INLINE __m512d _mm512_load_pd (void const* mem_addr)
{
__m512d res;
res.vect_f64[0] = vld1q_f64((const double *)mem_addr);
res.vect_f64[1] = vld1q_f64((const double *)mem_addr + 2);
res.vect_f64[2] = vld1q_f64((const double *)mem_addr + 4);
res.vect_f64[3] = vld1q_f64((const double *)mem_addr + 6);
return res;
__m512d res;
res.vect_f64[0] = vld1q_f64((const double *)mem_addr);
res.vect_f64[1] = vld1q_f64((const double *)mem_addr + 2);
res.vect_f64[2] = vld1q_f64((const double *)mem_addr + 4);
res.vect_f64[3] = vld1q_f64((const double *)mem_addr + 6);
return res;
}

FORCE_INLINE __m512 _mm512_load_ps (void const* mem_addr)
{
__m512 res;
res.vect_f32[0] = vld1q_f32((const float *)mem_addr);
res.vect_f32[1] = vld1q_f32((const float *)mem_addr + 4);
res.vect_f32[2] = vld1q_f32((const float *)mem_addr + 8);
res.vect_f32[3] = vld1q_f32((const float *)mem_addr + 12);
return res;
__m512 res;
res.vect_f32[0] = vld1q_f32((const float *)mem_addr);
res.vect_f32[1] = vld1q_f32((const float *)mem_addr + 4);
res.vect_f32[2] = vld1q_f32((const float *)mem_addr + 8);
res.vect_f32[3] = vld1q_f32((const float *)mem_addr + 12);
return res;
}

FORCE_INLINE void _mm512_store_epi32 (void* mem_addr, __m512i a)
{
vst1q_s32((int32_t *)mem_addr, a.vect_s32[0]);
vst1q_s32((int32_t *)mem_addr + 4, a.vect_s32[1]);
vst1q_s32((int32_t *)mem_addr + 8, a.vect_s32[2]);
vst1q_s32((int32_t *)mem_addr + 12, a.vect_s32[3]);
vst1q_s32((int32_t *)mem_addr, a.vect_s32[0]);
vst1q_s32((int32_t *)mem_addr + 4, a.vect_s32[1]);
vst1q_s32((int32_t *)mem_addr + 8, a.vect_s32[2]);
vst1q_s32((int32_t *)mem_addr + 12, a.vect_s32[3]);
}

FORCE_INLINE void _mm512_store_epi64 (void* mem_addr, __m512i a)
{
vst1q_s64((int64_t *)mem_addr, a.vect_s64[0]);
vst1q_s64((int64_t *)mem_addr + 2, a.vect_s64[1]);
vst1q_s64((int64_t *)mem_addr + 4, a.vect_s64[2]);
vst1q_s64((int64_t *)mem_addr + 6, a.vect_s64[3]);
vst1q_s64((int64_t *)mem_addr, a.vect_s64[0]);
vst1q_s64((int64_t *)mem_addr + 2, a.vect_s64[1]);
vst1q_s64((int64_t *)mem_addr + 4, a.vect_s64[2]);
vst1q_s64((int64_t *)mem_addr + 6, a.vect_s64[3]);
}

FORCE_INLINE void _mm512_store_pd (double * mem_addr, __m512d a)
{
vst1q_f64(mem_addr, a.vect_f64[0]);
vst1q_f64(mem_addr + 2, a.vect_f64[1]);
vst1q_f64(mem_addr + 4, a.vect_f64[2]);
vst1q_f64(mem_addr + 6, a.vect_f64[3]);
vst1q_f64(mem_addr, a.vect_f64[0]);
vst1q_f64(mem_addr + 2, a.vect_f64[1]);
vst1q_f64(mem_addr + 4, a.vect_f64[2]);
vst1q_f64(mem_addr + 6, a.vect_f64[3]);
}

FORCE_INLINE void _mm512_store_ps (float * mem_addr, __m512 a)
{
vst1q_f32(mem_addr, a.vect_f32[0]);
vst1q_f32(mem_addr + 4, a.vect_f32[1]);
vst1q_f32(mem_addr + 8, a.vect_f32[2]);
vst1q_f32(mem_addr + 12, a.vect_f32[3]);
}
vst1q_f32(mem_addr, a.vect_f32[0]);
vst1q_f32(mem_addr + 4, a.vect_f32[1]);
vst1q_f32(mem_addr + 8, a.vect_f32[2]);
vst1q_f32(mem_addr + 12, a.vect_f32[3]);
}
48 changes: 24 additions & 24 deletions avxintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2160,55 +2160,55 @@ FORCE_INLINE __m256 _mm256_cmp_ps(__m256 a, __m256 b, const int imm8)
FORCE_INLINE __m256i _mm256_load_epi32 (void const* mem_addr)
{
__m256i res;
res.vect_s32[0] = vld1q_s32((const int32_t *)mem_addr);
res.vect_s32[1] = vld1q_s32((const int32_t *)mem_addr + 4);
return res;
res.vect_s32[0] = vld1q_s32((const int32_t *)mem_addr);
res.vect_s32[1] = vld1q_s32((const int32_t *)mem_addr + 4);
return res;
}

FORCE_INLINE __m256i _mm256_load_epi64 (void const* mem_addr)
{
__m256i res;
res.vect_s64[0] = vld1q_s64((const int64_t *)mem_addr);
res.vect_s64[1] = vld1q_s64((const int64_t *)mem_addr + 2);
return res;
__m256i res;
res.vect_s64[0] = vld1q_s64((const int64_t *)mem_addr);
res.vect_s64[1] = vld1q_s64((const int64_t *)mem_addr + 2);
return res;
}

FORCE_INLINE __m256d _mm256_load_pd (double const * mem_addr)
{
__m256d res;
res.vect_f64[0] = vld1q_f64((const double *)mem_addr);
res.vect_f64[1] = vld1q_f64((const double *)mem_addr + 2);
return res;
__m256d res;
res.vect_f64[0] = vld1q_f64((const double *)mem_addr);
res.vect_f64[1] = vld1q_f64((const double *)mem_addr + 2);
return res;
}

FORCE_INLINE __m256 _mm256_load_ps (float const * mem_addr)
{
__m256 res;
res.vect_f32[0] = vld1q_f32((const float *)mem_addr);
res.vect_f32[1] = vld1q_f32((const float *)mem_addr + 4);
return res;
__m256 res;
res.vect_f32[0] = vld1q_f32((const float *)mem_addr);
res.vect_f32[1] = vld1q_f32((const float *)mem_addr + 4);
return res;
}

FORCE_INLINE void _mm256_store_epi32 (void* mem_addr, __m256i a)
{
vst1q_s32((int32_t *)mem_addr, a.vect_s32[0]);
vst1q_s32((int32_t *)mem_addr + 4, a.vect_s32[1]);
vst1q_s32((int32_t *)mem_addr, a.vect_s32[0]);
vst1q_s32((int32_t *)mem_addr + 4, a.vect_s32[1]);
}

FORCE_INLINE void _mm256_store_epi64 (void* mem_addr, __m256i a)
{
vst1q_s64((int64_t *)mem_addr, a.vect_s64[0]);
vst1q_s64((int64_t *)mem_addr + 2, a.vect_s64[1]);
vst1q_s64((int64_t *)mem_addr, a.vect_s64[0]);
vst1q_s64((int64_t *)mem_addr + 2, a.vect_s64[1]);
}

FORCE_INLINE void _mm256_store_pd (double * mem_addr, __m256d a)
{
vst1q_f64(mem_addr, a.vect_f64[0]);
vst1q_f64(mem_addr + 2, a.vect_f64[1]);
vst1q_f64(mem_addr, a.vect_f64[0]);
vst1q_f64(mem_addr + 2, a.vect_f64[1]);
}

FORCE_INLINE void _mm256_store_ps (float * mem_addr, __m256 a)
{
vst1q_f32(mem_addr, a.vect_f32[0]);
vst1q_f32(mem_addr + 4, a.vect_f32[1]);
}
vst1q_f32(mem_addr, a.vect_f32[0]);
vst1q_f32(mem_addr + 4, a.vect_f32[1]);
}
16 changes: 8 additions & 8 deletions emmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -857,39 +857,39 @@ FORCE_INLINE __m128i _mm_load_si128 (__m128i const* mem_addr)

FORCE_INLINE __m128d _mm_load_pd (double const* mem_addr)
{
__m128d res;
__m128d res;
res = vld1q_f64((const double *)mem_addr);
return res;
}

FORCE_INLINE __m128 _mm_load_ps (float const* mem_addr)
{
__m128 res;
__m128 res;
res = vld1q_f32((const float *)mem_addr);
return res;
}

FORCE_INLINE void _mm_store_epi32 (void* mem_addr, __m128i a)
{
vst1q_s32((int32_t *)mem_addr, a.vect_s32);
vst1q_s32((int32_t *)mem_addr, a.vect_s32);
}

FORCE_INLINE void _mm_store_epi64 (void* mem_addr, __m128i a)
{
vst1q_s64((int64_t *)mem_addr, a.vect_s64);
vst1q_s64((int64_t *)mem_addr, a.vect_s64);
}

FORCE_INLINE void _mm_store_si128 (__m128i* mem_addr, __m128i a)
{
vst1q_s32((int32_t *)mem_addr, a.vect_s32);
vst1q_s32((int32_t *)mem_addr, a.vect_s32);
}

FORCE_INLINE void _mm_store_pd (double* mem_addr, __m128d a)
{
vst1q_f64(mem_addr, a);
vst1q_f64(mem_addr, a);
}

FORCE_INLINE void _mm_store_ps (float* mem_addr, __m128 a)
{
vst1q_f32(mem_addr, a);
}
vst1q_f32(mem_addr, a);
}
Loading