Skip to content

Commit

Permalink
Avoid use of SSE4.1 intrinsic for SSE2
Browse files Browse the repository at this point in the history
_mm_extract_epi32 is an SSE4.1 intrinsic, though I haven't seen it
actually emit that.  Replace it with _mm_cvtsi128_si32 and
_mm_shuffle_epi32.
  • Loading branch information
Freaky committed Sep 20, 2023
1 parent 3281c83 commit 1e34249
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions src/simd/x86_sse2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ use std::arch::x86::{
__m128i,
_mm_and_si128,
_mm_cmpeq_epi8,
_mm_extract_epi32,
_mm_cvtsi128_si32,
_mm_loadu_si128,
_mm_sad_epu8,
_mm_set1_epi8,
_mm_setzero_si128,
_mm_shuffle_epi32,
_mm_sub_epi8,
_mm_xor_si128,
};
Expand All @@ -17,11 +18,12 @@ use std::arch::x86_64::{
__m128i,
_mm_and_si128,
_mm_cmpeq_epi8,
_mm_extract_epi32,
_mm_cvtsi128_si32,
_mm_loadu_si128,
_mm_sad_epu8,
_mm_set1_epi8,
_mm_setzero_si128,
_mm_shuffle_epi32,
_mm_sub_epi8,
_mm_xor_si128,
};
Expand Down Expand Up @@ -49,7 +51,7 @@ unsafe fn mm_from_offset(slice: &[u8], offset: usize) -> __m128i {
#[target_feature(enable = "sse2")]
unsafe fn sum(u8s: &__m128i) -> usize {
let sums = _mm_sad_epu8(*u8s, _mm_setzero_si128());
(_mm_extract_epi32(sums, 0) + _mm_extract_epi32(sums, 2)) as usize
(_mm_cvtsi128_si32(sums) + _mm_cvtsi128_si32(_mm_shuffle_epi32(sums, 0xaa))) as usize
}

#[target_feature(enable = "sse2")]
Expand Down

0 comments on commit 1e34249

Please sign in to comment.