Skip to content

Commit

Permalink
ssse3 (rust-lang#224)
Browse files Browse the repository at this point in the history
* ssse3: _mm_abs_pi8 failing

Intrinsic has incorrect return type!
<8 x i8> (<8 x i8>)* @llvm.x86.ssse3.pabs.b

* Introduce a x86_mmx type

And make it compatible with i8x8 and u8x8.
Alex suggested to change the i8x8 declaration as:
```
struct i8x8(i64);
```
But I don't see how to make it compatible with the
existing code/macros.

* ssse3: _mm_abs_pi16, _mm_abs_pi32, _mm_shuffle_pi8

* ssse3: _mm_abs_pi16, _mm_abs_pi32, _mm_shuffle_pi8 tests

* Replace x86_mmx by __m64

* ssse3: _mm_sign_pi8, _mm_sign_pi16, _mm_sign_pi32

* ssse3: _mm_mulhrs_pi16

* ssse3: _mm_maddubs_pi16

* ssse3: _mm_hsub_pi16, _mm_hsub_pi32, _mm_hsubs_pi16

* ssse3: _mm_hadd_pi16, _mm_hadd_pi32, _mm_hadds_pi16

* Move some ssse3 intrinsics from i586 to i686
  • Loading branch information
gwenn authored and alexcrichton committed Dec 3, 2017
1 parent f8f6797 commit 61cab5d
Show file tree
Hide file tree
Showing 2 changed files with 375 additions and 0 deletions.
3 changes: 3 additions & 0 deletions coresimd/src/x86/i686/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ pub use self::sse::*;
mod sse2;
pub use self::sse2::*;

mod ssse3;
pub use self::ssse3::*;

mod sse41;
pub use self::sse41::*;

Expand Down
372 changes: 372 additions & 0 deletions coresimd/src/x86/i686/ssse3.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,372 @@
//! Supplemental Streaming SIMD Extensions 3 (SSSE3)

#[cfg(test)]
use stdsimd_test::assert_instr;

use core::mem;
use v64::*;
use x86::__m64;

/// Compute the absolute value of packed 8-bit integers in `a` and
/// return the unsigned results.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pabsb))]
pub unsafe fn _mm_abs_pi8(a: i8x8) -> u8x8 {
mem::transmute(pabsb(mem::transmute(a)))
}

/// Compute the absolute value of packed 8-bit integers in `a`, and return the
/// unsigned results.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pabsw))]
pub unsafe fn _mm_abs_pi16(a: i16x4) -> u16x4 {
mem::transmute(pabsw(mem::transmute(a)))
}

/// Compute the absolute value of packed 32-bit integers in `a`, and return the
/// unsigned results.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pabsd))]
pub unsafe fn _mm_abs_pi32(a: i32x2) -> u32x2 {
mem::transmute(pabsd(mem::transmute(a)))
}

/// Shuffle packed 8-bit integers in `a` according to shuffle control mask in
/// the corresponding 8-bit element of `b`, and return the results
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pshufb))]
pub unsafe fn _mm_shuffle_pi8(a: u8x8, b: u8x8) -> u8x8 {
mem::transmute(pshufb(mem::transmute(a), mem::transmute(b)))
}

/// Concatenates the two 64-bit integer vector operands, and right-shifts
/// the result by the number of bytes specified in the immediate operand.
/*#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(palignr, n = 15))]
pub unsafe fn _mm_alignr_pi8(a: i8x8, b: i8x8, n: i32) -> i8x8 {
mem::transmute(palignrb(mem::transmute(a), mem::transmute(b), n))
}*/

/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [4 x i16].
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phaddw))]
pub unsafe fn _mm_hadd_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(phaddw(mem::transmute(a), mem::transmute(b)))
}

/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [2 x i32].
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phaddd))]
pub unsafe fn _mm_hadd_pi32(a: i32x2, b: i32x2) -> i32x2 {
mem::transmute(phaddd(mem::transmute(a), mem::transmute(b)))
}

/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phaddsw))]
pub unsafe fn _mm_hadds_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(phaddsw(mem::transmute(a), mem::transmute(b)))
}

/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [4 x i16].
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phsubsw))]
pub unsafe fn _mm_hsub_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(phsubsw(mem::transmute(a), mem::transmute(b)))
}

/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [2 x i32].
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phsubd))]
pub unsafe fn _mm_hsub_pi32(a: i32x2, b: i32x2) -> i32x2 {
mem::transmute(phsubd(mem::transmute(a), mem::transmute(b)))
}

/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
/// saturated to 8000h.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phsubsw))]
pub unsafe fn _mm_hsubs_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(phsubsw(mem::transmute(a), mem::transmute(b)))
}

/// Multiplies corresponding pairs of packed 8-bit unsigned integer
/// values contained in the first source operand and packed 8-bit signed
/// integer values contained in the second source operand, adds pairs of
/// contiguous products with signed saturation, and writes the 16-bit sums to
/// the corresponding bits in the destination.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pmaddubsw))]
pub unsafe fn _mm_maddubs_pi16(a: u8x8, b: i8x8) -> i16x4 {
mem::transmute(pmaddubsw(mem::transmute(a), mem::transmute(b)))
}

/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
/// products to the 18 most significant bits by right-shifting, rounds the
/// truncated value by adding 1, and writes bits [16:1] to the destination.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pmulhrsw))]
pub unsafe fn _mm_mulhrs_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(pmulhrsw(mem::transmute(a), mem::transmute(b)))
}

/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
/// integer in `b` is negative, and return the results.
/// Element in result are zeroed out when the corresponding element in `b` is
/// zero.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(psignb))]
pub unsafe fn _mm_sign_pi8(a: i8x8, b: i8x8) -> i8x8 {
mem::transmute(psignb(mem::transmute(a), mem::transmute(b)))
}

/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
/// integer in `b` is negative, and return the results.
/// Element in result are zeroed out when the corresponding element in `b` is
/// zero.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(psignw))]
pub unsafe fn _mm_sign_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(psignw(mem::transmute(a), mem::transmute(b)))
}

/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
/// integer in `b` is negative, and return the results.
/// Element in result are zeroed out when the corresponding element in `b` is
/// zero.
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(psignd))]
pub unsafe fn _mm_sign_pi32(a: i32x2, b: i32x2) -> i32x2 {
mem::transmute(psignd(mem::transmute(a), mem::transmute(b)))
}

#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.ssse3.pabs.b"]
fn pabsb(a: __m64) -> __m64;

#[link_name = "llvm.x86.ssse3.pabs.w"]
fn pabsw(a: __m64) -> __m64;

#[link_name = "llvm.x86.ssse3.pabs.d"]
fn pabsd(a: __m64) -> __m64;

#[link_name = "llvm.x86.ssse3.pshuf.b"]
fn pshufb(a: __m64, b: __m64) -> __m64;

/*#[link_name = "llvm.x86.mmx.palignr.b"]
fn palignrb(a: __m64, b: __m64, n: i32) -> __m64;*/

#[link_name = "llvm.x86.ssse3.phadd.w"]
fn phaddw(a: __m64, b: __m64) -> __m64;

#[link_name = "llvm.x86.ssse3.phadd.d"]
fn phaddd(a: __m64, b: __m64) -> __m64;

#[link_name = "llvm.x86.ssse3.phadd.sw"]
fn phaddsw(a: __m64, b: __m64) -> __m64;

#[link_name = "llvm.x86.ssse3.phsub.w"]
fn phsubw(a: __m64, b: __m64) -> __m64;

#[link_name = "llvm.x86.ssse3.phsub.d"]
fn phsubd(a: __m64, b: __m64) -> __m64;

#[link_name = "llvm.x86.ssse3.phsub.sw"]
fn phsubsw(a: __m64, b: __m64) -> __m64;

#[link_name = "llvm.x86.ssse3.pmadd.ub.sw"]
fn pmaddubsw(a: __m64, b: __m64) -> __m64;

#[link_name = "llvm.x86.ssse3.pmul.hr.sw"]
fn pmulhrsw(a: __m64, b: __m64) -> __m64;

#[link_name = "llvm.x86.ssse3.psign.b"]
fn psignb(a: __m64, b: __m64) -> __m64;

#[link_name = "llvm.x86.ssse3.psign.w"]
fn psignw(a: __m64, b: __m64) -> __m64;

#[link_name = "llvm.x86.ssse3.psign.d"]
fn psignd(a: __m64, b: __m64) -> __m64;
}

#[cfg(test)]
mod tests {
use stdsimd_test::simd_test;

use v64::*;
use x86::i686::ssse3;

#[simd_test = "ssse3"]
unsafe fn _mm_abs_pi8() {
let r = ssse3::_mm_abs_pi8(i8x8::splat(-5));
assert_eq!(r, u8x8::splat(5));
}

#[simd_test = "ssse3"]
unsafe fn _mm_abs_pi16() {
let r = ssse3::_mm_abs_pi16(i16x4::splat(-5));
assert_eq!(r, u16x4::splat(5));
}

#[simd_test = "ssse3"]
unsafe fn _mm_abs_pi32() {
let r = ssse3::_mm_abs_pi32(i32x2::splat(-5));
assert_eq!(r, u32x2::splat(5));
}

#[simd_test = "ssse3"]
unsafe fn _mm_shuffle_pi8() {
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b = u8x8::new(4, 128, 4, 3, 24, 12, 6, 19);
let expected = u8x8::new(5, 0, 5, 4, 1, 5, 7, 4);
let r = ssse3::_mm_shuffle_pi8(a, b);
assert_eq!(r, expected);
}

/*#[simd_test = "ssse3"]
unsafe fn _mm_alignr_pi8() {
let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b = i8x8::new(4, 63, 4, 3, 24, 12, 6, 19);
let r = ssse3::_mm_alignr_pi8(a, b, 33);
assert_eq!(r, i8x8::splat(0));
let r = ssse3::_mm_alignr_pi8(a, b, 17);
let expected = i8x8::new(2, 3, 4, 5, 6, 7, 8, 0);
assert_eq!(r, expected);
let r = ssse3::_mm_alignr_pi8(a, b, 16);
assert_eq!(r, a);
let r = ssse3::_mm_alignr_pi8(a, b, 15);
let expected = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
assert_eq!(r, expected);
let r = ssse3::_mm_alignr_pi8(a, b, 0);
assert_eq!(r, b);
}*/

#[simd_test = "ssse3"]
unsafe fn _mm_hadd_pi16() {
let a = i16x4::new(1, 2, 3, 4);
let b = i16x4::new(4, 128, 4, 3);
let expected = i16x4::new(3, 7, 132, 7);
let r = ssse3::_mm_hadd_pi16(a, b);
assert_eq!(r, expected);
}

#[simd_test = "ssse3"]
unsafe fn _mm_hadd_pi32() {
let a = i32x2::new(1, 2);
let b = i32x2::new(4, 128);
let expected = i32x2::new(3, 132);
let r = ssse3::_mm_hadd_pi32(a, b);
assert_eq!(r, expected);
}

#[simd_test = "ssse3"]
unsafe fn _mm_hadds_pi16() {
let a = i16x4::new(1, 2, 3, 4);
let b = i16x4::new(32767, 1, -32768, -1);
let expected = i16x4::new(3, 7, 32767, -32768);
let r = ssse3::_mm_hadds_pi16(a, b);
assert_eq!(r, expected);
}

#[simd_test = "ssse3"]
unsafe fn _mm_hsub_pi16() {
let a = i16x4::new(1, 2, 3, 4);
let b = i16x4::new(4, 128, 4, 3);
let expected = i16x4::new(-1, -1, -124, 1);
let r = ssse3::_mm_hsub_pi16(a, b);
assert_eq!(r, expected);
}

#[simd_test = "ssse3"]
unsafe fn _mm_hsub_pi32() {
let a = i32x2::new(1, 2);
let b = i32x2::new(4, 128);
let expected = i32x2::new(-1, -124);
let r = ssse3::_mm_hsub_pi32(a, b);
assert_eq!(r, expected);
}

#[simd_test = "ssse3"]
unsafe fn _mm_hsubs_pi16() {
let a = i16x4::new(1, 2, 3, 4);
let b = i16x4::new(4, 128, 4, 3);
let expected = i16x4::new(-1, -1, -124, 1);
let r = ssse3::_mm_hsubs_pi16(a, b);
assert_eq!(r, expected);
}

#[simd_test = "ssse3"]
unsafe fn _mm_maddubs_pi16() {
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b = i8x8::new(4, 63, 4, 3, 24, 12, 6, 19);
let expected = i16x4::new(130, 24, 192, 194);
let r = ssse3::_mm_maddubs_pi16(a, b);
assert_eq!(r, expected);
}

#[simd_test = "ssse3"]
unsafe fn _mm_mulhrs_pi16() {
let a = i16x4::new(1, 2, 3, 4);
let b = i16x4::new(4, 32767, -1, -32768);
let expected = i16x4::new(0, 2, 0, -4);
let r = ssse3::_mm_mulhrs_pi16(a, b);
assert_eq!(r, expected);
}

#[simd_test = "ssse3"]
unsafe fn _mm_sign_pi8() {
let a = i8x8::new(1, 2, 3, 4, -5, -6, 7, 8);
let b = i8x8::new(4, 64, 0, 3, 1, -1, -2, 1);
let expected = i8x8::new(1, 2, 0, 4, -5, 6, -7, 8);
let r = ssse3::_mm_sign_pi8(a, b);
assert_eq!(r, expected);
}

#[simd_test = "ssse3"]
unsafe fn _mm_sign_pi16() {
let a = i16x4::new(-1, 2, 3, 4);
let b = i16x4::new(1, -1, 1, 0);
let expected = i16x4::new(-1, -2, 3, 0);
let r = ssse3::_mm_sign_pi16(a, b);
assert_eq!(r, expected);
}

#[simd_test = "ssse3"]
unsafe fn _mm_sign_pi32() {
let a = i32x2::new(-1, 2);
let b = i32x2::new(1, 0);
let expected = i32x2::new(-1, 0);
let r = ssse3::_mm_sign_pi32(a, b);
assert_eq!(r, expected);
}
}

0 comments on commit 61cab5d

Please sign in to comment.