Skip to content

Commit

Permalink
movaps, movss, movsd, ucomiss, pxor, xorps
Browse files Browse the repository at this point in the history
  • Loading branch information
MatthewMerrill committed Oct 20, 2019
1 parent c0f2ebc commit d8b08da
Show file tree
Hide file tree
Showing 9 changed files with 285 additions and 8 deletions.
39 changes: 31 additions & 8 deletions emu/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,14 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
switch (insn) {
case 0x18 ... 0x1f: TRACEI("nop modrm\t"); READMODRM; break;

case 0x28: TRACEI("movp modrm, reg");
READMODRM; MOV(modrm_val, modrm_reg,128); break;
case 0x29: TRACEI("movp reg, modrm");
READMODRM; MOV(modrm_reg, modrm_val,128); break;
case 0x28: TRACEI("movaps xmm:modrm, xmm");
READMODRM; VLOAD(xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x29: TRACEI("movaps xmm, xmm:modrm");
READMODRM; VSTORE(xmm_modrm_reg, xmm_modrm_val,128); break;

case 0x2e: TRACEI("ucomiss xmm, xmm:modrm");
READMODRM; VCOMPARE(xmm_modrm_val, xmm_modrm_reg,32);
break;

case 0x31: TRACEI("rdtsc");
RDTSC; break;
Expand Down Expand Up @@ -99,6 +103,10 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
case 0x4f: TRACEI("cmovnle modrm, reg");
READMODRM; CMOVN(LE, modrm_val, modrm_reg,oz); break;

case 0x57: TRACEI("xorps xmm, xmm:modrm");
READMODRM; VXOR(xmm_modrm_val, xmm_modrm_reg,128);
break;

case 0x80: TRACEI("jo rel\t");
READIMM; J_REL(O, imm); break;
case 0x81: TRACEI("jno rel\t");
Expand Down Expand Up @@ -249,6 +257,12 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
BSWAP(reg_di); break;
#endif

#if OP_SIZE == 16
case 0xef: TRACEI("pxor xmm:modrm xmm");
READMODRM; VXOR(xmm_modrm_val, xmm_modrm_reg,128);
break;
#endif

default: TRACEI("undefined");
UNDEFINED;
}
Expand Down Expand Up @@ -788,10 +802,12 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
case 0x0f:
READINSN;
switch (insn) {
case 0x10:
TRACEI("movsd xmm:modrm, xmm");
READMODRM; VLOAD(xmm_modrm_val, xmm_modrm_reg,64);
break;
case 0x10: TRACEI("movsd xmm:modrm, xmm");
READMODRM; VLOAD_PADMEM(xmm_modrm_val, xmm_modrm_reg,64);
break;
case 0x11: TRACEI("movsd xmm, xmm:modrm");
READMODRM; VSTORE(xmm_modrm_reg, xmm_modrm_val,64);
break;

case 0x18 ... 0x1f: TRACEI("rep nop modrm\t"); READMODRM; break;
default: TRACE("undefined"); UNDEFINED;
Expand All @@ -814,6 +830,13 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
// after a rep prefix, means we have sse/mmx insanity
READINSN;
switch (insn) {
case 0x10: TRACEI("movss xmm:modrm, xmm");
READMODRM; VLOAD_PADMEM(xmm_modrm_val, xmm_modrm_reg,32);
break;
case 0x11: TRACEI("movss xmm, xmm:modrm");
READMODRM; VSTORE(xmm_modrm_reg, xmm_modrm_val,32);
break;

case 0x18 ... 0x1f: TRACEI("repz nop modrm\t"); READMODRM; break;

// tzcnt is like bsf but the result when the input is zero is defined as the operand size
Expand Down
61 changes: 61 additions & 0 deletions emu/sse.c
Original file line number Diff line number Diff line change
@@ -1,9 +1,70 @@
#include <math.h>

#include "emu/cpu.h"

void vec_compare32(struct cpu_state *cpu, float *f2, float *f1) {
if (isnan(*f1) || isnan(*f2)) {
cpu->zf = 1;
cpu->pf = 1;
cpu->cf = 1;
}
else if (*f1 > *f2) {
cpu->zf = 0;
cpu->pf = 0;
cpu->cf = 0;
}
else if (*f1 < *f2) {
cpu->zf = 0;
cpu->pf = 0;
cpu->cf = 1;
}
else if (*f1 == *f2) {
cpu->zf = 1;
cpu->pf = 0;
cpu->cf = 0;
}
else {
printf("something's horribly wrong. err 1093281094");
}
cpu->zf_res = 0;
cpu->pf_res = 0;
}

void vec_load32(struct cpu_state *UNUSED(cpu), union xmm_reg *src, union xmm_reg *dst) {
dst->dw[0] = src->dw[0];
}
void vec_load64(struct cpu_state *UNUSED(cpu), union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] = src->qw[0];
}
void vec_load128(struct cpu_state *UNUSED(cpu), union xmm_reg *src, union xmm_reg *dst) {
*dst = *src;
}

static inline void zero_xmm(union xmm_reg *xmm) {
xmm->qw[0] = 0;
xmm->qw[1] = 0;
}
#define ZLOAD(sz) \
void vec_zload##sz(struct cpu_state *cpu, union xmm_reg *src, union xmm_reg *dst) { \
zero_xmm(dst); \
vec_load##sz(cpu, src, dst); \
}
ZLOAD(32)
ZLOAD(64)
ZLOAD(128)
#undef ZLOAD

void vec_store32(struct cpu_state *UNUSED(cpu), union xmm_reg *dst, union xmm_reg *src) {
dst->dw[0] = src->dw[0];
}
void vec_store64(struct cpu_state *UNUSED(cpu), union xmm_reg *dst, union xmm_reg *src) {
dst->qw[0] = src->qw[0];
}
void vec_store128(struct cpu_state *UNUSED(cpu), union xmm_reg *dst, union xmm_reg *src) {
*dst = *src;
}

void vec_xor128(struct cpu_state *UNUSED(cpu), union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] ^= src->qw[0];
dst->qw[1] ^= src->qw[1];
}
15 changes: 15 additions & 0 deletions emu/sse.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,22 @@

#include "emu/cpu.h"

void vec_compare32(struct cpu_state *UNUSED(cpu), float *f2, float *f1);

void vec_load32(struct cpu_state *UNUSED(cpu), union xmm_reg *dst, union xmm_reg *src);
void vec_load64(struct cpu_state *UNUSED(cpu), union xmm_reg *dst, union xmm_reg *src);
void vec_load128(struct cpu_state *UNUSED(cpu), union xmm_reg *dst, union xmm_reg *src);

// Zeroes out the destination before loading.
// Used in some instructions like movss when the src is memory.
void vec_zload32(struct cpu_state *UNUSED(cpu), union xmm_reg *dst, union xmm_reg *src);
void vec_zload64(struct cpu_state *UNUSED(cpu), union xmm_reg *dst, union xmm_reg *src);
void vec_zload128(struct cpu_state *UNUSED(cpu), union xmm_reg *dst, union xmm_reg *src);

void vec_store32(struct cpu_state *UNUSED(cpu), union xmm_reg *src, union xmm_reg *dst);
void vec_store64(struct cpu_state *UNUSED(cpu), union xmm_reg *src, union xmm_reg *dst);
void vec_store128(struct cpu_state *UNUSED(cpu), union xmm_reg *src, union xmm_reg *dst);

void vec_xor128(struct cpu_state *cpu, union xmm_reg *src, union xmm_reg *dst);

#endif
10 changes: 10 additions & 0 deletions jit/gen.c
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,17 @@ static inline bool gen_vec(enum arg rm, enum arg reg, void (*helper)(), gadget_t
#define v_write(op, src, dst,z) _v(arg_##dst, arg_##src, vec_##op##z, vec_helper_store##z##_gadgets, z)

#define VLOAD(src, dst,z) v(load, src, dst,z)
#define VZLOAD(src, dst,z) v_write(zload, dst, src,z)
#define VLOAD_PADMEM(src, dst, z) do { \
if (arg_##src == arg_xmm_modrm_val && modrm.type != modrm_reg) { \
VZLOAD(src, dst, z); \
} else { \
VLOAD(src, dst, z); \
} \
} while (0)
#define VSTORE(src, dst,z) v_write(store, src, dst,z)
#define VCOMPARE(src, dst,z) v(compare, src, dst,z)
#define VXOR(src, dst,z) v(xor, src, dst,z)

#define DECODER_RET int
#define DECODER_NAME gen_step
Expand Down
19 changes: 19 additions & 0 deletions tests/e2e/sse2/expected.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
movaps
11.11 22.22 33.33 44.44
55.55 66.66 77.77 88.88
55.55 66.66 77.77 88.88
movss
11.11 22.22 33.33 44.44
55.55 66.66 77.77 88.88
55.55 22.22 33.33 44.44
16.12 00.00 00.00 00.00
16.12
xorps
0.00E+00 0.00E+00 0.00E+00 0.00E+00
1.11E+01 2.22E+01 3.33E+01 4.44E+01
5.55E+01 6.67E+01 7.78E+01 8.89E+01
1.11E+01 2.22E+01 3.33E+01 4.44E+01
0.00E+00 0.00E+00 0.00E+00 0.00E+00
7.05E-37 5.31E-37 1.46E-38 1.18E-38
1.11E+01 2.22E+01 3.33E+01 4.44E+01
0.00E+00 0.00E+00 0.00E+00 0.00E+00
29 changes: 29 additions & 0 deletions tests/e2e/sse2/movaps.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include <stdio.h>
#include <xmmintrin.h>

#define printout() printf("%05.2f %05.2f %05.2f %05.2f\n", out[0], out[1], out[2], out[3])

void main(void) {
float out[4] = { 0, 0, 0, 0 };
float buf1234[4] = { 11.11, 22.22, 33.33, 44.44 };
float buf5678[4] = { 55.55, 66.66, 77.77, 88.88 };
float fa = 16.12;

// xmm1 Initially 1234
__m128 xmm1 = _mm_load_ps((float*) buf1234);
_mm_store_ps((float*) out, xmm1);
printout();

// xmm2 Initially 5678
__m128 xmm2 = _mm_load_ps((float*) buf5678);
_mm_store_ps((float*) out, xmm2);
printout();

// Move xmm2 onto xmm1
asm volatile( "movaps %[vec2], %[vec1]\n\t"
: [vec1] "+x" (xmm1)
: [vec2] "x" (xmm2));

_mm_store_ps((float*) out, xmm1);
printout();
}
62 changes: 62 additions & 0 deletions tests/e2e/sse2/movss.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#include <stdio.h>
#include <xmmintrin.h>

#define NOINLINE __attribute__ ((noinline))
#define printout() printf("%05.2f %05.2f %05.2f %05.2f\n", out[0], out[1], out[2], out[3])

void move5(__m128 *xmm1, __m128 *xmm2);
void move1612(__m128 *xmm1, float fa);
void store1612(__m128 *xmm1, float *fa);

void main(void) {
float out[4] = { 0, 0, 0, 0 };
float buf1234[4] = { 11.11, 22.22, 33.33, 44.44 };
float buf5678[4] = { 55.55, 66.66, 77.77, 88.88 };
float fa = 16.12;

// xmm1 Initially 1234
__m128 xmm1 = _mm_load_ps((float*) buf1234);
_mm_store_ps((float*) out, xmm1);
printout();

// xmm2 Initially 5678
__m128 xmm2 = _mm_load_ps((float*) buf5678);
_mm_store_ps((float*) out, xmm2);
printout();

//move5(&xmm1, &xmm2);
__m128 xmm3 = _mm_move_ss(xmm1, xmm2);
_mm_store_ps((float*) out, xmm3);
printout();

move1612(&xmm1, fa);
_mm_store_ps((float*) out, xmm1);
printout();

fa = 00.00;
store1612(&xmm1, &fa);
printf("%05.2f\n", fa);
}

void NOINLINE move5(__m128 *xmm1, __m128 *xmm2) {
// Move the 5 from 5678, rest should remain: 5234.
//*xmm1 = _mm_move_ss(*xmm1, *xmm2);
asm volatile( "movss %[vec2], %[vec1]\n\t"
: [vec1] "+x" (*xmm1)
: [vec2] "x" (*xmm2));
}

void NOINLINE move1612(__m128 *xmm1, float fa) {
// Move the 16.12 into first position of xmm1.
// This is mem, so rest should be zeroed.
asm volatile( "movss %[flt], %[vec]\n\t"
: [vec] "+x" (*xmm1)
: [flt] "m" (fa));
}

void NOINLINE store1612(__m128 *xmm1, float *fa) {
// Store the 16.12 into float.
asm volatile( "movss %[vec], %[flt]\n\t"
: [flt] "+m" (*fa)
: [vec] "x" (*xmm1));
}
12 changes: 12 additions & 0 deletions tests/e2e/sse2/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/sh
gcc -msse2 movaps.c -o test_movaps
echo movaps
./test_movaps

echo movss
gcc -msse2 movss.c -o test_movss
./test_movss

echo xorps
gcc -msse2 xorps.c -o test_xorps
./test_xorps
46 changes: 46 additions & 0 deletions tests/e2e/sse2/xorps.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include <stdio.h>
#include <xmmintrin.h>

#define printout() printf("%05.2E %05.2E %05.2E %05.2E\n", out[0], out[1], out[2], out[3])

void main(void) {
float out[4] = { 0, 0, 0, 0 };
float buf0000[4] = { 00.00, 00.00, 00.00, 00.00 };
float buf1234[4] = { 11.11, 22.22, 33.33, 44.44 };
float buf5678[4] = { 55.55, 66.66, 77.77, 88.88 };

// xmm0 Initially 1234
__m128 xmm0 = _mm_load_ps((float*) buf0000);
_mm_store_ps((float*) out, xmm0);
printout();

// xmm1 Initially 1234
__m128 xmm1 = _mm_load_ps((float*) buf1234);
_mm_store_ps((float*) out, xmm1);
printout();

// xmm2 Initially 5678
__m128 xmm2 = _mm_load_ps((float*) buf5678);
_mm_store_ps((float*) out, xmm2);
printout();

// 0000 ^ 1234 = 1234
_mm_store_ps((float*) out, _mm_xor_ps(xmm0, xmm1));
printout();

// 1234 ^ 1234 = 0000
_mm_store_ps((float*) out, _mm_xor_ps(xmm1, xmm1));
printout();

// 1234 ^ 5678 = some known value
_mm_store_ps((float*) out, _mm_xor_ps(xmm1, xmm2));
printout();

// 5678 ^ (1234 ^ 5678) = 1234
_mm_store_ps((float*) out, _mm_xor_ps(xmm2, _mm_xor_ps(xmm1, xmm2)));
printout();

// setzero with xorps = 0000
_mm_store_ps((float*) out, _mm_setzero_ps());
printout();
}

0 comments on commit d8b08da

Please sign in to comment.