diff --git a/emu/cpu.h b/emu/cpu.h index a4628288d7..91fdd64c78 100644 --- a/emu/cpu.h +++ b/emu/cpu.h @@ -18,10 +18,11 @@ union mm_reg { }; union xmm_reg { qword_t qw[2]; - dword_t dw[4]; + uint32_t u32[4]; + uint16_t u16[8]; + uint8_t u8[16]; float f32[4]; double f64[2]; - // TODO more forms }; static_assert(sizeof(union xmm_reg) == 16, "xmm_reg size"); static_assert(sizeof(union mm_reg) == 8, "mm_reg size"); diff --git a/emu/decode.h b/emu/decode.h index a8d3f2cd38..1be25cf4c4 100644 --- a/emu/decode.h +++ b/emu/decode.h @@ -30,6 +30,7 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) { #define READIMM8 READIMM_(imm, 8); imm = (int8_t) (uint8_t) imm #define READIMM16 READIMM_(imm, 16) #define READMODRM_MEM READMODRM; if (modrm.type == modrm_reg) UNDEFINED +#define READMODRM_NOMEM READMODRM; if (modrm.type != modrm_reg) UNDEFINED restart: TRACEIP(); @@ -266,13 +267,18 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) { #endif #if OP_SIZE == 16 + case 0x60: TRACEI("punpcklbw xmm:modrm, xmm"); + READMODRM; V_OP(unpack_bw, xmm_modrm_val, xmm_modrm_reg,128); break; + case 0x6e: TRACEI("movd modrm, xmm"); - // TODO: this is supposed to use general registers! - READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,32); break; + READMODRM; VMOV(modrm_val, xmm_modrm_reg,32); break; case 0x6f: TRACEI("movdqa xmm:modrm, xmm"); READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,128); break; + case 0x70: TRACEI("pshufd xmm:modrm, xmm, imm8"); + READMODRM; READIMM8; V_OP_IMM(shuffle_d, xmm_modrm_val, xmm_modrm_reg,128); break; + case 0x73: READMODRM; switch (modrm.opcode) { case 0x02: TRACEI("psrlq imm, xmm"); @@ -281,19 +287,34 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) { } break; + case 0x74: TRACEI("pcmpeqb xmm:modrm, xmm"); + READMODRM; V_OP(compare_eqb, xmm_modrm_val, xmm_modrm_reg,128); break; + case 0x76: TRACEI("pcmpeqd xmm:modrm, xmm"); + READMODRM; V_OP(compare_eqd, xmm_modrm_val, xmm_modrm_reg,128); break; + case 0x7e: TRACEI("movd xmm, modrm"); - // TODO: this is supposed to use general registers! - READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,32); break; + READMODRM; VMOV(xmm_modrm_reg, modrm_val,32); break; case 0x7f: TRACEI("movdqa xmm, xmm:modrm"); READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,128); break; + case 0xc5: TRACEI("pextrw xmm, modrm_val, imm8"); + READMODRM; READIMM8; V_OP_IMM(extract_w, xmm_modrm_reg, modrm_val,128); break; + case 0xd6: TRACEI("movq xmm, xmm:modrm"); READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,64); break; - case 0xef: TRACEI("pxor xmm:modrm xmm"); + case 0xd7: TRACEI("pmovmskb xmm:modrm, reg"); + READMODRM_NOMEM; V_OP(movmask_b, xmm_modrm_val, modrm_reg,128); break; + + case 0xef: TRACEI("pxor xmm:modrm, xmm"); READMODRM; V_OP(xor, xmm_modrm_val, xmm_modrm_reg,128); break; #else + case 0x10: TRACEI("movups xmm:modrm, xmm"); + READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,128); break; + case 0x11: TRACEI("movups xmm, xmm:modrm"); + READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,128); break; + case 0x6f: TRACEI("movq modrm, mm"); READMODRM; VMOV(mm_modrm_val, mm_modrm_reg, 64); break; case 0x7f: TRACEI("movq mm, modrm"); @@ -881,8 +902,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) { case 0x2a: TRACEI("cvtsi2sd modrm, xmm"); READMODRM; V_OP(cvtsi2sd, modrm_val, xmm_modrm_reg,32); break; - case 0x2c: TRACEI("cvtsd2si reg, xmm:modrm"); - READMODRM; V_OP(cvtsd2si, xmm_modrm_val, modrm_reg,64); break; + case 0x2c: TRACEI("cvttsd2si reg, xmm:modrm"); + READMODRM; V_OP(cvttsd2si, xmm_modrm_val, modrm_reg,64); break; case 0x5a: TRACEI("cvtsd2ss xmm:modrm, xmm"); READMODRM; V_OP(cvtsd2ss, xmm_modrm_val, xmm_modrm_reg,64); break; @@ -895,6 +916,9 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) { case 0x5e: TRACEI("divsd xmm:modrm, xmm"); READMODRM; V_OP(fdivs, xmm_modrm_val, xmm_modrm_reg,64); break; + case 0x70: TRACEI("pshuflw xmm:modrm, xmm, imm8"); + READMODRM; READIMM8; V_OP_IMM(shuffle_lw, xmm_modrm_val, xmm_modrm_reg,128); break; + case 0x18 ... 0x1f: TRACEI("rep nop modrm\t"); READMODRM; break; default: TRACE("undefined"); UNDEFINED; } @@ -917,18 +941,21 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) { READINSN; switch (insn) { case 0x10: TRACEI("movss xmm:modrm, xmm"); - READMODRM; VMOV_MERGE_REG(xmm_modrm_val, xmm_modrm_reg,32); - break; + READMODRM; VMOV_MERGE_REG(xmm_modrm_val, xmm_modrm_reg,32); break; case 0x11: TRACEI("movss xmm, xmm:modrm"); - READMODRM; VMOV_MERGE_REG(xmm_modrm_reg, xmm_modrm_val,32); - break; + READMODRM; VMOV_MERGE_REG(xmm_modrm_reg, xmm_modrm_val,32); break; + + case 0x6f: TRACEI("movdqu xmm:modrm, xmm"); + READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,128); break; case 0x7e: TRACEI("movq xmm:modrm, xmm"); - READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,64); - break; + READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,64); break; case 0x18 ... 0x1f: TRACEI("repz nop modrm\t"); READMODRM; break; + case 0x7f: TRACEI("movdqu xmm, xmm:modrm"); + READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,128); break; + // tzcnt is like bsf but the result when the input is zero is defined as the operand size // for now, it can just be an alias case 0xbc: TRACEI("~~tzcnt~~ bsf modrm, reg"); diff --git a/emu/vec.c b/emu/vec.c index 1a0612f2d7..b5b59b28f8 100644 --- a/emu/vec.c +++ b/emu/vec.c @@ -46,6 +46,7 @@ VEC_ZERO_COPY(128, 128) VEC_ZERO_COPY(128, 64) VEC_ZERO_COPY(128, 32) VEC_ZERO_COPY(64, 64) +VEC_ZERO_COPY(32, 32) void vec_merge32(NO_CPU, const void *src, void *dst) { memcpy(dst, src, 4); @@ -87,9 +88,49 @@ void vec_fdivs64(NO_CPU, const double *src, double *dst) { void vec_cvtsi2sd32(NO_CPU, const uint32_t *src, double *dst) { *dst = *src; } -void vec_cvtsd2si64(NO_CPU, const double *src, uint32_t *dst) { +void vec_cvttsd2si64(NO_CPU, const double *src, uint32_t *dst) { *dst = *src; } void vec_cvtsd2ss64(NO_CPU, const double *src, float *dst) { *dst = *src; } + +void vec_unpack_bw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) { + for (int i = 7; i >= 0; i--) { + dst->u8[i*2 + 1] = src->u8[i]; + dst->u8[i*2] = dst->u8[i]; + } +} + +void vec_shuffle_lw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding) { + union xmm_reg src_copy = *src; + for (int i = 0; i < 4; i++) + dst->u16[i] = src_copy.u16[(encoding >> (i*2)) % 4]; + dst->qw[1] = src->qw[1]; +} +void vec_shuffle_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding) { + union xmm_reg src_copy = *src; + for (int i = 0; i < 4; i++) + dst->u32[i] = src_copy.u32[(encoding >> (i*2)) % 4]; +} + +void vec_compare_eqb128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) { + for (unsigned i = 0; i < array_size(src->u8); i++) + dst->u8[i] = dst->u8[i] == src->u8[i] ? ~0 : 0; +} +void vec_compare_eqd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) { + for (unsigned i = 0; i < array_size(src->u32); i++) + dst->u32[i] = dst->u32[i] == src->u32[i] ? ~0 : 0; +} + +void vec_movmask_b128(NO_CPU, const union xmm_reg *src, uint32_t *dst) { + *dst = 0; + for (unsigned i = 0; i < array_size(src->u8); i++) { + if (src->u8[i] & (1 << 7)) + *dst |= 1 << i; + } +} + +void vec_extract_w128(NO_CPU, const union xmm_reg *src, uint32_t *dst, uint8_t index) { + *dst = src->u16[index % 8]; +} diff --git a/emu/vec.h b/emu/vec.h index 4a5c8c7239..d51c554f87 100644 --- a/emu/vec.h +++ b/emu/vec.h @@ -12,7 +12,7 @@ void vec_zero128_copy128(NO_CPU, const void *src, void *dst); void vec_zero128_copy64(NO_CPU, const void *src, void *dst); void vec_zero128_copy32(NO_CPU, const void *src, void *dst); void vec_zero64_copy64(NO_CPU, const void *src, void *dst); - +void vec_zero32_copy32(NO_CPU, const void *src, void *dst); // "merge" means don't zero the register before writing to it void vec_merge32(NO_CPU, const void *src, void *dst); void vec_merge64(NO_CPU, const void *src, void *dst); @@ -27,7 +27,16 @@ void vec_fsubs64(NO_CPU, const double *src, double *dst); void vec_fdivs64(NO_CPU, const double *src, double *dst); void vec_cvtsi2sd32(NO_CPU, const uint32_t *src, double *dst); -void vec_cvtsd2si64(NO_CPU, const double *src, uint32_t *dst); +void vec_cvttsd2si64(NO_CPU, const double *src, uint32_t *dst); void vec_cvtsd2ss64(NO_CPU, const double *src, float *dst); +// TODO organize +void vec_unpack_bw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst); +void vec_shuffle_lw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding); +void vec_shuffle_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding); +void vec_compare_eqb128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst); +void vec_compare_eqd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst); +void vec_movmask_b128(NO_CPU, const union xmm_reg *src, uint32_t *dst); +void vec_extract_w128(NO_CPU, const union xmm_reg *src, uint32_t *dst, uint8_t index); + #endif diff --git a/jit/gadgets-aarch64/misc.S b/jit/gadgets-aarch64/misc.S index 5a15086255..cf4715918c 100644 --- a/jit/gadgets-aarch64/misc.S +++ b/jit/gadgets-aarch64/misc.S @@ -188,9 +188,9 @@ do_helper 2 .endr .macro do_vec_helper rm, size= - .gadget vec_helper_\rm\size + .gadget vec_helper_\rm\size\_imm .ifin(\rm, read,write) - \rm\()_prep (\size), vec_helper_\rm\size + \rm\()_prep (\size), vec_helper_\rm\size\_imm .endifin save_regs save_c @@ -227,6 +227,17 @@ do_helper 2 add x2, x0, x2 .endif + .ifc _imm,_imm + # imm for third argument + .ifin(\rm, reg) + ldr w3, [_ip, 12] + movl 12(%_ip), %ecx + .endifin + .ifin(\rm, read,write) + ldr w3, [_ip, 20] + .endifin + .endif + .ifin(\rm, read,write) ldr x8, [_ip, 8] .endifin @@ -238,7 +249,7 @@ do_helper 2 restore_c load_regs .ifc \rm,write - write_done (\size), vec_helper_\rm\size + write_done (\size), vec_helper_\rm\size\_imm .endif .ifin(\rm, reg,imm) gret 2 @@ -247,18 +258,20 @@ do_helper 2 gret 3 .endifin .ifc \rm,read - read_bullshit (\size), vec_helper_\rm\size + read_bullshit (\size), vec_helper_\rm\size\_imm .else N .ifc \rm,write - write_bullshit (\size), vec_helper_\rm\size + write_bullshit (\size), vec_helper_\rm\size\_imm .endif N .endif .endm -.irp rm, reg,imm - do_vec_helper \rm -.endr -.irp size, SIZE_LIST,64,128 - do_vec_helper read, \size - do_vec_helper write, \size +.irp _imm, ,_imm + .irp rm, reg,imm + do_vec_helper \rm, \_imm + .endr + .irp size, SIZE_LIST,64,128 + do_vec_helper read, \_imm, size + do_vec_helper write, \_imm, \size + .endr .endr .gadget fstsw_ax diff --git a/jit/gadgets-x86_64/misc.S b/jit/gadgets-x86_64/misc.S index d2579448a3..7d3e2d7b18 100644 --- a/jit/gadgets-x86_64/misc.S +++ b/jit/gadgets-x86_64/misc.S @@ -137,10 +137,10 @@ do_helper 2 do_helper write, \size .endr -.macro do_vec_helper rm, size= - .gadget vec_helper_\rm\size +.macro do_vec_helper rm, _imm, size= + .gadget vec_helper_\rm\size\_imm .ifin(\rm, read,write) - \rm\()_prep (\size), vec_helper_\rm\size + \rm\()_prep (\size), vec_helper_\rm\size\_imm .endifin save_regs save_c @@ -178,6 +178,16 @@ do_helper 2 leaq (%_cpu,%r14), %rdx .endif + .ifc _imm,_imm + # imm for third argument + .ifin(\rm, reg) + movl 12(%_ip), %ecx + .endifin + .ifin(\rm, read,write) + movl 20(%_ip), %ecx + .endifin + .endif + .ifin(\rm, read,write) callq *8(%_ip) .endifin @@ -188,7 +198,7 @@ do_helper 2 restore_c load_regs .ifc \rm,write - write_done (\size), vec_helper_\rm\size + write_done (\size), vec_helper_\rm\size\_imm .endif .ifin(\rm, reg,imm) gret 2 @@ -198,12 +208,14 @@ do_helper 2 .endifin .endm -.irp rm, reg,imm - do_vec_helper \rm -.endr -.irp size, SIZE_LIST,64,128 - do_vec_helper read, \size - do_vec_helper write, \size +.irp _imm, ,_imm + .irp rm, reg,imm + do_vec_helper \rm, \_imm + .endr + .irp size, SIZE_LIST,64,128 + do_vec_helper read, \_imm, \size + do_vec_helper write, \_imm, \size + .endr .endr .gadget fstsw_ax diff --git a/jit/gen.c b/jit/gen.c index 9288a2164d..029d0314d8 100644 --- a/jit/gen.c +++ b/jit/gen.c @@ -442,7 +442,7 @@ static inline uint16_t cpu_reg_offset(enum arg arg, int index) { return 0; } -static inline bool gen_vec(enum arg src, enum arg dst, void (*helper)(), gadget_t read_mem_gadget, gadget_t write_mem_gadget, struct gen_state *state, struct modrm *modrm, uint8_t imm, dword_t saved_ip, bool seg_gs) { +static inline bool gen_vec(enum arg src, enum arg dst, void (*helper)(), gadget_t read_mem_gadget, gadget_t write_mem_gadget, struct gen_state *state, struct modrm *modrm, uint8_t imm, dword_t saved_ip, bool seg_gs, bool has_imm) { bool rm_is_src = !could_be_memory(dst); enum arg rm = rm_is_src ? src : dst; enum arg reg = rm_is_src ? dst : src; @@ -454,18 +454,27 @@ static inline bool gen_vec(enum arg src, enum arg dst, void (*helper)(), gadget_ if (could_be_memory(rm) && modrm->type != modrm_reg) rm = arg_mem; + uint64_t imm_arg = 0; + if (has_imm) + imm_arg = (uint64_t) imm << 32; + switch (rm) { case arg_xmm_modrm_val: case arg_mm_modrm_val: case arg_modrm_val: assert(rm_reg_offset != 0); - g(vec_helper_reg); + if (!has_imm) + g(vec_helper_reg); + else + g(vec_helper_reg_imm); GEN(helper); // first byte is src, second byte is dst + uint64_t arg; if (rm_is_src) - GEN(rm_reg_offset | (reg_offset << 16)); + arg = rm_reg_offset | (reg_offset << 16); else - GEN(reg_offset | (rm_reg_offset << 16)); + arg = reg_offset | (rm_reg_offset << 16); + GEN(arg | imm_arg); break; case arg_mem: @@ -473,7 +482,7 @@ static inline bool gen_vec(enum arg src, enum arg dst, void (*helper)(), gadget_ GEN(rm_is_src ? read_mem_gadget : write_mem_gadget); GEN(saved_ip); GEN(helper); - GEN(reg_offset); + GEN(reg_offset | imm_arg); break; case arg_imm: @@ -489,15 +498,18 @@ static inline bool gen_vec(enum arg src, enum arg dst, void (*helper)(), gadget_ return true; } -#define _v(src, dst, helper, z) do { \ - extern void gadget_vec_helper_read##z(void); \ - extern void gadget_vec_helper_write##z(void); \ - if (!gen_vec(src, dst, (void (*)()) helper, gadget_vec_helper_read##z, gadget_vec_helper_write##z, state, &modrm, imm, saved_ip, seg_gs)) return false; \ +#define has_imm_ false +#define has_imm__imm true +#define _v(src, dst, helper, _imm, z) do { \ + extern void gadget_vec_helper_read##z##_imm(void); \ + extern void gadget_vec_helper_write##z##_imm(void); \ + if (!gen_vec(src, dst, (void (*)()) helper, gadget_vec_helper_read##z##_imm, gadget_vec_helper_write##z##_imm, state, &modrm, imm, saved_ip, seg_gs, has_imm_##_imm)) return false; \ } while (0) -#define v_(op, src, dst,z) _v(arg_##src, arg_##dst, vec_##op##z, z) -#define v(op, src, dst,z) v_(op, src, dst,z) -#define v_imm(op, _imm, dst,z) do { imm = _imm; v(op, imm, dst,z); } while (0) +#define v_(op, src, dst, _imm,z) _v(arg_##src, arg_##dst, vec_##op##z, _imm,z) +#define v(op, src, dst,z) v_(op, src, dst,,z) +#define v_imm(op, src, dst,z) v_(op, src, dst, _imm,z) +#define vec_dst_size_modrm_val 32 #define vec_dst_size_mm_modrm_val 64 #define vec_dst_size_mm_modrm_reg 64 #define vec_dst_size_xmm_modrm_val 128 @@ -519,8 +531,9 @@ static inline bool gen_vec(enum arg src, enum arg dst, void (*helper)(), gadget_ } #define VCOMPARE(src, dst,z) v(compare, src, dst,z) -#define VSHIFTR_IMM(src, dst, z) v_imm(imm_shiftr, src, dst,z) +#define VSHIFTR_IMM(src, dst, z) v(imm_shiftr, src, dst,z) #define V_OP(op, src, dst, z) v(op, src, dst, z) +#define V_OP_IMM(op, src, dst, z) v_imm(op, src, dst, z) #define DECODER_RET int #define DECODER_NAME gen_step diff --git a/misc.h b/misc.h index bdb0ecaaac..0f8486d274 100644 --- a/misc.h +++ b/misc.h @@ -54,6 +54,8 @@ static inline void __use(int dummy __attribute__((unused)), ...) {} }) #endif +#define array_size(arr) (sizeof(arr)/sizeof((arr)[0])) + // types typedef int64_t sqword_t; typedef uint64_t qword_t; diff --git a/tests/e2e/qemu/expected.txt b/tests/e2e/qemu/expected.txt index 7d25fe4b32..9bf53ab0f3 100644 --- a/tests/e2e/qemu/expected.txt +++ b/tests/e2e/qemu/expected.txt @@ -4227,84 +4227,84 @@ cmpxchgw EAX=fffefdfc A=12345678 C=fbca7654 cmpxchgb EAX=fffefdfc A=12345678 C=fbca7654 cmpxchg8b: eax=65423456 edx=000fbca7 op1=000fbca765423456 CC=00 cmpxchg8b: eax=6789abcd edx=00012345 op1=0006532432432434 CC=40 -stosb ESI=0805e980 EDI=0805e991 EAX=12345678 ECX=00000011 EFL=0000 -stosw ESI=0805e980 EDI=0805e992 EAX=12345678 ECX=00000011 EFL=0000 -stosl ESI=0805e980 EDI=0805e994 EAX=12345678 ECX=00000011 EFL=0000 -stosb ESI=0805e980 EDI=0805e98f EAX=12345678 ECX=00000011 EFL=0000 -stosw ESI=0805e980 EDI=0805e98e EAX=12345678 ECX=00000011 EFL=0000 -stosl ESI=0805e980 EDI=0805e98c EAX=12345678 ECX=00000011 EFL=0000 -rep stosb ESI=0805e980 EDI=0805e9a1 EAX=12345678 ECX=00000000 EFL=0000 -rep stosw ESI=0805e980 EDI=0805e9b2 EAX=12345678 ECX=00000000 EFL=0000 -rep stosl ESI=0805e980 EDI=0805e9d4 EAX=12345678 ECX=00000000 EFL=0000 -rep stosb ESI=0805e980 EDI=0805e97f EAX=12345678 ECX=00000000 EFL=0000 -rep stosw ESI=0805e980 EDI=0805e96e EAX=12345678 ECX=00000000 EFL=0000 -rep stosl ESI=0805e980 EDI=0805e94c EAX=12345678 ECX=00000000 EFL=0000 -lodsb ESI=0805e981 EDI=0805e990 EAX=12345678 ECX=00000011 EFL=0000 -lodsw ESI=0805e982 EDI=0805e990 EAX=12345678 ECX=00000011 EFL=0000 -lodsl ESI=0805e984 EDI=0805e990 EAX=12345678 ECX=00000011 EFL=0000 -lodsb ESI=0805e97f EDI=0805e990 EAX=12345678 ECX=00000011 EFL=0000 -lodsw ESI=0805e97e EDI=0805e990 EAX=12345678 ECX=00000011 EFL=0000 -lodsl ESI=0805e97c EDI=0805e990 EAX=12345678 ECX=00000011 EFL=0000 -rep lodsb ESI=0805e991 EDI=0805e990 EAX=12345678 ECX=00000000 EFL=0000 -rep lodsw ESI=0805e9a2 EDI=0805e990 EAX=12345678 ECX=00000000 EFL=0000 -rep lodsl ESI=0805e9c4 EDI=0805e990 EAX=12345678 ECX=00000000 EFL=0000 -rep lodsb ESI=0805e96f EDI=0805e990 EAX=12345678 ECX=00000000 EFL=0000 -rep lodsw ESI=0805e95e EDI=0805e990 EAX=12345678 ECX=00000000 EFL=0000 -rep lodsl ESI=0805e93c EDI=0805e990 EAX=19181716 ECX=00000000 EFL=0000 -movsb ESI=0805e981 EDI=0805e991 EAX=12345678 ECX=00000011 EFL=0000 -movsw ESI=0805e982 EDI=0805e992 EAX=12345678 ECX=00000011 EFL=0000 -movsl ESI=0805e984 EDI=0805e994 EAX=12345678 ECX=00000011 EFL=0000 -movsb ESI=0805e97f EDI=0805e98f EAX=12345678 ECX=00000011 EFL=0000 -movsw ESI=0805e97e EDI=0805e98e EAX=12345678 ECX=00000011 EFL=0000 -movsl ESI=0805e97c EDI=0805e98c EAX=12345678 ECX=00000011 EFL=0000 -rep movsb ESI=0805e991 EDI=0805e9a1 EAX=12345678 ECX=00000000 EFL=0000 -rep movsw ESI=0805e9a2 EDI=0805e9b2 EAX=12345678 ECX=00000000 EFL=0000 -rep movsl ESI=0805e9c4 EDI=0805e9d4 EAX=12345678 ECX=00000000 EFL=0000 -rep movsb ESI=0805e96f EDI=0805e97f EAX=12345678 ECX=00000000 EFL=0000 -rep movsw ESI=0805e95e EDI=0805e96e EAX=12345678 ECX=00000000 EFL=0000 -rep movsl ESI=0805e93c EDI=0805e94c EAX=12345678 ECX=00000000 EFL=0000 -lodsb ESI=0805e981 EDI=0805e990 EAX=12345678 ECX=00000011 EFL=0000 -lodsw ESI=0805e982 EDI=0805e990 EAX=12345678 ECX=00000011 EFL=0000 -lodsl ESI=0805e984 EDI=0805e990 EAX=12345678 ECX=00000011 EFL=0000 -lodsb ESI=0805e97f EDI=0805e990 EAX=12345678 ECX=00000011 EFL=0000 -lodsw ESI=0805e97e EDI=0805e990 EAX=12345678 ECX=00000011 EFL=0000 -lodsl ESI=0805e97c EDI=0805e990 EAX=12345678 ECX=00000011 EFL=0000 -scasb ESI=0805e980 EDI=0805e991 EAX=12345678 ECX=00000011 EFL=0044 -scasw ESI=0805e980 EDI=0805e992 EAX=12345678 ECX=00000011 EFL=0044 -scasl ESI=0805e980 EDI=0805e994 EAX=12345678 ECX=00000011 EFL=0044 -scasb ESI=0805e980 EDI=0805e98f EAX=12345678 ECX=00000011 EFL=0044 -scasw ESI=0805e980 EDI=0805e98e EAX=12345678 ECX=00000011 EFL=0044 -scasl ESI=0805e980 EDI=0805e98c EAX=12345678 ECX=00000011 EFL=0044 -repz scasb ESI=0805e980 EDI=0805e992 EAX=12345678 ECX=0000000f EFL=0095 -repz scasw ESI=0805e980 EDI=0805e994 EAX=12345678 ECX=0000000f EFL=0091 -repz scasl ESI=0805e980 EDI=0805e9d4 EAX=12345678 ECX=00000000 EFL=0044 -repz scasb ESI=0805e980 EDI=0805e98e EAX=12345678 ECX=0000000f EFL=0095 -repz scasw ESI=0805e980 EDI=0805e98c EAX=12345678 ECX=0000000f EFL=0091 -repz scasl ESI=0805e980 EDI=0805e958 EAX=12345678 ECX=00000003 EFL=0014 -repnz scasb ESI=0805e980 EDI=0805e991 EAX=12345678 ECX=00000010 EFL=0044 -repnz scasw ESI=0805e980 EDI=0805e992 EAX=12345678 ECX=00000010 EFL=0044 -repnz scasl ESI=0805e980 EDI=0805e994 EAX=12345678 ECX=00000010 EFL=0044 -repnz scasb ESI=0805e980 EDI=0805e98f EAX=12345678 ECX=00000010 EFL=0044 -repnz scasw ESI=0805e980 EDI=0805e98e EAX=12345678 ECX=00000010 EFL=0044 -repnz scasl ESI=0805e980 EDI=0805e98c EAX=12345678 ECX=00000010 EFL=0044 -cmpsb ESI=0805e981 EDI=0805e991 EAX=12345678 ECX=00000011 EFL=0044 -cmpsw ESI=0805e982 EDI=0805e992 EAX=12345678 ECX=00000011 EFL=0044 -cmpsl ESI=0805e984 EDI=0805e994 EAX=12345678 ECX=00000011 EFL=0044 -cmpsb ESI=0805e97f EDI=0805e98f EAX=12345678 ECX=00000011 EFL=0044 -cmpsw ESI=0805e97e EDI=0805e98e EAX=12345678 ECX=00000011 EFL=0044 -cmpsl ESI=0805e97c EDI=0805e98c EAX=12345678 ECX=00000011 EFL=0044 -repz cmpsb ESI=0805e991 EDI=0805e9a1 EAX=12345678 ECX=00000000 EFL=0044 -repz cmpsw ESI=0805e9a2 EDI=0805e9b2 EAX=12345678 ECX=00000000 EFL=0044 -repz cmpsl ESI=0805e9c4 EDI=0805e9d4 EAX=12345678 ECX=00000000 EFL=0044 -repz cmpsb ESI=0805e96f EDI=0805e97f EAX=12345678 ECX=00000000 EFL=0044 -repz cmpsw ESI=0805e95e EDI=0805e96e EAX=12345678 ECX=00000000 EFL=0044 -repz cmpsl ESI=0805e958 EDI=0805e968 EAX=12345678 ECX=00000007 EFL=0014 -repnz cmpsb ESI=0805e981 EDI=0805e991 EAX=12345678 ECX=00000010 EFL=0044 -repnz cmpsw ESI=0805e982 EDI=0805e992 EAX=12345678 ECX=00000010 EFL=0044 -repnz cmpsl ESI=0805e984 EDI=0805e994 EAX=12345678 ECX=00000010 EFL=0044 -repnz cmpsb ESI=0805e97f EDI=0805e98f EAX=12345678 ECX=00000010 EFL=0044 -repnz cmpsw ESI=0805e97e EDI=0805e98e EAX=12345678 ECX=00000010 EFL=0044 -repnz cmpsl ESI=0805e97c EDI=0805e98c EAX=12345678 ECX=00000010 EFL=0044 +stosb ESI=0805fde0 EDI=0805fdf1 EAX=12345678 ECX=00000011 EFL=0000 +stosw ESI=0805fde0 EDI=0805fdf2 EAX=12345678 ECX=00000011 EFL=0000 +stosl ESI=0805fde0 EDI=0805fdf4 EAX=12345678 ECX=00000011 EFL=0000 +stosb ESI=0805fde0 EDI=0805fdef EAX=12345678 ECX=00000011 EFL=0000 +stosw ESI=0805fde0 EDI=0805fdee EAX=12345678 ECX=00000011 EFL=0000 +stosl ESI=0805fde0 EDI=0805fdec EAX=12345678 ECX=00000011 EFL=0000 +rep stosb ESI=0805fde0 EDI=0805fe01 EAX=12345678 ECX=00000000 EFL=0000 +rep stosw ESI=0805fde0 EDI=0805fe12 EAX=12345678 ECX=00000000 EFL=0000 +rep stosl ESI=0805fde0 EDI=0805fe34 EAX=12345678 ECX=00000000 EFL=0000 +rep stosb ESI=0805fde0 EDI=0805fddf EAX=12345678 ECX=00000000 EFL=0000 +rep stosw ESI=0805fde0 EDI=0805fdce EAX=12345678 ECX=00000000 EFL=0000 +rep stosl ESI=0805fde0 EDI=0805fdac EAX=12345678 ECX=00000000 EFL=0000 +lodsb ESI=0805fde1 EDI=0805fdf0 EAX=12345678 ECX=00000011 EFL=0000 +lodsw ESI=0805fde2 EDI=0805fdf0 EAX=12345678 ECX=00000011 EFL=0000 +lodsl ESI=0805fde4 EDI=0805fdf0 EAX=12345678 ECX=00000011 EFL=0000 +lodsb ESI=0805fddf EDI=0805fdf0 EAX=12345678 ECX=00000011 EFL=0000 +lodsw ESI=0805fdde EDI=0805fdf0 EAX=12345678 ECX=00000011 EFL=0000 +lodsl ESI=0805fddc EDI=0805fdf0 EAX=12345678 ECX=00000011 EFL=0000 +rep lodsb ESI=0805fdf1 EDI=0805fdf0 EAX=12345678 ECX=00000000 EFL=0000 +rep lodsw ESI=0805fe02 EDI=0805fdf0 EAX=12345678 ECX=00000000 EFL=0000 +rep lodsl ESI=0805fe24 EDI=0805fdf0 EAX=12345678 ECX=00000000 EFL=0000 +rep lodsb ESI=0805fdcf EDI=0805fdf0 EAX=12345678 ECX=00000000 EFL=0000 +rep lodsw ESI=0805fdbe EDI=0805fdf0 EAX=12345678 ECX=00000000 EFL=0000 +rep lodsl ESI=0805fd9c EDI=0805fdf0 EAX=19181716 ECX=00000000 EFL=0000 +movsb ESI=0805fde1 EDI=0805fdf1 EAX=12345678 ECX=00000011 EFL=0000 +movsw ESI=0805fde2 EDI=0805fdf2 EAX=12345678 ECX=00000011 EFL=0000 +movsl ESI=0805fde4 EDI=0805fdf4 EAX=12345678 ECX=00000011 EFL=0000 +movsb ESI=0805fddf EDI=0805fdef EAX=12345678 ECX=00000011 EFL=0000 +movsw ESI=0805fdde EDI=0805fdee EAX=12345678 ECX=00000011 EFL=0000 +movsl ESI=0805fddc EDI=0805fdec EAX=12345678 ECX=00000011 EFL=0000 +rep movsb ESI=0805fdf1 EDI=0805fe01 EAX=12345678 ECX=00000000 EFL=0000 +rep movsw ESI=0805fe02 EDI=0805fe12 EAX=12345678 ECX=00000000 EFL=0000 +rep movsl ESI=0805fe24 EDI=0805fe34 EAX=12345678 ECX=00000000 EFL=0000 +rep movsb ESI=0805fdcf EDI=0805fddf EAX=12345678 ECX=00000000 EFL=0000 +rep movsw ESI=0805fdbe EDI=0805fdce EAX=12345678 ECX=00000000 EFL=0000 +rep movsl ESI=0805fd9c EDI=0805fdac EAX=12345678 ECX=00000000 EFL=0000 +lodsb ESI=0805fde1 EDI=0805fdf0 EAX=12345678 ECX=00000011 EFL=0000 +lodsw ESI=0805fde2 EDI=0805fdf0 EAX=12345678 ECX=00000011 EFL=0000 +lodsl ESI=0805fde4 EDI=0805fdf0 EAX=12345678 ECX=00000011 EFL=0000 +lodsb ESI=0805fddf EDI=0805fdf0 EAX=12345678 ECX=00000011 EFL=0000 +lodsw ESI=0805fdde EDI=0805fdf0 EAX=12345678 ECX=00000011 EFL=0000 +lodsl ESI=0805fddc EDI=0805fdf0 EAX=12345678 ECX=00000011 EFL=0000 +scasb ESI=0805fde0 EDI=0805fdf1 EAX=12345678 ECX=00000011 EFL=0044 +scasw ESI=0805fde0 EDI=0805fdf2 EAX=12345678 ECX=00000011 EFL=0044 +scasl ESI=0805fde0 EDI=0805fdf4 EAX=12345678 ECX=00000011 EFL=0044 +scasb ESI=0805fde0 EDI=0805fdef EAX=12345678 ECX=00000011 EFL=0044 +scasw ESI=0805fde0 EDI=0805fdee EAX=12345678 ECX=00000011 EFL=0044 +scasl ESI=0805fde0 EDI=0805fdec EAX=12345678 ECX=00000011 EFL=0044 +repz scasb ESI=0805fde0 EDI=0805fdf2 EAX=12345678 ECX=0000000f EFL=0004 +repz scasw ESI=0805fde0 EDI=0805fdf4 EAX=12345678 ECX=0000000f EFL=0004 +repz scasl ESI=0805fde0 EDI=0805fe34 EAX=12345678 ECX=00000000 EFL=0044 +repz scasb ESI=0805fde0 EDI=0805fdee EAX=12345678 ECX=0000000f EFL=0004 +repz scasw ESI=0805fde0 EDI=0805fdec EAX=12345678 ECX=0000000f EFL=0004 +repz scasl ESI=0805fde0 EDI=0805fdb8 EAX=12345678 ECX=00000003 EFL=0085 +repnz scasb ESI=0805fde0 EDI=0805fdf1 EAX=12345678 ECX=00000010 EFL=0044 +repnz scasw ESI=0805fde0 EDI=0805fdf2 EAX=12345678 ECX=00000010 EFL=0044 +repnz scasl ESI=0805fde0 EDI=0805fdf4 EAX=12345678 ECX=00000010 EFL=0044 +repnz scasb ESI=0805fde0 EDI=0805fdef EAX=12345678 ECX=00000010 EFL=0044 +repnz scasw ESI=0805fde0 EDI=0805fdee EAX=12345678 ECX=00000010 EFL=0044 +repnz scasl ESI=0805fde0 EDI=0805fdec EAX=12345678 ECX=00000010 EFL=0044 +cmpsb ESI=0805fde1 EDI=0805fdf1 EAX=12345678 ECX=00000011 EFL=0044 +cmpsw ESI=0805fde2 EDI=0805fdf2 EAX=12345678 ECX=00000011 EFL=0044 +cmpsl ESI=0805fde4 EDI=0805fdf4 EAX=12345678 ECX=00000011 EFL=0044 +cmpsb ESI=0805fddf EDI=0805fdef EAX=12345678 ECX=00000011 EFL=0044 +cmpsw ESI=0805fdde EDI=0805fdee EAX=12345678 ECX=00000011 EFL=0044 +cmpsl ESI=0805fddc EDI=0805fdec EAX=12345678 ECX=00000011 EFL=0044 +repz cmpsb ESI=0805fdf1 EDI=0805fe01 EAX=12345678 ECX=00000000 EFL=0044 +repz cmpsw ESI=0805fe02 EDI=0805fe12 EAX=12345678 ECX=00000000 EFL=0044 +repz cmpsl ESI=0805fe24 EDI=0805fe34 EAX=12345678 ECX=00000000 EFL=0044 +repz cmpsb ESI=0805fdcf EDI=0805fddf EAX=12345678 ECX=00000000 EFL=0044 +repz cmpsw ESI=0805fdbe EDI=0805fdce EAX=12345678 ECX=00000000 EFL=0044 +repz cmpsl ESI=0805fdb8 EDI=0805fdc8 EAX=12345678 ECX=00000007 EFL=0014 +repnz cmpsb ESI=0805fde1 EDI=0805fdf1 EAX=12345678 ECX=00000010 EFL=0044 +repnz cmpsw ESI=0805fde2 EDI=0805fdf2 EAX=12345678 ECX=00000010 EFL=0044 +repnz cmpsl ESI=0805fde4 EDI=0805fdf4 EAX=12345678 ECX=00000010 EFL=0044 +repnz cmpsb ESI=0805fddf EDI=0805fdef EAX=12345678 ECX=00000010 EFL=0044 +repnz cmpsw ESI=0805fdde EDI=0805fdee EAX=12345678 ECX=00000010 EFL=0044 +repnz cmpsl ESI=0805fddc EDI=0805fdec EAX=12345678 ECX=00000010 EFL=0044 lea 0x4000 = 00004000 lea (%%eax) = 00000001 lea (%%ebx) = 00000002 @@ -4352,3 +4352,20 @@ cwde A=8234a6f8 R=ffffa6f8 cwd A=8234a6f8 R=8234a6f8:8345ffff cdq A=8234a6f8 R=8234a6f8:ffffffff bswapl : A=12345678 R=78563412 +punpcklbw: a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=1f4529677c23cdc65898ba69d748ab73 +punpcklbw: a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=0f00767c25625ac2080854542727f8f8 +pcmpeqb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=00000000000000000000000000000000 +pcmpeqb : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=0000ffff0000ff0000000000ffffffff +pcmpeqd : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=00000000000000000000000000000000 +pcmpeqd : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=000000000000000000000000ffffffff +pxor : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9da342043da9b9aa5a4e5f0bc0d39fd8 +pxor : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e1280000092e00170f0a479800000000 +pextrw : r=0000944a +pmovmskb : r=00009918 +pshufd : a=dc515cff944a58ec456723c698694873 ib=78 r=456723c6dc515cff944a58ec98694873 +pshufd : a=231be9e8cde7438d007c62c2085427f8 ib=78 r=007c62c2231be9e8cde7438d085427f8 +pshuflw : a=dc515cff944a58ec456723c698694873 ib=78 r=dc515cff944a58ec9869456723c64873 +pshuflw : a=231be9e8cde7438d007c62c2085427f8 ib=78 r=231be9e8cde7438d0854007c62c227f8 +cvtsd2ss : a=231be9e8cde7438d007c62c2085427f8 r=231be9e8cde7438d007c62c200000000 +cvttsd2si: a=231be9e8cde7438d007c62c2085427f8 r=00000000 +cvtsi2sd : a=085427f8 r=231be9e8cde7438d41a0a84ff0000000 diff --git a/tests/e2e/qemu/qemu-test.c b/tests/e2e/qemu/qemu-test.c index 5fd57fc1bc..afca74cbfb 100644 --- a/tests/e2e/qemu/qemu-test.c +++ b/tests/e2e/qemu/qemu-test.c @@ -37,7 +37,7 @@ //#define LINUX_VM86_IOPL_FIX //#define TEST_P4_FLAGS #ifdef __SSE__ -//#define TEST_SSE +#define TEST_SSE #define TEST_CMOV 1 #define TEST_FCOMI 1 #else @@ -2396,267 +2396,268 @@ void test_sse(void) XMMReg r, a, b; int i; - MMX_OP2(punpcklbw); - MMX_OP2(punpcklwd); - MMX_OP2(punpckldq); - MMX_OP2(packsswb); - MMX_OP2(pcmpgtb); - MMX_OP2(pcmpgtw); - MMX_OP2(pcmpgtd); - MMX_OP2(packuswb); - MMX_OP2(punpckhbw); - MMX_OP2(punpckhwd); - MMX_OP2(punpckhdq); - MMX_OP2(packssdw); - MMX_OP2(pcmpeqb); - MMX_OP2(pcmpeqw); - MMX_OP2(pcmpeqd); - - MMX_OP2(paddq); - MMX_OP2(pmullw); - MMX_OP2(psubusb); - MMX_OP2(psubusw); - MMX_OP2(pminub); - MMX_OP2(pand); - MMX_OP2(paddusb); - MMX_OP2(paddusw); - MMX_OP2(pmaxub); - MMX_OP2(pandn); - - MMX_OP2(pmulhuw); - MMX_OP2(pmulhw); - - MMX_OP2(psubsb); - MMX_OP2(psubsw); - MMX_OP2(pminsw); - MMX_OP2(por); - MMX_OP2(paddsb); - MMX_OP2(paddsw); - MMX_OP2(pmaxsw); - MMX_OP2(pxor); - MMX_OP2(pmuludq); - MMX_OP2(pmaddwd); - MMX_OP2(psadbw); - MMX_OP2(psubb); - MMX_OP2(psubw); - MMX_OP2(psubd); - MMX_OP2(psubq); - MMX_OP2(paddb); - MMX_OP2(paddw); - MMX_OP2(paddd); - - MMX_OP2(pavgb); - MMX_OP2(pavgw); - - asm volatile ("pinsrw $1, %1, %0" : "=y" (r.q[0]) : "r" (0x12345678)); - printf("%-9s: r=" FMT64X "\n", "pinsrw", r.q[0]); - - asm volatile ("pinsrw $5, %1, %0" : "=x" (r.dq) : "r" (0x12345678)); - printf("%-9s: r=" FMT64X "" FMT64X "\n", "pinsrw", r.q[1], r.q[0]); + // NOTE: when the MMX op is implemented, just change SSE_OP2 to MMX_OP2, which tests both + SSE_OP2(punpcklbw); + // MMX_OP2(punpcklwd); + // MMX_OP2(punpckldq); + // MMX_OP2(packsswb); + // MMX_OP2(pcmpgtb); + // MMX_OP2(pcmpgtw); + // MMX_OP2(pcmpgtd); + // MMX_OP2(packuswb); + // MMX_OP2(punpckhbw); + // MMX_OP2(punpckhwd); + // MMX_OP2(punpckhdq); + // MMX_OP2(packssdw); + SSE_OP2(pcmpeqb); + // MMX_OP2(pcmpeqw); + SSE_OP2(pcmpeqd); + + // MMX_OP2(paddq); + // MMX_OP2(pmullw); + // MMX_OP2(psubusb); + // MMX_OP2(psubusw); + // MMX_OP2(pminub); + // MMX_OP2(pand); + // MMX_OP2(paddusb); + // MMX_OP2(paddusw); + // MMX_OP2(pmaxub); + // MMX_OP2(pandn); + + // MMX_OP2(pmulhuw); + // MMX_OP2(pmulhw); + + // MMX_OP2(psubsb); + // MMX_OP2(psubsw); + // MMX_OP2(pminsw); + // MMX_OP2(por); + // MMX_OP2(paddsb); + // MMX_OP2(paddsw); + // MMX_OP2(pmaxsw); + SSE_OP2(pxor); + // MMX_OP2(pmuludq); + // MMX_OP2(pmaddwd); + // MMX_OP2(psadbw); + // MMX_OP2(psubb); + // MMX_OP2(psubw); + // MMX_OP2(psubd); + // MMX_OP2(psubq); + // MMX_OP2(paddb); + // MMX_OP2(paddw); + // MMX_OP2(paddd); + + // MMX_OP2(pavgb); + // MMX_OP2(pavgw); + + // asm volatile ("pinsrw $1, %1, %0" : "=y" (r.q[0]) : "r" (0x12345678)); + // printf("%-9s: r=" FMT64X "\n", "pinsrw", r.q[0]); + + // asm volatile ("pinsrw $5, %1, %0" : "=x" (r.dq) : "r" (0x12345678)); + // printf("%-9s: r=" FMT64X "" FMT64X "\n", "pinsrw", r.q[1], r.q[0]); a.q[0] = test_values[0][0]; a.q[1] = test_values[0][1]; - asm volatile ("pextrw $1, %1, %0" : "=r" (r.l[0]) : "y" (a.q[0])); - printf("%-9s: r=%08x\n", "pextrw", r.l[0]); + // asm volatile ("pextrw $1, %1, %0" : "=r" (r.l[0]) : "y" (a.q[0])); + // printf("%-9s: r=%08x\n", "pextrw", r.l[0]); asm volatile ("pextrw $5, %1, %0" : "=r" (r.l[0]) : "x" (a.dq)); printf("%-9s: r=%08x\n", "pextrw", r.l[0]); - asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "y" (a.q[0])); - printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]); + // asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "y" (a.q[0])); + // printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]); asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "x" (a.dq)); printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]); - { - r.q[0] = -1; - r.q[1] = -1; - - a.q[0] = test_values[0][0]; - a.q[1] = test_values[0][1]; - b.q[0] = test_values[1][0]; - b.q[1] = test_values[1][1]; - asm volatile("maskmovq %1, %0" : - : "y" (a.q[0]), "y" (b.q[0]), "D" (&r) - : "memory"); - printf("%-9s: r=" FMT64X " a=" FMT64X " b=" FMT64X "\n", - "maskmov", - r.q[0], - a.q[0], - b.q[0]); - asm volatile("maskmovdqu %1, %0" : - : "x" (a.dq), "x" (b.dq), "D" (&r) - : "memory"); - printf("%-9s: r=" FMT64X "" FMT64X " a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X "\n", - "maskmov", - r.q[1], r.q[0], - a.q[1], a.q[0], - b.q[1], b.q[0]); - } + // { + // r.q[0] = -1; + // r.q[1] = -1; + + // a.q[0] = test_values[0][0]; + // a.q[1] = test_values[0][1]; + // b.q[0] = test_values[1][0]; + // b.q[1] = test_values[1][1]; + // asm volatile("maskmovq %1, %0" : + // : "y" (a.q[0]), "y" (b.q[0]), "D" (&r) + // : "memory"); + // printf("%-9s: r=" FMT64X " a=" FMT64X " b=" FMT64X "\n", + // "maskmov", + // r.q[0], + // a.q[0], + // b.q[0]); + // asm volatile("maskmovdqu %1, %0" : + // : "x" (a.dq), "x" (b.dq), "D" (&r) + // : "memory"); + // printf("%-9s: r=" FMT64X "" FMT64X " a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X "\n", + // "maskmov", + // r.q[1], r.q[0], + // a.q[1], a.q[0], + // b.q[1], b.q[0]); + // } asm volatile ("emms"); - SSE_OP2(punpcklqdq); - SSE_OP2(punpckhqdq); - SSE_OP2(andps); - SSE_OP2(andpd); - SSE_OP2(andnps); - SSE_OP2(andnpd); - SSE_OP2(orps); - SSE_OP2(orpd); - SSE_OP2(xorps); - SSE_OP2(xorpd); - - SSE_OP2(unpcklps); - SSE_OP2(unpcklpd); - SSE_OP2(unpckhps); - SSE_OP2(unpckhpd); - - SHUF_OP(shufps, 0x78); - SHUF_OP(shufpd, 0x02); + // SSE_OP2(punpcklqdq); + // SSE_OP2(punpckhqdq); + // SSE_OP2(andps); + // SSE_OP2(andpd); + // SSE_OP2(andnps); + // SSE_OP2(andnpd); + // SSE_OP2(orps); + // SSE_OP2(orpd); + // SSE_OP2(xorps); + // SSE_OP2(xorpd); + + // SSE_OP2(unpcklps); + // SSE_OP2(unpcklpd); + // SSE_OP2(unpckhps); + // SSE_OP2(unpckhpd); + + // SHUF_OP(shufps, 0x78); + // SHUF_OP(shufpd, 0x02); PSHUF_OP(pshufd, 0x78); PSHUF_OP(pshuflw, 0x78); - PSHUF_OP(pshufhw, 0x78); - - SHIFT_OP(psrlw, 7); - SHIFT_OP(psrlw, 16); - SHIFT_OP(psraw, 7); - SHIFT_OP(psraw, 16); - SHIFT_OP(psllw, 7); - SHIFT_OP(psllw, 16); - - SHIFT_OP(psrld, 7); - SHIFT_OP(psrld, 32); - SHIFT_OP(psrad, 7); - SHIFT_OP(psrad, 32); - SHIFT_OP(pslld, 7); - SHIFT_OP(pslld, 32); - - SHIFT_OP(psrlq, 7); - SHIFT_OP(psrlq, 32); - SHIFT_OP(psllq, 7); - SHIFT_OP(psllq, 32); - - SHIFT_IM(psrldq, 16); - SHIFT_IM(psrldq, 7); - SHIFT_IM(pslldq, 16); - SHIFT_IM(pslldq, 7); - - MOVMSK(movmskps); - MOVMSK(movmskpd); + // PSHUF_OP(pshufhw, 0x78); + + // SHIFT_OP(psrlw, 7); + // SHIFT_OP(psrlw, 16); + // SHIFT_OP(psraw, 7); + // SHIFT_OP(psraw, 16); + // SHIFT_OP(psllw, 7); + // SHIFT_OP(psllw, 16); + + // SHIFT_OP(psrld, 7); + // SHIFT_OP(psrld, 32); + // SHIFT_OP(psrad, 7); + // SHIFT_OP(psrad, 32); + // SHIFT_OP(pslld, 7); + // SHIFT_OP(pslld, 32); + + // SHIFT_OP(psrlq, 7); + // SHIFT_OP(psrlq, 32); + // SHIFT_OP(psllq, 7); + // SHIFT_OP(psllq, 32); + + // SHIFT_IM(psrldq, 16); + // SHIFT_IM(psrldq, 7); + // SHIFT_IM(pslldq, 16); + // SHIFT_IM(pslldq, 7); + + // MOVMSK(movmskps); + // MOVMSK(movmskpd); /* FPU specific ops */ - { - uint32_t mxcsr; - asm volatile("stmxcsr %0" : "=m" (mxcsr)); - printf("mxcsr=%08x\n", mxcsr & 0x1f80); - asm volatile("ldmxcsr %0" : : "m" (mxcsr)); - } - - test_sse_comi(2, -1); - test_sse_comi(2, 2); - test_sse_comi(2, 3); - test_sse_comi(2, q_nan.d); - test_sse_comi(q_nan.d, -1); - - for(i = 0; i < 2; i++) { - a.s[0] = 2.7; - a.s[1] = 3.4; - a.s[2] = 4; - a.s[3] = -6.3; - b.s[0] = 45.7; - b.s[1] = 353.4; - b.s[2] = 4; - b.s[3] = 56.3; - if (i == 1) { - a.s[0] = q_nan.d; - b.s[3] = q_nan.d; - } - - SSE_OPS(add); - SSE_OPS(mul); - SSE_OPS(sub); - SSE_OPS(min); - SSE_OPS(div); - SSE_OPS(max); - SSE_OPS(sqrt); - SSE_OPS(cmpeq); - SSE_OPS(cmplt); - SSE_OPS(cmple); - SSE_OPS(cmpunord); - SSE_OPS(cmpneq); - SSE_OPS(cmpnlt); - SSE_OPS(cmpnle); - SSE_OPS(cmpord); - - - a.d[0] = 2.7; - a.d[1] = -3.4; - b.d[0] = 45.7; - b.d[1] = -53.4; - if (i == 1) { - a.d[0] = q_nan.d; - b.d[1] = q_nan.d; - } - SSE_OPD(add); - SSE_OPD(mul); - SSE_OPD(sub); - SSE_OPD(min); - SSE_OPD(div); - SSE_OPD(max); - SSE_OPD(sqrt); - SSE_OPD(cmpeq); - SSE_OPD(cmplt); - SSE_OPD(cmple); - SSE_OPD(cmpunord); - SSE_OPD(cmpneq); - SSE_OPD(cmpnlt); - SSE_OPD(cmpnle); - SSE_OPD(cmpord); - } + // { + // uint32_t mxcsr; + // asm volatile("stmxcsr %0" : "=m" (mxcsr)); + // printf("mxcsr=%08x\n", mxcsr & 0x1f80); + // asm volatile("ldmxcsr %0" : : "m" (mxcsr)); + // } + + // test_sse_comi(2, -1); + // test_sse_comi(2, 2); + // test_sse_comi(2, 3); + // test_sse_comi(2, q_nan.d); + // test_sse_comi(q_nan.d, -1); + + // for(i = 0; i < 2; i++) { + // a.s[0] = 2.7; + // a.s[1] = 3.4; + // a.s[2] = 4; + // a.s[3] = -6.3; + // b.s[0] = 45.7; + // b.s[1] = 353.4; + // b.s[2] = 4; + // b.s[3] = 56.3; + // if (i == 1) { + // a.s[0] = q_nan.d; + // b.s[3] = q_nan.d; + // } + + // SSE_OPS(add); + // SSE_OPS(mul); + // SSE_OPS(sub); + // SSE_OPS(min); + // SSE_OPS(div); + // SSE_OPS(max); + // SSE_OPS(sqrt); + // SSE_OPS(cmpeq); + // SSE_OPS(cmplt); + // SSE_OPS(cmple); + // SSE_OPS(cmpunord); + // SSE_OPS(cmpneq); + // SSE_OPS(cmpnlt); + // SSE_OPS(cmpnle); + // SSE_OPS(cmpord); + + + // a.d[0] = 2.7; + // a.d[1] = -3.4; + // b.d[0] = 45.7; + // b.d[1] = -53.4; + // if (i == 1) { + // a.d[0] = q_nan.d; + // b.d[1] = q_nan.d; + // } + // SSE_OPD(add); + // SSE_OPD(mul); + // SSE_OPD(sub); + // SSE_OPD(min); + // SSE_OPD(div); + // SSE_OPD(max); + // SSE_OPD(sqrt); + // SSE_OPD(cmpeq); + // SSE_OPD(cmplt); + // SSE_OPD(cmple); + // SSE_OPD(cmpunord); + // SSE_OPD(cmpneq); + // SSE_OPD(cmpnlt); + // SSE_OPD(cmpnle); + // SSE_OPD(cmpord); + // } /* float to float/int */ - a.s[0] = 2.7; - a.s[1] = 3.4; - a.s[2] = 4; - a.s[3] = -6.3; - CVT_OP_XMM(cvtps2pd); - CVT_OP_XMM(cvtss2sd); - CVT_OP_XMM2MMX(cvtps2pi); - CVT_OP_XMM2MMX(cvttps2pi); - CVT_OP_XMM2REG(cvtss2si); - CVT_OP_XMM2REG(cvttss2si); - CVT_OP_XMM(cvtps2dq); - CVT_OP_XMM(cvttps2dq); - - a.d[0] = 2.6; - a.d[1] = -3.4; - CVT_OP_XMM(cvtpd2ps); + // a.s[0] = 2.7; + // a.s[1] = 3.4; + // a.s[2] = 4; + // a.s[3] = -6.3; + // CVT_OP_XMM(cvtps2pd); + // CVT_OP_XMM(cvtss2sd); + // CVT_OP_XMM2MMX(cvtps2pi); + // CVT_OP_XMM2MMX(cvttps2pi); + // CVT_OP_XMM2REG(cvtss2si); + // CVT_OP_XMM2REG(cvttss2si); + // CVT_OP_XMM(cvtps2dq); + // CVT_OP_XMM(cvttps2dq); + + // a.d[0] = 2.6; + // a.d[1] = -3.4; + // CVT_OP_XMM(cvtpd2ps); CVT_OP_XMM(cvtsd2ss); - CVT_OP_XMM2MMX(cvtpd2pi); - CVT_OP_XMM2MMX(cvttpd2pi); - CVT_OP_XMM2REG(cvtsd2si); + // CVT_OP_XMM2MMX(cvtpd2pi); + // CVT_OP_XMM2MMX(cvttpd2pi); + // CVT_OP_XMM2REG(cvtsd2si); CVT_OP_XMM2REG(cvttsd2si); - CVT_OP_XMM(cvtpd2dq); - CVT_OP_XMM(cvttpd2dq); + // CVT_OP_XMM(cvtpd2dq); + // CVT_OP_XMM(cvttpd2dq); /* sse/mmx moves */ - CVT_OP_XMM2MMX(movdq2q); - CVT_OP_MMX2XMM(movq2dq); + // CVT_OP_XMM2MMX(movdq2q); + // CVT_OP_MMX2XMM(movq2dq); /* int to float */ - a.l[0] = -6; - a.l[1] = 2; - a.l[2] = 100; - a.l[3] = -60000; - CVT_OP_MMX2XMM(cvtpi2ps); - CVT_OP_MMX2XMM(cvtpi2pd); - CVT_OP_REG2XMM(cvtsi2ss); + // a.l[0] = -6; + // a.l[1] = 2; + // a.l[2] = 100; + // a.l[3] = -60000; + // CVT_OP_MMX2XMM(cvtpi2ps); + // CVT_OP_MMX2XMM(cvtpi2pd); + // CVT_OP_REG2XMM(cvtsi2ss); CVT_OP_REG2XMM(cvtsi2sd); - CVT_OP_XMM(cvtdq2ps); - CVT_OP_XMM(cvtdq2pd); + // CVT_OP_XMM(cvtdq2ps); + // CVT_OP_XMM(cvtdq2pd); /* XXX: test PNI insns */ #if 0 @@ -2759,7 +2760,7 @@ int main(int argc, char **argv) test_conv(); #ifdef TEST_SSE test_sse(); - test_fxsave(); + //test_fxsave(); #endif return 0; } diff --git a/tests/e2e/qemu/test.sh b/tests/e2e/qemu/test.sh index 823d771002..f4582efcfc 100755 --- a/tests/e2e/qemu/test.sh +++ b/tests/e2e/qemu/test.sh @@ -1,4 +1,5 @@ #!/bin/sh # -no-pie because the test contains non-position-independent inline asm -gcc qemu-test.c -no-pie -o qemu-test +# silence a few warnings that I can't be bothered to fix +gcc qemu-test.c -o qemu-test -msse2 -no-pie -Wno-attributes -Wno-format ./qemu-test