diff --git a/emu/decode.h b/emu/decode.h index 9e59cbbbf6..ca7f8aaec7 100644 --- a/emu/decode.h +++ b/emu/decode.h @@ -292,7 +292,7 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) { case 0x70: TRACEI("pshufd xmm:modrm, xmm, imm8"); READMODRM; READIMM8; V_OP_IMM(shuffle_d, xmm_modrm_val, xmm_modrm_reg,128); break; - case 0x73: READMODRM; + case 0x73: READMODRM_NOMEM; switch (modrm.opcode) { case 0x02: TRACEI("psrlq imm, xmm"); READIMM8; V_OP(imm_shiftr_q, imm, xmm_modrm_reg, 128); break; @@ -339,6 +339,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) { READMODRM; V_OP(xor, xmm_modrm_val, xmm_modrm_reg,128); break; case 0xf3: TRACEI("psllq xmm:modrm, xmm"); READMODRM; V_OP(shiftl_q, xmm_modrm_val, xmm_modrm_reg,128); break; + case 0xf4: TRACEI("pmuludq xmm:modrm, xmm"); + READMODRM; V_OP(mulu_dq, xmm_modrm_val, xmm_modrm_reg,128); break; case 0xfb: TRACEI("psubq xmm:modrm, xmm"); READMODRM; V_OP(sub_q, xmm_modrm_val, xmm_modrm_reg,128); break; case 0xfc: TRACEI("paddb xmm:modrm, xmm"); @@ -363,10 +365,39 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) { case 0x57: TRACEI("xorps xmm:modrm, xmm"); READMODRM; V_OP(xor, xmm_modrm_val, xmm_modrm_reg,128); break; - case 0x6f: TRACEI("movq modrm, mm"); - READMODRM; VMOV(mm_modrm_val, mm_modrm_reg, 64); break; - case 0x7f: TRACEI("movq mm, modrm"); - READMODRM; VMOV(mm_modrm_reg, mm_modrm_val, 64); break; + case 0x62: TRACEI("punpckldq mm:modrm, mm"); + READMODRM; V_OP(unpack_dq, mm_modrm_val, mm_modrm_reg,64); break; + + case 0x6e: TRACEI("movd modrm, mm"); + READMODRM; VMOV(modrm_val, mm_modrm_reg,32); break; + case 0x6f: TRACEI("movq mm:modrm, mm"); + READMODRM; VMOV(mm_modrm_val, mm_modrm_reg,64); break; + + case 0x73: READMODRM; + switch (modrm.opcode) { + case 2: TRACEI("psrlq imm, mm"); + READIMM8; V_OP(imm_shiftr_q, imm, mm_modrm_reg, 64); break; + case 6: TRACEI("psllq imm, mm"); + READIMM8; V_OP(imm_shiftl_q, imm, mm_modrm_reg, 64); break; + default: UNDEFINED; + } + break; + + case 0x7e: TRACEI("movd mm, modrm"); + READMODRM; VMOV(mm_modrm_reg, modrm_val,32); break; + case 0x7f: TRACEI("movq mm, mm:modrm"); + READMODRM_MEM; VMOV(mm_modrm_reg, mm_modrm_val,64); break; + + case 0xd4: TRACEI("paddq mm:modrm, mm"); + READMODRM; V_OP(add_q, mm_modrm_val, mm_modrm_reg,64); break; + case 0xdb: TRACEI("pand mm:modrm, mm"); + READMODRM; V_OP(and, mm_modrm_val, mm_modrm_reg,64); break; + + case 0xef: TRACEI("pxor mm:modrm, mm"); + READMODRM; V_OP(xor, mm_modrm_val, mm_modrm_reg,64); break; + + case 0xf4: TRACEI("pmuludq mm:modrm, mm"); + READMODRM; V_OP(mulu_dq, mm_modrm_val, mm_modrm_reg,64); break; #endif default: TRACEI("undefined"); diff --git a/emu/vec.c b/emu/vec.c index c1852a3784..b198217343 100644 --- a/emu/vec.c +++ b/emu/vec.c @@ -18,6 +18,7 @@ VEC_ZERO_COPY(128, 128) VEC_ZERO_COPY(128, 64) VEC_ZERO_COPY(128, 32) VEC_ZERO_COPY(64, 64) +VEC_ZERO_COPY(64, 32) VEC_ZERO_COPY(32, 32) void vec_merge32(NO_CPU, const void *src, void *dst) { @@ -38,6 +39,12 @@ void vec_imm_shiftl_q128(NO_CPU, const uint8_t amount, union xmm_reg *dst) { dst->qw[1] <<= amount; } } +void vec_imm_shiftl_q64(NO_CPU, const uint8_t amount, union mm_reg *dst) { + if (amount > 63) + dst->qw = 0; + else + dst->qw <<= amount; +} void vec_imm_shiftr_q128(NO_CPU, const uint8_t amount, union xmm_reg *dst) { if (amount > 63) { @@ -47,6 +54,12 @@ void vec_imm_shiftr_q128(NO_CPU, const uint8_t amount, union xmm_reg *dst) { dst->qw[1] >>= amount; } } +void vec_imm_shiftr_q64(NO_CPU, const uint8_t amount, union mm_reg *dst) { + if (amount > 63) + dst->qw = 0; + else + dst->qw >>= amount; +} void vec_shiftl_q128(NO_CPU, union xmm_reg *amount, union xmm_reg *dst) { uint64_t amount_qw = amount->qw[0]; @@ -78,15 +91,33 @@ void vec_add_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) { dst->qw[0] += src->qw[0]; dst->qw[1] += src->qw[1]; } +void vec_add_q64(NO_CPU, union mm_reg *src, union mm_reg *dst) { + dst->qw += src->qw; +} void vec_sub_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) { dst->qw[0] -= src->qw[0]; dst->qw[1] -= src->qw[1]; } +void vec_mulu_dq128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) { + dst->qw[0] = (uint64_t) src->u32[0] * dst->u32[0]; + dst->qw[1] = (uint64_t) src->u32[2] * dst->u32[2]; +} +void vec_mulu_dq64(NO_CPU, union mm_reg *src, union mm_reg *dst) { + dst->qw = (uint64_t) src->dw[0] * dst->dw[0]; +} + void vec_and128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) { dst->qw[0] &= src->qw[0]; dst->qw[1] &= src->qw[1]; } +void vec_and64(NO_CPU, union mm_reg *src, union mm_reg *dst) { + dst->qw &= src->qw; +} +void vec_andn128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) { + dst->qw[0] = ~dst->qw[0] & src->qw[0]; + dst->qw[1] = ~dst->qw[1] & src->qw[1]; +} void vec_or128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) { dst->qw[0] |= src->qw[0]; dst->qw[1] |= src->qw[1]; @@ -95,9 +126,8 @@ void vec_xor128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) { dst->qw[0] ^= src->qw[0]; dst->qw[1] ^= src->qw[1]; } -void vec_andn128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) { - dst->qw[0] = ~dst->qw[0] & src->qw[0]; - dst->qw[1] = ~dst->qw[1] & src->qw[1]; +void vec_xor64(NO_CPU, union mm_reg *src, union mm_reg *dst) { + dst->qw ^= src->qw; } void vec_min_ub128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) { @@ -123,12 +153,12 @@ void vec_single_fcmp64(NO_CPU, const double *src, union xmm_reg *dst, uint8_t ty } void vec_single_fadd64(NO_CPU, const double *src, double *dst) { *dst += *src; } -void vec_single_fmul64(NO_CPU, const double *src, double *dst) { *dst *= *src; } -void vec_single_fsub64(NO_CPU, const double *src, double *dst) { *dst -= *src; } -void vec_single_fdiv64(NO_CPU, const double *src, double *dst) { *dst /= *src; } void vec_single_fadd32(NO_CPU, const float *src, float *dst) { *dst += *src; } +void vec_single_fmul64(NO_CPU, const double *src, double *dst) { *dst *= *src; } void vec_single_fmul32(NO_CPU, const float *src, float *dst) { *dst *= *src; } +void vec_single_fsub64(NO_CPU, const double *src, double *dst) { *dst -= *src; } void vec_single_fsub32(NO_CPU, const float *src, float *dst) { *dst -= *src; } +void vec_single_fdiv64(NO_CPU, const double *src, double *dst) { *dst /= *src; } void vec_single_fdiv32(NO_CPU, const float *src, float *dst) { *dst /= *src; } void vec_single_fmax64(NO_CPU, const double *src, double *dst) { @@ -160,10 +190,17 @@ void vec_single_ucomi64(struct cpu_state *cpu, const double *src, const double * cpu->sf_res = 0; } -// TODO float edge cases e.g. nan +// come to the dark side of macros +#define _ISNAN_int32_t(x) false +#define _ISNAN_float(x) isnan(x) +#define _ISNAN_double(x) isnan(x) +#define _ISNAN(x, t) _ISNAN_##t(x) #define VEC_CVT(name, src_t, dst_t) \ void vec_cvt##name(NO_CPU, const src_t *src, dst_t *dst) { \ - *dst = *src; \ + if (_ISNAN(*src, src_t)) \ + *dst = INT32_MIN; \ + else \ + *dst = *src; \ } VEC_CVT(si2sd32, int32_t, double) VEC_CVT(tsd2si64, double, int32_t) @@ -183,6 +220,9 @@ void vec_unpack_dq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) { dst->u32[2] = dst->u32[1]; dst->u32[1] = src->u32[0]; } +void vec_unpack_dq64(NO_CPU, const union mm_reg *src, union mm_reg *dst) { + dst->dw[1] = src->dw[0]; +} void vec_unpack_qdq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) { dst->qw[1] = src->qw[0]; } diff --git a/emu/vec.h b/emu/vec.h index 9f504f5dd6..f12743a6cd 100644 --- a/emu/vec.h +++ b/emu/vec.h @@ -11,6 +11,7 @@ void vec_zero128_copy128(NO_CPU, const void *src, void *dst); void vec_zero128_copy64(NO_CPU, const void *src, void *dst); void vec_zero128_copy32(NO_CPU, const void *src, void *dst); void vec_zero64_copy64(NO_CPU, const void *src, void *dst); +void vec_zero64_copy32(NO_CPU, const void *src, void *dst); void vec_zero32_copy32(NO_CPU, const void *src, void *dst); // "merge" means don't zero the register before writing to it void vec_merge32(NO_CPU, const void *src, void *dst); @@ -18,27 +19,34 @@ void vec_merge64(NO_CPU, const void *src, void *dst); void vec_merge128(NO_CPU, const void *src, void *dst); void vec_imm_shiftl_q128(NO_CPU, const uint8_t amount, union xmm_reg *dst); +void vec_imm_shiftl_q64(NO_CPU, const uint8_t amount, union mm_reg *dst); void vec_imm_shiftr_q128(NO_CPU, const uint8_t amount, union xmm_reg *dst); +void vec_imm_shiftr_q64(NO_CPU, const uint8_t amount, union mm_reg *dst); void vec_shiftl_q128(NO_CPU, union xmm_reg *amount, union xmm_reg *dst); void vec_shiftr_q128(NO_CPU, union xmm_reg *amount, union xmm_reg *dst); void vec_add_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst); void vec_add_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst); +void vec_add_q64(NO_CPU, union mm_reg *src, union mm_reg *dst); void vec_sub_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst); +void vec_mulu_dq128(NO_CPU, union xmm_reg *src, union xmm_reg *dst); +void vec_mulu_dq64(NO_CPU, union mm_reg *src, union mm_reg *dst); + void vec_and128(NO_CPU, union xmm_reg *src, union xmm_reg *dst); +void vec_and64(NO_CPU, union mm_reg *src, union mm_reg *dst); void vec_andn128(NO_CPU, union xmm_reg *src, union xmm_reg *dst); void vec_or128(NO_CPU, union xmm_reg *src, union xmm_reg *dst); void vec_xor128(NO_CPU, union xmm_reg *src, union xmm_reg *dst); +void vec_xor64(NO_CPU, union mm_reg *src, union mm_reg *dst); void vec_min_ub128(NO_CPU, union xmm_reg *src, union xmm_reg *dst); - void vec_single_fadd64(NO_CPU, const double *src, double *dst); -void vec_single_fmul64(NO_CPU, const double *src, double *dst); -void vec_single_fsub64(NO_CPU, const double *src, double *dst); -void vec_single_fdiv64(NO_CPU, const double *src, double *dst); void vec_single_fadd32(NO_CPU, const float *src, float *dst); +void vec_single_fmul64(NO_CPU, const double *src, double *dst); void vec_single_fmul32(NO_CPU, const float *src, float *dst); +void vec_single_fsub64(NO_CPU, const double *src, double *dst); void vec_single_fsub32(NO_CPU, const float *src, float *dst); +void vec_single_fdiv64(NO_CPU, const double *src, double *dst); void vec_single_fdiv32(NO_CPU, const float *src, float *dst); void vec_single_fmax64(NO_CPU, const double *src, double *dst); @@ -57,6 +65,7 @@ void vec_cvtss2sd32(NO_CPU, const float *src, double *dst); // TODO organize void vec_unpack_bw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst); void vec_unpack_dq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst); +void vec_unpack_dq64(NO_CPU, const union mm_reg *src, union mm_reg *dst); void vec_unpack_qdq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst); void vec_shuffle_lw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding); void vec_shuffle_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding); diff --git a/jit/gen.c b/jit/gen.c index e96378b682..768a364cd5 100644 --- a/jit/gen.c +++ b/jit/gen.c @@ -491,7 +491,7 @@ static inline bool gen_vec(enum arg src, enum arg dst, void (*helper)(), gadget_ g(vec_helper_imm); GEN(helper); // This is rm_opcode instead of opcode because PSRLQ is weird like that - GEN(((uint16_t) imm) | (CPU_OFFSET(xmm[modrm->rm_opcode]) << 16)); + GEN(((uint16_t) imm) | (cpu_reg_offset(reg, modrm->rm_opcode) << 16)); break; default: die("unimplemented vecarg"); diff --git a/tests/e2e/qemu/expected.txt b/tests/e2e/qemu/expected.txt index 5241225264..eebdea9347 100644 --- a/tests/e2e/qemu/expected.txt +++ b/tests/e2e/qemu/expected.txt @@ -4360,16 +4360,26 @@ pcmpeqb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab pcmpeqb : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=0000ffff0000ff0000000000ffffffff pcmpeqd : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=00000000000000000000000000000000 pcmpeqd : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=000000000000000000000000ffffffff +paddq : a=456723c698694873 b=1f297ccd58bad7ab r=6490a093f124201e +paddq : a=007c62c2085427f8 b=0f76255a085427f8 r=0ff2881c10a84ff0 paddq : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=1e437bfb3e2e3a326490a093f124201e paddq : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e54fd3d192b087270ff2881c10a84ff0 pminub : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=41511efb944a58461f2923c658694873 pminub : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=231be9e8c4c9438d0076255a085427f8 +pand : a=456723c698694873 b=1f297ccd58bad7ab r=052120c418284023 +pand : a=007c62c2085427f8 b=0f76255a085427f8 r=00742042085427f8 pand : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=40501cfb80424044052120c418284023 pand : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=0213e9e8c4c1438800742042085427f8 por : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=ddf35effbdebf9ee5f6f7fcfd8fbdffb por : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e33be9e8cdef439f0f7e67da085427f8 +pxor : a=456723c698694873 b=1f297ccd58bad7ab r=5a4e5f0bc0d39fd8 +pxor : a=007c62c2085427f8 b=0f76255a085427f8 r=0f0a479800000000 pxor : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9da342043da9b9aa5a4e5f0bc0d39fd8 pxor : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e1280000092e00170f0a479800000000 +pmuludq : a=456723c698694873 b=1f297ccd58bad7ab r=34d36dcc65b9f9d1 +pmuludq : a=007c62c2085427f8 b=0f76255a085427f8 r=00455e29c0fd8040 +pmuludq : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=6269151e89bfbc8834d36dcc65b9f9d1 +pmuludq : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=9e46f0ab618189d200455e29c0fd8040 psubq : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9a5f3e03ea6677a6263da6f93fae70c8 psubq : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=60e80000091dfff3f1063d6800000000 paddb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=1d437afa3d2d393264909f93f0231f1e @@ -4397,29 +4407,45 @@ pshufd : a=231be9e8cde7438d007c62c2085427f8 ib=78 r=007c62c2231be9e8cde7438d08 pshuflw : a=dc515cff944a58ec456723c698694873 ib=78 r=dc515cff944a58ec9869456723c64873 pshuflw : a=231be9e8cde7438d007c62c2085427f8 ib=78 r=231be9e8cde7438d0854007c62c227f8 psrlq : a=dc515cff944a58ec456723c698694873 ib=07 r=01b8a2b9ff2894b1008ace478d30d290 +psrlq : a=456723c698694873 ib=07 r=008ace478d30d290 psrlq : a=231be9e8cde7438d007c62c2085427f8 ib=07 r=004637d3d19bce870000f8c58410a84f +psrlq : a=007c62c2085427f8 ib=07 r=0000f8c58410a84f psrlq : a=dc515cff944a58ec456723c698694873 b=00000000000000000000000000000007 r=01b8a2b9ff2894b1008ace478d30d290 psrlq : a=231be9e8cde7438d007c62c2085427f8 b=00000000000000000000000000000007 r=004637d3d19bce870000f8c58410a84f psrlq : a=dc515cff944a58ec456723c698694873 ib=20 r=00000000dc515cff00000000456723c6 +psrlq : a=456723c698694873 ib=20 r=00000000456723c6 psrlq : a=231be9e8cde7438d007c62c2085427f8 ib=20 r=00000000231be9e800000000007c62c2 +psrlq : a=007c62c2085427f8 ib=20 r=00000000007c62c2 psrlq : a=dc515cff944a58ec456723c698694873 b=00000000000000000000000000000020 r=00000000dc515cff00000000456723c6 psrlq : a=231be9e8cde7438d007c62c2085427f8 b=00000000000000000000000000000020 r=00000000231be9e800000000007c62c2 psllq : a=dc515cff944a58ec456723c698694873 ib=07 r=28ae7fca252c7600b391e34c34a43980 +psllq : a=456723c698694873 ib=07 r=b391e34c34a43980 psllq : a=231be9e8cde7438d007c62c2085427f8 ib=07 r=8df4f466f3a1c6803e3161042a13fc00 +psllq : a=007c62c2085427f8 ib=07 r=3e3161042a13fc00 psllq : a=dc515cff944a58ec456723c698694873 b=00000000000000000000000000000007 r=28ae7fca252c7600b391e34c34a43980 psllq : a=231be9e8cde7438d007c62c2085427f8 b=00000000000000000000000000000007 r=8df4f466f3a1c6803e3161042a13fc00 psllq : a=dc515cff944a58ec456723c698694873 ib=20 r=944a58ec000000009869487300000000 +psllq : a=456723c698694873 ib=20 r=9869487300000000 psllq : a=231be9e8cde7438d007c62c2085427f8 ib=20 r=cde7438d00000000085427f800000000 +psllq : a=007c62c2085427f8 ib=20 r=085427f800000000 psllq : a=dc515cff944a58ec456723c698694873 b=00000000000000000000000000000020 r=944a58ec000000009869487300000000 psllq : a=231be9e8cde7438d007c62c2085427f8 b=00000000000000000000000000000020 r=cde7438d00000000085427f800000000 psrlq : a=dc515cff944a58ec456723c698694873 ib=10 r=0000dc515cff944a0000456723c69869 +psrlq : a=456723c698694873 ib=10 r=0000456723c69869 psrlq : a=231be9e8cde7438d007c62c2085427f8 ib=10 r=0000231be9e8cde70000007c62c20854 +psrlq : a=007c62c2085427f8 ib=10 r=0000007c62c20854 psrlq : a=dc515cff944a58ec456723c698694873 ib=07 r=01b8a2b9ff2894b1008ace478d30d290 +psrlq : a=456723c698694873 ib=07 r=008ace478d30d290 psrlq : a=231be9e8cde7438d007c62c2085427f8 ib=07 r=004637d3d19bce870000f8c58410a84f +psrlq : a=007c62c2085427f8 ib=07 r=0000f8c58410a84f psllq : a=dc515cff944a58ec456723c698694873 ib=10 r=5cff944a58ec000023c6986948730000 +psllq : a=456723c698694873 ib=10 r=23c6986948730000 psllq : a=231be9e8cde7438d007c62c2085427f8 ib=10 r=e9e8cde7438d000062c2085427f80000 +psllq : a=007c62c2085427f8 ib=10 r=62c2085427f80000 psllq : a=dc515cff944a58ec456723c698694873 ib=07 r=28ae7fca252c7600b391e34c34a43980 +psllq : a=456723c698694873 ib=07 r=b391e34c34a43980 psllq : a=231be9e8cde7438d007c62c2085427f8 ib=07 r=8df4f466f3a1c6803e3161042a13fc00 +psllq : a=007c62c2085427f8 ib=07 r=3e3161042a13fc00 movmskpd : a=dc515cff944a58ec456723c698694873 r=00000002 movmskpd : a=231be9e8cde7438d007c62c2085427f8 r=00000000 ucomiss : a=2.000000 b=-1.000000 cc=0000 diff --git a/tests/e2e/qemu/qemu-test.c b/tests/e2e/qemu/qemu-test.c index 62c20a7722..4771c2981a 100644 --- a/tests/e2e/qemu/qemu-test.c +++ b/tests/e2e/qemu/qemu-test.c @@ -2205,6 +2205,13 @@ static uint64_t __attribute__((aligned(16))) test_values[4][2] = { a.q[1], a.q[0],\ ib,\ r.q[1], r.q[0]);\ + a.q[0] = test_values[2*i][0];\ + asm volatile (#op " $" #ib ", %0" : "=y" (r.q[0]) : "0" (a.q[0]));\ + printf("%-9s: a=" FMT64X " ib=%02x r=" FMT64X "\n",\ + #op,\ + a.q[0],\ + ib,\ + r.q[0]);\ }\ } @@ -2419,12 +2426,12 @@ void test_sse(void) // MMX_OP2(pcmpeqw); SSE_OP2(pcmpeqd); - SSE_OP2(paddq); + MMX_OP2(paddq); // MMX_OP2(pmullw); // MMX_OP2(psubusb); // MMX_OP2(psubusw); SSE_OP2(pminub); - SSE_OP2(pand); + MMX_OP2(pand); // MMX_OP2(paddusb); // MMX_OP2(paddusw); // MMX_OP2(pmaxub); @@ -2440,8 +2447,8 @@ void test_sse(void) // MMX_OP2(paddsb); // MMX_OP2(paddsw); // MMX_OP2(pmaxsw); - SSE_OP2(pxor); - // MMX_OP2(pmuludq); + MMX_OP2(pxor); + MMX_OP2(pmuludq); // MMX_OP2(pmaddwd); // MMX_OP2(psadbw); // MMX_OP2(psubb); @@ -2558,6 +2565,7 @@ void test_sse(void) MOVMSK(movmskpd); /* FPU specific ops */ + asm volatile ("emms"); // { // uint32_t mxcsr;