Skip to content

Commit

Permalink
Refactor vector instructions again
Browse files Browse the repository at this point in the history
Argument order is sane again!
  • Loading branch information
tbodt committed May 16, 2020
1 parent 3795b28 commit f63b888
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 157 deletions.
33 changes: 18 additions & 15 deletions emu/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
case 0x18 ... 0x1f: TRACEI("nop modrm\t"); READMODRM; break;

case 0x28: TRACEI("movaps xmm:modrm, xmm");
READMODRM; VLOAD(xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x29: TRACEI("movaps xmm, xmm:modrm");
READMODRM; VSTORE(xmm_modrm_reg, xmm_modrm_val,128); break;
READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,128); break;

case 0x2e: TRACEI("ucomiss xmm, xmm:modrm");
READMODRM; VCOMPARE(xmm_modrm_val, xmm_modrm_reg,32);
Expand Down Expand Up @@ -267,11 +267,11 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {

#if OP_SIZE == 16
case 0x6e: TRACEI("movd modrm, xmm");
// TODO: REX.W = 1 might be needed later
READMODRM; VZLOAD(xmm_modrm_val, xmm_modrm_reg,32); break;
// TODO: this is supposed to use general registers!
READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,32); break;

case 0x6f: TRACEI("movdqa xmm:modrm, xmm");
READMODRM; VLOAD(xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,128); break;

case 0x73: READMODRM;
switch (modrm.opcode) {
Expand All @@ -282,19 +282,22 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
break;

case 0x7e: TRACEI("movd xmm, modrm");
// TODO: REX.W = 1 might be needed later
READMODRM; VSTORE(xmm_modrm_reg, xmm_modrm_val,32); break;
// TODO: this is supposed to use general registers!
READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,32); break;

case 0x7f: TRACEI("movdqa xmm, xmm:modrm");
READMODRM; VSTORE(xmm_modrm_reg, xmm_modrm_val,128); break;
READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,128); break;

case 0xd6: TRACEI("movq xmm, xmm:modrm");
READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,64); break;

case 0xef: TRACEI("pxor xmm:modrm xmm");
READMODRM; VXOR(xmm_modrm_val, xmm_modrm_reg,128); break;
#else
case 0x6f: TRACEI("movq modrm, mm");
READMODRM; VLOAD(mm_modrm_val, mm_modrm_reg, 64); break;
READMODRM; VMOV(mm_modrm_val, mm_modrm_reg, 64); break;
case 0x7f: TRACEI("movq mm, modrm");
READMODRM; VSTORE(mm_modrm_reg, mm_modrm_val, 64); break;
READMODRM; VMOV(mm_modrm_reg, mm_modrm_val, 64); break;
#endif

default: TRACEI("undefined");
Expand Down Expand Up @@ -871,9 +874,9 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READINSN;
switch (insn) {
case 0x10: TRACEI("movsd xmm:modrm, xmm");
READMODRM; VLOAD_PADMEM(xmm_modrm_val, xmm_modrm_reg,64); break;
READMODRM; VMOV_MERGE_REG(xmm_modrm_val, xmm_modrm_reg,64); break;
case 0x11: TRACEI("movsd xmm, xmm:modrm");
READMODRM; VSTORE(xmm_modrm_reg, xmm_modrm_val,64); break;
READMODRM; VMOV_MERGE_REG(xmm_modrm_reg, xmm_modrm_val,64); break;

case 0x58: TRACEI("addsd xmm:modrm, xmm");
READMODRM; VS_FMATH(add, xmm_modrm_val, xmm_modrm_reg,64); break;
Expand Down Expand Up @@ -904,14 +907,14 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READINSN;
switch (insn) {
case 0x10: TRACEI("movss xmm:modrm, xmm");
READMODRM; VLOAD_PADMEM(xmm_modrm_val, xmm_modrm_reg,32);
READMODRM; VMOV_MERGE_REG(xmm_modrm_val, xmm_modrm_reg,32);
break;
case 0x11: TRACEI("movss xmm, xmm:modrm");
READMODRM; VSTORE(xmm_modrm_reg, xmm_modrm_val,32);
READMODRM; VMOV_MERGE_REG(xmm_modrm_reg, xmm_modrm_val,32);
break;

case 0x7e: TRACEI("movq xmm:modrm, xmm");
READMODRM; VZLOAD(xmm_modrm_val, xmm_modrm_reg,64);
READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,64);
break;

case 0x18 ... 0x1f: TRACEI("repz nop modrm\t"); READMODRM; break;
Expand Down
57 changes: 22 additions & 35 deletions emu/vec.c
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
#include <math.h>
#include <string.h>

#include "emu/vec.h"
#include "emu/cpu.h"

/////////////////////////////////////////////
// See header file for the confusing thing //
// that is argument ordering in this file //
/////////////////////////////////////////////

void vec_compare32(struct cpu_state *cpu, float *f2, float *f1) {
if (isnan(*f1) || isnan(*f2)) {
cpu->zf = 1;
Expand Down Expand Up @@ -36,60 +32,51 @@ void vec_compare32(struct cpu_state *cpu, float *f2, float *f1) {
cpu->pf_res = 0;
}

void vec_load32(struct cpu_state *UNUSED(cpu), const void *src, void *dst) {
memcpy(dst, src, 4);
}
void vec_load64(struct cpu_state *UNUSED(cpu), const void *src, void *dst) {
memcpy(dst, src, 8);
}
void vec_load128(struct cpu_state *UNUSED(cpu), const void *src, void *dst) {
memcpy(dst, src, 16);
}

static inline void zero_xmm(union xmm_reg *xmm) {
xmm->qw[0] = 0;
xmm->qw[1] = 0;
}
#define ZLOAD(sz) \
void vec_zload##sz(struct cpu_state *cpu, const union xmm_reg *src, union xmm_reg *dst) { \
zero_xmm(dst); \
vec_load##sz(cpu, src, dst); \
}
ZLOAD(32)
ZLOAD(64)
ZLOAD(128)
#undef ZLOAD

void vec_store32(struct cpu_state *UNUSED(cpu), void *dst, void *src) {
#define VEC_ZERO_COPY(zero, copy) \
void vec_zero##zero##_copy##copy(NO_CPU, const void *src, void *dst) { \
memset(dst, 0, zero/8); \
memcpy(dst, src, copy/8); \
}
VEC_ZERO_COPY(128, 128)
VEC_ZERO_COPY(128, 64)
VEC_ZERO_COPY(128, 32)
VEC_ZERO_COPY(64, 64)

void vec_merge32(NO_CPU, const void *src, void *dst) {
memcpy(dst, src, 4);
}
void vec_store64(struct cpu_state *UNUSED(cpu), void *dst, void *src) {
void vec_merge64(NO_CPU, const void *src, void *dst) {
memcpy(dst, src, 8);
}
void vec_store128(struct cpu_state *UNUSED(cpu), void *dst, void *src) {
void vec_merge128(NO_CPU, const void *src, void *dst) {
memcpy(dst, src, 16);
}

void vec_imm_shiftr64(struct cpu_state *UNUSED(cpu), const uint8_t amount, union xmm_reg *src) {
void vec_imm_shiftr64(NO_CPU, const uint8_t amount, union xmm_reg *dst) {
if (amount > 63) {
zero_xmm(src);
zero_xmm(dst);
} else {
src->qw[0] >>= amount;
src->qw[1] >>= amount;
dst->qw[0] >>= amount;
dst->qw[1] >>= amount;
}
}

void vec_xor128(struct cpu_state *UNUSED(cpu), union xmm_reg *src, union xmm_reg *dst) {
void vec_xor128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] ^= src->qw[0];
dst->qw[1] ^= src->qw[1];
}

void vec_fadds64(struct cpu_state *UNUSED(cpu), const double *src, double *dst) {
void vec_fadds64(NO_CPU, const double *src, double *dst) {
*dst += *src;
}
void vec_fmuls64(struct cpu_state *UNUSED(cpu), const double *src, double *dst) {
void vec_fmuls64(NO_CPU, const double *src, double *dst) {
*dst *= *src;
}
void vec_fsubs64(struct cpu_state *UNUSED(cpu), const double *src, double *dst) {
void vec_fsubs64(NO_CPU, const double *src, double *dst) {
*dst -= *src;
}
61 changes: 21 additions & 40 deletions emu/vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,45 +3,26 @@

#include "emu/cpu.h"

void vec_compare32(struct cpu_state *UNUSED(cpu), float *f2, float *f1);

/**
* Argument ordering swaps back and forth because laziness has taken
* precedence over actual quality. To minimize gadget complicatedness,
* the second argument is always an XMM. If either arg is memory, the
* first one is.
*
* Corresponding with jit/gen.c:
* =============================
* - If v(...) is being used, the first argument is source.
* - If v_write(...) is being used, the first argument is being written to.
* Because the first argument is the operand that might be memory.
*
* jit/gen method | arg order
* ----------------|------------
* v() | const a, b
* v_write() | a, const b
*/

void vec_load32(struct cpu_state *UNUSED(cpu), const union xmm_reg *src, union xmm_reg *dst);
void vec_load64(struct cpu_state *UNUSED(cpu), const union xmm_reg *src, union xmm_reg *dst);
void vec_load128(struct cpu_state *UNUSED(cpu), const union xmm_reg *src, union xmm_reg *dst);

// Zeroes out the destination before loading.
// Used in some instructions like movss when the src is memory.
void vec_zload32(struct cpu_state *UNUSED(cpu), const union xmm_reg *src, union xmm_reg *dst);
void vec_zload64(struct cpu_state *UNUSED(cpu), const union xmm_reg *src, union xmm_reg *dst);
void vec_zload128(struct cpu_state *UNUSED(cpu), const union xmm_reg *src, union xmm_reg *dst);

void vec_store32(struct cpu_state *UNUSED(cpu), union xmm_reg *src, const union xmm_reg *dst);
void vec_store64(struct cpu_state *UNUSED(cpu), union xmm_reg *src, const union xmm_reg *dst);
void vec_store128(struct cpu_state *UNUSED(cpu), union xmm_reg *src, const union xmm_reg *dst);

void vec_imm_shiftr64(struct cpu_state *UNUSED(cpu), const uint8_t amount, union xmm_reg *src);
void vec_xor128(struct cpu_state *cpu, union xmm_reg *src, union xmm_reg *dst);

void vec_fadds64(struct cpu_state *cpu, const double *src, double *dst);
void vec_fmuls64(struct cpu_state *cpu, const double *src, double *dst);
void vec_fsubs64(struct cpu_state *cpu, const double *src, double *dst);
#define NO_CPU struct cpu_state *UNUSED(cpu)
void vec_compare32(NO_CPU, float *f2, float *f1);

// arguments are in src, dst order

void vec_zero128_copy128(NO_CPU, const void *src, void *dst);
void vec_zero128_copy64(NO_CPU, const void *src, void *dst);
void vec_zero128_copy32(NO_CPU, const void *src, void *dst);
void vec_zero64_copy64(NO_CPU, const void *src, void *dst);

// "merge" means don't zero the register before writing to it
void vec_merge32(NO_CPU, const void *src, void *dst);
void vec_merge64(NO_CPU, const void *src, void *dst);
void vec_merge128(NO_CPU, const void *src, void *dst);

void vec_imm_shiftr64(NO_CPU, const uint8_t amount, union xmm_reg *dst);
void vec_xor128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);

void vec_fadds64(NO_CPU, const double *src, double *dst);
void vec_fmuls64(NO_CPU, const double *src, double *dst);
void vec_fsubs64(NO_CPU, const double *src, double *dst);

#endif
42 changes: 28 additions & 14 deletions jit/gadgets-aarch64/misc.S
Original file line number Diff line number Diff line change
Expand Up @@ -196,27 +196,41 @@ do_helper 2
save_c
mov x0, _cpu

# r/m argument, first
.ifin(\rm, reg)
ldrb w1, [_ip, 9]
# the argument order should be a consistent src, dst
.ifc \rm,reg
# src
ldrb w1, [_ip, 8]
add x1, x0, x1
.endifin
.ifin(\rm, read,write)
# dst
ldrb w2, [_ip, 9]
add x2, x0, x2
.endif
.ifc \rm,read
# src
mov x1, _xaddr
.endifin
.ifin(\rm, imm)
ldrh w1, [_ip, 9]
.endifin
# dst
ldrb w2, [_ip, 16]
add x2, x0, x2
.endif
.ifc \rm,write
# src
ldrb w1, [_ip, 16]
add x1, x0, x1
# dst
mov x2, _xaddr
.endif
.ifc \rm,imm
# src
ldrh w1, [_ip, 8]
# dst
ldrb w2, [_ip, 10]
add x2, x0, x2
.endif

# reg argument, second
.ifin(\rm, read,write)
ldr x2, [_ip, 16]
add x2, x0, x2
ldr x8, [_ip, 8]
.endifin
.ifin(\rm, reg,imm)
ldr x2, [_ip, 8]
add x2, x0, x2
ldr x8, [_ip]
.endifin
blr x8
Expand Down
42 changes: 28 additions & 14 deletions jit/gadgets-x86_64/misc.S
Original file line number Diff line number Diff line change
Expand Up @@ -147,27 +147,41 @@ do_helper 2
movq %_cpu, %rdi
xorq %r14, %r14

# r/m argument, first
.ifin(\rm, reg)
movb 9(%_ip), %r14b
# the argument order should be a consistent src, dst
.ifc \rm,reg
# src
movb 8(%_ip), %r14b
leaq (%_cpu,%r14), %rsi
.endifin
.ifin(\rm, read,write)
# dst
movb 9(%_ip), %r14b
leaq (%_cpu,%r14), %rdx
.endif
.ifc \rm,read
# src
movq %_addrq, %rsi
.endifin
.ifin(\rm, imm)
movb 9(%_ip), %sil
.endifin

# reg argument, second
.ifin(\rm, read,write)
# dst
movb 16(%_ip), %r14b
leaq (%_cpu,%r14), %rdx
.endif
.ifc \rm,write
# src
movb 16(%_ip), %r14b
leaq (%_cpu,%r14), %rsi
# dst
movq %_addrq, %rdx
.endif
.ifc \rm,imm
# src
movw 8(%_ip), %si
# dst
movb 10(%_ip), %r14b
leaq (%_cpu,%r14), %rdx
.endif

.ifin(\rm, read,write)
callq *8(%_ip)
.endifin
.ifin(\rm, reg,imm)
movb 8(%_ip), %r14b
leaq (%_cpu,%r14), %rdx
callq *(%_ip)
.endifin

Expand Down
Loading

0 comments on commit f63b888

Please sign in to comment.