Skip to content

Commit

Permalink
Add enough SSE2 instructions for go hello world
Browse files Browse the repository at this point in the history
Go binaries built outside of iSH are usually built using SSE2, so some
support is needed to run them. ish-app#698
  • Loading branch information
tbodt committed May 16, 2020
1 parent f7d9d82 commit c5b512d
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 40 deletions.
42 changes: 32 additions & 10 deletions emu/cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,24 @@ union mm_reg {
union xmm_reg {
qword_t qw[2];
dword_t dw[4];
float f32[4];
double f64[2];
// TODO more forms
};
static_assert(sizeof(union xmm_reg) == 16, "xmm_reg size");
static_assert(sizeof(union mm_reg) == 8, "mm_reg size");

struct cpu_state {
struct mem *mem;
struct jit *jit;

// general registers
// assumes little endian (as does literally everything)
#define _REG(n) \
union { \
dword_t e##n; \
word_t n; \
};
}
#define _REGX(n) \
union { \
dword_t e##n##x; \
Expand All @@ -40,16 +45,21 @@ struct cpu_state {
byte_t n##l; \
byte_t n##h; \
}; \
};
}

_REGX(a);
_REGX(b);
_REGX(c);
_REGX(d);
_REG(si);
_REG(di);
_REG(bp);
_REG(sp);
union {
struct {
_REGX(a);
_REGX(c);
_REGX(d);
_REGX(b);
_REG(sp);
_REG(bp);
_REG(si);
_REG(di);
};
dword_t regs[8];
};
#undef REGX
#undef REG

Expand Down Expand Up @@ -153,6 +163,18 @@ struct cpu_state {
dword_t trapno;
};

#define CPU_OFFSET(field) offsetof(struct cpu_state, field)

static_assert(CPU_OFFSET(eax) == CPU_OFFSET(regs[0]), "register order");
static_assert(CPU_OFFSET(ecx) == CPU_OFFSET(regs[1]), "register order");
static_assert(CPU_OFFSET(edx) == CPU_OFFSET(regs[2]), "register order");
static_assert(CPU_OFFSET(ebx) == CPU_OFFSET(regs[3]), "register order");
static_assert(CPU_OFFSET(esp) == CPU_OFFSET(regs[4]), "register order");
static_assert(CPU_OFFSET(ebp) == CPU_OFFSET(regs[5]), "register order");
static_assert(CPU_OFFSET(esi) == CPU_OFFSET(regs[6]), "register order");
static_assert(CPU_OFFSET(edi) == CPU_OFFSET(regs[7]), "register order");
static_assert(sizeof(struct cpu_state) < UINT16_MAX, "cpu struct is too big for vector gadgets");

// flags
#define ZF (cpu->zf_res ? cpu->res == 0 : cpu->zf)
#define SF (cpu->sf_res ? (int32_t) cpu->res < 0 : cpu->sf)
Expand Down
17 changes: 12 additions & 5 deletions emu/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; CMOVN(LE, modrm_val, modrm_reg,oz); break;

case 0x57: TRACEI("xorps xmm, xmm:modrm");
READMODRM; VXOR(xmm_modrm_val, xmm_modrm_reg,128);
READMODRM; V_OP(xor, xmm_modrm_val, xmm_modrm_reg,128);
break;

case 0x77: TRACEI("emms (ignored because there is no mmx)"); break;
Expand Down Expand Up @@ -292,7 +292,7 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,64); break;

case 0xef: TRACEI("pxor xmm:modrm xmm");
READMODRM; VXOR(xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(xor, xmm_modrm_val, xmm_modrm_reg,128); break;
#else
case 0x6f: TRACEI("movq modrm, mm");
READMODRM; VMOV(mm_modrm_val, mm_modrm_reg, 64); break;
Expand Down Expand Up @@ -878,12 +878,19 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
case 0x11: TRACEI("movsd xmm, xmm:modrm");
READMODRM; VMOV_MERGE_REG(xmm_modrm_reg, xmm_modrm_val,64); break;

case 0x2a: TRACEI("cvtsi2sd modrm, xmm");
READMODRM; V_OP(cvtsi2sd, modrm_val, xmm_modrm_reg,32); break;
case 0x2c: TRACEI("cvtsd2si reg, xmm:modrm");
READMODRM; V_OP(cvtsd2si, xmm_modrm_val, modrm_reg,32); break;

case 0x58: TRACEI("addsd xmm:modrm, xmm");
READMODRM; VS_FMATH(add, xmm_modrm_val, xmm_modrm_reg,64); break;
READMODRM; V_OP(fadds, xmm_modrm_val, xmm_modrm_reg,64); break;
case 0x59: TRACEI("mulsd xmm:modrm, xmm");
READMODRM; VS_FMATH(mul, xmm_modrm_val, xmm_modrm_reg,64); break;
READMODRM; V_OP(fmuls, xmm_modrm_val, xmm_modrm_reg,64); break;
case 0x5c: TRACEI("subsd xmm:modrm, xmm");
READMODRM; VS_FMATH(sub, xmm_modrm_val, xmm_modrm_reg,64); break;
READMODRM; V_OP(fsubs, xmm_modrm_val, xmm_modrm_reg,64); break;
case 0x5e: TRACEI("divsd xmm:modrm, xmm");
READMODRM; V_OP(fdivs, xmm_modrm_val, xmm_modrm_reg,64); break;

case 0x18 ... 0x1f: TRACEI("rep nop modrm\t"); READMODRM; break;
default: TRACE("undefined"); UNDEFINED;
Expand Down
10 changes: 10 additions & 0 deletions emu/vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,13 @@ void vec_fmuls64(NO_CPU, const double *src, double *dst) {
void vec_fsubs64(NO_CPU, const double *src, double *dst) {
*dst -= *src;
}
void vec_fdivs64(NO_CPU, const double *src, double *dst) {
*dst /= *src;
}

void vec_cvtsi2sd32(NO_CPU, const uint32_t *src, union xmm_reg *dst) {
dst->f64[0] = *src;
}
void vec_cvtsd2si32(NO_CPU, const union xmm_reg *src, uint32_t *dst) {
*dst = src->f64[0];
}
4 changes: 4 additions & 0 deletions emu/vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,9 @@ void vec_xor128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_fadds64(NO_CPU, const double *src, double *dst);
void vec_fmuls64(NO_CPU, const double *src, double *dst);
void vec_fsubs64(NO_CPU, const double *src, double *dst);
void vec_fdivs64(NO_CPU, const double *src, double *dst);

void vec_cvtsi2sd32(NO_CPU, const uint32_t *src, union xmm_reg *dst);
void vec_cvtsd2si32(NO_CPU, const union xmm_reg *src, uint32_t *dst);

#endif
10 changes: 5 additions & 5 deletions jit/gadgets-aarch64/misc.S
Original file line number Diff line number Diff line change
Expand Up @@ -199,22 +199,22 @@ do_helper 2
# the argument order should be a consistent src, dst
.ifc \rm,reg
# src
ldrb w1, [_ip, 8]
ldrh w1, [_ip, 8]
add x1, x0, x1
# dst
ldrb w2, [_ip, 9]
ldrh w2, [_ip, 10]
add x2, x0, x2
.endif
.ifc \rm,read
# src
mov x1, _xaddr
# dst
ldrb w2, [_ip, 16]
ldrh w2, [_ip, 16]
add x2, x0, x2
.endif
.ifc \rm,write
# src
ldrb w1, [_ip, 16]
ldrh w1, [_ip, 16]
add x1, x0, x1
# dst
mov x2, _xaddr
Expand All @@ -223,7 +223,7 @@ do_helper 2
# src
ldrh w1, [_ip, 8]
# dst
ldrb w2, [_ip, 10]
ldrh w2, [_ip, 10]
add x2, x0, x2
.endif

Expand Down
10 changes: 5 additions & 5 deletions jit/gadgets-x86_64/misc.S
Original file line number Diff line number Diff line change
Expand Up @@ -150,22 +150,22 @@ do_helper 2
# the argument order should be a consistent src, dst
.ifc \rm,reg
# src
movb 8(%_ip), %r14b
movw 8(%_ip), %r14w
leaq (%_cpu,%r14), %rsi
# dst
movb 9(%_ip), %r14b
movw 10(%_ip), %r14w
leaq (%_cpu,%r14), %rdx
.endif
.ifc \rm,read
# src
movq %_addrq, %rsi
# dst
movb 16(%_ip), %r14b
movw 16(%_ip), %r14w
leaq (%_cpu,%r14), %rdx
.endif
.ifc \rm,write
# src
movb 16(%_ip), %r14b
movw 16(%_ip), %r14w
leaq (%_cpu,%r14), %rsi
# dst
movq %_addrq, %rdx
Expand All @@ -174,7 +174,7 @@ do_helper 2
# src
movw 8(%_ip), %si
# dst
movb 10(%_ip), %r14b
movw 10(%_ip), %r14w
leaq (%_cpu,%r14), %rdx
.endif

Expand Down
26 changes: 11 additions & 15 deletions jit/gen.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <assert.h>
#include <stdint.h>
#include "jit/gen.h"
#include "emu/modrm.h"
#include "emu/cpuid.h"
Expand Down Expand Up @@ -427,22 +428,17 @@ void helper_rdtsc(struct cpu_state *cpu);

// vector

// The offset will eventually exceed 1 byte. When that happens the gadgets will
// need to be updated, and then the assert can be removed.
#define CPU_OFFSET(field) ({ \
assert(offsetof(struct cpu_state, field) < 256); \
offsetof(struct cpu_state, field); \
})

static inline bool could_be_memory(enum arg arg) {
return arg == arg_modrm_val || arg == arg_mm_modrm_val || arg == arg_xmm_modrm_val;
}

static inline uint8_t cpu_reg_offset(enum arg arg, int index) {
static inline uint16_t cpu_reg_offset(enum arg arg, int index) {
if (arg == arg_xmm_modrm_reg || arg == arg_xmm_modrm_val)
return CPU_OFFSET(xmm[index]);
if (arg == arg_mm_modrm_reg || arg == arg_xmm_modrm_val)
if (arg == arg_mm_modrm_reg || arg == arg_mm_modrm_val)
return CPU_OFFSET(mm[index]);
if (arg == arg_modrm_reg || arg == arg_modrm_val)
return CPU_OFFSET(regs[index]);
return 0;
}

Expand All @@ -451,8 +447,8 @@ static inline bool gen_vec(enum arg src, enum arg dst, void (*helper)(), gadget_
enum arg rm = rm_is_src ? src : dst;
enum arg reg = rm_is_src ? dst : src;

uint8_t reg_offset = cpu_reg_offset(reg, modrm->opcode);
uint8_t rm_reg_offset = cpu_reg_offset(rm, modrm->rm_opcode);
uint16_t reg_offset = cpu_reg_offset(reg, modrm->opcode);
uint16_t rm_reg_offset = cpu_reg_offset(rm, modrm->rm_opcode);
assert(reg_offset != 0);

if (could_be_memory(rm) && modrm->type != modrm_reg)
Expand All @@ -461,14 +457,15 @@ static inline bool gen_vec(enum arg src, enum arg dst, void (*helper)(), gadget_
switch (rm) {
case arg_xmm_modrm_val:
case arg_mm_modrm_val:
case arg_modrm_val:
assert(rm_reg_offset != 0);
g(vec_helper_reg);
GEN(helper);
// first byte is src, second byte is dst
if (rm_is_src)
GEN(rm_reg_offset | (reg_offset << 8));
GEN(rm_reg_offset | (reg_offset << 16));
else
GEN(reg_offset | (rm_reg_offset << 8));
GEN(reg_offset | (rm_reg_offset << 16));
break;

case arg_mem:
Expand Down Expand Up @@ -523,8 +520,7 @@ static inline bool gen_vec(enum arg src, enum arg dst, void (*helper)(), gadget_

#define VCOMPARE(src, dst,z) v(compare, src, dst,z)
#define VSHIFTR_IMM(src, dst, z) v_imm(imm_shiftr, src, dst,z)
#define VXOR(src, dst,z) v(xor, src, dst,z)
#define VS_FMATH(op, src, dst,z) v(f##op##s, src, dst,z)
#define V_OP(op, src, dst, z) v(op, src, dst, z)

#define DECODER_RET int
#define DECODER_NAME gen_step
Expand Down

0 comments on commit c5b512d

Please sign in to comment.