Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shrinkflation #133

Merged
merged 5 commits into from
Feb 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/Project.xcconfig
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "iSH.xcconfig"

MARKETING_VERSION = 1.2.4
MARKETING_VERSION = 1.3

ENABLE_BITCODE = NO // no idea why
PRODUCT_BUNDLE_IDENTIFIER = $(ROOT_BUNDLE_IDENTIFIER)
Expand Down
2 changes: 1 addition & 1 deletion deps/aports/main/x86/index.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
APKINDEX-v3.14-2023-02-12.tar.gz
APKINDEX-v3.14-2023-02-16.tar.gz
18 changes: 9 additions & 9 deletions emu/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,13 +289,13 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(fmovmask_d, xmm_modrm_val, modrm_reg,128); break;

case 0x54: TRACEI("andpd xmm:modrm, xmm");
READMODRM; V_OP(and, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(and_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x55: TRACEI("andnpd xmm:modrm, xmm");
READMODRM; V_OP(andn, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x56: TRACEI("orpd xmm:modrm, xmm");
READMODRM; V_OP(or, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(or_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x57: TRACEI("xorpd xmm:modrm, xmm");
READMODRM; V_OP(xor, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(xor_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x58: TRACEI("addpd xmm:modrm, xmm");
READMODRM; V_OP(add_p, xmm_modrm_val, xmm_modrm_reg,64); break;
case 0x59: TRACEI("mulpd xmm:modrm, xmm");
Expand Down Expand Up @@ -413,7 +413,7 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
case 0xda: TRACEI("pminub xmm:modrm, xmm");
READMODRM; V_OP(min_ub, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xdb: TRACEI("pand xmm:modrm, xmm");
READMODRM; V_OP(and, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(and_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xdc: TRACEI("paddusb xmm:modrm, xmm");
READMODRM; V_OP(addus_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xdd: TRACEI("paddusw xmm:modrm, xmm");
Expand Down Expand Up @@ -445,15 +445,15 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
case 0xea: TRACEI("pminsw xmm:modrm, xmm");
READMODRM; V_OP(mins_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xeb: TRACEI("por xmm:modrm, xmm");
READMODRM; V_OP(or, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(or_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xec: TRACEI("paddsb xmm:modrm, xmm");
READMODRM; V_OP(addss_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xed: TRACEI("paddsw xmm:modrm, xmm");
READMODRM; V_OP(addss_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xee: TRACEI("pmaxsw xmm:modrm, xmm");
READMODRM; V_OP(maxs_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xef: TRACEI("pxor xmm:modrm, xmm");
READMODRM; V_OP(xor, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(xor_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xf1: TRACEI("psllw xmm:modrm, xmm");
READMODRM; V_OP(shiftl_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xf2: TRACEI("pslld xmm:modrm, xmm");
Expand Down Expand Up @@ -503,13 +503,13 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(single_ucomi, xmm_modrm_val, xmm_modrm_reg,32); break;

case 0x54: TRACEI("andps xmm:modrm, xmm");
READMODRM; V_OP(and, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(and_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x55: TRACEI("andnps xmm:modrm, xmm");
READMODRM; V_OP(andn, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x56: TRACEI("orps xmm:modrm, xmm");
READMODRM; V_OP(or, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(or_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x57: TRACEI("xorps xmm:modrm, xmm");
READMODRM; V_OP(xor, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(xor_dq, xmm_modrm_val, xmm_modrm_reg,128); break;

case 0x58: TRACEI("addps xmm:modrm, xmm");
READMODRM; V_OP(add_p, xmm_modrm_val, xmm_modrm_reg,32); break;
Expand Down
139 changes: 34 additions & 105 deletions emu/mmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,41 @@ union vec {

#define VEC_MMX_OP(name, suffix, op, size) \
void vec_##name##_##suffix##64(NO_CPU, const union mm_reg *src, union mm_reg *dst) { \
union vec s = { .qw = src->qw}, d = { .qw = dst->qw }; \
union vec s = { .qw = src->qw }, d = { .qw = dst->qw }; \
for (unsigned i = 0; i < array_size(s.u##size); i++) \
d.u##size[i] op##= s.u##size[i]; \
dst->qw = d.qw; \
}

#define _VEC_MMX_CMP(sgn, usgn, suffix, relop, size) \
void vec_compare##sgn##_##suffix##64(NO_CPU, const union mm_reg *src, union mm_reg *dst) { \
union vec s = { .qw = src->qw}, d = { .qw = dst->qw }; \
union vec s = { .qw = src->qw }, d = { .qw = dst->qw }; \
for (unsigned i = 0; i < array_size(s.u##size); i++) \
d.u##size[i] = (usgn##int##size##_t)d.u##size[i] relop (usgn##int##size##_t)s.u##size[i] ? ~0 : 0;\
dst->qw = d.qw; \
}

#define _SHIFT(op, size) \
do { \
if (unlikely(amount > (size)-1)) { \
dst->qw = 0; \
} else { \
union vec d = { .qw = dst->qw }; \
for (unsigned i = 0; i < array_size(d.u##size); i++) \
d.u##size[i] op##= amount; \
dst->qw = d.qw; \
} \
} while (0)

#define VEC_MMX_SHIFT(dir, suffix, op, size) \
void vec_shift##dir##_##suffix##64(NO_CPU, const union mm_reg *src, union mm_reg *dst) { \
const uint8_t amount = src->qw; \
_SHIFT(op, size); \
} \
void vec_imm_shift##dir##_##suffix##64(NO_CPU, const uint8_t amount, union mm_reg *dst) { \
_SHIFT(op, size); \
}

#define VEC_MMX_CMPD(suffix, relop, size) \
_VEC_MMX_CMP(, u, suffix, relop, size)
#define VEC_MMX_CMPS(suffix, relop, size) \
Expand Down Expand Up @@ -62,105 +83,13 @@ VEC_MMX_CMPS(gtb, >, 8)
VEC_MMX_CMPS(gtw, >, 16)
VEC_MMX_CMPS(gtd, >, 32)

void vec_shiftl_w64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
if (src->qw > 15) {
dst->qw = 0;
} else {
union vec d = { .qw = dst->qw};
for (unsigned i = 0; i < array_size(d.u16); i++)
d.u16[i] <<= src->qw;
dst->qw = d.qw;
}
}
void vec_shiftl_d64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
if (src->qw > 31) {
dst->qw = 0;
} else {
dst->dw[0] <<= src->qw;
dst->dw[1] <<= src->qw;
}
}
void vec_shiftl_q64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
if (src->qw > 63)
dst->qw = 0;
else
dst->qw <<= src->qw;
}

void vec_shiftr_w64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
if (src->qw > 15) {
dst->qw = 0;
} else {
union vec d = { .qw = dst->qw};
for (unsigned i = 0; i < array_size(d.u16); i++)
d.u16[i] >>= src->qw;
dst->qw = d.qw;
}
}
void vec_shiftr_d64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
if (src->qw > 31) {
dst->qw = 0;
} else {
dst->dw[0] >>= src->qw;
dst->dw[1] >>= src->qw;
}
}
void vec_shiftr_q64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
if (src->qw > 63)
dst->qw = 0;
else
dst->qw >>= src->qw;
}

void vec_imm_shiftl_w64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
if (amount > 15) {
dst->qw = 0;
} else {
union vec d = { .qw = dst->qw};
for (unsigned i = 0; i < array_size(d.u16); i++)
d.u16[i] <<= amount;
dst->qw = d.qw;
}
}
void vec_imm_shiftl_d64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
if (amount > 31) {
dst->qw = 0;
} else {
dst->dw[0] <<= amount;
dst->dw[1] <<= amount;
}
}
void vec_imm_shiftl_q64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
if (amount > 63)
dst->qw = 0;
else
dst->qw <<= amount;
}
VEC_MMX_SHIFT(r, w, >>, 16)
VEC_MMX_SHIFT(r, d, >>, 32)
VEC_MMX_SHIFT(r, q, >>, 64)

void vec_imm_shiftr_w64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
if (amount > 15) {
dst->qw = 0;
} else {
union vec d = { .qw = dst->qw};
for (unsigned i = 0; i < array_size(d.u16); i++)
d.u16[i] >>= amount;
dst->qw = d.qw;
}
}
void vec_imm_shiftr_d64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
if (amount > 31) {
dst->qw = 0;
} else {
dst->dw[0] >>= amount;
dst->dw[1] >>= amount;
}
}
void vec_imm_shiftr_q64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
if (amount > 63)
dst->qw = 0;
else
dst->qw >>= amount;
}
VEC_MMX_SHIFT(l, w, <<, 16)
VEC_MMX_SHIFT(l, d, <<, 32)
VEC_MMX_SHIFT(l, q, <<, 64)

void vec_shiftrs_w64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
union vec d = { .qw = dst->qw };
Expand Down Expand Up @@ -206,15 +135,15 @@ void vec_imm_shiftrs_d64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
}

void vec_mulu64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
union vec s = { .qw = src->qw}, d = { .qw = dst->qw };
union vec s = { .qw = src->qw }, d = { .qw = dst->qw };
for (unsigned i = 0; i < 4; i++) {
uint32_t res = ((int16_t)d.u16[i] * (int16_t)s.u16[i]);
d.u16[i] = ((res >> 16) & 0xffff);
}
dst->qw = d.qw;
}
void vec_mull64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
union vec s = { .qw = src->qw}, d = { .qw = dst->qw };
union vec s = { .qw = src->qw }, d = { .qw = dst->qw };
for (int i = 0; i < 4; i++) {
d.u16[i] = (uint16_t)(d.u16[i] * s.u16[i]);
}
Expand All @@ -229,14 +158,14 @@ void vec_unpackl_dq64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
}

void vec_shuffle_w64(NO_CPU, const union mm_reg *src, union mm_reg *dst, uint8_t encoding) {
union vec s = { .qw = src->qw}, d = { .qw = dst->qw };
union vec s = { .qw = src->qw }, d = { .qw = dst->qw };
for (unsigned i = 0; i < 4; i++)
d.u16[i] = s.u16[(encoding >> (2 * i)) % 4];
dst->qw = d.qw;
}

void vec_movmask_b64(NO_CPU, const union mm_reg *src, uint32_t *dst) {
union vec s = { .qw = src->qw};
union vec s = { .qw = src->qw };
*dst = 0;
for (unsigned i = 0; i < array_size(s.u8); i++) {
if (s.u8[i] & (1 << 7))
Expand All @@ -245,7 +174,7 @@ void vec_movmask_b64(NO_CPU, const union mm_reg *src, uint32_t *dst) {
}

void vec_insert_w64(NO_CPU, const uint32_t *src, union mm_reg *dst, uint8_t index) {
union vec d = { .qw = dst->qw};
union vec d = { .qw = dst->qw };
d.u16[index % 4] = (uint16_t)*src;
dst->qw = d.qw;
}
Loading