Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Mono] Enable the supported V128 SIMD intrinsics on Arm64 across all codegen engines #84289

Merged
merged 6 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix create*
  • Loading branch information
fanyang-mono committed Apr 11, 2023
commit 31283700d3a8b13090d16dcc6fe047f60a05e31c
12 changes: 10 additions & 2 deletions src/mono/mono/mini/cpu-arm64.mdesc
Original file line number Diff line number Diff line change
Expand Up @@ -521,8 +521,16 @@ expand_i4: dest:x src1:i len:4
expand_i8: dest:x src1:i len:4
expand_r4: dest:x src1:f len:4
expand_r8: dest:x src1:f len:4
create_scalar: dest:x src1:i len:12
create_scalar_unsafe: dest:x src1:i len:4
insert_i1: dest:x src1:i len:4
insert_i2: dest:x src1:i len:4
insert_i4: dest:x src1:i len:4
insert_i8: dest:x src1:i len:4
insert_r4: dest:x src1:f len:4
insert_r8: dest:x src1:f len:4
create_scalar_int: dest:x src1:i len:8
create_scalar_float: dest:x src1:f len:12
create_scalar_unsafe_int: dest:x src1:i len:4
create_scalar_unsafe_float: dest:x src1:f len:4

generic_class_init: src1:a len:44 clob:c
gc_safe_point: src1:i len:12 clob:c
Expand Down
58 changes: 43 additions & 15 deletions src/mono/mono/mini/mini-arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -3821,13 +3821,25 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
case OP_INSERT_I1:
case OP_INSERT_I2:
case OP_INSERT_I4:
case OP_INSERT_I8:
case OP_INSERT_R4:
case OP_INSERT_R8: {
case OP_INSERT_I8: {
const int t = get_type_size_macro (ins->inst_c1);
arm_neon_ins_g(code, t, dreg, sreg1, ins->inst_c0);
break;
}
case OP_INSERT_R4:
case OP_INSERT_R8: {
int t;
switch (ins->inst_c1) {
case MONO_TYPE_R4:
t = SIZE_4;
break;
case MONO_TYPE_R8:
t = SIZE_8;
break;
}
arm_neon_ins_e(code, t, dreg, sreg1, ins->inst_c0, 0);
break;
}
case OP_ARM64_XADDV: {
switch (ins->inst_c0) {
case INTRINS_AARCH64_ADV_SIMD_FADDV:
Expand All @@ -3854,8 +3866,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
}
break;
}
case OP_CREATE_SCALAR: {
int t = get_type_size_macro (ins->inst_c1);
case OP_CREATE_SCALAR_INT: {
const int t = get_type_size_macro (ins->inst_c1);
arm_neon_eor_16b (code, dreg, dreg, dreg);
arm_neon_ins_g(code, t, dreg, sreg1, 0);
break;
}
case OP_CREATE_SCALAR_FLOAT: {
int t;
switch (ins->inst_c1) {
case MONO_TYPE_R4:
t = SIZE_4;
Expand All @@ -3864,22 +3882,32 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
t = SIZE_8;
break;
}
if (is_type_float_macro (ins->inst_c1)) {
// ins expects an integer register
arm_fmov_double_to_rx(code, NEON_TMP_REG, sreg1);
arm_neon_eor_16b (code, dreg, dreg, dreg);
arm_neon_ins_g(code, t, dreg, NEON_TMP_REG, 0);
} else {
arm_neon_eor_16b (code, dreg, dreg, dreg);
arm_neon_ins_g(code, t, dreg, sreg1, 0);
}
// Use a temp register for zero op, as sreg1 and dreg share the same resgister here
arm_neon_eor_16b (code, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG);
fanyang-mono marked this conversation as resolved.
Show resolved Hide resolved
arm_neon_ins_e(code, t, NEON_TMP_REG, sreg1, 0, 0);
arm_neon_mov (code, dreg, NEON_TMP_REG);
break;
}
case OP_CREATE_SCALAR_UNSAFE: {
case OP_CREATE_SCALAR_UNSAFE_INT: {
const int t = get_type_size_macro (ins->inst_c1);
arm_neon_ins_g(code, t, dreg, sreg1, 0);
break;
}
case OP_CREATE_SCALAR_UNSAFE_FLOAT: {
if (dreg != sreg1) {
int t;
switch (ins->inst_c1) {
fanyang-mono marked this conversation as resolved.
Show resolved Hide resolved
case MONO_TYPE_R4:
t = SIZE_4;
break;
case MONO_TYPE_R8:
t = SIZE_8;
break;
}
arm_neon_ins_e(code, t, dreg, sreg1, 0, 0);
}
break;
}
// Enable this when adding support for Narrow and enable support for Create at the same time
// case OP_XCONCAT:
// arm_neon_ext_16b(code, dreg, sreg1, sreg2, 8);
Expand Down
5 changes: 5 additions & 0 deletions src/mono/mono/mini/mini-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -1168,6 +1168,11 @@ MINI_OP3(OP_MULX_HL64, "mulxhl64", LREG, LREG, LREG, LREG)
MINI_OP(OP_CREATE_SCALAR_UNSAFE, "create_scalar_unsafe", XREG, XREG, NONE)
MINI_OP(OP_CREATE_SCALAR, "create_scalar", XREG, XREG, NONE)

MINI_OP(OP_CREATE_SCALAR_UNSAFE_INT, "create_scalar_unsafe_int", XREG, IREG, NONE)
MINI_OP(OP_CREATE_SCALAR_UNSAFE_FLOAT, "create_scalar_unsafe_float", XREG, FREG, NONE)
MINI_OP(OP_CREATE_SCALAR_INT, "create_scalar_int", XREG, IREG, NONE)
MINI_OP(OP_CREATE_SCALAR_FLOAT, "create_scalar_float", XREG, FREG, NONE)

MINI_OP(OP_XMOVE, "xmove", XREG, XREG, NONE)
MINI_OP(OP_XZERO, "xzero", XREG, NONE, NONE)
MINI_OP(OP_XONES, "xones", XREG, NONE, NONE)
Expand Down
32 changes: 25 additions & 7 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -843,9 +843,9 @@ type_to_expand_op (MonoTypeEnum type)
}

static int
type_to_insert_op (MonoType *type)
type_to_insert_op (MonoTypeEnum type)
{
switch (type->type) {
switch (type) {
case MONO_TYPE_I1:
case MONO_TYPE_U1:
return OP_INSERT_I1;
Expand Down Expand Up @@ -992,14 +992,15 @@ emit_hardware_intrinsics (
static MonoInst *
emit_vector_create_elementwise (
MonoCompile *cfg, MonoMethodSignature *fsig, MonoType *vtype,
MonoType *etype, MonoInst **args)
MonoTypeEnum type, MonoInst **args)
{
int op = type_to_insert_op (etype);
int op = type_to_insert_op (type);
MonoClass *vklass = mono_class_from_mono_type_internal (vtype);
MonoInst *ins = emit_xzero (cfg, vklass);
for (int i = 0; i < fsig->param_count; ++i) {
ins = emit_simd_ins (cfg, vklass, op, ins->dreg, args [i]->dreg);
ins->inst_c0 = i;
ins->inst_c1 = type;
}
return ins;
}
Expand Down Expand Up @@ -1433,20 +1434,37 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
} else if (is_create_from_half_vectors_overload (fsig))
return emit_simd_ins (cfg, klass, OP_XCONCAT, args [0]->dreg, args [1]->dreg);
else if (is_elementwise_create_overload (fsig, etype))
return emit_vector_create_elementwise (cfg, fsig, fsig->ret, etype, args);
return emit_vector_create_elementwise (cfg, fsig, fsig->ret, arg0_type, args);
break;
}
case SN_CreateScalar: {
MonoType *etype = get_vector_t_elem_type (fsig->ret);
if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype))
return NULL;
return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR, -1, arg0_type, fsig, args);
if (COMPILE_LLVM (cfg))
return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR, -1, arg0_type, fsig, args);
else {
if (type_enum_is_float (arg0_type)) {
return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR_FLOAT, -1, arg0_type, fsig, args);
} else {
return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR_INT, -1, arg0_type, fsig, args);
}
}

}
case SN_CreateScalarUnsafe: {
MonoType *etype = get_vector_t_elem_type (fsig->ret);
if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype))
return NULL;
return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR_UNSAFE, -1, arg0_type, fsig, args);
if (COMPILE_LLVM (cfg))
return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR_UNSAFE, -1, arg0_type, fsig, args);
else {
if (type_enum_is_float (arg0_type)) {
return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR_UNSAFE_FLOAT, -1, arg0_type, fsig, args);
} else {
return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR_UNSAFE_INT, -1, arg0_type, fsig, args);
}
}
}
case SN_Dot: {
if (!is_element_type_primitive (fsig->params [0]))
Expand Down