Skip to content

Commit

Permalink
Add f16 inline ASM support for 32-bit ARM
Browse files Browse the repository at this point in the history
  • Loading branch information
beetrees committed Jun 21, 2024
1 parent 12b33d3 commit 753fb07
Show file tree
Hide file tree
Showing 3 changed files with 365 additions and 183 deletions.
39 changes: 39 additions & 0 deletions compiler/rustc_codegen_llvm/src/asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1020,6 +1020,19 @@ fn llvm_fixup_input<'ll, 'tcx>(
value
}
}
(
InlineAsmRegClass::Arm(
ArmInlineAsmRegClass::dreg
| ArmInlineAsmRegClass::dreg_low8
| ArmInlineAsmRegClass::dreg_low16
| ArmInlineAsmRegClass::qreg
| ArmInlineAsmRegClass::qreg_low4
| ArmInlineAsmRegClass::qreg_low8,
),
Abi::Vector { element, count: count @ (4 | 8) },
) if element.primitive() == Primitive::Float(Float::F16) => {
bx.bitcast(value, bx.type_vector(bx.type_i16(), count))
}
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
match s.primitive() {
// MIPS only supports register-length arithmetics.
Expand Down Expand Up @@ -1130,6 +1143,19 @@ fn llvm_fixup_output<'ll, 'tcx>(
value
}
}
(
InlineAsmRegClass::Arm(
ArmInlineAsmRegClass::dreg
| ArmInlineAsmRegClass::dreg_low8
| ArmInlineAsmRegClass::dreg_low16
| ArmInlineAsmRegClass::qreg
| ArmInlineAsmRegClass::qreg_low4
| ArmInlineAsmRegClass::qreg_low8,
),
Abi::Vector { element, count: count @ (4 | 8) },
) if element.primitive() == Primitive::Float(Float::F16) => {
bx.bitcast(value, bx.type_vector(bx.type_f16(), count))
}
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
match s.primitive() {
// MIPS only supports register-length arithmetics.
Expand Down Expand Up @@ -1233,6 +1259,19 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
layout.llvm_type(cx)
}
}
(
InlineAsmRegClass::Arm(
ArmInlineAsmRegClass::dreg
| ArmInlineAsmRegClass::dreg_low8
| ArmInlineAsmRegClass::dreg_low16
| ArmInlineAsmRegClass::qreg
| ArmInlineAsmRegClass::qreg_low4
| ArmInlineAsmRegClass::qreg_low8,
),
Abi::Vector { element, count: count @ (4 | 8) },
) if element.primitive() == Primitive::Float(Float::F16) => {
cx.type_vector(cx.type_i16(), count)
}
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
match s.primitive() {
// MIPS only supports register-length arithmetics.
Expand Down
12 changes: 7 additions & 5 deletions compiler/rustc_target/src/asm/arm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,18 @@ impl ArmInlineAsmRegClass {
_arch: InlineAsmArch,
) -> &'static [(InlineAsmType, Option<Symbol>)] {
match self {
Self::reg => types! { _: I8, I16, I32, F32; },
Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F32; },
Self::reg => types! { _: I8, I16, I32, F16, F32; },
Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F16, F32; },
Self::dreg_low16 | Self::dreg_low8 => types! {
vfp2: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2);
vfp2: I64, F64;
neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
},
Self::dreg => types! {
d32: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2);
d32: I64, F64;
neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
},
Self::qreg | Self::qreg_low8 | Self::qreg_low4 => types! {
neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4);
neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4);
},
}
}
Expand Down
Loading

0 comments on commit 753fb07

Please sign in to comment.