Skip to content

Commit

Permalink
[LoongArch] Fix LASX vector_extract codegen
Browse files Browse the repository at this point in the history
Custom lowering `ISD::EXTRACT_VECTOR_ELT` with lasx.
  • Loading branch information
wangleiat committed Dec 29, 2023
1 parent d3ddb93 commit 47c88bc
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 57 deletions.
21 changes: 20 additions & 1 deletion llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UNDEF, VT, Legal);

setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);

setOperationAction(ISD::SETCC, VT, Legal);
Expand Down Expand Up @@ -406,6 +406,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerWRITE_REGISTER(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return lowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
Expand Down Expand Up @@ -513,6 +515,23 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
return SDValue();
}

SDValue
LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
EVT VecTy = Op->getOperand(0)->getValueType(0);
SDValue Idx = Op->getOperand(1);
EVT EltTy = VecTy.getVectorElementType();
unsigned NumElts = VecTy.getVectorNumElements();

if (isa<ConstantSDNode>(Idx) &&
(EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
EltTy == MVT::f64 ||
cast<ConstantSDNode>(Idx)->getZExtValue() < NumElts / 2))
return Op;

return SDValue();
}

SDValue
LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
Expand Down
40 changes: 8 additions & 32 deletions llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1590,38 +1590,14 @@ def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)),
(VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>;
def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)),
(VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>;
def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)),
(VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>;
def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)),
(VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>;
def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)),
(f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), sub_32))>;
def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)),
(f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), sub_64))>;

// Vector extraction with variable index.
def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)),
(SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj,
i64:$rk),
sub_32)),
GPR), (i64 24))>;
def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)),
(SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H v16i16:$xj,
i64:$rk),
sub_32)),
GPR), (i64 16))>;
def : Pat<(i64 (vector_extract v8i32:$xj, i64:$rk)),
(COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_W v8i32:$xj, i64:$rk),
sub_32)),
GPR)>;
def : Pat<(i64 (vector_extract v4i64:$xj, i64:$rk)),
(COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (XVREPLVE_D v4i64:$xj, i64:$rk),
sub_64)),
GPR)>;
def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)),
(f32 (EXTRACT_SUBREG (XVREPLVE_W v8f32:$xj, i64:$rk), sub_32))>;
def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)),
(f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>;
def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)),
(XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>;
def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)),
(XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>;
def : Pat<(f32 (vector_extract v8f32:$xj, uimm3:$imm)),
(MOVGR2FR_W (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm))>;
def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)),
(MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>;

// vselect
def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd,
Expand Down
108 changes: 84 additions & 24 deletions llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ define void @extract_8xi32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_8xi32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
; CHECK-NEXT: st.w $a0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <8 x i32>, ptr %src
Expand All @@ -44,7 +44,7 @@ define void @extract_4xi64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_4xi64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT: st.d $a0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <4 x i64>, ptr %src
Expand All @@ -57,8 +57,8 @@ define void @extract_8xfloat(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_8xfloat:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: ori $a0, $zero, 7
; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
; CHECK-NEXT: movgr2fr.w $fa0, $a0
; CHECK-NEXT: fst.s $fa0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <8 x float>, ptr %src
Expand All @@ -71,8 +71,8 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_4xdouble:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: ori $a0, $zero, 3
; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
; CHECK-NEXT: movgr2fr.d $fa0, $a0
; CHECK-NEXT: fst.d $fa0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <4 x double>, ptr %src
Expand All @@ -84,12 +84,21 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind {
define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_32xi8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
; CHECK-NEXT: addi.d $sp, $sp, -64
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 64
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a2
; CHECK-NEXT: movfr2gr.s $a0, $fa0
; CHECK-NEXT: srai.w $a0, $a0, 24
; CHECK-NEXT: xvst $xr0, $sp, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 0
; CHECK-NEXT: ld.b $a0, $a0, 0
; CHECK-NEXT: st.b $a0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: ret
%v = load volatile <32 x i8>, ptr %src
%e = extractelement <32 x i8> %v, i32 %idx
Expand All @@ -100,12 +109,21 @@ define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_16xi16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
; CHECK-NEXT: addi.d $sp, $sp, -64
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 64
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a2
; CHECK-NEXT: movfr2gr.s $a0, $fa0
; CHECK-NEXT: srai.w $a0, $a0, 16
; CHECK-NEXT: xvst $xr0, $sp, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 1
; CHECK-NEXT: ld.h $a0, $a0, 0
; CHECK-NEXT: st.h $a0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: ret
%v = load volatile <16 x i16>, ptr %src
%e = extractelement <16 x i16> %v, i32 %idx
Expand All @@ -116,11 +134,21 @@ define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_8xi32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
; CHECK-NEXT: addi.d $sp, $sp, -64
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 64
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2
; CHECK-NEXT: movfr2gr.s $a0, $fa0
; CHECK-NEXT: xvst $xr0, $sp, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: st.w $a0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: ret
%v = load volatile <8 x i32>, ptr %src
%e = extractelement <8 x i32> %v, i32 %idx
Expand All @@ -131,11 +159,21 @@ define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_4xi64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
; CHECK-NEXT: addi.d $sp, $sp, -64
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 64
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2
; CHECK-NEXT: movfr2gr.d $a0, $fa0
; CHECK-NEXT: xvst $xr0, $sp, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: st.d $a0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: ret
%v = load volatile <4 x i64>, ptr %src
%e = extractelement <4 x i64> %v, i32 %idx
Expand All @@ -146,10 +184,21 @@ define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_8xfloat_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
; CHECK-NEXT: addi.d $sp, $sp, -64
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 64
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2
; CHECK-NEXT: xvst $xr0, $sp, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2
; CHECK-NEXT: fld.s $fa0, $a0, 0
; CHECK-NEXT: fst.s $fa0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: ret
%v = load volatile <8 x float>, ptr %src
%e = extractelement <8 x float> %v, i32 %idx
Expand All @@ -160,10 +209,21 @@ define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_4xdouble_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
; CHECK-NEXT: addi.d $sp, $sp, -64
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 64
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2
; CHECK-NEXT: xvst $xr0, $sp, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3
; CHECK-NEXT: fld.d $fa0, $a0, 0
; CHECK-NEXT: fst.d $fa0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 64
; CHECK-NEXT: ret
%v = load volatile <4 x double>, ptr %src
%e = extractelement <4 x double> %v, i32 %idx
Expand Down

0 comments on commit 47c88bc

Please sign in to comment.