1 | //===-- RISCVISelLowering.h - RISC-V DAG Lowering Interface -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that RISC-V uses to lower LLVM code into a |
10 | // selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H |
15 | #define LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H |
16 | |
17 | #include "RISCV.h" |
18 | #include "llvm/CodeGen/CallingConvLower.h" |
19 | #include "llvm/CodeGen/SelectionDAG.h" |
20 | #include "llvm/CodeGen/TargetLowering.h" |
21 | #include <optional> |
22 | |
23 | namespace llvm { |
24 | class InstructionCost; |
25 | class RISCVSubtarget; |
26 | struct RISCVRegisterInfo; |
27 | class RVVArgDispatcher; |
28 | |
29 | namespace RISCVISD { |
30 | // clang-format off |
31 | enum NodeType : unsigned { |
32 | FIRST_NUMBER = ISD::BUILTIN_OP_END, |
33 | RET_GLUE, |
34 | SRET_GLUE, |
35 | MRET_GLUE, |
36 | CALL, |
37 | /// Select with condition operator - This selects between a true value and |
38 | /// a false value (ops #3 and #4) based on the boolean result of comparing |
39 | /// the lhs and rhs (ops #0 and #1) of a conditional expression with the |
40 | /// condition code in op #2, a XLenVT constant from the ISD::CondCode enum. |
41 | /// The lhs and rhs are XLenVT integers. The true and false values can be |
42 | /// integer or floating point. |
43 | SELECT_CC, |
44 | BR_CC, |
45 | BuildPairF64, |
46 | SplitF64, |
47 | TAIL, |
48 | |
49 | // Add the Lo 12 bits from an address. Selected to ADDI. |
50 | ADD_LO, |
51 | // Get the Hi 20 bits from an address. Selected to LUI. |
52 | HI, |
53 | |
54 | // Represents an AUIPC+ADDI pair. Selected to PseudoLLA. |
55 | LLA, |
56 | |
57 | // Selected as PseudoAddTPRel. Used to emit a TP-relative relocation. |
58 | ADD_TPREL, |
59 | |
60 | // Multiply high for signedxunsigned. |
61 | MULHSU, |
62 | |
63 | // Represents (ADD (SHL a, b), c) with the arguments appearing in the order |
64 | // a, b, c. 'b' must be a constant. Maps to sh1add/sh2add/sh3add with zba |
65 | // or addsl with XTheadBa. |
66 | SHL_ADD, |
67 | |
68 | // RV64I shifts, directly matching the semantics of the named RISC-V |
69 | // instructions. |
70 | SLLW, |
71 | SRAW, |
72 | SRLW, |
73 | // 32-bit operations from RV64M that can't be simply matched with a pattern |
74 | // at instruction selection time. These have undefined behavior for division |
75 | // by 0 or overflow (divw) like their target independent counterparts. |
76 | DIVW, |
77 | DIVUW, |
78 | REMUW, |
79 | // RV64IB rotates, directly matching the semantics of the named RISC-V |
80 | // instructions. |
81 | ROLW, |
82 | RORW, |
83 | // RV64IZbb bit counting instructions directly matching the semantics of the |
84 | // named RISC-V instructions. |
85 | CLZW, |
86 | CTZW, |
87 | |
88 | // RV64IZbb absolute value for i32. Expanded to (max (negw X), X) during isel. |
89 | ABSW, |
90 | |
91 | // FPR<->GPR transfer operations when the FPR is smaller than XLEN, needed as |
92 | // XLEN is the only legal integer width. |
93 | // |
94 | // FMV_H_X matches the semantics of the FMV.H.X. |
95 | // FMV_X_ANYEXTH is similar to FMV.X.H but has an any-extended result. |
96 | // FMV_X_SIGNEXTH is similar to FMV.X.H and has a sign-extended result. |
97 | // FMV_W_X_RV64 matches the semantics of the FMV.W.X. |
98 | // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result. |
99 | // |
100 | // This is a more convenient semantic for producing dagcombines that remove |
101 | // unnecessary GPR->FPR->GPR moves. |
102 | FMV_H_X, |
103 | FMV_X_ANYEXTH, |
104 | FMV_X_SIGNEXTH, |
105 | FMV_W_X_RV64, |
106 | FMV_X_ANYEXTW_RV64, |
107 | // FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and |
108 | // fcvt.w(u).s/d/h on RV32. Unlike FP_TO_S/UINT these saturate out of |
109 | // range inputs. These are used for FP_TO_S/UINT_SAT lowering. Rounding mode |
110 | // is passed as a TargetConstant operand using the RISCVFPRndMode enum. |
111 | FCVT_X, |
112 | FCVT_XU, |
113 | // FP to 32 bit int conversions for RV64. These are used to keep track of the |
114 | // result being sign extended to 64 bit. These saturate out of range inputs. |
115 | // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering. Rounding mode |
116 | // is passed as a TargetConstant operand using the RISCVFPRndMode enum. |
117 | FCVT_W_RV64, |
118 | FCVT_WU_RV64, |
119 | |
120 | FP_ROUND_BF16, |
121 | FP_EXTEND_BF16, |
122 | |
123 | // Rounds an FP value to its corresponding integer in the same FP format. |
124 | // First operand is the value to round, the second operand is the largest |
125 | // integer that can be represented exactly in the FP format. This will be |
126 | // expanded into multiple instructions and basic blocks with a custom |
127 | // inserter. |
128 | FROUND, |
129 | |
130 | FCLASS, |
131 | |
132 | // Floating point fmax and fmin matching the RISC-V instruction semantics. |
133 | FMAX, FMIN, |
134 | |
135 | // A read of the 64-bit counter CSR on a 32-bit target (returns (Lo, Hi)). |
136 | // It takes a chain operand and another two target constant operands (the |
137 | // CSR numbers of the low and high parts of the counter). |
138 | READ_COUNTER_WIDE, |
139 | |
140 | // brev8, orc.b, zip, and unzip from Zbb and Zbkb. All operands are i32 or |
141 | // XLenVT. |
142 | BREV8, |
143 | ORC_B, |
144 | ZIP, |
145 | UNZIP, |
146 | |
147 | // Scalar cryptography |
148 | CLMUL, CLMULH, CLMULR, |
149 | SHA256SIG0, SHA256SIG1, SHA256SUM0, SHA256SUM1, |
150 | SM4KS, SM4ED, |
151 | SM3P0, SM3P1, |
152 | |
153 | // May-Be-Operations |
154 | MOPR, MOPRR, |
155 | |
156 | // Vector Extension |
157 | FIRST_VL_VECTOR_OP, |
158 | // VMV_V_V_VL matches the semantics of vmv.v.v but includes an extra operand |
159 | // for the VL value to be used for the operation. The first operand is |
160 | // passthru operand. |
161 | VMV_V_V_VL = FIRST_VL_VECTOR_OP, |
162 | // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand |
163 | // for the VL value to be used for the operation. The first operand is |
164 | // passthru operand. |
165 | VMV_V_X_VL, |
166 | // VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand |
167 | // for the VL value to be used for the operation. The first operand is |
168 | // passthru operand. |
169 | VFMV_V_F_VL, |
170 | // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign |
171 | // extended from the vector element size. |
172 | VMV_X_S, |
173 | // VMV_S_X_VL matches the semantics of vmv.s.x. It carries a VL operand. |
174 | VMV_S_X_VL, |
175 | // VFMV_S_F_VL matches the semantics of vfmv.s.f. It carries a VL operand. |
176 | VFMV_S_F_VL, |
177 | // Splats an 64-bit value that has been split into two i32 parts. This is |
178 | // expanded late to two scalar stores and a stride 0 vector load. |
179 | // The first operand is passthru operand. |
180 | SPLAT_VECTOR_SPLIT_I64_VL, |
181 | // Truncates a RVV integer vector by one power-of-two. Carries both an extra |
182 | // mask and VL operand. |
183 | TRUNCATE_VECTOR_VL, |
184 | // Matches the semantics of vslideup/vslidedown. The first operand is the |
185 | // pass-thru operand, the second is the source vector, the third is the XLenVT |
186 | // index (either constant or non-constant), the fourth is the mask, the fifth |
187 | // is the VL and the sixth is the policy. |
188 | VSLIDEUP_VL, |
189 | VSLIDEDOWN_VL, |
190 | // Matches the semantics of vslide1up/slide1down. The first operand is |
191 | // passthru operand, the second is source vector, third is the XLenVT scalar |
192 | // value. The fourth and fifth operands are the mask and VL operands. |
193 | VSLIDE1UP_VL, |
194 | VSLIDE1DOWN_VL, |
195 | // Matches the semantics of vfslide1up/vfslide1down. The first operand is |
196 | // passthru operand, the second is source vector, third is a scalar value |
197 | // whose type matches the element type of the vectors. The fourth and fifth |
198 | // operands are the mask and VL operands. |
199 | VFSLIDE1UP_VL, |
200 | VFSLIDE1DOWN_VL, |
201 | // Matches the semantics of the vid.v instruction, with a mask and VL |
202 | // operand. |
203 | VID_VL, |
204 | // Matches the semantics of the vfcnvt.rod function (Convert double-width |
205 | // float to single-width float, rounding towards odd). Takes a double-width |
206 | // float vector and produces a single-width float vector. Also has a mask and |
207 | // VL operand. |
208 | VFNCVT_ROD_VL, |
209 | // These nodes match the semantics of the corresponding RVV vector reduction |
210 | // instructions. They produce a vector result which is the reduction |
211 | // performed over the second vector operand plus the first element of the |
212 | // third vector operand. The first operand is the pass-thru operand. The |
213 | // second operand is an unconstrained vector type, and the result, first, and |
214 | // third operand's types are expected to be the corresponding full-width |
215 | // LMUL=1 type for the second operand: |
216 | // nxv8i8 = vecreduce_add nxv8i8, nxv32i8, nxv8i8 |
217 | // nxv2i32 = vecreduce_add nxv2i32, nxv8i32, nxv2i32 |
218 | // The different in types does introduce extra vsetvli instructions but |
219 | // similarly it reduces the number of registers consumed per reduction. |
220 | // Also has a mask and VL operand. |
221 | VECREDUCE_ADD_VL, |
222 | VECREDUCE_UMAX_VL, |
223 | VECREDUCE_SMAX_VL, |
224 | VECREDUCE_UMIN_VL, |
225 | VECREDUCE_SMIN_VL, |
226 | VECREDUCE_AND_VL, |
227 | VECREDUCE_OR_VL, |
228 | VECREDUCE_XOR_VL, |
229 | VECREDUCE_FADD_VL, |
230 | VECREDUCE_SEQ_FADD_VL, |
231 | VECREDUCE_FMIN_VL, |
232 | VECREDUCE_FMAX_VL, |
233 | |
234 | // Vector binary ops with a merge as a third operand, a mask as a fourth |
235 | // operand, and VL as a fifth operand. |
236 | ADD_VL, |
237 | AND_VL, |
238 | MUL_VL, |
239 | OR_VL, |
240 | SDIV_VL, |
241 | SHL_VL, |
242 | SREM_VL, |
243 | SRA_VL, |
244 | SRL_VL, |
245 | ROTL_VL, |
246 | ROTR_VL, |
247 | SUB_VL, |
248 | UDIV_VL, |
249 | UREM_VL, |
250 | XOR_VL, |
251 | SMIN_VL, |
252 | SMAX_VL, |
253 | UMIN_VL, |
254 | UMAX_VL, |
255 | |
256 | BITREVERSE_VL, |
257 | BSWAP_VL, |
258 | CTLZ_VL, |
259 | CTTZ_VL, |
260 | CTPOP_VL, |
261 | |
262 | SADDSAT_VL, |
263 | UADDSAT_VL, |
264 | SSUBSAT_VL, |
265 | USUBSAT_VL, |
266 | |
267 | // Averaging adds of unsigned integers. |
268 | AVGFLOORU_VL, |
269 | // Rounding averaging adds of unsigned integers. |
270 | AVGCEILU_VL, |
271 | |
272 | MULHS_VL, |
273 | MULHU_VL, |
274 | FADD_VL, |
275 | FSUB_VL, |
276 | FMUL_VL, |
277 | FDIV_VL, |
278 | VFMIN_VL, |
279 | VFMAX_VL, |
280 | |
281 | // Vector unary ops with a mask as a second operand and VL as a third operand. |
282 | FNEG_VL, |
283 | FABS_VL, |
284 | FSQRT_VL, |
285 | FCLASS_VL, |
286 | FCOPYSIGN_VL, // Has a merge operand |
287 | VFCVT_RTZ_X_F_VL, |
288 | VFCVT_RTZ_XU_F_VL, |
289 | VFCVT_X_F_VL, |
290 | VFCVT_XU_F_VL, |
291 | VFROUND_NOEXCEPT_VL, |
292 | VFCVT_RM_X_F_VL, // Has a rounding mode operand. |
293 | VFCVT_RM_XU_F_VL, // Has a rounding mode operand. |
294 | SINT_TO_FP_VL, |
295 | UINT_TO_FP_VL, |
296 | VFCVT_RM_F_X_VL, // Has a rounding mode operand. |
297 | VFCVT_RM_F_XU_VL, // Has a rounding mode operand. |
298 | FP_ROUND_VL, |
299 | FP_EXTEND_VL, |
300 | |
301 | // Vector FMA ops with a mask as a fourth operand and VL as a fifth operand. |
302 | VFMADD_VL, |
303 | VFNMADD_VL, |
304 | VFMSUB_VL, |
305 | VFNMSUB_VL, |
306 | |
307 | // Vector widening FMA ops with a mask as a fourth operand and VL as a fifth |
308 | // operand. |
309 | VFWMADD_VL, |
310 | VFWNMADD_VL, |
311 | VFWMSUB_VL, |
312 | VFWNMSUB_VL, |
313 | |
314 | // Widening instructions with a merge value a third operand, a mask as a |
315 | // fourth operand, and VL as a fifth operand. |
316 | VWMUL_VL, |
317 | VWMULU_VL, |
318 | VWMULSU_VL, |
319 | VWADD_VL, |
320 | VWADDU_VL, |
321 | VWSUB_VL, |
322 | VWSUBU_VL, |
323 | VWADD_W_VL, |
324 | VWADDU_W_VL, |
325 | VWSUB_W_VL, |
326 | VWSUBU_W_VL, |
327 | VWSLL_VL, |
328 | |
329 | VFWMUL_VL, |
330 | VFWADD_VL, |
331 | VFWSUB_VL, |
332 | VFWADD_W_VL, |
333 | VFWSUB_W_VL, |
334 | |
335 | // Widening ternary operations with a mask as the fourth operand and VL as the |
336 | // fifth operand. |
337 | VWMACC_VL, |
338 | VWMACCU_VL, |
339 | VWMACCSU_VL, |
340 | |
341 | // Narrowing logical shift right. |
342 | // Operands are (source, shift, passthru, mask, vl) |
343 | VNSRL_VL, |
344 | |
345 | // Vector compare producing a mask. Fourth operand is input mask. Fifth |
346 | // operand is VL. |
347 | SETCC_VL, |
348 | |
349 | // General vmerge node with mask, true, false, passthru, and vl operands. |
350 | // Tail agnostic vselect can be implemented by setting passthru to undef. |
351 | VMERGE_VL, |
352 | |
353 | // Mask binary operators. |
354 | VMAND_VL, |
355 | VMOR_VL, |
356 | VMXOR_VL, |
357 | |
358 | // Set mask vector to all zeros or ones. |
359 | VMCLR_VL, |
360 | VMSET_VL, |
361 | |
362 | // Matches the semantics of vrgather.vx and vrgather.vv with extra operands |
363 | // for passthru and VL. Operands are (src, index, mask, passthru, vl). |
364 | VRGATHER_VX_VL, |
365 | VRGATHER_VV_VL, |
366 | VRGATHEREI16_VV_VL, |
367 | |
368 | // Vector sign/zero extend with additional mask & VL operands. |
369 | VSEXT_VL, |
370 | VZEXT_VL, |
371 | |
372 | // vcpop.m with additional mask and VL operands. |
373 | VCPOP_VL, |
374 | |
375 | // vfirst.m with additional mask and VL operands. |
376 | VFIRST_VL, |
377 | |
378 | LAST_VL_VECTOR_OP = VFIRST_VL, |
379 | |
380 | // Read VLENB CSR |
381 | READ_VLENB, |
382 | // Reads value of CSR. |
383 | // The first operand is a chain pointer. The second specifies address of the |
384 | // required CSR. Two results are produced, the read value and the new chain |
385 | // pointer. |
386 | READ_CSR, |
387 | // Write value to CSR. |
388 | // The first operand is a chain pointer, the second specifies address of the |
389 | // required CSR and the third is the value to write. The result is the new |
390 | // chain pointer. |
391 | WRITE_CSR, |
392 | // Read and write value of CSR. |
393 | // The first operand is a chain pointer, the second specifies address of the |
394 | // required CSR and the third is the value to write. Two results are produced, |
395 | // the value read before the modification and the new chain pointer. |
396 | SWAP_CSR, |
397 | |
398 | // Branchless select operations, matching the semantics of the instructions |
399 | // defined in Zicond or XVentanaCondOps. |
400 | CZERO_EQZ, // vt.maskc for XVentanaCondOps. |
401 | CZERO_NEZ, // vt.maskcn for XVentanaCondOps. |
402 | |
403 | // FP to 32 bit int conversions for RV64. These are used to keep track of the |
404 | // result being sign extended to 64 bit. These saturate out of range inputs. |
405 | STRICT_FCVT_W_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE, |
406 | STRICT_FCVT_WU_RV64, |
407 | STRICT_FADD_VL, |
408 | STRICT_FSUB_VL, |
409 | STRICT_FMUL_VL, |
410 | STRICT_FDIV_VL, |
411 | STRICT_FSQRT_VL, |
412 | STRICT_VFMADD_VL, |
413 | STRICT_VFNMADD_VL, |
414 | STRICT_VFMSUB_VL, |
415 | STRICT_VFNMSUB_VL, |
416 | STRICT_FP_ROUND_VL, |
417 | STRICT_FP_EXTEND_VL, |
418 | STRICT_VFNCVT_ROD_VL, |
419 | STRICT_SINT_TO_FP_VL, |
420 | STRICT_UINT_TO_FP_VL, |
421 | STRICT_VFCVT_RM_X_F_VL, |
422 | STRICT_VFCVT_RTZ_X_F_VL, |
423 | STRICT_VFCVT_RTZ_XU_F_VL, |
424 | STRICT_FSETCC_VL, |
425 | STRICT_FSETCCS_VL, |
426 | STRICT_VFROUND_NOEXCEPT_VL, |
427 | LAST_RISCV_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL, |
428 | |
429 | SF_VC_XV_SE, |
430 | SF_VC_IV_SE, |
431 | SF_VC_VV_SE, |
432 | SF_VC_FV_SE, |
433 | SF_VC_XVV_SE, |
434 | SF_VC_IVV_SE, |
435 | SF_VC_VVV_SE, |
436 | SF_VC_FVV_SE, |
437 | SF_VC_XVW_SE, |
438 | SF_VC_IVW_SE, |
439 | SF_VC_VVW_SE, |
440 | SF_VC_FVW_SE, |
441 | SF_VC_V_X_SE, |
442 | SF_VC_V_I_SE, |
443 | SF_VC_V_XV_SE, |
444 | SF_VC_V_IV_SE, |
445 | SF_VC_V_VV_SE, |
446 | SF_VC_V_FV_SE, |
447 | SF_VC_V_XVV_SE, |
448 | SF_VC_V_IVV_SE, |
449 | SF_VC_V_VVV_SE, |
450 | SF_VC_V_FVV_SE, |
451 | SF_VC_V_XVW_SE, |
452 | SF_VC_V_IVW_SE, |
453 | SF_VC_V_VVW_SE, |
454 | SF_VC_V_FVW_SE, |
455 | |
456 | // WARNING: Do not add anything in the end unless you want the node to |
457 | // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all |
458 | // opcodes will be thought as target memory ops! |
459 | |
460 | TH_LWD = ISD::FIRST_TARGET_MEMORY_OPCODE, |
461 | TH_LWUD, |
462 | TH_LDD, |
463 | TH_SWD, |
464 | TH_SDD, |
465 | }; |
466 | // clang-format on |
467 | } // namespace RISCVISD |
468 | |
469 | class RISCVTargetLowering : public TargetLowering { |
470 | const RISCVSubtarget &Subtarget; |
471 | |
472 | public: |
473 | explicit RISCVTargetLowering(const TargetMachine &TM, |
474 | const RISCVSubtarget &STI); |
475 | |
476 | const RISCVSubtarget &getSubtarget() const { return Subtarget; } |
477 | |
478 | bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, |
479 | MachineFunction &MF, |
480 | unsigned Intrinsic) const override; |
481 | bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, |
482 | unsigned AS, |
483 | Instruction *I = nullptr) const override; |
484 | bool isLegalICmpImmediate(int64_t Imm) const override; |
485 | bool isLegalAddImmediate(int64_t Imm) const override; |
486 | bool isTruncateFree(Type *SrcTy, Type *DstTy) const override; |
487 | bool isTruncateFree(EVT SrcVT, EVT DstVT) const override; |
488 | bool isZExtFree(SDValue Val, EVT VT2) const override; |
489 | bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override; |
490 | bool signExtendConstant(const ConstantInt *CI) const override; |
491 | bool isCheapToSpeculateCttz(Type *Ty) const override; |
492 | bool isCheapToSpeculateCtlz(Type *Ty) const override; |
493 | bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; |
494 | bool hasAndNotCompare(SDValue Y) const override; |
495 | bool hasBitTest(SDValue X, SDValue Y) const override; |
496 | bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
497 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
498 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
499 | SelectionDAG &DAG) const override; |
500 | /// Return true if the (vector) instruction I will be lowered to an instruction |
501 | /// with a scalar splat operand for the given Operand number. |
502 | bool canSplatOperand(Instruction *I, int Operand) const; |
503 | /// Return true if a vector instruction will lower to a target instruction |
504 | /// able to splat the given operand. |
505 | bool canSplatOperand(unsigned Opcode, int Operand) const; |
506 | bool shouldSinkOperands(Instruction *I, |
507 | SmallVectorImpl<Use *> &Ops) const override; |
508 | bool shouldScalarizeBinop(SDValue VecOp) const override; |
509 | bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; |
510 | std::pair<int, bool> getLegalZfaFPImm(const APFloat &Imm, EVT VT) const; |
511 | bool isFPImmLegal(const APFloat &Imm, EVT VT, |
512 | bool ForCodeSize) const override; |
513 | bool (EVT ResVT, EVT SrcVT, |
514 | unsigned Index) const override; |
515 | |
516 | bool isIntDivCheap(EVT VT, AttributeList Attr) const override; |
517 | |
518 | bool preferScalarizeSplat(SDNode *N) const override; |
519 | |
520 | bool softPromoteHalfType() const override { return true; } |
521 | |
522 | /// Return the register type for a given MVT, ensuring vectors are treated |
523 | /// as a series of gpr sized integers. |
524 | MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, |
525 | EVT VT) const override; |
526 | |
527 | /// Return the number of registers for a given MVT, ensuring vectors are |
528 | /// treated as a series of gpr sized integers. |
529 | unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
530 | CallingConv::ID CC, |
531 | EVT VT) const override; |
532 | |
533 | unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, |
534 | CallingConv::ID CC, EVT VT, |
535 | EVT &IntermediateVT, |
536 | unsigned &NumIntermediates, |
537 | MVT &RegisterVT) const override; |
538 | |
539 | bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, |
540 | EVT VT) const override; |
541 | |
542 | /// Return true if the given shuffle mask can be codegen'd directly, or if it |
543 | /// should be stack expanded. |
544 | bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; |
545 | |
546 | bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { |
547 | // If the pair to store is a mixture of float and int values, we will |
548 | // save two bitwise instructions and one float-to-int instruction and |
549 | // increase one store instruction. There is potentially a more |
550 | // significant benefit because it avoids the float->int domain switch |
551 | // for input value. So It is more likely a win. |
552 | if ((LTy.isFloatingPoint() && HTy.isInteger()) || |
553 | (LTy.isInteger() && HTy.isFloatingPoint())) |
554 | return true; |
555 | // If the pair only contains int values, we will save two bitwise |
556 | // instructions and increase one store instruction (costing one more |
557 | // store buffer). Since the benefit is more blurred we leave such a pair |
558 | // out until we get testcase to prove it is a win. |
559 | return false; |
560 | } |
561 | |
562 | bool |
563 | shouldExpandBuildVectorWithShuffles(EVT VT, |
564 | unsigned DefinedValues) const override; |
565 | |
566 | /// Return the cost of LMUL for linear operations. |
567 | InstructionCost getLMULCost(MVT VT) const; |
568 | |
569 | InstructionCost getVRGatherVVCost(MVT VT) const; |
570 | InstructionCost getVRGatherVICost(MVT VT) const; |
571 | InstructionCost getVSlideVXCost(MVT VT) const; |
572 | InstructionCost getVSlideVICost(MVT VT) const; |
573 | |
574 | // Provide custom lowering hooks for some operations. |
575 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
576 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
577 | SelectionDAG &DAG) const override; |
578 | |
579 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
580 | |
581 | bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
582 | const APInt &DemandedElts, |
583 | TargetLoweringOpt &TLO) const override; |
584 | |
585 | void computeKnownBitsForTargetNode(const SDValue Op, |
586 | KnownBits &Known, |
587 | const APInt &DemandedElts, |
588 | const SelectionDAG &DAG, |
589 | unsigned Depth) const override; |
590 | unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
591 | const APInt &DemandedElts, |
592 | const SelectionDAG &DAG, |
593 | unsigned Depth) const override; |
594 | |
595 | bool canCreateUndefOrPoisonForTargetNode(SDValue Op, |
596 | const APInt &DemandedElts, |
597 | const SelectionDAG &DAG, |
598 | bool PoisonOnly, bool ConsiderFlags, |
599 | unsigned Depth) const override; |
600 | |
601 | const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; |
602 | |
603 | // This method returns the name of a target specific DAG node. |
604 | const char *getTargetNodeName(unsigned Opcode) const override; |
605 | |
606 | MachineMemOperand::Flags |
607 | getTargetMMOFlags(const Instruction &I) const override; |
608 | |
609 | MachineMemOperand::Flags |
610 | getTargetMMOFlags(const MemSDNode &Node) const override; |
611 | |
612 | bool |
613 | areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, |
614 | const MemSDNode &NodeY) const override; |
615 | |
616 | ConstraintType getConstraintType(StringRef Constraint) const override; |
617 | |
618 | InlineAsm::ConstraintCode |
619 | getInlineAsmMemConstraint(StringRef ConstraintCode) const override; |
620 | |
621 | std::pair<unsigned, const TargetRegisterClass *> |
622 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
623 | StringRef Constraint, MVT VT) const override; |
624 | |
625 | void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
626 | std::vector<SDValue> &Ops, |
627 | SelectionDAG &DAG) const override; |
628 | |
629 | MachineBasicBlock * |
630 | EmitInstrWithCustomInserter(MachineInstr &MI, |
631 | MachineBasicBlock *BB) const override; |
632 | |
633 | void AdjustInstrPostInstrSelection(MachineInstr &MI, |
634 | SDNode *Node) const override; |
635 | |
636 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
637 | EVT VT) const override; |
638 | |
639 | bool shouldFormOverflowOp(unsigned Opcode, EVT VT, |
640 | bool MathUsed) const override { |
641 | if (VT == MVT::i8 || VT == MVT::i16) |
642 | return false; |
643 | |
644 | return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed); |
645 | } |
646 | |
647 | bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, |
648 | unsigned AddrSpace) const override { |
649 | // If we can replace 4 or more scalar stores, there will be a reduction |
650 | // in instructions even after we add a vector constant load. |
651 | return NumElem >= 4; |
652 | } |
653 | |
654 | bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { |
655 | return VT.isScalarInteger(); |
656 | } |
657 | bool convertSelectOfConstantsToMath(EVT VT) const override { return true; } |
658 | |
659 | bool isCtpopFast(EVT VT) const override; |
660 | |
661 | unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override; |
662 | |
663 | bool preferZeroCompareBranch() const override { return true; } |
664 | |
665 | bool shouldInsertFencesForAtomic(const Instruction *I) const override { |
666 | return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I); |
667 | } |
668 | Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, |
669 | AtomicOrdering Ord) const override; |
670 | Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, |
671 | AtomicOrdering Ord) const override; |
672 | |
673 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
674 | EVT VT) const override; |
675 | |
676 | ISD::NodeType getExtendForAtomicOps() const override { |
677 | return ISD::SIGN_EXTEND; |
678 | } |
679 | |
680 | ISD::NodeType getExtendForAtomicCmpSwapArg() const override; |
681 | |
682 | bool shouldTransformSignedTruncationCheck(EVT XVT, |
683 | unsigned KeptBits) const override; |
684 | |
685 | TargetLowering::ShiftLegalizationStrategy |
686 | preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, |
687 | unsigned ExpansionFactor) const override { |
688 | if (DAG.getMachineFunction().getFunction().hasMinSize()) |
689 | return ShiftLegalizationStrategy::LowerToLibcall; |
690 | return TargetLowering::preferredShiftLegalizationStrategy(DAG, N, |
691 | ExpansionFactor); |
692 | } |
693 | |
694 | bool isDesirableToCommuteWithShift(const SDNode *N, |
695 | CombineLevel Level) const override; |
696 | |
697 | /// If a physical register, this returns the register that receives the |
698 | /// exception address on entry to an EH pad. |
699 | Register |
700 | getExceptionPointerRegister(const Constant *PersonalityFn) const override; |
701 | |
702 | /// If a physical register, this returns the register that receives the |
703 | /// exception typeid on entry to a landing pad. |
704 | Register |
705 | getExceptionSelectorRegister(const Constant *PersonalityFn) const override; |
706 | |
707 | bool shouldExtendTypeInLibCall(EVT Type) const override; |
708 | bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; |
709 | |
710 | /// Returns the register with the specified architectural or ABI name. This |
711 | /// method is necessary to lower the llvm.read_register.* and |
712 | /// llvm.write_register.* intrinsics. Allocatable registers must be reserved |
713 | /// with the clang -ffixed-xX flag for access to be allowed. |
714 | Register getRegisterByName(const char *RegName, LLT VT, |
715 | const MachineFunction &MF) const override; |
716 | |
717 | // Lower incoming arguments, copy physregs into vregs |
718 | SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, |
719 | bool IsVarArg, |
720 | const SmallVectorImpl<ISD::InputArg> &Ins, |
721 | const SDLoc &DL, SelectionDAG &DAG, |
722 | SmallVectorImpl<SDValue> &InVals) const override; |
723 | bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, |
724 | bool IsVarArg, |
725 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
726 | LLVMContext &Context) const override; |
727 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
728 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
729 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, |
730 | SelectionDAG &DAG) const override; |
731 | SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, |
732 | SmallVectorImpl<SDValue> &InVals) const override; |
733 | |
734 | bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
735 | Type *Ty) const override; |
736 | bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; |
737 | bool mayBeEmittedAsTailCall(const CallInst *CI) const override; |
738 | bool shouldConsiderGEPOffsetSplit() const override { return true; } |
739 | |
740 | bool decomposeMulByConstant(LLVMContext &Context, EVT VT, |
741 | SDValue C) const override; |
742 | |
743 | bool isMulAddWithConstProfitable(SDValue AddNode, |
744 | SDValue ConstNode) const override; |
745 | |
746 | TargetLowering::AtomicExpansionKind |
747 | shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; |
748 | Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, |
749 | Value *AlignedAddr, Value *Incr, |
750 | Value *Mask, Value *ShiftAmt, |
751 | AtomicOrdering Ord) const override; |
752 | TargetLowering::AtomicExpansionKind |
753 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override; |
754 | Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, |
755 | AtomicCmpXchgInst *CI, |
756 | Value *AlignedAddr, Value *CmpVal, |
757 | Value *NewVal, Value *Mask, |
758 | AtomicOrdering Ord) const override; |
759 | |
760 | /// Returns true if the target allows unaligned memory accesses of the |
761 | /// specified type. |
762 | bool allowsMisalignedMemoryAccesses( |
763 | EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), |
764 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
765 | unsigned *Fast = nullptr) const override; |
766 | |
767 | EVT getOptimalMemOpType(const MemOp &Op, |
768 | const AttributeList &FuncAttributes) const override; |
769 | |
770 | bool splitValueIntoRegisterParts( |
771 | SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
772 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) |
773 | const override; |
774 | |
775 | SDValue joinRegisterPartsIntoValue( |
776 | SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts, |
777 | unsigned NumParts, MVT PartVT, EVT ValueVT, |
778 | std::optional<CallingConv::ID> CC) const override; |
779 | |
780 | // Return the value of VLMax for the given vector type (i.e. SEW and LMUL) |
781 | SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const; |
782 | |
783 | static RISCVII::VLMUL getLMUL(MVT VT); |
784 | inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, |
785 | unsigned MinSize) { |
786 | // Original equation: |
787 | // VLMAX = (VectorBits / EltSize) * LMUL |
788 | // where LMUL = MinSize / RISCV::RVVBitsPerBlock |
789 | // The following equations have been reordered to prevent loss of precision |
790 | // when calculating fractional LMUL. |
791 | return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; |
792 | } |
793 | |
794 | // Return inclusive (low, high) bounds on the value of VLMAX for the |
795 | // given scalable container type given known bounds on VLEN. |
796 | static std::pair<unsigned, unsigned> |
797 | computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget); |
798 | |
799 | static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul); |
800 | static unsigned getSubregIndexByMVT(MVT VT, unsigned Index); |
801 | static unsigned getRegClassIDForVecVT(MVT VT); |
802 | static std::pair<unsigned, unsigned> |
803 | (MVT VecVT, MVT SubVecVT, |
804 | unsigned , |
805 | const RISCVRegisterInfo *TRI); |
806 | MVT getContainerForFixedLengthVector(MVT VT) const; |
807 | |
808 | bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override; |
809 | |
810 | bool isLegalElementTypeForRVV(EVT ScalarTy) const; |
811 | |
812 | bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; |
813 | |
814 | unsigned getJumpTableEncoding() const override; |
815 | |
816 | const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, |
817 | const MachineBasicBlock *MBB, |
818 | unsigned uid, |
819 | MCContext &Ctx) const override; |
820 | |
821 | bool isVScaleKnownToBeAPowerOfTwo() const override; |
822 | |
823 | bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, |
824 | ISD::MemIndexedMode &AM, SelectionDAG &DAG) const; |
825 | bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, |
826 | ISD::MemIndexedMode &AM, |
827 | SelectionDAG &DAG) const override; |
828 | bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, |
829 | SDValue &Offset, ISD::MemIndexedMode &AM, |
830 | SelectionDAG &DAG) const override; |
831 | |
832 | bool isLegalScaleForGatherScatter(uint64_t Scale, |
833 | uint64_t ElemSize) const override { |
834 | // Scaled addressing not supported on indexed load/stores |
835 | return Scale == 1; |
836 | } |
837 | |
838 | /// If the target has a standard location for the stack protector cookie, |
839 | /// returns the address of that location. Otherwise, returns nullptr. |
840 | Value *getIRStackGuard(IRBuilderBase &IRB) const override; |
841 | |
842 | /// Returns whether or not generating a interleaved load/store intrinsic for |
843 | /// this type will be legal. |
844 | bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, |
845 | Align Alignment, unsigned AddrSpace, |
846 | const DataLayout &) const; |
847 | |
848 | /// Return true if a stride load store of the given result type and |
849 | /// alignment is legal. |
850 | bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const; |
851 | |
852 | unsigned getMaxSupportedInterleaveFactor() const override { return 8; } |
853 | |
854 | bool fallBackToDAGISel(const Instruction &Inst) const override; |
855 | |
856 | bool lowerInterleavedLoad(LoadInst *LI, |
857 | ArrayRef<ShuffleVectorInst *> Shuffles, |
858 | ArrayRef<unsigned> Indices, |
859 | unsigned Factor) const override; |
860 | |
861 | bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, |
862 | unsigned Factor) const override; |
863 | |
864 | bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, |
865 | LoadInst *LI) const override; |
866 | |
867 | bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, |
868 | StoreInst *SI) const override; |
869 | |
870 | bool supportKCFIBundles() const override { return true; } |
871 | |
872 | MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, |
873 | MachineBasicBlock::instr_iterator &MBBI, |
874 | const TargetInstrInfo *TII) const override; |
875 | |
876 | /// RISCVCCAssignFn - This target-specific function extends the default |
877 | /// CCValAssign with additional information used to lower RISC-V calling |
878 | /// conventions. |
879 | typedef bool RISCVCCAssignFn(const DataLayout &DL, RISCVABI::ABI, |
880 | unsigned ValNo, MVT ValVT, MVT LocVT, |
881 | CCValAssign::LocInfo LocInfo, |
882 | ISD::ArgFlagsTy ArgFlags, CCState &State, |
883 | bool IsFixed, bool IsRet, Type *OrigTy, |
884 | const RISCVTargetLowering &TLI, |
885 | RVVArgDispatcher &RVVDispatcher); |
886 | |
887 | private: |
888 | void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, |
889 | const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, |
890 | RISCVCCAssignFn Fn) const; |
891 | void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo, |
892 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
893 | bool IsRet, CallLoweringInfo *CLI, |
894 | RISCVCCAssignFn Fn) const; |
895 | |
896 | template <class NodeTy> |
897 | SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true, |
898 | bool IsExternWeak = false) const; |
899 | SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, |
900 | bool UseGOT) const; |
901 | SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const; |
902 | SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const; |
903 | |
904 | SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
905 | SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; |
906 | SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; |
907 | SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; |
908 | SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
909 | SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; |
910 | SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const; |
911 | SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; |
912 | SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; |
913 | SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
914 | SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; |
915 | SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; |
916 | SDValue lowerSPLAT_VECTOR_PARTS(SDValue Op, SelectionDAG &DAG) const; |
917 | SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const; |
918 | SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, |
919 | int64_t ExtTrueVal) const; |
920 | SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const; |
921 | SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const; |
922 | SDValue lowerVectorFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const; |
923 | SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
924 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
925 | SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
926 | SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
927 | SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; |
928 | SDValue lowerVPREDUCE(SDValue Op, SelectionDAG &DAG) const; |
929 | SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; |
930 | SDValue lowerVectorMaskVecReduction(SDValue Op, SelectionDAG &DAG, |
931 | bool IsVP) const; |
932 | SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const; |
933 | SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; |
934 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
935 | SDValue lowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const; |
936 | SDValue lowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; |
937 | SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
938 | SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const; |
939 | SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; |
940 | SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; |
941 | SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const; |
942 | SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const; |
943 | SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op, |
944 | SelectionDAG &DAG) const; |
945 | SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const; |
946 | SDValue lowerMaskedScatter(SDValue Op, SelectionDAG &DAG) const; |
947 | SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const; |
948 | SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const; |
949 | SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const; |
950 | SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op, |
951 | SelectionDAG &DAG) const; |
952 | SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; |
953 | SDValue lowerUnsignedAvgFloor(SDValue Op, SelectionDAG &DAG) const; |
954 | SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; |
955 | SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const; |
956 | SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const; |
957 | SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const; |
958 | SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const; |
959 | SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const; |
960 | SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const; |
961 | SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const; |
962 | SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const; |
963 | SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const; |
964 | SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG, |
965 | unsigned ExtendOpc) const; |
966 | SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
967 | SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
968 | |
969 | SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; |
970 | SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const; |
971 | |
972 | SDValue lowerStrictFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const; |
973 | |
974 | SDValue lowerVectorStrictFSetcc(SDValue Op, SelectionDAG &DAG) const; |
975 | |
976 | SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; |
977 | SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; |
978 | |
979 | bool isEligibleForTailCallOptimization( |
980 | CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, |
981 | const SmallVector<CCValAssign, 16> &ArgLocs) const; |
982 | |
983 | /// Generate error diagnostics if any register used by CC has been marked |
984 | /// reserved. |
985 | void validateCCReservedRegs( |
986 | const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, |
987 | MachineFunction &MF) const; |
988 | |
989 | bool useRVVForFixedLengthVectorVT(MVT VT) const; |
990 | |
991 | MVT getVPExplicitVectorLengthTy() const override; |
992 | |
993 | bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF, |
994 | bool IsScalable) const override; |
995 | |
996 | bool shouldExpandCttzElements(EVT VT) const override; |
997 | |
998 | /// RVV code generation for fixed length vectors does not lower all |
999 | /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to |
1000 | /// merge. However, merging them creates a BUILD_VECTOR that is just as |
1001 | /// illegal as the original, thus leading to an infinite legalisation loop. |
1002 | /// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types, |
1003 | /// this override can be removed. |
1004 | bool mergeStoresAfterLegalization(EVT VT) const override; |
1005 | |
1006 | /// Disable normalizing |
1007 | /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and |
1008 | /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) |
1009 | /// RISC-V doesn't have flags so it's better to perform the and/or in a GPR. |
1010 | bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override { |
1011 | return false; |
1012 | } |
1013 | |
1014 | /// For available scheduling models FDIV + two independent FMULs are much |
1015 | /// faster than two FDIVs. |
1016 | unsigned combineRepeatedFPDivisors() const override; |
1017 | |
1018 | SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, |
1019 | SmallVectorImpl<SDNode *> &Created) const override; |
1020 | |
1021 | bool shouldFoldSelectWithSingleBitTest(EVT VT, |
1022 | const APInt &AndMask) const override; |
1023 | |
1024 | unsigned getMinimumJumpTableEntries() const override; |
1025 | }; |
1026 | |
1027 | /// As per the spec, the rules for passing vector arguments are as follows: |
1028 | /// |
1029 | /// 1. For the first vector mask argument, use v0 to pass it. |
1030 | /// 2. For vector data arguments or rest vector mask arguments, starting from |
1031 | /// the v8 register, if a vector register group between v8-v23 that has not been |
1032 | /// allocated can be found and the first register number is a multiple of LMUL, |
1033 | /// then allocate this vector register group to the argument and mark these |
1034 | /// registers as allocated. Otherwise, pass it by reference and are replaced in |
1035 | /// the argument list with the address. |
1036 | /// 3. For tuple vector data arguments, starting from the v8 register, if |
1037 | /// NFIELDS consecutive vector register groups between v8-v23 that have not been |
1038 | /// allocated can be found and the first register number is a multiple of LMUL, |
1039 | /// then allocate these vector register groups to the argument and mark these |
1040 | /// registers as allocated. Otherwise, pass it by reference and are replaced in |
1041 | /// the argument list with the address. |
1042 | class RVVArgDispatcher { |
1043 | public: |
1044 | static constexpr unsigned NumArgVRs = 16; |
1045 | |
1046 | struct RVVArgInfo { |
1047 | unsigned NF; |
1048 | MVT VT; |
1049 | bool FirstVMask = false; |
1050 | }; |
1051 | |
1052 | template <typename Arg> |
1053 | RVVArgDispatcher(const MachineFunction *MF, const RISCVTargetLowering *TLI, |
1054 | ArrayRef<Arg> ArgList) |
1055 | : MF(MF), TLI(TLI) { |
1056 | constructArgInfos(ArgList); |
1057 | compute(); |
1058 | } |
1059 | |
1060 | RVVArgDispatcher() = default; |
1061 | |
1062 | MCPhysReg getNextPhysReg(); |
1063 | |
1064 | private: |
1065 | SmallVector<RVVArgInfo, 4> RVVArgInfos; |
1066 | SmallVector<MCPhysReg, 4> AllocatedPhysRegs; |
1067 | |
1068 | const MachineFunction *MF = nullptr; |
1069 | const RISCVTargetLowering *TLI = nullptr; |
1070 | |
1071 | unsigned CurIdx = 0; |
1072 | |
1073 | template <typename Arg> void constructArgInfos(ArrayRef<Arg> Ret); |
1074 | void compute(); |
1075 | void allocatePhysReg(unsigned NF = 1, unsigned LMul = 1, |
1076 | unsigned StartReg = 0); |
1077 | }; |
1078 | |
1079 | namespace RISCV { |
1080 | |
1081 | bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, |
1082 | MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, |
1083 | ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, |
1084 | bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, |
1085 | RVVArgDispatcher &RVVDispatcher); |
1086 | |
1087 | bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, |
1088 | MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, |
1089 | ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, |
1090 | bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, |
1091 | RVVArgDispatcher &RVVDispatcher); |
1092 | |
1093 | bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, |
1094 | CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, |
1095 | CCState &State); |
1096 | |
1097 | ArrayRef<MCPhysReg> getArgGPRs(const RISCVABI::ABI ABI); |
1098 | |
1099 | } // end namespace RISCV |
1100 | |
1101 | namespace RISCVVIntrinsicsTable { |
1102 | |
1103 | struct RISCVVIntrinsicInfo { |
1104 | unsigned IntrinsicID; |
1105 | uint8_t ScalarOperand; |
1106 | uint8_t VLOperand; |
1107 | bool hasScalarOperand() const { |
1108 | // 0xF is not valid. See NoScalarOperand in IntrinsicsRISCV.td. |
1109 | return ScalarOperand != 0xF; |
1110 | } |
1111 | bool hasVLOperand() const { |
1112 | // 0x1F is not valid. See NoVLOperand in IntrinsicsRISCV.td. |
1113 | return VLOperand != 0x1F; |
1114 | } |
1115 | }; |
1116 | |
1117 | using namespace RISCV; |
1118 | |
1119 | #define GET_RISCVVIntrinsicsTable_DECL |
1120 | #include "RISCVGenSearchableTables.inc" |
1121 | #undef GET_RISCVVIntrinsicsTable_DECL |
1122 | |
1123 | } // end namespace RISCVVIntrinsicsTable |
1124 | |
1125 | } // end namespace llvm |
1126 | |
1127 | #endif |
1128 | |