1 | //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines an instruction selector for the RISC-V target. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "RISCVISelDAGToDAG.h" |
14 | #include "MCTargetDesc/RISCVBaseInfo.h" |
15 | #include "MCTargetDesc/RISCVMCTargetDesc.h" |
16 | #include "MCTargetDesc/RISCVMatInt.h" |
17 | #include "RISCVISelLowering.h" |
18 | #include "RISCVMachineFunctionInfo.h" |
19 | #include "llvm/CodeGen/MachineFrameInfo.h" |
20 | #include "llvm/IR/IntrinsicsRISCV.h" |
21 | #include "llvm/Support/Alignment.h" |
22 | #include "llvm/Support/Debug.h" |
23 | #include "llvm/Support/MathExtras.h" |
24 | #include "llvm/Support/raw_ostream.h" |
25 | |
26 | using namespace llvm; |
27 | |
28 | #define DEBUG_TYPE "riscv-isel" |
29 | #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection" |
30 | |
31 | static cl::opt<bool> UsePseudoMovImm( |
32 | "riscv-use-rematerializable-movimm" , cl::Hidden, |
33 | cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " |
34 | "constant materialization" ), |
35 | cl::init(Val: false)); |
36 | |
37 | namespace llvm::RISCV { |
38 | #define GET_RISCVVSSEGTable_IMPL |
39 | #define GET_RISCVVLSEGTable_IMPL |
40 | #define GET_RISCVVLXSEGTable_IMPL |
41 | #define GET_RISCVVSXSEGTable_IMPL |
42 | #define GET_RISCVVLETable_IMPL |
43 | #define GET_RISCVVSETable_IMPL |
44 | #define GET_RISCVVLXTable_IMPL |
45 | #define GET_RISCVVSXTable_IMPL |
46 | #include "RISCVGenSearchableTables.inc" |
47 | } // namespace llvm::RISCV |
48 | |
49 | void RISCVDAGToDAGISel::PreprocessISelDAG() { |
50 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); |
51 | |
52 | bool MadeChange = false; |
53 | while (Position != CurDAG->allnodes_begin()) { |
54 | SDNode *N = &*--Position; |
55 | if (N->use_empty()) |
56 | continue; |
57 | |
58 | SDValue Result; |
59 | switch (N->getOpcode()) { |
60 | case ISD::SPLAT_VECTOR: { |
61 | // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point |
62 | // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. |
63 | MVT VT = N->getSimpleValueType(ResNo: 0); |
64 | unsigned Opc = |
65 | VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; |
66 | SDLoc DL(N); |
67 | SDValue VL = CurDAG->getRegister(RISCV::Reg: X0, VT: Subtarget->getXLenVT()); |
68 | SDValue Src = N->getOperand(Num: 0); |
69 | if (VT.isInteger()) |
70 | Src = CurDAG->getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget->getXLenVT(), |
71 | Operand: N->getOperand(Num: 0)); |
72 | Result = CurDAG->getNode(Opcode: Opc, DL, VT, N1: CurDAG->getUNDEF(VT), N2: Src, N3: VL); |
73 | break; |
74 | } |
75 | case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: { |
76 | // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector |
77 | // load. Done after lowering and combining so that we have a chance to |
78 | // optimize this to VMV_V_X_VL when the upper bits aren't needed. |
79 | assert(N->getNumOperands() == 4 && "Unexpected number of operands" ); |
80 | MVT VT = N->getSimpleValueType(ResNo: 0); |
81 | SDValue Passthru = N->getOperand(Num: 0); |
82 | SDValue Lo = N->getOperand(Num: 1); |
83 | SDValue Hi = N->getOperand(Num: 2); |
84 | SDValue VL = N->getOperand(Num: 3); |
85 | assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && |
86 | Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && |
87 | "Unexpected VTs!" ); |
88 | MachineFunction &MF = CurDAG->getMachineFunction(); |
89 | SDLoc DL(N); |
90 | |
91 | // Create temporary stack for each expanding node. |
92 | SDValue StackSlot = |
93 | CurDAG->CreateStackTemporary(Bytes: TypeSize::getFixed(ExactSize: 8), Alignment: Align(8)); |
94 | int FI = cast<FrameIndexSDNode>(Val: StackSlot.getNode())->getIndex(); |
95 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); |
96 | |
97 | SDValue Chain = CurDAG->getEntryNode(); |
98 | Lo = CurDAG->getStore(Chain, dl: DL, Val: Lo, Ptr: StackSlot, PtrInfo: MPI, Alignment: Align(8)); |
99 | |
100 | SDValue OffsetSlot = |
101 | CurDAG->getMemBasePlusOffset(Base: StackSlot, Offset: TypeSize::getFixed(ExactSize: 4), DL); |
102 | Hi = CurDAG->getStore(Chain, dl: DL, Val: Hi, Ptr: OffsetSlot, PtrInfo: MPI.getWithOffset(O: 4), |
103 | Alignment: Align(8)); |
104 | |
105 | Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); |
106 | |
107 | SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); |
108 | SDValue IntID = |
109 | CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); |
110 | SDValue Ops[] = {Chain, |
111 | IntID, |
112 | Passthru, |
113 | StackSlot, |
114 | CurDAG->getRegister(RISCV::X0, MVT::i64), |
115 | VL}; |
116 | |
117 | Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, |
118 | MVT::i64, MPI, Align(8), |
119 | MachineMemOperand::MOLoad); |
120 | break; |
121 | } |
122 | } |
123 | |
124 | if (Result) { |
125 | LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: " ); |
126 | LLVM_DEBUG(N->dump(CurDAG)); |
127 | LLVM_DEBUG(dbgs() << "\nNew: " ); |
128 | LLVM_DEBUG(Result->dump(CurDAG)); |
129 | LLVM_DEBUG(dbgs() << "\n" ); |
130 | |
131 | CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Result); |
132 | MadeChange = true; |
133 | } |
134 | } |
135 | |
136 | if (MadeChange) |
137 | CurDAG->RemoveDeadNodes(); |
138 | } |
139 | |
140 | void RISCVDAGToDAGISel::PostprocessISelDAG() { |
141 | HandleSDNode Dummy(CurDAG->getRoot()); |
142 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); |
143 | |
144 | bool MadeChange = false; |
145 | while (Position != CurDAG->allnodes_begin()) { |
146 | SDNode *N = &*--Position; |
147 | // Skip dead nodes and any non-machine opcodes. |
148 | if (N->use_empty() || !N->isMachineOpcode()) |
149 | continue; |
150 | |
151 | MadeChange |= doPeepholeSExtW(Node: N); |
152 | |
153 | // FIXME: This is here only because the VMerge transform doesn't |
154 | // know how to handle masked true inputs. Once that has been moved |
155 | // to post-ISEL, this can be deleted as well. |
156 | MadeChange |= doPeepholeMaskedRVV(Node: cast<MachineSDNode>(Val: N)); |
157 | } |
158 | |
159 | CurDAG->setRoot(Dummy.getValue()); |
160 | |
161 | MadeChange |= doPeepholeMergeVVMFold(); |
162 | |
163 | // After we're done with everything else, convert IMPLICIT_DEF |
164 | // passthru operands to NoRegister. This is required to workaround |
165 | // an optimization deficiency in MachineCSE. This really should |
166 | // be merged back into each of the patterns (i.e. there's no good |
167 | // reason not to go directly to NoReg), but is being done this way |
168 | // to allow easy backporting. |
169 | MadeChange |= doPeepholeNoRegPassThru(); |
170 | |
171 | if (MadeChange) |
172 | CurDAG->RemoveDeadNodes(); |
173 | } |
174 | |
175 | static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, |
176 | RISCVMatInt::InstSeq &Seq) { |
177 | SDValue SrcReg = CurDAG->getRegister(RISCV::Reg: X0, VT); |
178 | for (const RISCVMatInt::Inst &Inst : Seq) { |
179 | SDValue SDImm = CurDAG->getTargetConstant(Val: Inst.getImm(), DL, VT); |
180 | SDNode *Result = nullptr; |
181 | switch (Inst.getOpndKind()) { |
182 | case RISCVMatInt::Imm: |
183 | Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SDImm); |
184 | break; |
185 | case RISCVMatInt::RegX0: |
186 | Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, |
187 | CurDAG->getRegister(RISCV::Reg: X0, VT)); |
188 | break; |
189 | case RISCVMatInt::RegReg: |
190 | Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg, Op2: SrcReg); |
191 | break; |
192 | case RISCVMatInt::RegImm: |
193 | Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg, Op2: SDImm); |
194 | break; |
195 | } |
196 | |
197 | // Only the first instruction has X0 as its source. |
198 | SrcReg = SDValue(Result, 0); |
199 | } |
200 | |
201 | return SrcReg; |
202 | } |
203 | |
204 | static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, |
205 | int64_t Imm, const RISCVSubtarget &Subtarget) { |
206 | RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget); |
207 | |
208 | // Use a rematerializable pseudo instruction for short sequences if enabled. |
209 | if (Seq.size() == 2 && UsePseudoMovImm) |
210 | return SDValue( |
211 | CurDAG->getMachineNode(RISCV::Opcode: PseudoMovImm, dl: DL, VT, |
212 | Op1: CurDAG->getTargetConstant(Val: Imm, DL, VT)), |
213 | 0); |
214 | |
215 | // See if we can create this constant as (ADD (SLLI X, C), X) where X is at |
216 | // worst an LUI+ADDIW. This will require an extra register, but avoids a |
217 | // constant pool. |
218 | // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where |
219 | // low and high 32 bits are the same and bit 31 and 63 are set. |
220 | if (Seq.size() > 3) { |
221 | unsigned ShiftAmt, AddOpc; |
222 | RISCVMatInt::InstSeq SeqLo = |
223 | RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc); |
224 | if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) { |
225 | SDValue Lo = selectImmSeq(CurDAG, DL, VT, Seq&: SeqLo); |
226 | |
227 | SDValue SLLI = SDValue( |
228 | CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo, |
229 | CurDAG->getTargetConstant(ShiftAmt, DL, VT)), |
230 | 0); |
231 | return SDValue(CurDAG->getMachineNode(Opcode: AddOpc, dl: DL, VT, Op1: Lo, Op2: SLLI), 0); |
232 | } |
233 | } |
234 | |
235 | // Otherwise, use the original sequence. |
236 | return selectImmSeq(CurDAG, DL, VT, Seq); |
237 | } |
238 | |
239 | static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, |
240 | unsigned NF, RISCVII::VLMUL LMUL) { |
241 | static const unsigned M1TupleRegClassIDs[] = { |
242 | RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, |
243 | RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, |
244 | RISCV::VRN8M1RegClassID}; |
245 | static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID, |
246 | RISCV::VRN3M2RegClassID, |
247 | RISCV::VRN4M2RegClassID}; |
248 | |
249 | assert(Regs.size() >= 2 && Regs.size() <= 8); |
250 | |
251 | unsigned RegClassID; |
252 | unsigned SubReg0; |
253 | switch (LMUL) { |
254 | default: |
255 | llvm_unreachable("Invalid LMUL." ); |
256 | case RISCVII::VLMUL::LMUL_F8: |
257 | case RISCVII::VLMUL::LMUL_F4: |
258 | case RISCVII::VLMUL::LMUL_F2: |
259 | case RISCVII::VLMUL::LMUL_1: |
260 | static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, |
261 | "Unexpected subreg numbering" ); |
262 | SubReg0 = RISCV::sub_vrm1_0; |
263 | RegClassID = M1TupleRegClassIDs[NF - 2]; |
264 | break; |
265 | case RISCVII::VLMUL::LMUL_2: |
266 | static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, |
267 | "Unexpected subreg numbering" ); |
268 | SubReg0 = RISCV::sub_vrm2_0; |
269 | RegClassID = M2TupleRegClassIDs[NF - 2]; |
270 | break; |
271 | case RISCVII::VLMUL::LMUL_4: |
272 | static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, |
273 | "Unexpected subreg numbering" ); |
274 | SubReg0 = RISCV::sub_vrm4_0; |
275 | RegClassID = RISCV::VRN2M4RegClassID; |
276 | break; |
277 | } |
278 | |
279 | SDLoc DL(Regs[0]); |
280 | SmallVector<SDValue, 8> Ops; |
281 | |
282 | Ops.push_back(Elt: CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); |
283 | |
284 | for (unsigned I = 0; I < Regs.size(); ++I) { |
285 | Ops.push_back(Elt: Regs[I]); |
286 | Ops.push_back(Elt: CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); |
287 | } |
288 | SDNode *N = |
289 | CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); |
290 | return SDValue(N, 0); |
291 | } |
292 | |
293 | void RISCVDAGToDAGISel::addVectorLoadStoreOperands( |
294 | SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, |
295 | bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, |
296 | bool IsLoad, MVT *IndexVT) { |
297 | SDValue Chain = Node->getOperand(Num: 0); |
298 | SDValue Glue; |
299 | |
300 | Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Base pointer. |
301 | |
302 | if (IsStridedOrIndexed) { |
303 | Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Index. |
304 | if (IndexVT) |
305 | *IndexVT = Operands.back()->getSimpleValueType(ResNo: 0); |
306 | } |
307 | |
308 | if (IsMasked) { |
309 | // Mask needs to be copied to V0. |
310 | SDValue Mask = Node->getOperand(Num: CurOp++); |
311 | Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); |
312 | Glue = Chain.getValue(R: 1); |
313 | Operands.push_back(Elt: CurDAG->getRegister(RISCV::Reg: V0, VT: Mask.getValueType())); |
314 | } |
315 | SDValue VL; |
316 | selectVLOp(N: Node->getOperand(Num: CurOp++), VL); |
317 | Operands.push_back(Elt: VL); |
318 | |
319 | MVT XLenVT = Subtarget->getXLenVT(); |
320 | SDValue SEWOp = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT); |
321 | Operands.push_back(Elt: SEWOp); |
322 | |
323 | // At the IR layer, all the masked load intrinsics have policy operands, |
324 | // none of the others do. All have passthru operands. For our pseudos, |
325 | // all loads have policy operands. |
326 | if (IsLoad) { |
327 | uint64_t Policy = RISCVII::MASK_AGNOSTIC; |
328 | if (IsMasked) |
329 | Policy = Node->getConstantOperandVal(Num: CurOp++); |
330 | SDValue PolicyOp = CurDAG->getTargetConstant(Val: Policy, DL, VT: XLenVT); |
331 | Operands.push_back(Elt: PolicyOp); |
332 | } |
333 | |
334 | Operands.push_back(Elt: Chain); // Chain. |
335 | if (Glue) |
336 | Operands.push_back(Elt: Glue); |
337 | } |
338 | |
339 | void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, |
340 | bool IsStrided) { |
341 | SDLoc DL(Node); |
342 | unsigned NF = Node->getNumValues() - 1; |
343 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
344 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
345 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
346 | |
347 | unsigned CurOp = 2; |
348 | SmallVector<SDValue, 8> Operands; |
349 | |
350 | SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, |
351 | Node->op_begin() + CurOp + NF); |
352 | SDValue Merge = createTuple(CurDAG&: *CurDAG, Regs, NF, LMUL); |
353 | Operands.push_back(Elt: Merge); |
354 | CurOp += NF; |
355 | |
356 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided, |
357 | Operands, /*IsLoad=*/true); |
358 | |
359 | const RISCV::VLSEGPseudo *P = |
360 | RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW, |
361 | static_cast<unsigned>(LMUL)); |
362 | MachineSDNode *Load = |
363 | CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); |
364 | |
365 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
366 | CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()}); |
367 | |
368 | SDValue SuperReg = SDValue(Load, 0); |
369 | for (unsigned I = 0; I < NF; ++I) { |
370 | unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, Index: I); |
371 | ReplaceUses(F: SDValue(Node, I), |
372 | T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: SuperReg)); |
373 | } |
374 | |
375 | ReplaceUses(F: SDValue(Node, NF), T: SDValue(Load, 1)); |
376 | CurDAG->RemoveDeadNode(N: Node); |
377 | } |
378 | |
379 | void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { |
380 | SDLoc DL(Node); |
381 | unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. |
382 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
383 | MVT XLenVT = Subtarget->getXLenVT(); |
384 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
385 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
386 | |
387 | unsigned CurOp = 2; |
388 | SmallVector<SDValue, 7> Operands; |
389 | |
390 | SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, |
391 | Node->op_begin() + CurOp + NF); |
392 | SDValue MaskedOff = createTuple(CurDAG&: *CurDAG, Regs, NF, LMUL); |
393 | Operands.push_back(Elt: MaskedOff); |
394 | CurOp += NF; |
395 | |
396 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, |
397 | /*IsStridedOrIndexed*/ false, Operands, |
398 | /*IsLoad=*/true); |
399 | |
400 | const RISCV::VLSEGPseudo *P = |
401 | RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, |
402 | Log2SEW, static_cast<unsigned>(LMUL)); |
403 | MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, |
404 | XLenVT, MVT::Other, Operands); |
405 | |
406 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
407 | CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()}); |
408 | |
409 | SDValue SuperReg = SDValue(Load, 0); |
410 | for (unsigned I = 0; I < NF; ++I) { |
411 | unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, Index: I); |
412 | ReplaceUses(F: SDValue(Node, I), |
413 | T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: SuperReg)); |
414 | } |
415 | |
416 | ReplaceUses(F: SDValue(Node, NF), T: SDValue(Load, 1)); // VL |
417 | ReplaceUses(F: SDValue(Node, NF + 1), T: SDValue(Load, 2)); // Chain |
418 | CurDAG->RemoveDeadNode(N: Node); |
419 | } |
420 | |
421 | void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, |
422 | bool IsOrdered) { |
423 | SDLoc DL(Node); |
424 | unsigned NF = Node->getNumValues() - 1; |
425 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
426 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
427 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
428 | |
429 | unsigned CurOp = 2; |
430 | SmallVector<SDValue, 8> Operands; |
431 | |
432 | SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, |
433 | Node->op_begin() + CurOp + NF); |
434 | SDValue MaskedOff = createTuple(CurDAG&: *CurDAG, Regs, NF, LMUL); |
435 | Operands.push_back(Elt: MaskedOff); |
436 | CurOp += NF; |
437 | |
438 | MVT IndexVT; |
439 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, |
440 | /*IsStridedOrIndexed*/ true, Operands, |
441 | /*IsLoad=*/true, IndexVT: &IndexVT); |
442 | |
443 | assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && |
444 | "Element count mismatch" ); |
445 | |
446 | RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT); |
447 | unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits()); |
448 | if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { |
449 | report_fatal_error(reason: "The V extension does not support EEW=64 for index " |
450 | "values when XLEN=32" ); |
451 | } |
452 | const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( |
453 | NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), |
454 | static_cast<unsigned>(IndexLMUL)); |
455 | MachineSDNode *Load = |
456 | CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); |
457 | |
458 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
459 | CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()}); |
460 | |
461 | SDValue SuperReg = SDValue(Load, 0); |
462 | for (unsigned I = 0; I < NF; ++I) { |
463 | unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, Index: I); |
464 | ReplaceUses(F: SDValue(Node, I), |
465 | T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: SuperReg)); |
466 | } |
467 | |
468 | ReplaceUses(F: SDValue(Node, NF), T: SDValue(Load, 1)); |
469 | CurDAG->RemoveDeadNode(N: Node); |
470 | } |
471 | |
472 | void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, |
473 | bool IsStrided) { |
474 | SDLoc DL(Node); |
475 | unsigned NF = Node->getNumOperands() - 4; |
476 | if (IsStrided) |
477 | NF--; |
478 | if (IsMasked) |
479 | NF--; |
480 | MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0); |
481 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
482 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
483 | SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); |
484 | SDValue StoreVal = createTuple(CurDAG&: *CurDAG, Regs, NF, LMUL); |
485 | |
486 | SmallVector<SDValue, 8> Operands; |
487 | Operands.push_back(Elt: StoreVal); |
488 | unsigned CurOp = 2 + NF; |
489 | |
490 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided, |
491 | Operands); |
492 | |
493 | const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( |
494 | NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); |
495 | MachineSDNode *Store = |
496 | CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT: Node->getValueType(ResNo: 0), Ops: Operands); |
497 | |
498 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
499 | CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {MemOp->getMemOperand()}); |
500 | |
501 | ReplaceNode(F: Node, T: Store); |
502 | } |
503 | |
504 | void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, |
505 | bool IsOrdered) { |
506 | SDLoc DL(Node); |
507 | unsigned NF = Node->getNumOperands() - 5; |
508 | if (IsMasked) |
509 | --NF; |
510 | MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0); |
511 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
512 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
513 | SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); |
514 | SDValue StoreVal = createTuple(CurDAG&: *CurDAG, Regs, NF, LMUL); |
515 | |
516 | SmallVector<SDValue, 8> Operands; |
517 | Operands.push_back(Elt: StoreVal); |
518 | unsigned CurOp = 2 + NF; |
519 | |
520 | MVT IndexVT; |
521 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, |
522 | /*IsStridedOrIndexed*/ true, Operands, |
523 | /*IsLoad=*/false, IndexVT: &IndexVT); |
524 | |
525 | assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && |
526 | "Element count mismatch" ); |
527 | |
528 | RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT); |
529 | unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits()); |
530 | if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { |
531 | report_fatal_error(reason: "The V extension does not support EEW=64 for index " |
532 | "values when XLEN=32" ); |
533 | } |
534 | const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( |
535 | NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), |
536 | static_cast<unsigned>(IndexLMUL)); |
537 | MachineSDNode *Store = |
538 | CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT: Node->getValueType(ResNo: 0), Ops: Operands); |
539 | |
540 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
541 | CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {MemOp->getMemOperand()}); |
542 | |
543 | ReplaceNode(F: Node, T: Store); |
544 | } |
545 | |
546 | void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { |
547 | if (!Subtarget->hasVInstructions()) |
548 | return; |
549 | |
550 | assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode" ); |
551 | |
552 | SDLoc DL(Node); |
553 | MVT XLenVT = Subtarget->getXLenVT(); |
554 | |
555 | unsigned IntNo = Node->getConstantOperandVal(Num: 0); |
556 | |
557 | assert((IntNo == Intrinsic::riscv_vsetvli || |
558 | IntNo == Intrinsic::riscv_vsetvlimax) && |
559 | "Unexpected vsetvli intrinsic" ); |
560 | |
561 | bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax; |
562 | unsigned Offset = (VLMax ? 1 : 2); |
563 | |
564 | assert(Node->getNumOperands() == Offset + 2 && |
565 | "Unexpected number of operands" ); |
566 | |
567 | unsigned SEW = |
568 | RISCVVType::decodeVSEW(VSEW: Node->getConstantOperandVal(Num: Offset) & 0x7); |
569 | RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( |
570 | Node->getConstantOperandVal(Num: Offset + 1) & 0x7); |
571 | |
572 | unsigned VTypeI = RISCVVType::encodeVTYPE(VLMUL: VLMul, SEW, /*TailAgnostic*/ true, |
573 | /*MaskAgnostic*/ true); |
574 | SDValue VTypeIOp = CurDAG->getTargetConstant(Val: VTypeI, DL, VT: XLenVT); |
575 | |
576 | SDValue VLOperand; |
577 | unsigned Opcode = RISCV::PseudoVSETVLI; |
578 | if (auto *C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1))) { |
579 | if (auto VLEN = Subtarget->getRealVLen()) |
580 | if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue()) |
581 | VLMax = true; |
582 | } |
583 | if (VLMax || isAllOnesConstant(V: Node->getOperand(Num: 1))) { |
584 | VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); |
585 | Opcode = RISCV::PseudoVSETVLIX0; |
586 | } else { |
587 | VLOperand = Node->getOperand(Num: 1); |
588 | |
589 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: VLOperand)) { |
590 | uint64_t AVL = C->getZExtValue(); |
591 | if (isUInt<5>(x: AVL)) { |
592 | SDValue VLImm = CurDAG->getTargetConstant(Val: AVL, DL, VT: XLenVT); |
593 | ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, |
594 | XLenVT, VLImm, VTypeIOp)); |
595 | return; |
596 | } |
597 | } |
598 | } |
599 | |
600 | ReplaceNode(F: Node, |
601 | T: CurDAG->getMachineNode(Opcode, dl: DL, VT: XLenVT, Op1: VLOperand, Op2: VTypeIOp)); |
602 | } |
603 | |
604 | bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) { |
605 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
606 | unsigned Opcode = Node->getOpcode(); |
607 | assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) && |
608 | "Unexpected opcode" ); |
609 | SDLoc DL(Node); |
610 | |
611 | // For operations of the form (x << C1) op C2, check if we can use |
612 | // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1. |
613 | SDValue N0 = Node->getOperand(Num: 0); |
614 | SDValue N1 = Node->getOperand(Num: 1); |
615 | |
616 | ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Val&: N1); |
617 | if (!Cst) |
618 | return false; |
619 | |
620 | int64_t Val = Cst->getSExtValue(); |
621 | |
622 | // Check if immediate can already use ANDI/ORI/XORI. |
623 | if (isInt<12>(x: Val)) |
624 | return false; |
625 | |
626 | SDValue Shift = N0; |
627 | |
628 | // If Val is simm32 and we have a sext_inreg from i32, then the binop |
629 | // produces at least 33 sign bits. We can peek through the sext_inreg and use |
630 | // a SLLIW at the end. |
631 | bool SignExt = false; |
632 | if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG && |
633 | N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) { |
634 | SignExt = true; |
635 | Shift = N0.getOperand(i: 0); |
636 | } |
637 | |
638 | if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse()) |
639 | return false; |
640 | |
641 | ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Val: Shift.getOperand(i: 1)); |
642 | if (!ShlCst) |
643 | return false; |
644 | |
645 | uint64_t ShAmt = ShlCst->getZExtValue(); |
646 | |
647 | // Make sure that we don't change the operation by removing bits. |
648 | // This only matters for OR and XOR, AND is unaffected. |
649 | uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(N: ShAmt); |
650 | if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) |
651 | return false; |
652 | |
653 | int64_t ShiftedVal = Val >> ShAmt; |
654 | if (!isInt<12>(x: ShiftedVal)) |
655 | return false; |
656 | |
657 | // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW. |
658 | if (SignExt && ShAmt >= 32) |
659 | return false; |
660 | |
661 | // Ok, we can reorder to get a smaller immediate. |
662 | unsigned BinOpc; |
663 | switch (Opcode) { |
664 | default: llvm_unreachable("Unexpected opcode" ); |
665 | case ISD::AND: BinOpc = RISCV::ANDI; break; |
666 | case ISD::OR: BinOpc = RISCV::ORI; break; |
667 | case ISD::XOR: BinOpc = RISCV::XORI; break; |
668 | } |
669 | |
670 | unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI; |
671 | |
672 | SDNode *BinOp = |
673 | CurDAG->getMachineNode(Opcode: BinOpc, dl: DL, VT, Op1: Shift.getOperand(i: 0), |
674 | Op2: CurDAG->getTargetConstant(Val: ShiftedVal, DL, VT)); |
675 | SDNode *SLLI = |
676 | CurDAG->getMachineNode(Opcode: ShOpc, dl: DL, VT, Op1: SDValue(BinOp, 0), |
677 | Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT)); |
678 | ReplaceNode(F: Node, T: SLLI); |
679 | return true; |
680 | } |
681 | |
682 | bool RISCVDAGToDAGISel::(SDNode *Node) { |
683 | // Only supported with XTHeadBb at the moment. |
684 | if (!Subtarget->hasVendorXTHeadBb()) |
685 | return false; |
686 | |
687 | auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1)); |
688 | if (!N1C) |
689 | return false; |
690 | |
691 | SDValue N0 = Node->getOperand(Num: 0); |
692 | if (!N0.hasOneUse()) |
693 | return false; |
694 | |
695 | auto = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL, |
696 | MVT VT) { |
697 | return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0), |
698 | CurDAG->getTargetConstant(Msb, DL, VT), |
699 | CurDAG->getTargetConstant(Lsb, DL, VT)); |
700 | }; |
701 | |
702 | SDLoc DL(Node); |
703 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
704 | const unsigned RightShAmt = N1C->getZExtValue(); |
705 | |
706 | // Transform (sra (shl X, C1) C2) with C1 < C2 |
707 | // -> (TH.EXT X, msb, lsb) |
708 | if (N0.getOpcode() == ISD::SHL) { |
709 | auto *N01C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)); |
710 | if (!N01C) |
711 | return false; |
712 | |
713 | const unsigned LeftShAmt = N01C->getZExtValue(); |
714 | // Make sure that this is a bitfield extraction (i.e., the shift-right |
715 | // amount can not be less than the left-shift). |
716 | if (LeftShAmt > RightShAmt) |
717 | return false; |
718 | |
719 | const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt; |
720 | const unsigned Msb = MsbPlusOne - 1; |
721 | const unsigned Lsb = RightShAmt - LeftShAmt; |
722 | |
723 | SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); |
724 | ReplaceNode(F: Node, T: TH_EXT); |
725 | return true; |
726 | } |
727 | |
728 | // Transform (sra (sext_inreg X, _), C) -> |
729 | // (TH.EXT X, msb, lsb) |
730 | if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { |
731 | unsigned ExtSize = |
732 | cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT().getSizeInBits(); |
733 | |
734 | // ExtSize of 32 should use sraiw via tablegen pattern. |
735 | if (ExtSize == 32) |
736 | return false; |
737 | |
738 | const unsigned Msb = ExtSize - 1; |
739 | const unsigned Lsb = RightShAmt; |
740 | |
741 | SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); |
742 | ReplaceNode(F: Node, T: TH_EXT); |
743 | return true; |
744 | } |
745 | |
746 | return false; |
747 | } |
748 | |
749 | bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) { |
750 | // Target does not support indexed loads. |
751 | if (!Subtarget->hasVendorXTHeadMemIdx()) |
752 | return false; |
753 | |
754 | LoadSDNode *Ld = cast<LoadSDNode>(Val: Node); |
755 | ISD::MemIndexedMode AM = Ld->getAddressingMode(); |
756 | if (AM == ISD::UNINDEXED) |
757 | return false; |
758 | |
759 | const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Ld->getOffset()); |
760 | if (!C) |
761 | return false; |
762 | |
763 | EVT LoadVT = Ld->getMemoryVT(); |
764 | assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) && |
765 | "Unexpected addressing mode" ); |
766 | bool IsPre = AM == ISD::PRE_INC; |
767 | bool IsPost = AM == ISD::POST_INC; |
768 | int64_t Offset = C->getSExtValue(); |
769 | |
770 | // The constants that can be encoded in the THeadMemIdx instructions |
771 | // are of the form (sign_extend(imm5) << imm2). |
772 | int64_t Shift; |
773 | for (Shift = 0; Shift < 4; Shift++) |
774 | if (isInt<5>(x: Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) |
775 | break; |
776 | |
777 | // Constant cannot be encoded. |
778 | if (Shift == 4) |
779 | return false; |
780 | |
781 | bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD); |
782 | unsigned Opcode; |
783 | if (LoadVT == MVT::i8 && IsPre) |
784 | Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB; |
785 | else if (LoadVT == MVT::i8 && IsPost) |
786 | Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA; |
787 | else if (LoadVT == MVT::i16 && IsPre) |
788 | Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB; |
789 | else if (LoadVT == MVT::i16 && IsPost) |
790 | Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA; |
791 | else if (LoadVT == MVT::i32 && IsPre) |
792 | Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB; |
793 | else if (LoadVT == MVT::i32 && IsPost) |
794 | Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA; |
795 | else if (LoadVT == MVT::i64 && IsPre) |
796 | Opcode = RISCV::TH_LDIB; |
797 | else if (LoadVT == MVT::i64 && IsPost) |
798 | Opcode = RISCV::TH_LDIA; |
799 | else |
800 | return false; |
801 | |
802 | EVT Ty = Ld->getOffset().getValueType(); |
803 | SDValue Ops[] = {Ld->getBasePtr(), |
804 | CurDAG->getTargetConstant(Val: Offset >> Shift, DL: SDLoc(Node), VT: Ty), |
805 | CurDAG->getTargetConstant(Val: Shift, DL: SDLoc(Node), VT: Ty), |
806 | Ld->getChain()}; |
807 | SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0), |
808 | Ld->getValueType(1), MVT::Other, Ops); |
809 | |
810 | MachineMemOperand *MemOp = cast<MemSDNode>(Val: Node)->getMemOperand(); |
811 | CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: New), NewMemRefs: {MemOp}); |
812 | |
813 | ReplaceNode(F: Node, T: New); |
814 | |
815 | return true; |
816 | } |
817 | |
818 | void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) { |
819 | if (!Subtarget->hasVInstructions()) |
820 | return; |
821 | |
822 | assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode" ); |
823 | |
824 | SDLoc DL(Node); |
825 | unsigned IntNo = Node->getConstantOperandVal(Num: 1); |
826 | |
827 | assert((IntNo == Intrinsic::riscv_sf_vc_x_se || |
828 | IntNo == Intrinsic::riscv_sf_vc_i_se) && |
829 | "Unexpected vsetvli intrinsic" ); |
830 | |
831 | // imm, imm, imm, simm5/scalar, sew, log2lmul, vl |
832 | unsigned Log2SEW = Log2_32(Value: Node->getConstantOperandVal(Num: 6)); |
833 | SDValue SEWOp = |
834 | CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: Subtarget->getXLenVT()); |
835 | SmallVector<SDValue, 8> Operands = {Node->getOperand(Num: 2), Node->getOperand(Num: 3), |
836 | Node->getOperand(Num: 4), Node->getOperand(Num: 5), |
837 | Node->getOperand(Num: 8), SEWOp, |
838 | Node->getOperand(Num: 0)}; |
839 | |
840 | unsigned Opcode; |
841 | auto *LMulSDNode = cast<ConstantSDNode>(Val: Node->getOperand(Num: 7)); |
842 | switch (LMulSDNode->getSExtValue()) { |
843 | case 5: |
844 | Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8 |
845 | : RISCV::PseudoVC_I_SE_MF8; |
846 | break; |
847 | case 6: |
848 | Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4 |
849 | : RISCV::PseudoVC_I_SE_MF4; |
850 | break; |
851 | case 7: |
852 | Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2 |
853 | : RISCV::PseudoVC_I_SE_MF2; |
854 | break; |
855 | case 0: |
856 | Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1 |
857 | : RISCV::PseudoVC_I_SE_M1; |
858 | break; |
859 | case 1: |
860 | Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2 |
861 | : RISCV::PseudoVC_I_SE_M2; |
862 | break; |
863 | case 2: |
864 | Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4 |
865 | : RISCV::PseudoVC_I_SE_M4; |
866 | break; |
867 | case 3: |
868 | Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8 |
869 | : RISCV::PseudoVC_I_SE_M8; |
870 | break; |
871 | } |
872 | |
873 | ReplaceNode(F: Node, T: CurDAG->getMachineNode( |
874 | Opcode, dl: DL, VT: Node->getSimpleValueType(ResNo: 0), Ops: Operands)); |
875 | } |
876 | |
877 | void RISCVDAGToDAGISel::Select(SDNode *Node) { |
878 | // If we have a custom node, we have already selected. |
879 | if (Node->isMachineOpcode()) { |
880 | LLVM_DEBUG(dbgs() << "== " ; Node->dump(CurDAG); dbgs() << "\n" ); |
881 | Node->setNodeId(-1); |
882 | return; |
883 | } |
884 | |
885 | // Instruction Selection not handled by the auto-generated tablegen selection |
886 | // should be handled here. |
887 | unsigned Opcode = Node->getOpcode(); |
888 | MVT XLenVT = Subtarget->getXLenVT(); |
889 | SDLoc DL(Node); |
890 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
891 | |
892 | bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs(); |
893 | |
894 | switch (Opcode) { |
895 | case ISD::Constant: { |
896 | assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT" ); |
897 | auto *ConstNode = cast<ConstantSDNode>(Val: Node); |
898 | if (ConstNode->isZero()) { |
899 | SDValue New = |
900 | CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT); |
901 | ReplaceNode(F: Node, T: New.getNode()); |
902 | return; |
903 | } |
904 | int64_t Imm = ConstNode->getSExtValue(); |
905 | // If the upper XLen-16 bits are not used, try to convert this to a simm12 |
906 | // by sign extending bit 15. |
907 | if (isUInt<16>(x: Imm) && isInt<12>(x: SignExtend64<16>(x: Imm)) && |
908 | hasAllHUsers(Node)) |
909 | Imm = SignExtend64<16>(x: Imm); |
910 | // If the upper 32-bits are not used try to convert this into a simm32 by |
911 | // sign extending bit 32. |
912 | if (!isInt<32>(x: Imm) && isUInt<32>(x: Imm) && hasAllWUsers(Node)) |
913 | Imm = SignExtend64<32>(x: Imm); |
914 | |
915 | ReplaceNode(F: Node, T: selectImm(CurDAG, DL, VT, Imm, Subtarget: *Subtarget).getNode()); |
916 | return; |
917 | } |
918 | case ISD::ConstantFP: { |
919 | const APFloat &APF = cast<ConstantFPSDNode>(Val: Node)->getValueAPF(); |
920 | auto [FPImm, NeedsFNeg] = |
921 | static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(Imm: APF, |
922 | VT); |
923 | if (FPImm >= 0) { |
924 | unsigned Opc; |
925 | unsigned FNegOpc; |
926 | switch (VT.SimpleTy) { |
927 | default: |
928 | llvm_unreachable("Unexpected size" ); |
929 | case MVT::f16: |
930 | Opc = RISCV::FLI_H; |
931 | FNegOpc = RISCV::FSGNJN_H; |
932 | break; |
933 | case MVT::f32: |
934 | Opc = RISCV::FLI_S; |
935 | FNegOpc = RISCV::FSGNJN_S; |
936 | break; |
937 | case MVT::f64: |
938 | Opc = RISCV::FLI_D; |
939 | FNegOpc = RISCV::FSGNJN_D; |
940 | break; |
941 | } |
942 | SDNode *Res = CurDAG->getMachineNode( |
943 | Opcode: Opc, dl: DL, VT, Op1: CurDAG->getTargetConstant(Val: FPImm, DL, VT: XLenVT)); |
944 | if (NeedsFNeg) |
945 | Res = CurDAG->getMachineNode(Opcode: FNegOpc, dl: DL, VT, Op1: SDValue(Res, 0), |
946 | Op2: SDValue(Res, 0)); |
947 | |
948 | ReplaceNode(F: Node, T: Res); |
949 | return; |
950 | } |
951 | |
952 | bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64; |
953 | SDValue Imm; |
954 | // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will |
955 | // create an integer immediate. |
956 | if (APF.isPosZero() || NegZeroF64) |
957 | Imm = CurDAG->getRegister(RISCV::X0, XLenVT); |
958 | else |
959 | Imm = selectImm(CurDAG, DL, VT: XLenVT, Imm: APF.bitcastToAPInt().getSExtValue(), |
960 | Subtarget: *Subtarget); |
961 | |
962 | bool HasZdinx = Subtarget->hasStdExtZdinx(); |
963 | bool Is64Bit = Subtarget->is64Bit(); |
964 | unsigned Opc; |
965 | switch (VT.SimpleTy) { |
966 | default: |
967 | llvm_unreachable("Unexpected size" ); |
968 | case MVT::bf16: |
969 | assert(Subtarget->hasStdExtZfbfmin()); |
970 | Opc = RISCV::FMV_H_X; |
971 | break; |
972 | case MVT::f16: |
973 | Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X; |
974 | break; |
975 | case MVT::f32: |
976 | Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X; |
977 | break; |
978 | case MVT::f64: |
979 | // For RV32, we can't move from a GPR, we need to convert instead. This |
980 | // should only happen for +0.0 and -0.0. |
981 | assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant" ); |
982 | if (Is64Bit) |
983 | Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X; |
984 | else |
985 | Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W; |
986 | break; |
987 | } |
988 | |
989 | SDNode *Res; |
990 | if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W) |
991 | Res = CurDAG->getMachineNode( |
992 | Opcode: Opc, dl: DL, VT, Op1: Imm, |
993 | Op2: CurDAG->getTargetConstant(Val: RISCVFPRndMode::RNE, DL, VT: XLenVT)); |
994 | else |
995 | Res = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: Imm); |
996 | |
997 | // For f64 -0.0, we need to insert a fneg.d idiom. |
998 | if (NegZeroF64) { |
999 | Opc = RISCV::FSGNJN_D; |
1000 | if (HasZdinx) |
1001 | Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X; |
1002 | Res = |
1003 | CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: SDValue(Res, 0), Op2: SDValue(Res, 0)); |
1004 | } |
1005 | |
1006 | ReplaceNode(F: Node, T: Res); |
1007 | return; |
1008 | } |
1009 | case RISCVISD::BuildPairF64: { |
1010 | if (!Subtarget->hasStdExtZdinx()) |
1011 | break; |
1012 | |
1013 | assert(!Subtarget->is64Bit() && "Unexpected subtarget" ); |
1014 | |
1015 | SDValue Ops[] = { |
1016 | CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), |
1017 | Node->getOperand(0), |
1018 | CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), |
1019 | Node->getOperand(1), |
1020 | CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)}; |
1021 | |
1022 | SDNode *N = |
1023 | CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::f64, Ops); |
1024 | ReplaceNode(F: Node, T: N); |
1025 | return; |
1026 | } |
1027 | case RISCVISD::SplitF64: { |
1028 | if (Subtarget->hasStdExtZdinx()) { |
1029 | assert(!Subtarget->is64Bit() && "Unexpected subtarget" ); |
1030 | |
1031 | if (!SDValue(Node, 0).use_empty()) { |
1032 | SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, VT, |
1033 | Node->getOperand(0)); |
1034 | ReplaceUses(F: SDValue(Node, 0), T: Lo); |
1035 | } |
1036 | |
1037 | if (!SDValue(Node, 1).use_empty()) { |
1038 | SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, VT, |
1039 | Node->getOperand(0)); |
1040 | ReplaceUses(F: SDValue(Node, 1), T: Hi); |
1041 | } |
1042 | |
1043 | CurDAG->RemoveDeadNode(N: Node); |
1044 | return; |
1045 | } |
1046 | |
1047 | if (!Subtarget->hasStdExtZfa()) |
1048 | break; |
1049 | assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() && |
1050 | "Unexpected subtarget" ); |
1051 | |
1052 | // With Zfa, lower to fmv.x.w and fmvh.x.d. |
1053 | if (!SDValue(Node, 0).use_empty()) { |
1054 | SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT, |
1055 | Node->getOperand(0)); |
1056 | ReplaceUses(F: SDValue(Node, 0), T: SDValue(Lo, 0)); |
1057 | } |
1058 | if (!SDValue(Node, 1).use_empty()) { |
1059 | SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT, |
1060 | Node->getOperand(0)); |
1061 | ReplaceUses(F: SDValue(Node, 1), T: SDValue(Hi, 0)); |
1062 | } |
1063 | |
1064 | CurDAG->RemoveDeadNode(N: Node); |
1065 | return; |
1066 | } |
1067 | case ISD::SHL: { |
1068 | auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1)); |
1069 | if (!N1C) |
1070 | break; |
1071 | SDValue N0 = Node->getOperand(Num: 0); |
1072 | if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || |
1073 | !isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) |
1074 | break; |
1075 | unsigned ShAmt = N1C->getZExtValue(); |
1076 | uint64_t Mask = N0.getConstantOperandVal(i: 1); |
1077 | |
1078 | // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has |
1079 | // 32 leading zeros and C3 trailing zeros. |
1080 | if (ShAmt <= 32 && isShiftedMask_64(Value: Mask)) { |
1081 | unsigned XLen = Subtarget->getXLen(); |
1082 | unsigned LeadingZeros = XLen - llvm::bit_width(Value: Mask); |
1083 | unsigned TrailingZeros = llvm::countr_zero(Val: Mask); |
1084 | if (TrailingZeros > 0 && LeadingZeros == 32) { |
1085 | SDNode *SRLIW = CurDAG->getMachineNode( |
1086 | RISCV::SRLIW, DL, VT, N0->getOperand(0), |
1087 | CurDAG->getTargetConstant(TrailingZeros, DL, VT)); |
1088 | SDNode *SLLI = CurDAG->getMachineNode( |
1089 | RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), |
1090 | CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT)); |
1091 | ReplaceNode(F: Node, T: SLLI); |
1092 | return; |
1093 | } |
1094 | } |
1095 | break; |
1096 | } |
1097 | case ISD::SRL: { |
1098 | auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1)); |
1099 | if (!N1C) |
1100 | break; |
1101 | SDValue N0 = Node->getOperand(Num: 0); |
1102 | if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) |
1103 | break; |
1104 | unsigned ShAmt = N1C->getZExtValue(); |
1105 | uint64_t Mask = N0.getConstantOperandVal(i: 1); |
1106 | |
1107 | // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has |
1108 | // 32 leading zeros and C3 trailing zeros. |
1109 | if (isShiftedMask_64(Value: Mask) && N0.hasOneUse()) { |
1110 | unsigned XLen = Subtarget->getXLen(); |
1111 | unsigned LeadingZeros = XLen - llvm::bit_width(Value: Mask); |
1112 | unsigned TrailingZeros = llvm::countr_zero(Val: Mask); |
1113 | if (LeadingZeros == 32 && TrailingZeros > ShAmt) { |
1114 | SDNode *SRLIW = CurDAG->getMachineNode( |
1115 | RISCV::SRLIW, DL, VT, N0->getOperand(0), |
1116 | CurDAG->getTargetConstant(TrailingZeros, DL, VT)); |
1117 | SDNode *SLLI = CurDAG->getMachineNode( |
1118 | RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), |
1119 | CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT)); |
1120 | ReplaceNode(F: Node, T: SLLI); |
1121 | return; |
1122 | } |
1123 | } |
1124 | |
1125 | // Optimize (srl (and X, C2), C) -> |
1126 | // (srli (slli X, (XLen-C3), (XLen-C3) + C) |
1127 | // Where C2 is a mask with C3 trailing ones. |
1128 | // Taking into account that the C2 may have had lower bits unset by |
1129 | // SimplifyDemandedBits. This avoids materializing the C2 immediate. |
1130 | // This pattern occurs when type legalizing right shifts for types with |
1131 | // less than XLen bits. |
1132 | Mask |= maskTrailingOnes<uint64_t>(N: ShAmt); |
1133 | if (!isMask_64(Value: Mask)) |
1134 | break; |
1135 | unsigned TrailingOnes = llvm::countr_one(Value: Mask); |
1136 | if (ShAmt >= TrailingOnes) |
1137 | break; |
1138 | // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64. |
1139 | if (TrailingOnes == 32) { |
1140 | SDNode *SRLI = CurDAG->getMachineNode( |
1141 | Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT, |
1142 | N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT)); |
1143 | ReplaceNode(F: Node, T: SRLI); |
1144 | return; |
1145 | } |
1146 | |
1147 | // Only do the remaining transforms if the AND has one use. |
1148 | if (!N0.hasOneUse()) |
1149 | break; |
1150 | |
1151 | // If C2 is (1 << ShAmt) use bexti or th.tst if possible. |
1152 | if (HasBitTest && ShAmt + 1 == TrailingOnes) { |
1153 | SDNode *BEXTI = CurDAG->getMachineNode( |
1154 | Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT, |
1155 | N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT)); |
1156 | ReplaceNode(F: Node, T: BEXTI); |
1157 | return; |
1158 | } |
1159 | |
1160 | unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; |
1161 | SDNode *SLLI = |
1162 | CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), |
1163 | CurDAG->getTargetConstant(LShAmt, DL, VT)); |
1164 | SDNode *SRLI = CurDAG->getMachineNode( |
1165 | RISCV::SRLI, DL, VT, SDValue(SLLI, 0), |
1166 | CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); |
1167 | ReplaceNode(F: Node, T: SRLI); |
1168 | return; |
1169 | } |
1170 | case ISD::SRA: { |
1171 | if (trySignedBitfieldExtract(Node)) |
1172 | return; |
1173 | |
1174 | // Optimize (sra (sext_inreg X, i16), C) -> |
1175 | // (srai (slli X, (XLen-16), (XLen-16) + C) |
1176 | // And (sra (sext_inreg X, i8), C) -> |
1177 | // (srai (slli X, (XLen-8), (XLen-8) + C) |
1178 | // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. |
1179 | // This transform matches the code we get without Zbb. The shifts are more |
1180 | // compressible, and this can help expose CSE opportunities in the sdiv by |
1181 | // constant optimization. |
1182 | auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1)); |
1183 | if (!N1C) |
1184 | break; |
1185 | SDValue N0 = Node->getOperand(Num: 0); |
1186 | if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) |
1187 | break; |
1188 | unsigned ShAmt = N1C->getZExtValue(); |
1189 | unsigned ExtSize = |
1190 | cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT().getSizeInBits(); |
1191 | // ExtSize of 32 should use sraiw via tablegen pattern. |
1192 | if (ExtSize >= 32 || ShAmt >= ExtSize) |
1193 | break; |
1194 | unsigned LShAmt = Subtarget->getXLen() - ExtSize; |
1195 | SDNode *SLLI = |
1196 | CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), |
1197 | CurDAG->getTargetConstant(LShAmt, DL, VT)); |
1198 | SDNode *SRAI = CurDAG->getMachineNode( |
1199 | RISCV::SRAI, DL, VT, SDValue(SLLI, 0), |
1200 | CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); |
1201 | ReplaceNode(F: Node, T: SRAI); |
1202 | return; |
1203 | } |
1204 | case ISD::OR: |
1205 | case ISD::XOR: |
1206 | if (tryShrinkShlLogicImm(Node)) |
1207 | return; |
1208 | |
1209 | break; |
1210 | case ISD::AND: { |
1211 | auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1)); |
1212 | if (!N1C) |
1213 | break; |
1214 | uint64_t C1 = N1C->getZExtValue(); |
1215 | const bool isC1Mask = isMask_64(Value: C1); |
1216 | const bool isC1ANDI = isInt<12>(x: C1); |
1217 | |
1218 | SDValue N0 = Node->getOperand(Num: 0); |
1219 | |
1220 | auto = [&](SDNode *Node, SDLoc DL, MVT VT, |
1221 | SDValue X, unsigned Msb, |
1222 | unsigned Lsb) { |
1223 | if (!Subtarget->hasVendorXTHeadBb()) |
1224 | return false; |
1225 | |
1226 | SDNode *TH_EXTU = CurDAG->getMachineNode( |
1227 | RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT), |
1228 | CurDAG->getTargetConstant(Lsb, DL, VT)); |
1229 | ReplaceNode(F: Node, T: TH_EXTU); |
1230 | return true; |
1231 | }; |
1232 | |
1233 | bool LeftShift = N0.getOpcode() == ISD::SHL; |
1234 | if (LeftShift || N0.getOpcode() == ISD::SRL) { |
1235 | auto *C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1)); |
1236 | if (!C) |
1237 | break; |
1238 | unsigned C2 = C->getZExtValue(); |
1239 | unsigned XLen = Subtarget->getXLen(); |
1240 | assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!" ); |
1241 | |
1242 | // Keep track of whether this is a c.andi. If we can't use c.andi, the |
1243 | // shift pair might offer more compression opportunities. |
1244 | // TODO: We could check for C extension here, but we don't have many lit |
1245 | // tests with the C extension enabled so not checking gets better |
1246 | // coverage. |
1247 | // TODO: What if ANDI faster than shift? |
1248 | bool IsCANDI = isInt<6>(x: N1C->getSExtValue()); |
1249 | |
1250 | // Clear irrelevant bits in the mask. |
1251 | if (LeftShift) |
1252 | C1 &= maskTrailingZeros<uint64_t>(N: C2); |
1253 | else |
1254 | C1 &= maskTrailingOnes<uint64_t>(N: XLen - C2); |
1255 | |
1256 | // Some transforms should only be done if the shift has a single use or |
1257 | // the AND would become (srli (slli X, 32), 32) |
1258 | bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); |
1259 | |
1260 | SDValue X = N0.getOperand(i: 0); |
1261 | |
1262 | // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask |
1263 | // with c3 leading zeros. |
1264 | if (!LeftShift && isC1Mask) { |
1265 | unsigned Leading = XLen - llvm::bit_width(Value: C1); |
1266 | if (C2 < Leading) { |
1267 | // If the number of leading zeros is C2+32 this can be SRLIW. |
1268 | if (C2 + 32 == Leading) { |
1269 | SDNode *SRLIW = CurDAG->getMachineNode( |
1270 | RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); |
1271 | ReplaceNode(F: Node, T: SRLIW); |
1272 | return; |
1273 | } |
1274 | |
1275 | // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) |
1276 | // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. |
1277 | // |
1278 | // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type |
1279 | // legalized and goes through DAG combine. |
1280 | if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() && |
1281 | X.getOpcode() == ISD::SIGN_EXTEND_INREG && |
1282 | cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { |
1283 | SDNode *SRAIW = |
1284 | CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0), |
1285 | CurDAG->getTargetConstant(31, DL, VT)); |
1286 | SDNode *SRLIW = CurDAG->getMachineNode( |
1287 | RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0), |
1288 | CurDAG->getTargetConstant(Leading - 32, DL, VT)); |
1289 | ReplaceNode(F: Node, T: SRLIW); |
1290 | return; |
1291 | } |
1292 | |
1293 | // Try to use an unsigned bitfield extract (e.g., th.extu) if |
1294 | // available. |
1295 | // Transform (and (srl x, C2), C1) |
1296 | // -> (<bfextract> x, msb, lsb) |
1297 | // |
1298 | // Make sure to keep this below the SRLIW cases, as we always want to |
1299 | // prefer the more common instruction. |
1300 | const unsigned Msb = llvm::bit_width(Value: C1) + C2 - 1; |
1301 | const unsigned Lsb = C2; |
1302 | if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb)) |
1303 | return; |
1304 | |
1305 | // (srli (slli x, c3-c2), c3). |
1306 | // Skip if we could use (zext.w (sraiw X, C2)). |
1307 | bool Skip = Subtarget->hasStdExtZba() && Leading == 32 && |
1308 | X.getOpcode() == ISD::SIGN_EXTEND_INREG && |
1309 | cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; |
1310 | // Also Skip if we can use bexti or th.tst. |
1311 | Skip |= HasBitTest && Leading == XLen - 1; |
1312 | if (OneUseOrZExtW && !Skip) { |
1313 | SDNode *SLLI = CurDAG->getMachineNode( |
1314 | RISCV::SLLI, DL, VT, X, |
1315 | CurDAG->getTargetConstant(Leading - C2, DL, VT)); |
1316 | SDNode *SRLI = CurDAG->getMachineNode( |
1317 | RISCV::SRLI, DL, VT, SDValue(SLLI, 0), |
1318 | CurDAG->getTargetConstant(Leading, DL, VT)); |
1319 | ReplaceNode(F: Node, T: SRLI); |
1320 | return; |
1321 | } |
1322 | } |
1323 | } |
1324 | |
1325 | // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask |
1326 | // shifted by c2 bits with c3 leading zeros. |
1327 | if (LeftShift && isShiftedMask_64(Value: C1)) { |
1328 | unsigned Leading = XLen - llvm::bit_width(Value: C1); |
1329 | |
1330 | if (C2 + Leading < XLen && |
1331 | C1 == (maskTrailingOnes<uint64_t>(N: XLen - (C2 + Leading)) << C2)) { |
1332 | // Use slli.uw when possible. |
1333 | if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) { |
1334 | SDNode *SLLI_UW = |
1335 | CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X, |
1336 | CurDAG->getTargetConstant(C2, DL, VT)); |
1337 | ReplaceNode(F: Node, T: SLLI_UW); |
1338 | return; |
1339 | } |
1340 | |
1341 | // (srli (slli c2+c3), c3) |
1342 | if (OneUseOrZExtW && !IsCANDI) { |
1343 | SDNode *SLLI = CurDAG->getMachineNode( |
1344 | RISCV::SLLI, DL, VT, X, |
1345 | CurDAG->getTargetConstant(C2 + Leading, DL, VT)); |
1346 | SDNode *SRLI = CurDAG->getMachineNode( |
1347 | RISCV::SRLI, DL, VT, SDValue(SLLI, 0), |
1348 | CurDAG->getTargetConstant(Leading, DL, VT)); |
1349 | ReplaceNode(F: Node, T: SRLI); |
1350 | return; |
1351 | } |
1352 | } |
1353 | } |
1354 | |
1355 | // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a |
1356 | // shifted mask with c2 leading zeros and c3 trailing zeros. |
1357 | if (!LeftShift && isShiftedMask_64(Value: C1)) { |
1358 | unsigned Leading = XLen - llvm::bit_width(Value: C1); |
1359 | unsigned Trailing = llvm::countr_zero(Val: C1); |
1360 | if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW && |
1361 | !IsCANDI) { |
1362 | unsigned SrliOpc = RISCV::SRLI; |
1363 | // If the input is zexti32 we should use SRLIW. |
1364 | if (X.getOpcode() == ISD::AND && |
1365 | isa<ConstantSDNode>(Val: X.getOperand(i: 1)) && |
1366 | X.getConstantOperandVal(i: 1) == UINT64_C(0xFFFFFFFF)) { |
1367 | SrliOpc = RISCV::SRLIW; |
1368 | X = X.getOperand(i: 0); |
1369 | } |
1370 | SDNode *SRLI = CurDAG->getMachineNode( |
1371 | Opcode: SrliOpc, dl: DL, VT, Op1: X, |
1372 | Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT)); |
1373 | SDNode *SLLI = CurDAG->getMachineNode( |
1374 | RISCV::SLLI, DL, VT, SDValue(SRLI, 0), |
1375 | CurDAG->getTargetConstant(Trailing, DL, VT)); |
1376 | ReplaceNode(F: Node, T: SLLI); |
1377 | return; |
1378 | } |
1379 | // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. |
1380 | if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 && |
1381 | OneUseOrZExtW && !IsCANDI) { |
1382 | SDNode *SRLIW = CurDAG->getMachineNode( |
1383 | RISCV::SRLIW, DL, VT, X, |
1384 | CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); |
1385 | SDNode *SLLI = CurDAG->getMachineNode( |
1386 | RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), |
1387 | CurDAG->getTargetConstant(Trailing, DL, VT)); |
1388 | ReplaceNode(F: Node, T: SLLI); |
1389 | return; |
1390 | } |
1391 | } |
1392 | |
1393 | // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a |
1394 | // shifted mask with no leading zeros and c3 trailing zeros. |
1395 | if (LeftShift && isShiftedMask_64(Value: C1)) { |
1396 | unsigned Leading = XLen - llvm::bit_width(Value: C1); |
1397 | unsigned Trailing = llvm::countr_zero(Val: C1); |
1398 | if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) { |
1399 | SDNode *SRLI = CurDAG->getMachineNode( |
1400 | RISCV::SRLI, DL, VT, X, |
1401 | CurDAG->getTargetConstant(Trailing - C2, DL, VT)); |
1402 | SDNode *SLLI = CurDAG->getMachineNode( |
1403 | RISCV::SLLI, DL, VT, SDValue(SRLI, 0), |
1404 | CurDAG->getTargetConstant(Trailing, DL, VT)); |
1405 | ReplaceNode(F: Node, T: SLLI); |
1406 | return; |
1407 | } |
1408 | // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. |
1409 | if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { |
1410 | SDNode *SRLIW = CurDAG->getMachineNode( |
1411 | RISCV::SRLIW, DL, VT, X, |
1412 | CurDAG->getTargetConstant(Trailing - C2, DL, VT)); |
1413 | SDNode *SLLI = CurDAG->getMachineNode( |
1414 | RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), |
1415 | CurDAG->getTargetConstant(Trailing, DL, VT)); |
1416 | ReplaceNode(F: Node, T: SLLI); |
1417 | return; |
1418 | } |
1419 | } |
1420 | } |
1421 | |
1422 | // If C1 masks off the upper bits only (but can't be formed as an |
1423 | // ANDI), use an unsigned bitfield extract (e.g., th.extu), if |
1424 | // available. |
1425 | // Transform (and x, C1) |
1426 | // -> (<bfextract> x, msb, lsb) |
1427 | if (isC1Mask && !isC1ANDI) { |
1428 | const unsigned Msb = llvm::bit_width(Value: C1) - 1; |
1429 | if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0)) |
1430 | return; |
1431 | } |
1432 | |
1433 | if (tryShrinkShlLogicImm(Node)) |
1434 | return; |
1435 | |
1436 | break; |
1437 | } |
1438 | case ISD::MUL: { |
1439 | // Special case for calculating (mul (and X, C2), C1) where the full product |
1440 | // fits in XLen bits. We can shift X left by the number of leading zeros in |
1441 | // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final |
1442 | // product has XLen trailing zeros, putting it in the output of MULHU. This |
1443 | // can avoid materializing a constant in a register for C2. |
1444 | |
1445 | // RHS should be a constant. |
1446 | auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1)); |
1447 | if (!N1C || !N1C->hasOneUse()) |
1448 | break; |
1449 | |
1450 | // LHS should be an AND with constant. |
1451 | SDValue N0 = Node->getOperand(Num: 0); |
1452 | if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) |
1453 | break; |
1454 | |
1455 | uint64_t C2 = N0.getConstantOperandVal(i: 1); |
1456 | |
1457 | // Constant should be a mask. |
1458 | if (!isMask_64(Value: C2)) |
1459 | break; |
1460 | |
1461 | // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has |
1462 | // multiple users or the constant is a simm12. This prevents inserting a |
1463 | // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely |
1464 | // make it more costly to materialize. Otherwise, using a SLLI might allow |
1465 | // it to be compressed. |
1466 | bool IsANDIOrZExt = |
1467 | isInt<12>(x: C2) || |
1468 | (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb()); |
1469 | // With XTHeadBb, we can use TH.EXTU. |
1470 | IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb(); |
1471 | if (IsANDIOrZExt && (isInt<12>(x: N1C->getSExtValue()) || !N0.hasOneUse())) |
1472 | break; |
1473 | // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or |
1474 | // the constant is a simm32. |
1475 | bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba(); |
1476 | // With XTHeadBb, we can use TH.EXTU. |
1477 | IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb(); |
1478 | if (IsZExtW && (isInt<32>(x: N1C->getSExtValue()) || !N0.hasOneUse())) |
1479 | break; |
1480 | |
1481 | // We need to shift left the AND input and C1 by a total of XLen bits. |
1482 | |
1483 | // How far left do we need to shift the AND input? |
1484 | unsigned XLen = Subtarget->getXLen(); |
1485 | unsigned LeadingZeros = XLen - llvm::bit_width(Value: C2); |
1486 | |
1487 | // The constant gets shifted by the remaining amount unless that would |
1488 | // shift bits out. |
1489 | uint64_t C1 = N1C->getZExtValue(); |
1490 | unsigned ConstantShift = XLen - LeadingZeros; |
1491 | if (ConstantShift > (XLen - llvm::bit_width(Value: C1))) |
1492 | break; |
1493 | |
1494 | uint64_t ShiftedC1 = C1 << ConstantShift; |
1495 | // If this RV32, we need to sign extend the constant. |
1496 | if (XLen == 32) |
1497 | ShiftedC1 = SignExtend64<32>(x: ShiftedC1); |
1498 | |
1499 | // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). |
1500 | SDNode *Imm = selectImm(CurDAG, DL, VT, Imm: ShiftedC1, Subtarget: *Subtarget).getNode(); |
1501 | SDNode *SLLI = |
1502 | CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), |
1503 | CurDAG->getTargetConstant(LeadingZeros, DL, VT)); |
1504 | SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, |
1505 | SDValue(SLLI, 0), SDValue(Imm, 0)); |
1506 | ReplaceNode(F: Node, T: MULHU); |
1507 | return; |
1508 | } |
1509 | case ISD::LOAD: { |
1510 | if (tryIndexedLoad(Node)) |
1511 | return; |
1512 | break; |
1513 | } |
1514 | case ISD::INTRINSIC_WO_CHAIN: { |
1515 | unsigned IntNo = Node->getConstantOperandVal(Num: 0); |
1516 | switch (IntNo) { |
1517 | // By default we do not custom select any intrinsic. |
1518 | default: |
1519 | break; |
1520 | case Intrinsic::riscv_vmsgeu: |
1521 | case Intrinsic::riscv_vmsge: { |
1522 | SDValue Src1 = Node->getOperand(Num: 1); |
1523 | SDValue Src2 = Node->getOperand(Num: 2); |
1524 | bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; |
1525 | bool IsCmpUnsignedZero = false; |
1526 | // Only custom select scalar second operand. |
1527 | if (Src2.getValueType() != XLenVT) |
1528 | break; |
1529 | // Small constants are handled with patterns. |
1530 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Src2)) { |
1531 | int64_t CVal = C->getSExtValue(); |
1532 | if (CVal >= -15 && CVal <= 16) { |
1533 | if (!IsUnsigned || CVal != 0) |
1534 | break; |
1535 | IsCmpUnsignedZero = true; |
1536 | } |
1537 | } |
1538 | MVT Src1VT = Src1.getSimpleValueType(); |
1539 | unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; |
1540 | switch (RISCVTargetLowering::getLMUL(VT: Src1VT)) { |
1541 | default: |
1542 | llvm_unreachable("Unexpected LMUL!" ); |
1543 | #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ |
1544 | case RISCVII::VLMUL::lmulenum: \ |
1545 | VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ |
1546 | : RISCV::PseudoVMSLT_VX_##suffix; \ |
1547 | VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ |
1548 | VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ |
1549 | break; |
1550 | CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) |
1551 | CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) |
1552 | CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) |
1553 | CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) |
1554 | CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) |
1555 | CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) |
1556 | CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) |
1557 | #undef CASE_VMSLT_VMNAND_VMSET_OPCODES |
1558 | } |
1559 | SDValue SEW = CurDAG->getTargetConstant( |
1560 | Val: Log2_32(Value: Src1VT.getScalarSizeInBits()), DL, VT: XLenVT); |
1561 | SDValue VL; |
1562 | selectVLOp(N: Node->getOperand(Num: 3), VL); |
1563 | |
1564 | // If vmsgeu with 0 immediate, expand it to vmset. |
1565 | if (IsCmpUnsignedZero) { |
1566 | ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMSetOpcode, dl: DL, VT, Op1: VL, Op2: SEW)); |
1567 | return; |
1568 | } |
1569 | |
1570 | // Expand to |
1571 | // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd |
1572 | SDValue Cmp = SDValue( |
1573 | CurDAG->getMachineNode(Opcode: VMSLTOpcode, dl: DL, VT, Ops: {Src1, Src2, VL, SEW}), |
1574 | 0); |
1575 | ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMNANDOpcode, dl: DL, VT, |
1576 | Ops: {Cmp, Cmp, VL, SEW})); |
1577 | return; |
1578 | } |
1579 | case Intrinsic::riscv_vmsgeu_mask: |
1580 | case Intrinsic::riscv_vmsge_mask: { |
1581 | SDValue Src1 = Node->getOperand(Num: 2); |
1582 | SDValue Src2 = Node->getOperand(Num: 3); |
1583 | bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; |
1584 | bool IsCmpUnsignedZero = false; |
1585 | // Only custom select scalar second operand. |
1586 | if (Src2.getValueType() != XLenVT) |
1587 | break; |
1588 | // Small constants are handled with patterns. |
1589 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Src2)) { |
1590 | int64_t CVal = C->getSExtValue(); |
1591 | if (CVal >= -15 && CVal <= 16) { |
1592 | if (!IsUnsigned || CVal != 0) |
1593 | break; |
1594 | IsCmpUnsignedZero = true; |
1595 | } |
1596 | } |
1597 | MVT Src1VT = Src1.getSimpleValueType(); |
1598 | unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, |
1599 | VMOROpcode; |
1600 | switch (RISCVTargetLowering::getLMUL(VT: Src1VT)) { |
1601 | default: |
1602 | llvm_unreachable("Unexpected LMUL!" ); |
1603 | #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ |
1604 | case RISCVII::VLMUL::lmulenum: \ |
1605 | VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ |
1606 | : RISCV::PseudoVMSLT_VX_##suffix; \ |
1607 | VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ |
1608 | : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ |
1609 | break; |
1610 | CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) |
1611 | CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) |
1612 | CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) |
1613 | CASE_VMSLT_OPCODES(LMUL_1, M1, B8) |
1614 | CASE_VMSLT_OPCODES(LMUL_2, M2, B16) |
1615 | CASE_VMSLT_OPCODES(LMUL_4, M4, B32) |
1616 | CASE_VMSLT_OPCODES(LMUL_8, M8, B64) |
1617 | #undef CASE_VMSLT_OPCODES |
1618 | } |
1619 | // Mask operations use the LMUL from the mask type. |
1620 | switch (RISCVTargetLowering::getLMUL(VT)) { |
1621 | default: |
1622 | llvm_unreachable("Unexpected LMUL!" ); |
1623 | #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ |
1624 | case RISCVII::VLMUL::lmulenum: \ |
1625 | VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ |
1626 | VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ |
1627 | VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ |
1628 | break; |
1629 | CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) |
1630 | CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) |
1631 | CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) |
1632 | CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) |
1633 | CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) |
1634 | CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) |
1635 | CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) |
1636 | #undef CASE_VMXOR_VMANDN_VMOR_OPCODES |
1637 | } |
1638 | SDValue SEW = CurDAG->getTargetConstant( |
1639 | Val: Log2_32(Value: Src1VT.getScalarSizeInBits()), DL, VT: XLenVT); |
1640 | SDValue MaskSEW = CurDAG->getTargetConstant(Val: 0, DL, VT: XLenVT); |
1641 | SDValue VL; |
1642 | selectVLOp(N: Node->getOperand(Num: 5), VL); |
1643 | SDValue MaskedOff = Node->getOperand(Num: 1); |
1644 | SDValue Mask = Node->getOperand(Num: 4); |
1645 | |
1646 | // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. |
1647 | if (IsCmpUnsignedZero) { |
1648 | // We don't need vmor if the MaskedOff and the Mask are the same |
1649 | // value. |
1650 | if (Mask == MaskedOff) { |
1651 | ReplaceUses(F: Node, T: Mask.getNode()); |
1652 | return; |
1653 | } |
1654 | ReplaceNode(F: Node, |
1655 | T: CurDAG->getMachineNode(Opcode: VMOROpcode, dl: DL, VT, |
1656 | Ops: {Mask, MaskedOff, VL, MaskSEW})); |
1657 | return; |
1658 | } |
1659 | |
1660 | // If the MaskedOff value and the Mask are the same value use |
1661 | // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt |
1662 | // This avoids needing to copy v0 to vd before starting the next sequence. |
1663 | if (Mask == MaskedOff) { |
1664 | SDValue Cmp = SDValue( |
1665 | CurDAG->getMachineNode(Opcode: VMSLTOpcode, dl: DL, VT, Ops: {Src1, Src2, VL, SEW}), |
1666 | 0); |
1667 | ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMANDNOpcode, dl: DL, VT, |
1668 | Ops: {Mask, Cmp, VL, MaskSEW})); |
1669 | return; |
1670 | } |
1671 | |
1672 | // Mask needs to be copied to V0. |
1673 | SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, |
1674 | RISCV::V0, Mask, SDValue()); |
1675 | SDValue Glue = Chain.getValue(R: 1); |
1676 | SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); |
1677 | |
1678 | // Otherwise use |
1679 | // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 |
1680 | // The result is mask undisturbed. |
1681 | // We use the same instructions to emulate mask agnostic behavior, because |
1682 | // the agnostic result can be either undisturbed or all 1. |
1683 | SDValue Cmp = SDValue( |
1684 | CurDAG->getMachineNode(Opcode: VMSLTMaskOpcode, dl: DL, VT, |
1685 | Ops: {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), |
1686 | 0); |
1687 | // vmxor.mm vd, vd, v0 is used to update active value. |
1688 | ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMXOROpcode, dl: DL, VT, |
1689 | Ops: {Cmp, Mask, VL, MaskSEW})); |
1690 | return; |
1691 | } |
1692 | case Intrinsic::riscv_vsetvli: |
1693 | case Intrinsic::riscv_vsetvlimax: |
1694 | return selectVSETVLI(Node); |
1695 | } |
1696 | break; |
1697 | } |
1698 | case ISD::INTRINSIC_W_CHAIN: { |
1699 | unsigned IntNo = Node->getConstantOperandVal(Num: 1); |
1700 | switch (IntNo) { |
1701 | // By default we do not custom select any intrinsic. |
1702 | default: |
1703 | break; |
1704 | case Intrinsic::riscv_vlseg2: |
1705 | case Intrinsic::riscv_vlseg3: |
1706 | case Intrinsic::riscv_vlseg4: |
1707 | case Intrinsic::riscv_vlseg5: |
1708 | case Intrinsic::riscv_vlseg6: |
1709 | case Intrinsic::riscv_vlseg7: |
1710 | case Intrinsic::riscv_vlseg8: { |
1711 | selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); |
1712 | return; |
1713 | } |
1714 | case Intrinsic::riscv_vlseg2_mask: |
1715 | case Intrinsic::riscv_vlseg3_mask: |
1716 | case Intrinsic::riscv_vlseg4_mask: |
1717 | case Intrinsic::riscv_vlseg5_mask: |
1718 | case Intrinsic::riscv_vlseg6_mask: |
1719 | case Intrinsic::riscv_vlseg7_mask: |
1720 | case Intrinsic::riscv_vlseg8_mask: { |
1721 | selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); |
1722 | return; |
1723 | } |
1724 | case Intrinsic::riscv_vlsseg2: |
1725 | case Intrinsic::riscv_vlsseg3: |
1726 | case Intrinsic::riscv_vlsseg4: |
1727 | case Intrinsic::riscv_vlsseg5: |
1728 | case Intrinsic::riscv_vlsseg6: |
1729 | case Intrinsic::riscv_vlsseg7: |
1730 | case Intrinsic::riscv_vlsseg8: { |
1731 | selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); |
1732 | return; |
1733 | } |
1734 | case Intrinsic::riscv_vlsseg2_mask: |
1735 | case Intrinsic::riscv_vlsseg3_mask: |
1736 | case Intrinsic::riscv_vlsseg4_mask: |
1737 | case Intrinsic::riscv_vlsseg5_mask: |
1738 | case Intrinsic::riscv_vlsseg6_mask: |
1739 | case Intrinsic::riscv_vlsseg7_mask: |
1740 | case Intrinsic::riscv_vlsseg8_mask: { |
1741 | selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); |
1742 | return; |
1743 | } |
1744 | case Intrinsic::riscv_vloxseg2: |
1745 | case Intrinsic::riscv_vloxseg3: |
1746 | case Intrinsic::riscv_vloxseg4: |
1747 | case Intrinsic::riscv_vloxseg5: |
1748 | case Intrinsic::riscv_vloxseg6: |
1749 | case Intrinsic::riscv_vloxseg7: |
1750 | case Intrinsic::riscv_vloxseg8: |
1751 | selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); |
1752 | return; |
1753 | case Intrinsic::riscv_vluxseg2: |
1754 | case Intrinsic::riscv_vluxseg3: |
1755 | case Intrinsic::riscv_vluxseg4: |
1756 | case Intrinsic::riscv_vluxseg5: |
1757 | case Intrinsic::riscv_vluxseg6: |
1758 | case Intrinsic::riscv_vluxseg7: |
1759 | case Intrinsic::riscv_vluxseg8: |
1760 | selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); |
1761 | return; |
1762 | case Intrinsic::riscv_vloxseg2_mask: |
1763 | case Intrinsic::riscv_vloxseg3_mask: |
1764 | case Intrinsic::riscv_vloxseg4_mask: |
1765 | case Intrinsic::riscv_vloxseg5_mask: |
1766 | case Intrinsic::riscv_vloxseg6_mask: |
1767 | case Intrinsic::riscv_vloxseg7_mask: |
1768 | case Intrinsic::riscv_vloxseg8_mask: |
1769 | selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); |
1770 | return; |
1771 | case Intrinsic::riscv_vluxseg2_mask: |
1772 | case Intrinsic::riscv_vluxseg3_mask: |
1773 | case Intrinsic::riscv_vluxseg4_mask: |
1774 | case Intrinsic::riscv_vluxseg5_mask: |
1775 | case Intrinsic::riscv_vluxseg6_mask: |
1776 | case Intrinsic::riscv_vluxseg7_mask: |
1777 | case Intrinsic::riscv_vluxseg8_mask: |
1778 | selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); |
1779 | return; |
1780 | case Intrinsic::riscv_vlseg8ff: |
1781 | case Intrinsic::riscv_vlseg7ff: |
1782 | case Intrinsic::riscv_vlseg6ff: |
1783 | case Intrinsic::riscv_vlseg5ff: |
1784 | case Intrinsic::riscv_vlseg4ff: |
1785 | case Intrinsic::riscv_vlseg3ff: |
1786 | case Intrinsic::riscv_vlseg2ff: { |
1787 | selectVLSEGFF(Node, /*IsMasked*/ false); |
1788 | return; |
1789 | } |
1790 | case Intrinsic::riscv_vlseg8ff_mask: |
1791 | case Intrinsic::riscv_vlseg7ff_mask: |
1792 | case Intrinsic::riscv_vlseg6ff_mask: |
1793 | case Intrinsic::riscv_vlseg5ff_mask: |
1794 | case Intrinsic::riscv_vlseg4ff_mask: |
1795 | case Intrinsic::riscv_vlseg3ff_mask: |
1796 | case Intrinsic::riscv_vlseg2ff_mask: { |
1797 | selectVLSEGFF(Node, /*IsMasked*/ true); |
1798 | return; |
1799 | } |
1800 | case Intrinsic::riscv_vloxei: |
1801 | case Intrinsic::riscv_vloxei_mask: |
1802 | case Intrinsic::riscv_vluxei: |
1803 | case Intrinsic::riscv_vluxei_mask: { |
1804 | bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || |
1805 | IntNo == Intrinsic::riscv_vluxei_mask; |
1806 | bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || |
1807 | IntNo == Intrinsic::riscv_vloxei_mask; |
1808 | |
1809 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
1810 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
1811 | |
1812 | unsigned CurOp = 2; |
1813 | SmallVector<SDValue, 8> Operands; |
1814 | Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); |
1815 | |
1816 | MVT IndexVT; |
1817 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, |
1818 | /*IsStridedOrIndexed*/ true, Operands, |
1819 | /*IsLoad=*/true, IndexVT: &IndexVT); |
1820 | |
1821 | assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && |
1822 | "Element count mismatch" ); |
1823 | |
1824 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
1825 | RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT); |
1826 | unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits()); |
1827 | if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { |
1828 | report_fatal_error(reason: "The V extension does not support EEW=64 for index " |
1829 | "values when XLEN=32" ); |
1830 | } |
1831 | const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( |
1832 | IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), |
1833 | static_cast<unsigned>(IndexLMUL)); |
1834 | MachineSDNode *Load = |
1835 | CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands); |
1836 | |
1837 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
1838 | CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()}); |
1839 | |
1840 | ReplaceNode(F: Node, T: Load); |
1841 | return; |
1842 | } |
1843 | case Intrinsic::riscv_vlm: |
1844 | case Intrinsic::riscv_vle: |
1845 | case Intrinsic::riscv_vle_mask: |
1846 | case Intrinsic::riscv_vlse: |
1847 | case Intrinsic::riscv_vlse_mask: { |
1848 | bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || |
1849 | IntNo == Intrinsic::riscv_vlse_mask; |
1850 | bool IsStrided = |
1851 | IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; |
1852 | |
1853 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
1854 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
1855 | |
1856 | // The riscv_vlm intrinsic are always tail agnostic and no passthru |
1857 | // operand at the IR level. In pseudos, they have both policy and |
1858 | // passthru operand. The passthru operand is needed to track the |
1859 | // "tail undefined" state, and the policy is there just for |
1860 | // for consistency - it will always be "don't care" for the |
1861 | // unmasked form. |
1862 | bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; |
1863 | unsigned CurOp = 2; |
1864 | SmallVector<SDValue, 8> Operands; |
1865 | if (HasPassthruOperand) |
1866 | Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); |
1867 | else { |
1868 | // We eagerly lower to implicit_def (instead of undef), as we |
1869 | // otherwise fail to select nodes such as: nxv1i1 = undef |
1870 | SDNode *Passthru = |
1871 | CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT); |
1872 | Operands.push_back(Elt: SDValue(Passthru, 0)); |
1873 | } |
1874 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided, |
1875 | Operands, /*IsLoad=*/true); |
1876 | |
1877 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
1878 | const RISCV::VLEPseudo *P = |
1879 | RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW, |
1880 | static_cast<unsigned>(LMUL)); |
1881 | MachineSDNode *Load = |
1882 | CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands); |
1883 | |
1884 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
1885 | CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()}); |
1886 | |
1887 | ReplaceNode(F: Node, T: Load); |
1888 | return; |
1889 | } |
1890 | case Intrinsic::riscv_vleff: |
1891 | case Intrinsic::riscv_vleff_mask: { |
1892 | bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; |
1893 | |
1894 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
1895 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
1896 | |
1897 | unsigned CurOp = 2; |
1898 | SmallVector<SDValue, 7> Operands; |
1899 | Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); |
1900 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, |
1901 | /*IsStridedOrIndexed*/ false, Operands, |
1902 | /*IsLoad=*/true); |
1903 | |
1904 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
1905 | const RISCV::VLEPseudo *P = |
1906 | RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true, |
1907 | Log2SEW, static_cast<unsigned>(LMUL)); |
1908 | MachineSDNode *Load = CurDAG->getMachineNode( |
1909 | Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands); |
1910 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
1911 | CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()}); |
1912 | |
1913 | ReplaceNode(F: Node, T: Load); |
1914 | return; |
1915 | } |
1916 | } |
1917 | break; |
1918 | } |
1919 | case ISD::INTRINSIC_VOID: { |
1920 | unsigned IntNo = Node->getConstantOperandVal(Num: 1); |
1921 | switch (IntNo) { |
1922 | case Intrinsic::riscv_vsseg2: |
1923 | case Intrinsic::riscv_vsseg3: |
1924 | case Intrinsic::riscv_vsseg4: |
1925 | case Intrinsic::riscv_vsseg5: |
1926 | case Intrinsic::riscv_vsseg6: |
1927 | case Intrinsic::riscv_vsseg7: |
1928 | case Intrinsic::riscv_vsseg8: { |
1929 | selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); |
1930 | return; |
1931 | } |
1932 | case Intrinsic::riscv_vsseg2_mask: |
1933 | case Intrinsic::riscv_vsseg3_mask: |
1934 | case Intrinsic::riscv_vsseg4_mask: |
1935 | case Intrinsic::riscv_vsseg5_mask: |
1936 | case Intrinsic::riscv_vsseg6_mask: |
1937 | case Intrinsic::riscv_vsseg7_mask: |
1938 | case Intrinsic::riscv_vsseg8_mask: { |
1939 | selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); |
1940 | return; |
1941 | } |
1942 | case Intrinsic::riscv_vssseg2: |
1943 | case Intrinsic::riscv_vssseg3: |
1944 | case Intrinsic::riscv_vssseg4: |
1945 | case Intrinsic::riscv_vssseg5: |
1946 | case Intrinsic::riscv_vssseg6: |
1947 | case Intrinsic::riscv_vssseg7: |
1948 | case Intrinsic::riscv_vssseg8: { |
1949 | selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); |
1950 | return; |
1951 | } |
1952 | case Intrinsic::riscv_vssseg2_mask: |
1953 | case Intrinsic::riscv_vssseg3_mask: |
1954 | case Intrinsic::riscv_vssseg4_mask: |
1955 | case Intrinsic::riscv_vssseg5_mask: |
1956 | case Intrinsic::riscv_vssseg6_mask: |
1957 | case Intrinsic::riscv_vssseg7_mask: |
1958 | case Intrinsic::riscv_vssseg8_mask: { |
1959 | selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); |
1960 | return; |
1961 | } |
1962 | case Intrinsic::riscv_vsoxseg2: |
1963 | case Intrinsic::riscv_vsoxseg3: |
1964 | case Intrinsic::riscv_vsoxseg4: |
1965 | case Intrinsic::riscv_vsoxseg5: |
1966 | case Intrinsic::riscv_vsoxseg6: |
1967 | case Intrinsic::riscv_vsoxseg7: |
1968 | case Intrinsic::riscv_vsoxseg8: |
1969 | selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); |
1970 | return; |
1971 | case Intrinsic::riscv_vsuxseg2: |
1972 | case Intrinsic::riscv_vsuxseg3: |
1973 | case Intrinsic::riscv_vsuxseg4: |
1974 | case Intrinsic::riscv_vsuxseg5: |
1975 | case Intrinsic::riscv_vsuxseg6: |
1976 | case Intrinsic::riscv_vsuxseg7: |
1977 | case Intrinsic::riscv_vsuxseg8: |
1978 | selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); |
1979 | return; |
1980 | case Intrinsic::riscv_vsoxseg2_mask: |
1981 | case Intrinsic::riscv_vsoxseg3_mask: |
1982 | case Intrinsic::riscv_vsoxseg4_mask: |
1983 | case Intrinsic::riscv_vsoxseg5_mask: |
1984 | case Intrinsic::riscv_vsoxseg6_mask: |
1985 | case Intrinsic::riscv_vsoxseg7_mask: |
1986 | case Intrinsic::riscv_vsoxseg8_mask: |
1987 | selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); |
1988 | return; |
1989 | case Intrinsic::riscv_vsuxseg2_mask: |
1990 | case Intrinsic::riscv_vsuxseg3_mask: |
1991 | case Intrinsic::riscv_vsuxseg4_mask: |
1992 | case Intrinsic::riscv_vsuxseg5_mask: |
1993 | case Intrinsic::riscv_vsuxseg6_mask: |
1994 | case Intrinsic::riscv_vsuxseg7_mask: |
1995 | case Intrinsic::riscv_vsuxseg8_mask: |
1996 | selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); |
1997 | return; |
1998 | case Intrinsic::riscv_vsoxei: |
1999 | case Intrinsic::riscv_vsoxei_mask: |
2000 | case Intrinsic::riscv_vsuxei: |
2001 | case Intrinsic::riscv_vsuxei_mask: { |
2002 | bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || |
2003 | IntNo == Intrinsic::riscv_vsuxei_mask; |
2004 | bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || |
2005 | IntNo == Intrinsic::riscv_vsoxei_mask; |
2006 | |
2007 | MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0); |
2008 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
2009 | |
2010 | unsigned CurOp = 2; |
2011 | SmallVector<SDValue, 8> Operands; |
2012 | Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Store value. |
2013 | |
2014 | MVT IndexVT; |
2015 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, |
2016 | /*IsStridedOrIndexed*/ true, Operands, |
2017 | /*IsLoad=*/false, IndexVT: &IndexVT); |
2018 | |
2019 | assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && |
2020 | "Element count mismatch" ); |
2021 | |
2022 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
2023 | RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT); |
2024 | unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits()); |
2025 | if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { |
2026 | report_fatal_error(reason: "The V extension does not support EEW=64 for index " |
2027 | "values when XLEN=32" ); |
2028 | } |
2029 | const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( |
2030 | IsMasked, IsOrdered, IndexLog2EEW, |
2031 | static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); |
2032 | MachineSDNode *Store = |
2033 | CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands); |
2034 | |
2035 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
2036 | CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {MemOp->getMemOperand()}); |
2037 | |
2038 | ReplaceNode(F: Node, T: Store); |
2039 | return; |
2040 | } |
2041 | case Intrinsic::riscv_vsm: |
2042 | case Intrinsic::riscv_vse: |
2043 | case Intrinsic::riscv_vse_mask: |
2044 | case Intrinsic::riscv_vsse: |
2045 | case Intrinsic::riscv_vsse_mask: { |
2046 | bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || |
2047 | IntNo == Intrinsic::riscv_vsse_mask; |
2048 | bool IsStrided = |
2049 | IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; |
2050 | |
2051 | MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0); |
2052 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
2053 | |
2054 | unsigned CurOp = 2; |
2055 | SmallVector<SDValue, 8> Operands; |
2056 | Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Store value. |
2057 | |
2058 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided, |
2059 | Operands); |
2060 | |
2061 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
2062 | const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( |
2063 | IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); |
2064 | MachineSDNode *Store = |
2065 | CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands); |
2066 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
2067 | CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {MemOp->getMemOperand()}); |
2068 | |
2069 | ReplaceNode(F: Node, T: Store); |
2070 | return; |
2071 | } |
2072 | case Intrinsic::riscv_sf_vc_x_se: |
2073 | case Intrinsic::riscv_sf_vc_i_se: |
2074 | selectSF_VC_X_SE(Node); |
2075 | return; |
2076 | } |
2077 | break; |
2078 | } |
2079 | case ISD::BITCAST: { |
2080 | MVT SrcVT = Node->getOperand(Num: 0).getSimpleValueType(); |
2081 | // Just drop bitcasts between vectors if both are fixed or both are |
2082 | // scalable. |
2083 | if ((VT.isScalableVector() && SrcVT.isScalableVector()) || |
2084 | (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { |
2085 | ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0)); |
2086 | CurDAG->RemoveDeadNode(N: Node); |
2087 | return; |
2088 | } |
2089 | break; |
2090 | } |
2091 | case ISD::INSERT_SUBVECTOR: { |
2092 | SDValue V = Node->getOperand(Num: 0); |
2093 | SDValue SubV = Node->getOperand(Num: 1); |
2094 | SDLoc DL(SubV); |
2095 | auto Idx = Node->getConstantOperandVal(Num: 2); |
2096 | MVT SubVecVT = SubV.getSimpleValueType(); |
2097 | |
2098 | const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); |
2099 | MVT SubVecContainerVT = SubVecVT; |
2100 | // Establish the correct scalable-vector types for any fixed-length type. |
2101 | if (SubVecVT.isFixedLengthVector()) { |
2102 | assert(Idx == 0 && V.isUndef()); |
2103 | SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT: SubVecVT); |
2104 | } |
2105 | MVT ContainerVT = VT; |
2106 | if (VT.isFixedLengthVector()) |
2107 | ContainerVT = TLI.getContainerForFixedLengthVector(VT); |
2108 | |
2109 | const auto *TRI = Subtarget->getRegisterInfo(); |
2110 | unsigned SubRegIdx; |
2111 | std::tie(args&: SubRegIdx, args&: Idx) = |
2112 | RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
2113 | VecVT: ContainerVT, SubVecVT: SubVecContainerVT, InsertExtractIdx: Idx, TRI); |
2114 | |
2115 | // If the Idx hasn't been completely eliminated then this is a subvector |
2116 | // insert which doesn't naturally align to a vector register. These must |
2117 | // be handled using instructions to manipulate the vector registers. |
2118 | if (Idx != 0) |
2119 | break; |
2120 | |
2121 | RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(VT: SubVecContainerVT); |
2122 | [[maybe_unused]] bool IsSubVecPartReg = |
2123 | SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || |
2124 | SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || |
2125 | SubVecLMUL == RISCVII::VLMUL::LMUL_F8; |
2126 | assert((!IsSubVecPartReg || V.isUndef()) && |
2127 | "Expecting lowering to have created legal INSERT_SUBVECTORs when " |
2128 | "the subvector is smaller than a full-sized register" ); |
2129 | |
2130 | // If we haven't set a SubRegIdx, then we must be going between |
2131 | // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. |
2132 | if (SubRegIdx == RISCV::NoSubRegister) { |
2133 | unsigned InRegClassID = |
2134 | RISCVTargetLowering::getRegClassIDForVecVT(VT: ContainerVT); |
2135 | assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == |
2136 | InRegClassID && |
2137 | "Unexpected subvector extraction" ); |
2138 | SDValue RC = CurDAG->getTargetConstant(Val: InRegClassID, DL, VT: XLenVT); |
2139 | SDNode *NewNode = CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, |
2140 | dl: DL, VT, Op1: SubV, Op2: RC); |
2141 | ReplaceNode(F: Node, T: NewNode); |
2142 | return; |
2143 | } |
2144 | |
2145 | SDValue Insert = CurDAG->getTargetInsertSubreg(SRIdx: SubRegIdx, DL, VT, Operand: V, Subreg: SubV); |
2146 | ReplaceNode(F: Node, T: Insert.getNode()); |
2147 | return; |
2148 | } |
2149 | case ISD::EXTRACT_SUBVECTOR: { |
2150 | SDValue V = Node->getOperand(Num: 0); |
2151 | auto Idx = Node->getConstantOperandVal(Num: 1); |
2152 | MVT InVT = V.getSimpleValueType(); |
2153 | SDLoc DL(V); |
2154 | |
2155 | const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); |
2156 | MVT SubVecContainerVT = VT; |
2157 | // Establish the correct scalable-vector types for any fixed-length type. |
2158 | if (VT.isFixedLengthVector()) { |
2159 | assert(Idx == 0); |
2160 | SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); |
2161 | } |
2162 | if (InVT.isFixedLengthVector()) |
2163 | InVT = TLI.getContainerForFixedLengthVector(VT: InVT); |
2164 | |
2165 | const auto *TRI = Subtarget->getRegisterInfo(); |
2166 | unsigned SubRegIdx; |
2167 | std::tie(args&: SubRegIdx, args&: Idx) = |
2168 | RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
2169 | VecVT: InVT, SubVecVT: SubVecContainerVT, InsertExtractIdx: Idx, TRI); |
2170 | |
2171 | // If the Idx hasn't been completely eliminated then this is a subvector |
2172 | // extract which doesn't naturally align to a vector register. These must |
2173 | // be handled using instructions to manipulate the vector registers. |
2174 | if (Idx != 0) |
2175 | break; |
2176 | |
2177 | // If we haven't set a SubRegIdx, then we must be going between |
2178 | // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. |
2179 | if (SubRegIdx == RISCV::NoSubRegister) { |
2180 | unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT: InVT); |
2181 | assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == |
2182 | InRegClassID && |
2183 | "Unexpected subvector extraction" ); |
2184 | SDValue RC = CurDAG->getTargetConstant(Val: InRegClassID, DL, VT: XLenVT); |
2185 | SDNode *NewNode = |
2186 | CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, dl: DL, VT, Op1: V, Op2: RC); |
2187 | ReplaceNode(F: Node, T: NewNode); |
2188 | return; |
2189 | } |
2190 | |
2191 | SDValue = CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: V); |
2192 | ReplaceNode(F: Node, T: Extract.getNode()); |
2193 | return; |
2194 | } |
2195 | case RISCVISD::VMV_S_X_VL: |
2196 | case RISCVISD::VFMV_S_F_VL: |
2197 | case RISCVISD::VMV_V_X_VL: |
2198 | case RISCVISD::VFMV_V_F_VL: { |
2199 | // Try to match splat of a scalar load to a strided load with stride of x0. |
2200 | bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || |
2201 | Node->getOpcode() == RISCVISD::VFMV_S_F_VL; |
2202 | if (!Node->getOperand(Num: 0).isUndef()) |
2203 | break; |
2204 | SDValue Src = Node->getOperand(Num: 1); |
2205 | auto *Ld = dyn_cast<LoadSDNode>(Val&: Src); |
2206 | // Can't fold load update node because the second |
2207 | // output is used so that load update node can't be removed. |
2208 | if (!Ld || Ld->isIndexed()) |
2209 | break; |
2210 | EVT MemVT = Ld->getMemoryVT(); |
2211 | // The memory VT should be the same size as the element type. |
2212 | if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) |
2213 | break; |
2214 | if (!IsProfitableToFold(N: Src, U: Node, Root: Node) || |
2215 | !IsLegalToFold(N: Src, U: Node, Root: Node, OptLevel: TM.getOptLevel())) |
2216 | break; |
2217 | |
2218 | SDValue VL; |
2219 | if (IsScalarMove) { |
2220 | // We could deal with more VL if we update the VSETVLI insert pass to |
2221 | // avoid introducing more VSETVLI. |
2222 | if (!isOneConstant(V: Node->getOperand(Num: 2))) |
2223 | break; |
2224 | selectVLOp(N: Node->getOperand(Num: 2), VL); |
2225 | } else |
2226 | selectVLOp(N: Node->getOperand(Num: 2), VL); |
2227 | |
2228 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
2229 | SDValue SEW = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT); |
2230 | |
2231 | // If VL=1, then we don't need to do a strided load and can just do a |
2232 | // regular load. |
2233 | bool IsStrided = !isOneConstant(V: VL); |
2234 | |
2235 | // Only do a strided load if we have optimized zero-stride vector load. |
2236 | if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad()) |
2237 | break; |
2238 | |
2239 | SmallVector<SDValue> Operands = { |
2240 | SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT), 0), |
2241 | Ld->getBasePtr()}; |
2242 | if (IsStrided) |
2243 | Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT)); |
2244 | uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC; |
2245 | SDValue PolicyOp = CurDAG->getTargetConstant(Val: Policy, DL, VT: XLenVT); |
2246 | Operands.append(IL: {VL, SEW, PolicyOp, Ld->getChain()}); |
2247 | |
2248 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
2249 | const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( |
2250 | /*IsMasked*/ false, IsStrided, /*FF*/ false, |
2251 | Log2SEW, static_cast<unsigned>(LMUL)); |
2252 | MachineSDNode *Load = |
2253 | CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands); |
2254 | // Update the chain. |
2255 | ReplaceUses(F: Src.getValue(R: 1), T: SDValue(Load, 1)); |
2256 | // Record the mem-refs |
2257 | CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {Ld->getMemOperand()}); |
2258 | // Replace the splat with the vlse. |
2259 | ReplaceNode(F: Node, T: Load); |
2260 | return; |
2261 | } |
2262 | case ISD::PREFETCH: |
2263 | unsigned Locality = Node->getConstantOperandVal(Num: 3); |
2264 | if (Locality > 2) |
2265 | break; |
2266 | |
2267 | if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Val: Node)) { |
2268 | MachineMemOperand *MMO = LoadStoreMem->getMemOperand(); |
2269 | MMO->setFlags(MachineMemOperand::MONonTemporal); |
2270 | |
2271 | int NontemporalLevel = 0; |
2272 | switch (Locality) { |
2273 | case 0: |
2274 | NontemporalLevel = 3; // NTL.ALL |
2275 | break; |
2276 | case 1: |
2277 | NontemporalLevel = 1; // NTL.PALL |
2278 | break; |
2279 | case 2: |
2280 | NontemporalLevel = 0; // NTL.P1 |
2281 | break; |
2282 | default: |
2283 | llvm_unreachable("unexpected locality value." ); |
2284 | } |
2285 | |
2286 | if (NontemporalLevel & 0b1) |
2287 | MMO->setFlags(MONontemporalBit0); |
2288 | if (NontemporalLevel & 0b10) |
2289 | MMO->setFlags(MONontemporalBit1); |
2290 | } |
2291 | break; |
2292 | } |
2293 | |
2294 | // Select the default instruction. |
2295 | SelectCode(Node); |
2296 | } |
2297 | |
2298 | bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( |
2299 | const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, |
2300 | std::vector<SDValue> &OutOps) { |
2301 | // Always produce a register and immediate operand, as expected by |
2302 | // RISCVAsmPrinter::PrintAsmMemoryOperand. |
2303 | switch (ConstraintID) { |
2304 | case InlineAsm::ConstraintCode::o: |
2305 | case InlineAsm::ConstraintCode::m: { |
2306 | SDValue Op0, Op1; |
2307 | [[maybe_unused]] bool Found = SelectAddrRegImm(Addr: Op, Base&: Op0, Offset&: Op1); |
2308 | assert(Found && "SelectAddrRegImm should always succeed" ); |
2309 | OutOps.push_back(x: Op0); |
2310 | OutOps.push_back(x: Op1); |
2311 | return false; |
2312 | } |
2313 | case InlineAsm::ConstraintCode::A: |
2314 | OutOps.push_back(x: Op); |
2315 | OutOps.push_back( |
2316 | x: CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget->getXLenVT())); |
2317 | return false; |
2318 | default: |
2319 | report_fatal_error(reason: "Unexpected asm memory constraint " + |
2320 | InlineAsm::getMemConstraintName(C: ConstraintID)); |
2321 | } |
2322 | |
2323 | return true; |
2324 | } |
2325 | |
2326 | bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base, |
2327 | SDValue &Offset) { |
2328 | if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Addr)) { |
2329 | Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT: Subtarget->getXLenVT()); |
2330 | Offset = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Addr), VT: Subtarget->getXLenVT()); |
2331 | return true; |
2332 | } |
2333 | |
2334 | return false; |
2335 | } |
2336 | |
2337 | // Select a frame index and an optional immediate offset from an ADD or OR. |
2338 | bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, |
2339 | SDValue &Offset) { |
2340 | if (SelectAddrFrameIndex(Addr, Base, Offset)) |
2341 | return true; |
2342 | |
2343 | if (!CurDAG->isBaseWithConstantOffset(Op: Addr)) |
2344 | return false; |
2345 | |
2346 | if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val: Addr.getOperand(i: 0))) { |
2347 | int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue(); |
2348 | if (isInt<12>(x: CVal)) { |
2349 | Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), |
2350 | VT: Subtarget->getXLenVT()); |
2351 | Offset = CurDAG->getTargetConstant(Val: CVal, DL: SDLoc(Addr), |
2352 | VT: Subtarget->getXLenVT()); |
2353 | return true; |
2354 | } |
2355 | } |
2356 | |
2357 | return false; |
2358 | } |
2359 | |
2360 | // Fold constant addresses. |
2361 | static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, |
2362 | const MVT VT, const RISCVSubtarget *Subtarget, |
2363 | SDValue Addr, SDValue &Base, SDValue &Offset, |
2364 | bool IsPrefetch = false) { |
2365 | if (!isa<ConstantSDNode>(Val: Addr)) |
2366 | return false; |
2367 | |
2368 | int64_t CVal = cast<ConstantSDNode>(Val&: Addr)->getSExtValue(); |
2369 | |
2370 | // If the constant is a simm12, we can fold the whole constant and use X0 as |
2371 | // the base. If the constant can be materialized with LUI+simm12, use LUI as |
2372 | // the base. We can't use generateInstSeq because it favors LUI+ADDIW. |
2373 | int64_t Lo12 = SignExtend64<12>(x: CVal); |
2374 | int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12; |
2375 | if (!Subtarget->is64Bit() || isInt<32>(x: Hi)) { |
2376 | if (IsPrefetch && (Lo12 & 0b11111) != 0) |
2377 | return false; |
2378 | |
2379 | if (Hi) { |
2380 | int64_t Hi20 = (Hi >> 12) & 0xfffff; |
2381 | Base = SDValue( |
2382 | CurDAG->getMachineNode(RISCV::LUI, DL, VT, |
2383 | CurDAG->getTargetConstant(Hi20, DL, VT)), |
2384 | 0); |
2385 | } else { |
2386 | Base = CurDAG->getRegister(RISCV::X0, VT); |
2387 | } |
2388 | Offset = CurDAG->getTargetConstant(Val: Lo12, DL, VT); |
2389 | return true; |
2390 | } |
2391 | |
2392 | // Ask how constant materialization would handle this constant. |
2393 | RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget); |
2394 | |
2395 | // If the last instruction would be an ADDI, we can fold its immediate and |
2396 | // emit the rest of the sequence as the base. |
2397 | if (Seq.back().getOpcode() != RISCV::ADDI) |
2398 | return false; |
2399 | Lo12 = Seq.back().getImm(); |
2400 | if (IsPrefetch && (Lo12 & 0b11111) != 0) |
2401 | return false; |
2402 | |
2403 | // Drop the last instruction. |
2404 | Seq.pop_back(); |
2405 | assert(!Seq.empty() && "Expected more instructions in sequence" ); |
2406 | |
2407 | Base = selectImmSeq(CurDAG, DL, VT, Seq); |
2408 | Offset = CurDAG->getTargetConstant(Val: Lo12, DL, VT); |
2409 | return true; |
2410 | } |
2411 | |
2412 | // Is this ADD instruction only used as the base pointer of scalar loads and |
2413 | // stores? |
2414 | static bool isWorthFoldingAdd(SDValue Add) { |
2415 | for (auto *Use : Add->uses()) { |
2416 | if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && |
2417 | Use->getOpcode() != ISD::ATOMIC_LOAD && |
2418 | Use->getOpcode() != ISD::ATOMIC_STORE) |
2419 | return false; |
2420 | EVT VT = cast<MemSDNode>(Val: Use)->getMemoryVT(); |
2421 | if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 && |
2422 | VT != MVT::f64) |
2423 | return false; |
2424 | // Don't allow stores of the value. It must be used as the address. |
2425 | if (Use->getOpcode() == ISD::STORE && |
2426 | cast<StoreSDNode>(Val: Use)->getValue() == Add) |
2427 | return false; |
2428 | if (Use->getOpcode() == ISD::ATOMIC_STORE && |
2429 | cast<AtomicSDNode>(Val: Use)->getVal() == Add) |
2430 | return false; |
2431 | } |
2432 | |
2433 | return true; |
2434 | } |
2435 | |
2436 | bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr, |
2437 | unsigned MaxShiftAmount, |
2438 | SDValue &Base, SDValue &Index, |
2439 | SDValue &Scale) { |
2440 | EVT VT = Addr.getSimpleValueType(); |
2441 | auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index, |
2442 | SDValue &Shift) { |
2443 | uint64_t ShiftAmt = 0; |
2444 | Index = N; |
2445 | |
2446 | if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N.getOperand(i: 1))) { |
2447 | // Only match shifts by a value in range [0, MaxShiftAmount]. |
2448 | if (N.getConstantOperandVal(i: 1) <= MaxShiftAmount) { |
2449 | Index = N.getOperand(i: 0); |
2450 | ShiftAmt = N.getConstantOperandVal(i: 1); |
2451 | } |
2452 | } |
2453 | |
2454 | Shift = CurDAG->getTargetConstant(Val: ShiftAmt, DL: SDLoc(N), VT); |
2455 | return ShiftAmt != 0; |
2456 | }; |
2457 | |
2458 | if (Addr.getOpcode() == ISD::ADD) { |
2459 | if (auto *C1 = dyn_cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))) { |
2460 | SDValue AddrB = Addr.getOperand(i: 0); |
2461 | if (AddrB.getOpcode() == ISD::ADD && |
2462 | UnwrapShl(AddrB.getOperand(i: 0), Index, Scale) && |
2463 | !isa<ConstantSDNode>(Val: AddrB.getOperand(i: 1)) && |
2464 | isInt<12>(x: C1->getSExtValue())) { |
2465 | // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) |
2466 | SDValue C1Val = |
2467 | CurDAG->getTargetConstant(Val: C1->getZExtValue(), DL: SDLoc(Addr), VT); |
2468 | Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT, |
2469 | AddrB.getOperand(1), C1Val), |
2470 | 0); |
2471 | return true; |
2472 | } |
2473 | } else if (UnwrapShl(Addr.getOperand(i: 0), Index, Scale)) { |
2474 | Base = Addr.getOperand(i: 1); |
2475 | return true; |
2476 | } else { |
2477 | UnwrapShl(Addr.getOperand(i: 1), Index, Scale); |
2478 | Base = Addr.getOperand(i: 0); |
2479 | return true; |
2480 | } |
2481 | } else if (UnwrapShl(Addr, Index, Scale)) { |
2482 | EVT VT = Addr.getValueType(); |
2483 | Base = CurDAG->getRegister(RISCV::X0, VT); |
2484 | return true; |
2485 | } |
2486 | |
2487 | return false; |
2488 | } |
2489 | |
2490 | bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, |
2491 | SDValue &Offset, bool IsINX) { |
2492 | if (SelectAddrFrameIndex(Addr, Base, Offset)) |
2493 | return true; |
2494 | |
2495 | SDLoc DL(Addr); |
2496 | MVT VT = Addr.getSimpleValueType(); |
2497 | |
2498 | if (Addr.getOpcode() == RISCVISD::ADD_LO) { |
2499 | Base = Addr.getOperand(i: 0); |
2500 | Offset = Addr.getOperand(i: 1); |
2501 | return true; |
2502 | } |
2503 | |
2504 | int64_t RV32ZdinxRange = IsINX ? 4 : 0; |
2505 | if (CurDAG->isBaseWithConstantOffset(Op: Addr)) { |
2506 | int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue(); |
2507 | if (isInt<12>(x: CVal) && isInt<12>(x: CVal + RV32ZdinxRange)) { |
2508 | Base = Addr.getOperand(i: 0); |
2509 | if (Base.getOpcode() == RISCVISD::ADD_LO) { |
2510 | SDValue LoOperand = Base.getOperand(i: 1); |
2511 | if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: LoOperand)) { |
2512 | // If the Lo in (ADD_LO hi, lo) is a global variable's address |
2513 | // (its low part, really), then we can rely on the alignment of that |
2514 | // variable to provide a margin of safety before low part can overflow |
2515 | // the 12 bits of the load/store offset. Check if CVal falls within |
2516 | // that margin; if so (low part + CVal) can't overflow. |
2517 | const DataLayout &DL = CurDAG->getDataLayout(); |
2518 | Align Alignment = commonAlignment( |
2519 | A: GA->getGlobal()->getPointerAlignment(DL), Offset: GA->getOffset()); |
2520 | if (CVal == 0 || Alignment > CVal) { |
2521 | int64_t CombinedOffset = CVal + GA->getOffset(); |
2522 | Base = Base.getOperand(i: 0); |
2523 | Offset = CurDAG->getTargetGlobalAddress( |
2524 | GV: GA->getGlobal(), DL: SDLoc(LoOperand), VT: LoOperand.getValueType(), |
2525 | offset: CombinedOffset, TargetFlags: GA->getTargetFlags()); |
2526 | return true; |
2527 | } |
2528 | } |
2529 | } |
2530 | |
2531 | if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base)) |
2532 | Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT); |
2533 | Offset = CurDAG->getTargetConstant(Val: CVal, DL, VT); |
2534 | return true; |
2535 | } |
2536 | } |
2537 | |
2538 | // Handle ADD with large immediates. |
2539 | if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Val: Addr.getOperand(i: 1))) { |
2540 | int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue(); |
2541 | assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) && |
2542 | "simm12 not already handled?" ); |
2543 | |
2544 | // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use |
2545 | // an ADDI for part of the offset and fold the rest into the load/store. |
2546 | // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. |
2547 | if (isInt<12>(x: CVal / 2) && isInt<12>(x: CVal - CVal / 2)) { |
2548 | int64_t Adj = CVal < 0 ? -2048 : 2047; |
2549 | Base = SDValue( |
2550 | CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), |
2551 | CurDAG->getTargetConstant(Adj, DL, VT)), |
2552 | 0); |
2553 | Offset = CurDAG->getTargetConstant(Val: CVal - Adj, DL, VT); |
2554 | return true; |
2555 | } |
2556 | |
2557 | // For larger immediates, we might be able to save one instruction from |
2558 | // constant materialization by folding the Lo12 bits of the immediate into |
2559 | // the address. We should only do this if the ADD is only used by loads and |
2560 | // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled |
2561 | // separately with the full materialized immediate creating extra |
2562 | // instructions. |
2563 | if (isWorthFoldingAdd(Add: Addr) && |
2564 | selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr: Addr.getOperand(i: 1), Base, |
2565 | Offset)) { |
2566 | // Insert an ADD instruction with the materialized Hi52 bits. |
2567 | Base = SDValue( |
2568 | CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), |
2569 | 0); |
2570 | return true; |
2571 | } |
2572 | } |
2573 | |
2574 | if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset)) |
2575 | return true; |
2576 | |
2577 | Base = Addr; |
2578 | Offset = CurDAG->getTargetConstant(Val: 0, DL, VT); |
2579 | return true; |
2580 | } |
2581 | |
2582 | /// Similar to SelectAddrRegImm, except that the least significant 5 bits of |
2583 | /// Offset shoule be all zeros. |
2584 | bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, |
2585 | SDValue &Offset) { |
2586 | if (SelectAddrFrameIndex(Addr, Base, Offset)) |
2587 | return true; |
2588 | |
2589 | SDLoc DL(Addr); |
2590 | MVT VT = Addr.getSimpleValueType(); |
2591 | |
2592 | if (CurDAG->isBaseWithConstantOffset(Op: Addr)) { |
2593 | int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue(); |
2594 | if (isInt<12>(x: CVal)) { |
2595 | Base = Addr.getOperand(i: 0); |
2596 | |
2597 | // Early-out if not a valid offset. |
2598 | if ((CVal & 0b11111) != 0) { |
2599 | Base = Addr; |
2600 | Offset = CurDAG->getTargetConstant(Val: 0, DL, VT); |
2601 | return true; |
2602 | } |
2603 | |
2604 | if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base)) |
2605 | Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT); |
2606 | Offset = CurDAG->getTargetConstant(Val: CVal, DL, VT); |
2607 | return true; |
2608 | } |
2609 | } |
2610 | |
2611 | // Handle ADD with large immediates. |
2612 | if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Val: Addr.getOperand(i: 1))) { |
2613 | int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue(); |
2614 | assert(!(isInt<12>(CVal) && isInt<12>(CVal)) && |
2615 | "simm12 not already handled?" ); |
2616 | |
2617 | // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save |
2618 | // one instruction by folding adjustment (-2048 or 2016) into the address. |
2619 | if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) { |
2620 | int64_t Adj = CVal < 0 ? -2048 : 2016; |
2621 | int64_t AdjustedOffset = CVal - Adj; |
2622 | Base = SDValue(CurDAG->getMachineNode( |
2623 | RISCV::ADDI, DL, VT, Addr.getOperand(0), |
2624 | CurDAG->getTargetConstant(AdjustedOffset, DL, VT)), |
2625 | 0); |
2626 | Offset = CurDAG->getTargetConstant(Val: Adj, DL, VT); |
2627 | return true; |
2628 | } |
2629 | |
2630 | if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr: Addr.getOperand(i: 1), Base, |
2631 | Offset, IsPrefetch: true)) { |
2632 | // Insert an ADD instruction with the materialized Hi52 bits. |
2633 | Base = SDValue( |
2634 | CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), |
2635 | 0); |
2636 | return true; |
2637 | } |
2638 | } |
2639 | |
2640 | if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, IsPrefetch: true)) |
2641 | return true; |
2642 | |
2643 | Base = Addr; |
2644 | Offset = CurDAG->getTargetConstant(Val: 0, DL, VT); |
2645 | return true; |
2646 | } |
2647 | |
2648 | bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, |
2649 | SDValue &ShAmt) { |
2650 | ShAmt = N; |
2651 | |
2652 | // Peek through zext. |
2653 | if (ShAmt->getOpcode() == ISD::ZERO_EXTEND) |
2654 | ShAmt = ShAmt.getOperand(i: 0); |
2655 | |
2656 | // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift |
2657 | // amount. If there is an AND on the shift amount, we can bypass it if it |
2658 | // doesn't affect any of those bits. |
2659 | if (ShAmt.getOpcode() == ISD::AND && |
2660 | isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 1))) { |
2661 | const APInt &AndMask = ShAmt.getConstantOperandAPInt(i: 1); |
2662 | |
2663 | // Since the max shift amount is a power of 2 we can subtract 1 to make a |
2664 | // mask that covers the bits needed to represent all shift amounts. |
2665 | assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!" ); |
2666 | APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); |
2667 | |
2668 | if (ShMask.isSubsetOf(RHS: AndMask)) { |
2669 | ShAmt = ShAmt.getOperand(i: 0); |
2670 | } else { |
2671 | // SimplifyDemandedBits may have optimized the mask so try restoring any |
2672 | // bits that are known zero. |
2673 | KnownBits Known = CurDAG->computeKnownBits(Op: ShAmt.getOperand(i: 0)); |
2674 | if (!ShMask.isSubsetOf(RHS: AndMask | Known.Zero)) |
2675 | return true; |
2676 | ShAmt = ShAmt.getOperand(i: 0); |
2677 | } |
2678 | } |
2679 | |
2680 | if (ShAmt.getOpcode() == ISD::ADD && |
2681 | isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 1))) { |
2682 | uint64_t Imm = ShAmt.getConstantOperandVal(i: 1); |
2683 | // If we are shifting by X+N where N == 0 mod Size, then just shift by X |
2684 | // to avoid the ADD. |
2685 | if (Imm != 0 && Imm % ShiftWidth == 0) { |
2686 | ShAmt = ShAmt.getOperand(i: 0); |
2687 | return true; |
2688 | } |
2689 | } else if (ShAmt.getOpcode() == ISD::SUB && |
2690 | isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 0))) { |
2691 | uint64_t Imm = ShAmt.getConstantOperandVal(i: 0); |
2692 | // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to |
2693 | // generate a NEG instead of a SUB of a constant. |
2694 | if (Imm != 0 && Imm % ShiftWidth == 0) { |
2695 | SDLoc DL(ShAmt); |
2696 | EVT VT = ShAmt.getValueType(); |
2697 | SDValue Zero = CurDAG->getRegister(RISCV::X0, VT); |
2698 | unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; |
2699 | MachineSDNode *Neg = CurDAG->getMachineNode(Opcode: NegOpc, dl: DL, VT, Op1: Zero, |
2700 | Op2: ShAmt.getOperand(i: 1)); |
2701 | ShAmt = SDValue(Neg, 0); |
2702 | return true; |
2703 | } |
2704 | // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X |
2705 | // to generate a NOT instead of a SUB of a constant. |
2706 | if (Imm % ShiftWidth == ShiftWidth - 1) { |
2707 | SDLoc DL(ShAmt); |
2708 | EVT VT = ShAmt.getValueType(); |
2709 | MachineSDNode *Not = |
2710 | CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1), |
2711 | CurDAG->getTargetConstant(-1, DL, VT)); |
2712 | ShAmt = SDValue(Not, 0); |
2713 | return true; |
2714 | } |
2715 | } |
2716 | |
2717 | return true; |
2718 | } |
2719 | |
2720 | /// RISC-V doesn't have general instructions for integer setne/seteq, but we can |
2721 | /// check for equality with 0. This function emits instructions that convert the |
2722 | /// seteq/setne into something that can be compared with 0. |
2723 | /// \p ExpectedCCVal indicates the condition code to attempt to match (e.g. |
2724 | /// ISD::SETNE). |
2725 | bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, |
2726 | SDValue &Val) { |
2727 | assert(ISD::isIntEqualitySetCC(ExpectedCCVal) && |
2728 | "Unexpected condition code!" ); |
2729 | |
2730 | // We're looking for a setcc. |
2731 | if (N->getOpcode() != ISD::SETCC) |
2732 | return false; |
2733 | |
2734 | // Must be an equality comparison. |
2735 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get(); |
2736 | if (CCVal != ExpectedCCVal) |
2737 | return false; |
2738 | |
2739 | SDValue LHS = N->getOperand(Num: 0); |
2740 | SDValue RHS = N->getOperand(Num: 1); |
2741 | |
2742 | if (!LHS.getValueType().isScalarInteger()) |
2743 | return false; |
2744 | |
2745 | // If the RHS side is 0, we don't need any extra instructions, return the LHS. |
2746 | if (isNullConstant(V: RHS)) { |
2747 | Val = LHS; |
2748 | return true; |
2749 | } |
2750 | |
2751 | SDLoc DL(N); |
2752 | |
2753 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: RHS)) { |
2754 | int64_t CVal = C->getSExtValue(); |
2755 | // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and |
2756 | // non-zero otherwise. |
2757 | if (CVal == -2048) { |
2758 | Val = |
2759 | SDValue(CurDAG->getMachineNode( |
2760 | RISCV::XORI, DL, N->getValueType(0), LHS, |
2761 | CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))), |
2762 | 0); |
2763 | return true; |
2764 | } |
2765 | // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the |
2766 | // LHS is equal to the RHS and non-zero otherwise. |
2767 | if (isInt<12>(x: CVal) || CVal == 2048) { |
2768 | Val = |
2769 | SDValue(CurDAG->getMachineNode( |
2770 | RISCV::ADDI, DL, N->getValueType(0), LHS, |
2771 | CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))), |
2772 | 0); |
2773 | return true; |
2774 | } |
2775 | } |
2776 | |
2777 | // If nothing else we can XOR the LHS and RHS to produce zero if they are |
2778 | // equal and a non-zero value if they aren't. |
2779 | Val = SDValue( |
2780 | CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0); |
2781 | return true; |
2782 | } |
2783 | |
2784 | bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) { |
2785 | if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && |
2786 | cast<VTSDNode>(Val: N.getOperand(i: 1))->getVT().getSizeInBits() == Bits) { |
2787 | Val = N.getOperand(i: 0); |
2788 | return true; |
2789 | } |
2790 | |
2791 | auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) { |
2792 | if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(Val: N.getOperand(i: 1))) |
2793 | return N; |
2794 | |
2795 | SDValue N0 = N.getOperand(i: 0); |
2796 | if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) && |
2797 | N.getConstantOperandVal(i: 1) == ShiftAmt && |
2798 | N0.getConstantOperandVal(i: 1) == ShiftAmt) |
2799 | return N0.getOperand(i: 0); |
2800 | |
2801 | return N; |
2802 | }; |
2803 | |
2804 | MVT VT = N.getSimpleValueType(); |
2805 | if (CurDAG->ComputeNumSignBits(Op: N) > (VT.getSizeInBits() - Bits)) { |
2806 | Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits); |
2807 | return true; |
2808 | } |
2809 | |
2810 | return false; |
2811 | } |
2812 | |
2813 | bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) { |
2814 | if (N.getOpcode() == ISD::AND) { |
2815 | auto *C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1)); |
2816 | if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(N: Bits)) { |
2817 | Val = N.getOperand(i: 0); |
2818 | return true; |
2819 | } |
2820 | } |
2821 | MVT VT = N.getSimpleValueType(); |
2822 | APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: Bits); |
2823 | if (CurDAG->MaskedValueIsZero(Op: N, Mask)) { |
2824 | Val = N; |
2825 | return true; |
2826 | } |
2827 | |
2828 | return false; |
2829 | } |
2830 | |
2831 | /// Look for various patterns that can be done with a SHL that can be folded |
2832 | /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which |
2833 | /// SHXADD we are trying to match. |
2834 | bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, |
2835 | SDValue &Val) { |
2836 | if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: N.getOperand(i: 1))) { |
2837 | SDValue N0 = N.getOperand(i: 0); |
2838 | |
2839 | bool LeftShift = N0.getOpcode() == ISD::SHL; |
2840 | if ((LeftShift || N0.getOpcode() == ISD::SRL) && |
2841 | isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) { |
2842 | uint64_t Mask = N.getConstantOperandVal(i: 1); |
2843 | unsigned C2 = N0.getConstantOperandVal(i: 1); |
2844 | |
2845 | unsigned XLen = Subtarget->getXLen(); |
2846 | if (LeftShift) |
2847 | Mask &= maskTrailingZeros<uint64_t>(N: C2); |
2848 | else |
2849 | Mask &= maskTrailingOnes<uint64_t>(N: XLen - C2); |
2850 | |
2851 | // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no |
2852 | // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3 |
2853 | // followed by a SHXADD with c3 for the X amount. |
2854 | if (isShiftedMask_64(Value: Mask)) { |
2855 | unsigned Leading = XLen - llvm::bit_width(Value: Mask); |
2856 | unsigned Trailing = llvm::countr_zero(Val: Mask); |
2857 | if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) { |
2858 | SDLoc DL(N); |
2859 | EVT VT = N.getValueType(); |
2860 | Val = SDValue(CurDAG->getMachineNode( |
2861 | RISCV::SRLI, DL, VT, N0.getOperand(0), |
2862 | CurDAG->getTargetConstant(Trailing - C2, DL, VT)), |
2863 | 0); |
2864 | return true; |
2865 | } |
2866 | // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2 |
2867 | // leading zeros and c3 trailing zeros. We can use an SRLI by C3 |
2868 | // followed by a SHXADD using c3 for the X amount. |
2869 | if (!LeftShift && Leading == C2 && Trailing == ShAmt) { |
2870 | SDLoc DL(N); |
2871 | EVT VT = N.getValueType(); |
2872 | Val = SDValue( |
2873 | CurDAG->getMachineNode( |
2874 | RISCV::SRLI, DL, VT, N0.getOperand(0), |
2875 | CurDAG->getTargetConstant(Leading + Trailing, DL, VT)), |
2876 | 0); |
2877 | return true; |
2878 | } |
2879 | } |
2880 | } |
2881 | } |
2882 | |
2883 | bool LeftShift = N.getOpcode() == ISD::SHL; |
2884 | if ((LeftShift || N.getOpcode() == ISD::SRL) && |
2885 | isa<ConstantSDNode>(Val: N.getOperand(i: 1))) { |
2886 | SDValue N0 = N.getOperand(i: 0); |
2887 | if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && |
2888 | isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) { |
2889 | uint64_t Mask = N0.getConstantOperandVal(i: 1); |
2890 | if (isShiftedMask_64(Value: Mask)) { |
2891 | unsigned C1 = N.getConstantOperandVal(i: 1); |
2892 | unsigned XLen = Subtarget->getXLen(); |
2893 | unsigned Leading = XLen - llvm::bit_width(Value: Mask); |
2894 | unsigned Trailing = llvm::countr_zero(Val: Mask); |
2895 | // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and |
2896 | // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD. |
2897 | if (LeftShift && Leading == 32 && Trailing > 0 && |
2898 | (Trailing + C1) == ShAmt) { |
2899 | SDLoc DL(N); |
2900 | EVT VT = N.getValueType(); |
2901 | Val = SDValue(CurDAG->getMachineNode( |
2902 | RISCV::SRLIW, DL, VT, N0.getOperand(0), |
2903 | CurDAG->getTargetConstant(Trailing, DL, VT)), |
2904 | 0); |
2905 | return true; |
2906 | } |
2907 | // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and |
2908 | // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD. |
2909 | if (!LeftShift && Leading == 32 && Trailing > C1 && |
2910 | (Trailing - C1) == ShAmt) { |
2911 | SDLoc DL(N); |
2912 | EVT VT = N.getValueType(); |
2913 | Val = SDValue(CurDAG->getMachineNode( |
2914 | RISCV::SRLIW, DL, VT, N0.getOperand(0), |
2915 | CurDAG->getTargetConstant(Trailing, DL, VT)), |
2916 | 0); |
2917 | return true; |
2918 | } |
2919 | } |
2920 | } |
2921 | } |
2922 | |
2923 | return false; |
2924 | } |
2925 | |
2926 | /// Look for various patterns that can be done with a SHL that can be folded |
2927 | /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which |
2928 | /// SHXADD_UW we are trying to match. |
2929 | bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt, |
2930 | SDValue &Val) { |
2931 | if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: N.getOperand(i: 1)) && |
2932 | N.hasOneUse()) { |
2933 | SDValue N0 = N.getOperand(i: 0); |
2934 | if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) && |
2935 | N0.hasOneUse()) { |
2936 | uint64_t Mask = N.getConstantOperandVal(i: 1); |
2937 | unsigned C2 = N0.getConstantOperandVal(i: 1); |
2938 | |
2939 | Mask &= maskTrailingZeros<uint64_t>(N: C2); |
2940 | |
2941 | // Look for (and (shl y, c2), c1) where c1 is a shifted mask with |
2942 | // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by |
2943 | // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount. |
2944 | if (isShiftedMask_64(Value: Mask)) { |
2945 | unsigned Leading = llvm::countl_zero(Val: Mask); |
2946 | unsigned Trailing = llvm::countr_zero(Val: Mask); |
2947 | if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) { |
2948 | SDLoc DL(N); |
2949 | EVT VT = N.getValueType(); |
2950 | Val = SDValue(CurDAG->getMachineNode( |
2951 | RISCV::SLLI, DL, VT, N0.getOperand(0), |
2952 | CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)), |
2953 | 0); |
2954 | return true; |
2955 | } |
2956 | } |
2957 | } |
2958 | } |
2959 | |
2960 | return false; |
2961 | } |
2962 | |
2963 | static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, |
2964 | unsigned Bits, |
2965 | const TargetInstrInfo *TII) { |
2966 | unsigned MCOpcode = RISCV::getRVVMCOpcode(RVVPseudoOpcode: User->getMachineOpcode()); |
2967 | |
2968 | if (!MCOpcode) |
2969 | return false; |
2970 | |
2971 | const MCInstrDesc &MCID = TII->get(Opcode: User->getMachineOpcode()); |
2972 | const uint64_t TSFlags = MCID.TSFlags; |
2973 | if (!RISCVII::hasSEWOp(TSFlags)) |
2974 | return false; |
2975 | assert(RISCVII::hasVLOp(TSFlags)); |
2976 | |
2977 | bool HasGlueOp = User->getGluedNode() != nullptr; |
2978 | unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1; |
2979 | bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other; |
2980 | bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags); |
2981 | unsigned VLIdx = |
2982 | User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; |
2983 | const unsigned Log2SEW = User->getConstantOperandVal(Num: VLIdx + 1); |
2984 | |
2985 | if (UserOpNo == VLIdx) |
2986 | return false; |
2987 | |
2988 | auto NumDemandedBits = |
2989 | RISCV::getVectorLowDemandedScalarBits(Opcode: MCOpcode, Log2SEW); |
2990 | return NumDemandedBits && Bits >= *NumDemandedBits; |
2991 | } |
2992 | |
2993 | // Return true if all users of this SDNode* only consume the lower \p Bits. |
2994 | // This can be used to form W instructions for add/sub/mul/shl even when the |
2995 | // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if |
2996 | // SimplifyDemandedBits has made it so some users see a sext_inreg and some |
2997 | // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave |
2998 | // the add/sub/mul/shl to become non-W instructions. By checking the users we |
2999 | // may be able to use a W instruction and CSE with the other instruction if |
3000 | // this has happened. We could try to detect that the CSE opportunity exists |
3001 | // before doing this, but that would be more complicated. |
3002 | bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits, |
3003 | const unsigned Depth) const { |
3004 | assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || |
3005 | Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || |
3006 | Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND || |
3007 | Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR || |
3008 | Node->getOpcode() == ISD::SIGN_EXTEND_INREG || |
3009 | isa<ConstantSDNode>(Node) || Depth != 0) && |
3010 | "Unexpected opcode" ); |
3011 | |
3012 | if (Depth >= SelectionDAG::MaxRecursionDepth) |
3013 | return false; |
3014 | |
3015 | // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked |
3016 | // the VT. Ensure the type is scalar to avoid wasting time on vectors. |
3017 | if (Depth == 0 && !Node->getValueType(ResNo: 0).isScalarInteger()) |
3018 | return false; |
3019 | |
3020 | for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { |
3021 | SDNode *User = *UI; |
3022 | // Users of this node should have already been instruction selected |
3023 | if (!User->isMachineOpcode()) |
3024 | return false; |
3025 | |
3026 | // TODO: Add more opcodes? |
3027 | switch (User->getMachineOpcode()) { |
3028 | default: |
3029 | if (vectorPseudoHasAllNBitUsers(User, UserOpNo: UI.getOperandNo(), Bits, TII)) |
3030 | break; |
3031 | return false; |
3032 | case RISCV::ADDW: |
3033 | case RISCV::ADDIW: |
3034 | case RISCV::SUBW: |
3035 | case RISCV::MULW: |
3036 | case RISCV::SLLW: |
3037 | case RISCV::SLLIW: |
3038 | case RISCV::SRAW: |
3039 | case RISCV::SRAIW: |
3040 | case RISCV::SRLW: |
3041 | case RISCV::SRLIW: |
3042 | case RISCV::DIVW: |
3043 | case RISCV::DIVUW: |
3044 | case RISCV::REMW: |
3045 | case RISCV::REMUW: |
3046 | case RISCV::ROLW: |
3047 | case RISCV::RORW: |
3048 | case RISCV::RORIW: |
3049 | case RISCV::CLZW: |
3050 | case RISCV::CTZW: |
3051 | case RISCV::CPOPW: |
3052 | case RISCV::SLLI_UW: |
3053 | case RISCV::FMV_W_X: |
3054 | case RISCV::FCVT_H_W: |
3055 | case RISCV::FCVT_H_WU: |
3056 | case RISCV::FCVT_S_W: |
3057 | case RISCV::FCVT_S_WU: |
3058 | case RISCV::FCVT_D_W: |
3059 | case RISCV::FCVT_D_WU: |
3060 | case RISCV::TH_REVW: |
3061 | case RISCV::TH_SRRIW: |
3062 | if (Bits < 32) |
3063 | return false; |
3064 | break; |
3065 | case RISCV::SLL: |
3066 | case RISCV::SRA: |
3067 | case RISCV::SRL: |
3068 | case RISCV::ROL: |
3069 | case RISCV::ROR: |
3070 | case RISCV::BSET: |
3071 | case RISCV::BCLR: |
3072 | case RISCV::BINV: |
3073 | // Shift amount operands only use log2(Xlen) bits. |
3074 | if (UI.getOperandNo() != 1 || Bits < Log2_32(Value: Subtarget->getXLen())) |
3075 | return false; |
3076 | break; |
3077 | case RISCV::SLLI: |
3078 | // SLLI only uses the lower (XLen - ShAmt) bits. |
3079 | if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(Num: 1)) |
3080 | return false; |
3081 | break; |
3082 | case RISCV::ANDI: |
3083 | if (Bits >= (unsigned)llvm::bit_width(Value: User->getConstantOperandVal(Num: 1))) |
3084 | break; |
3085 | goto RecCheck; |
3086 | case RISCV::ORI: { |
3087 | uint64_t Imm = cast<ConstantSDNode>(Val: User->getOperand(Num: 1))->getSExtValue(); |
3088 | if (Bits >= (unsigned)llvm::bit_width<uint64_t>(Value: ~Imm)) |
3089 | break; |
3090 | [[fallthrough]]; |
3091 | } |
3092 | case RISCV::AND: |
3093 | case RISCV::OR: |
3094 | case RISCV::XOR: |
3095 | case RISCV::XORI: |
3096 | case RISCV::ANDN: |
3097 | case RISCV::ORN: |
3098 | case RISCV::XNOR: |
3099 | case RISCV::SH1ADD: |
3100 | case RISCV::SH2ADD: |
3101 | case RISCV::SH3ADD: |
3102 | RecCheck: |
3103 | if (hasAllNBitUsers(Node: User, Bits, Depth: Depth + 1)) |
3104 | break; |
3105 | return false; |
3106 | case RISCV::SRLI: { |
3107 | unsigned ShAmt = User->getConstantOperandVal(Num: 1); |
3108 | // If we are shifting right by less than Bits, and users don't demand any |
3109 | // bits that were shifted into [Bits-1:0], then we can consider this as an |
3110 | // N-Bit user. |
3111 | if (Bits > ShAmt && hasAllNBitUsers(Node: User, Bits: Bits - ShAmt, Depth: Depth + 1)) |
3112 | break; |
3113 | return false; |
3114 | } |
3115 | case RISCV::SEXT_B: |
3116 | case RISCV::PACKH: |
3117 | if (Bits < 8) |
3118 | return false; |
3119 | break; |
3120 | case RISCV::SEXT_H: |
3121 | case RISCV::FMV_H_X: |
3122 | case RISCV::ZEXT_H_RV32: |
3123 | case RISCV::ZEXT_H_RV64: |
3124 | case RISCV::PACKW: |
3125 | if (Bits < 16) |
3126 | return false; |
3127 | break; |
3128 | case RISCV::PACK: |
3129 | if (Bits < (Subtarget->getXLen() / 2)) |
3130 | return false; |
3131 | break; |
3132 | case RISCV::ADD_UW: |
3133 | case RISCV::SH1ADD_UW: |
3134 | case RISCV::SH2ADD_UW: |
3135 | case RISCV::SH3ADD_UW: |
3136 | // The first operand to add.uw/shXadd.uw is implicitly zero extended from |
3137 | // 32 bits. |
3138 | if (UI.getOperandNo() != 0 || Bits < 32) |
3139 | return false; |
3140 | break; |
3141 | case RISCV::SB: |
3142 | if (UI.getOperandNo() != 0 || Bits < 8) |
3143 | return false; |
3144 | break; |
3145 | case RISCV::SH: |
3146 | if (UI.getOperandNo() != 0 || Bits < 16) |
3147 | return false; |
3148 | break; |
3149 | case RISCV::SW: |
3150 | if (UI.getOperandNo() != 0 || Bits < 32) |
3151 | return false; |
3152 | break; |
3153 | } |
3154 | } |
3155 | |
3156 | return true; |
3157 | } |
3158 | |
3159 | // Select a constant that can be represented as (sign_extend(imm5) << imm2). |
3160 | bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5, |
3161 | SDValue &Shl2) { |
3162 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: N)) { |
3163 | int64_t Offset = C->getSExtValue(); |
3164 | int64_t Shift; |
3165 | for (Shift = 0; Shift < 4; Shift++) |
3166 | if (isInt<5>(x: Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) |
3167 | break; |
3168 | |
3169 | // Constant cannot be encoded. |
3170 | if (Shift == 4) |
3171 | return false; |
3172 | |
3173 | EVT Ty = N->getValueType(ResNo: 0); |
3174 | Simm5 = CurDAG->getTargetConstant(Val: Offset >> Shift, DL: SDLoc(N), VT: Ty); |
3175 | Shl2 = CurDAG->getTargetConstant(Val: Shift, DL: SDLoc(N), VT: Ty); |
3176 | return true; |
3177 | } |
3178 | |
3179 | return false; |
3180 | } |
3181 | |
3182 | // Select VL as a 5 bit immediate or a value that will become a register. This |
3183 | // allows us to choose betwen VSETIVLI or VSETVLI later. |
3184 | bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { |
3185 | auto *C = dyn_cast<ConstantSDNode>(Val&: N); |
3186 | if (C && isUInt<5>(x: C->getZExtValue())) { |
3187 | VL = CurDAG->getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(N), |
3188 | VT: N->getValueType(ResNo: 0)); |
3189 | } else if (C && C->isAllOnes()) { |
3190 | // Treat all ones as VLMax. |
3191 | VL = CurDAG->getTargetConstant(Val: RISCV::VLMaxSentinel, DL: SDLoc(N), |
3192 | VT: N->getValueType(ResNo: 0)); |
3193 | } else if (isa<RegisterSDNode>(N) && |
3194 | cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { |
3195 | // All our VL operands use an operand that allows GPRNoX0 or an immediate |
3196 | // as the register class. Convert X0 to a special immediate to pass the |
3197 | // MachineVerifier. This is recognized specially by the vsetvli insertion |
3198 | // pass. |
3199 | VL = CurDAG->getTargetConstant(Val: RISCV::VLMaxSentinel, DL: SDLoc(N), |
3200 | VT: N->getValueType(ResNo: 0)); |
3201 | } else { |
3202 | VL = N; |
3203 | } |
3204 | |
3205 | return true; |
3206 | } |
3207 | |
3208 | static SDValue findVSplat(SDValue N) { |
3209 | if (N.getOpcode() == ISD::INSERT_SUBVECTOR) { |
3210 | if (!N.getOperand(i: 0).isUndef()) |
3211 | return SDValue(); |
3212 | N = N.getOperand(i: 1); |
3213 | } |
3214 | SDValue Splat = N; |
3215 | if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL && |
3216 | Splat.getOpcode() != RISCVISD::VMV_S_X_VL) || |
3217 | !Splat.getOperand(i: 0).isUndef()) |
3218 | return SDValue(); |
3219 | assert(Splat.getNumOperands() == 3 && "Unexpected number of operands" ); |
3220 | return Splat; |
3221 | } |
3222 | |
3223 | bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { |
3224 | SDValue Splat = findVSplat(N); |
3225 | if (!Splat) |
3226 | return false; |
3227 | |
3228 | SplatVal = Splat.getOperand(i: 1); |
3229 | return true; |
3230 | } |
3231 | |
3232 | static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, |
3233 | SelectionDAG &DAG, |
3234 | const RISCVSubtarget &Subtarget, |
3235 | std::function<bool(int64_t)> ValidateImm) { |
3236 | SDValue Splat = findVSplat(N); |
3237 | if (!Splat || !isa<ConstantSDNode>(Val: Splat.getOperand(i: 1))) |
3238 | return false; |
3239 | |
3240 | const unsigned SplatEltSize = Splat.getScalarValueSizeInBits(); |
3241 | assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() && |
3242 | "Unexpected splat operand type" ); |
3243 | |
3244 | // The semantics of RISCVISD::VMV_V_X_VL is that when the operand |
3245 | // type is wider than the resulting vector element type: an implicit |
3246 | // truncation first takes place. Therefore, perform a manual |
3247 | // truncation/sign-extension in order to ignore any truncated bits and catch |
3248 | // any zero-extended immediate. |
3249 | // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first |
3250 | // sign-extending to (XLenVT -1). |
3251 | APInt SplatConst = Splat.getConstantOperandAPInt(i: 1).sextOrTrunc(width: SplatEltSize); |
3252 | |
3253 | int64_t SplatImm = SplatConst.getSExtValue(); |
3254 | |
3255 | if (!ValidateImm(SplatImm)) |
3256 | return false; |
3257 | |
3258 | SplatVal = DAG.getTargetConstant(Val: SplatImm, DL: SDLoc(N), VT: Subtarget.getXLenVT()); |
3259 | return true; |
3260 | } |
3261 | |
3262 | bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { |
3263 | return selectVSplatImmHelper(N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget, |
3264 | ValidateImm: [](int64_t Imm) { return isInt<5>(x: Imm); }); |
3265 | } |
3266 | |
3267 | bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { |
3268 | return selectVSplatImmHelper( |
3269 | N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget, |
3270 | ValidateImm: [](int64_t Imm) { return (isInt<5>(x: Imm) && Imm != -16) || Imm == 16; }); |
3271 | } |
3272 | |
3273 | bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, |
3274 | SDValue &SplatVal) { |
3275 | return selectVSplatImmHelper( |
3276 | N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget, ValidateImm: [](int64_t Imm) { |
3277 | return Imm != 0 && ((isInt<5>(x: Imm) && Imm != -16) || Imm == 16); |
3278 | }); |
3279 | } |
3280 | |
3281 | bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits, |
3282 | SDValue &SplatVal) { |
3283 | return selectVSplatImmHelper( |
3284 | N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget, |
3285 | ValidateImm: [Bits](int64_t Imm) { return isUIntN(N: Bits, x: Imm); }); |
3286 | } |
3287 | |
3288 | bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) { |
3289 | auto IsExtOrTrunc = [](SDValue N) { |
3290 | switch (N->getOpcode()) { |
3291 | case ISD::SIGN_EXTEND: |
3292 | case ISD::ZERO_EXTEND: |
3293 | // There's no passthru on these _VL nodes so any VL/mask is ok, since any |
3294 | // inactive elements will be undef. |
3295 | case RISCVISD::TRUNCATE_VECTOR_VL: |
3296 | case RISCVISD::VSEXT_VL: |
3297 | case RISCVISD::VZEXT_VL: |
3298 | return true; |
3299 | default: |
3300 | return false; |
3301 | } |
3302 | }; |
3303 | |
3304 | // We can have multiple nested nodes, so unravel them all if needed. |
3305 | while (IsExtOrTrunc(N)) { |
3306 | if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8) |
3307 | return false; |
3308 | N = N->getOperand(Num: 0); |
3309 | } |
3310 | |
3311 | return selectVSplat(N, SplatVal); |
3312 | } |
3313 | |
3314 | bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) { |
3315 | ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Val: N.getNode()); |
3316 | if (!CFP) |
3317 | return false; |
3318 | const APFloat &APF = CFP->getValueAPF(); |
3319 | // td can handle +0.0 already. |
3320 | if (APF.isPosZero()) |
3321 | return false; |
3322 | |
3323 | MVT VT = CFP->getSimpleValueType(ResNo: 0); |
3324 | |
3325 | // Even if this FPImm requires an additional FNEG (i.e. the second element of |
3326 | // the returned pair is true) we still prefer FLI + FNEG over immediate |
3327 | // materialization as the latter might generate a longer instruction sequence. |
3328 | if (static_cast<const RISCVTargetLowering *>(TLI) |
3329 | ->getLegalZfaFPImm(Imm: APF, VT) |
3330 | .first >= 0) |
3331 | return false; |
3332 | |
3333 | MVT XLenVT = Subtarget->getXLenVT(); |
3334 | if (VT == MVT::f64 && !Subtarget->is64Bit()) { |
3335 | assert(APF.isNegZero() && "Unexpected constant." ); |
3336 | return false; |
3337 | } |
3338 | SDLoc DL(N); |
3339 | Imm = selectImm(CurDAG, DL, VT: XLenVT, Imm: APF.bitcastToAPInt().getSExtValue(), |
3340 | Subtarget: *Subtarget); |
3341 | return true; |
3342 | } |
3343 | |
3344 | bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, |
3345 | SDValue &Imm) { |
3346 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: N)) { |
3347 | int64_t ImmVal = SignExtend64(X: C->getSExtValue(), B: Width); |
3348 | |
3349 | if (!isInt<5>(x: ImmVal)) |
3350 | return false; |
3351 | |
3352 | Imm = CurDAG->getTargetConstant(Val: ImmVal, DL: SDLoc(N), VT: Subtarget->getXLenVT()); |
3353 | return true; |
3354 | } |
3355 | |
3356 | return false; |
3357 | } |
3358 | |
3359 | // Try to remove sext.w if the input is a W instruction or can be made into |
3360 | // a W instruction cheaply. |
3361 | bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { |
3362 | // Look for the sext.w pattern, addiw rd, rs1, 0. |
3363 | if (N->getMachineOpcode() != RISCV::ADDIW || |
3364 | !isNullConstant(N->getOperand(1))) |
3365 | return false; |
3366 | |
3367 | SDValue N0 = N->getOperand(Num: 0); |
3368 | if (!N0.isMachineOpcode()) |
3369 | return false; |
3370 | |
3371 | switch (N0.getMachineOpcode()) { |
3372 | default: |
3373 | break; |
3374 | case RISCV::ADD: |
3375 | case RISCV::ADDI: |
3376 | case RISCV::SUB: |
3377 | case RISCV::MUL: |
3378 | case RISCV::SLLI: { |
3379 | // Convert sext.w+add/sub/mul to their W instructions. This will create |
3380 | // a new independent instruction. This improves latency. |
3381 | unsigned Opc; |
3382 | switch (N0.getMachineOpcode()) { |
3383 | default: |
3384 | llvm_unreachable("Unexpected opcode!" ); |
3385 | case RISCV::ADD: Opc = RISCV::ADDW; break; |
3386 | case RISCV::ADDI: Opc = RISCV::ADDIW; break; |
3387 | case RISCV::SUB: Opc = RISCV::SUBW; break; |
3388 | case RISCV::MUL: Opc = RISCV::MULW; break; |
3389 | case RISCV::SLLI: Opc = RISCV::SLLIW; break; |
3390 | } |
3391 | |
3392 | SDValue N00 = N0.getOperand(i: 0); |
3393 | SDValue N01 = N0.getOperand(i: 1); |
3394 | |
3395 | // Shift amount needs to be uimm5. |
3396 | if (N0.getMachineOpcode() == RISCV::SLLI && |
3397 | !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) |
3398 | break; |
3399 | |
3400 | SDNode *Result = |
3401 | CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VT: N->getValueType(ResNo: 0), |
3402 | Op1: N00, Op2: N01); |
3403 | ReplaceUses(F: N, T: Result); |
3404 | return true; |
3405 | } |
3406 | case RISCV::ADDW: |
3407 | case RISCV::ADDIW: |
3408 | case RISCV::SUBW: |
3409 | case RISCV::MULW: |
3410 | case RISCV::SLLIW: |
3411 | case RISCV::PACKW: |
3412 | case RISCV::TH_MULAW: |
3413 | case RISCV::TH_MULAH: |
3414 | case RISCV::TH_MULSW: |
3415 | case RISCV::TH_MULSH: |
3416 | if (N0.getValueType() == MVT::i32) |
3417 | break; |
3418 | |
3419 | // Result is already sign extended just remove the sext.w. |
3420 | // NOTE: We only handle the nodes that are selected with hasAllWUsers. |
3421 | ReplaceUses(F: N, T: N0.getNode()); |
3422 | return true; |
3423 | } |
3424 | |
3425 | return false; |
3426 | } |
3427 | |
3428 | static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) { |
3429 | // Check that we're using V0 as a mask register. |
3430 | if (!isa<RegisterSDNode>(MaskOp) || |
3431 | cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0) |
3432 | return false; |
3433 | |
3434 | // The glued user defines V0. |
3435 | const auto *Glued = GlueOp.getNode(); |
3436 | |
3437 | if (!Glued || Glued->getOpcode() != ISD::CopyToReg) |
3438 | return false; |
3439 | |
3440 | // Check that we're defining V0 as a mask register. |
3441 | if (!isa<RegisterSDNode>(Glued->getOperand(1)) || |
3442 | cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) |
3443 | return false; |
3444 | |
3445 | // Check the instruction defining V0; it needs to be a VMSET pseudo. |
3446 | SDValue MaskSetter = Glued->getOperand(Num: 2); |
3447 | |
3448 | // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came |
3449 | // from an extract_subvector or insert_subvector. |
3450 | if (MaskSetter->isMachineOpcode() && |
3451 | MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS) |
3452 | MaskSetter = MaskSetter->getOperand(Num: 0); |
3453 | |
3454 | const auto IsVMSet = [](unsigned Opc) { |
3455 | return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || |
3456 | Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || |
3457 | Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || |
3458 | Opc == RISCV::PseudoVMSET_M_B8; |
3459 | }; |
3460 | |
3461 | // TODO: Check that the VMSET is the expected bitwidth? The pseudo has |
3462 | // undefined behaviour if it's the wrong bitwidth, so we could choose to |
3463 | // assume that it's all-ones? Same applies to its VL. |
3464 | return MaskSetter->isMachineOpcode() && |
3465 | IsVMSet(MaskSetter.getMachineOpcode()); |
3466 | } |
3467 | |
3468 | // Return true if we can make sure mask of N is all-ones mask. |
3469 | static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) { |
3470 | return usesAllOnesMask(MaskOp: N->getOperand(Num: MaskOpIdx), |
3471 | GlueOp: N->getOperand(Num: N->getNumOperands() - 1)); |
3472 | } |
3473 | |
3474 | static bool isImplicitDef(SDValue V) { |
3475 | return V.isMachineOpcode() && |
3476 | V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; |
3477 | } |
3478 | |
3479 | // Optimize masked RVV pseudo instructions with a known all-ones mask to their |
3480 | // corresponding "unmasked" pseudo versions. The mask we're interested in will |
3481 | // take the form of a V0 physical register operand, with a glued |
3482 | // register-setting instruction. |
3483 | bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) { |
3484 | const RISCV::RISCVMaskedPseudoInfo *I = |
3485 | RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); |
3486 | if (!I) |
3487 | return false; |
3488 | |
3489 | unsigned MaskOpIdx = I->MaskOpIdx; |
3490 | if (!usesAllOnesMask(N, MaskOpIdx)) |
3491 | return false; |
3492 | |
3493 | // There are two classes of pseudos in the table - compares and |
3494 | // everything else. See the comment on RISCVMaskedPseudo for details. |
3495 | const unsigned Opc = I->UnmaskedPseudo; |
3496 | const MCInstrDesc &MCID = TII->get(Opcode: Opc); |
3497 | const bool UseTUPseudo = RISCVII::hasVecPolicyOp(TSFlags: MCID.TSFlags); |
3498 | #ifndef NDEBUG |
3499 | const MCInstrDesc &MaskedMCID = TII->get(Opcode: N->getMachineOpcode()); |
3500 | assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) == |
3501 | RISCVII::hasVecPolicyOp(MCID.TSFlags) && |
3502 | "Masked and unmasked pseudos are inconsistent" ); |
3503 | const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(Desc: MCID); |
3504 | assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure" ); |
3505 | #endif |
3506 | |
3507 | SmallVector<SDValue, 8> Ops; |
3508 | // Skip the merge operand at index 0 if !UseTUPseudo. |
3509 | for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) { |
3510 | // Skip the mask, and the Glue. |
3511 | SDValue Op = N->getOperand(Num: I); |
3512 | if (I == MaskOpIdx || Op.getValueType() == MVT::Glue) |
3513 | continue; |
3514 | Ops.push_back(Elt: Op); |
3515 | } |
3516 | |
3517 | // Transitively apply any node glued to our new node. |
3518 | const auto *Glued = N->getGluedNode(); |
3519 | if (auto *TGlued = Glued->getGluedNode()) |
3520 | Ops.push_back(Elt: SDValue(TGlued, TGlued->getNumValues() - 1)); |
3521 | |
3522 | MachineSDNode *Result = |
3523 | CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops); |
3524 | |
3525 | if (!N->memoperands_empty()) |
3526 | CurDAG->setNodeMemRefs(N: Result, NewMemRefs: N->memoperands()); |
3527 | |
3528 | Result->setFlags(N->getFlags()); |
3529 | ReplaceUses(F: N, T: Result); |
3530 | |
3531 | return true; |
3532 | } |
3533 | |
3534 | static bool IsVMerge(SDNode *N) { |
3535 | return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM; |
3536 | } |
3537 | |
3538 | static bool IsVMv(SDNode *N) { |
3539 | return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V; |
3540 | } |
3541 | |
3542 | static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) { |
3543 | switch (LMUL) { |
3544 | case RISCVII::LMUL_F8: |
3545 | return RISCV::PseudoVMSET_M_B1; |
3546 | case RISCVII::LMUL_F4: |
3547 | return RISCV::PseudoVMSET_M_B2; |
3548 | case RISCVII::LMUL_F2: |
3549 | return RISCV::PseudoVMSET_M_B4; |
3550 | case RISCVII::LMUL_1: |
3551 | return RISCV::PseudoVMSET_M_B8; |
3552 | case RISCVII::LMUL_2: |
3553 | return RISCV::PseudoVMSET_M_B16; |
3554 | case RISCVII::LMUL_4: |
3555 | return RISCV::PseudoVMSET_M_B32; |
3556 | case RISCVII::LMUL_8: |
3557 | return RISCV::PseudoVMSET_M_B64; |
3558 | case RISCVII::LMUL_RESERVED: |
3559 | llvm_unreachable("Unexpected LMUL" ); |
3560 | } |
3561 | llvm_unreachable("Unknown VLMUL enum" ); |
3562 | } |
3563 | |
3564 | // Try to fold away VMERGE_VVM instructions into their true operands: |
3565 | // |
3566 | // %true = PseudoVADD_VV ... |
3567 | // %x = PseudoVMERGE_VVM %false, %false, %true, %mask |
3568 | // -> |
3569 | // %x = PseudoVADD_VV_MASK %false, ..., %mask |
3570 | // |
3571 | // We can only fold if vmerge's merge operand, vmerge's false operand and |
3572 | // %true's merge operand (if it has one) are the same. This is because we have |
3573 | // to consolidate them into one merge operand in the result. |
3574 | // |
3575 | // If %true is masked, then we can use its mask instead of vmerge's if vmerge's |
3576 | // mask is all ones. |
3577 | // |
3578 | // We can also fold a VMV_V_V into its true operand, since it is equivalent to a |
3579 | // VMERGE_VVM with an all ones mask. |
3580 | // |
3581 | // The resulting VL is the minimum of the two VLs. |
3582 | // |
3583 | // The resulting policy is the effective policy the vmerge would have had, |
3584 | // i.e. whether or not it's merge operand was implicit-def. |
3585 | bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { |
3586 | SDValue Merge, False, True, VL, Mask, Glue; |
3587 | // A vmv.v.v is equivalent to a vmerge with an all-ones mask. |
3588 | if (IsVMv(N)) { |
3589 | Merge = N->getOperand(Num: 0); |
3590 | False = N->getOperand(Num: 0); |
3591 | True = N->getOperand(Num: 1); |
3592 | VL = N->getOperand(Num: 2); |
3593 | // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones |
3594 | // mask later below. |
3595 | } else { |
3596 | assert(IsVMerge(N)); |
3597 | Merge = N->getOperand(Num: 0); |
3598 | False = N->getOperand(Num: 1); |
3599 | True = N->getOperand(Num: 2); |
3600 | Mask = N->getOperand(Num: 3); |
3601 | VL = N->getOperand(Num: 4); |
3602 | // We always have a glue node for the mask at v0. |
3603 | Glue = N->getOperand(Num: N->getNumOperands() - 1); |
3604 | } |
3605 | assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0); |
3606 | assert(!Glue || Glue.getValueType() == MVT::Glue); |
3607 | |
3608 | // We require that either merge and false are the same, or that merge |
3609 | // is undefined. |
3610 | if (Merge != False && !isImplicitDef(V: Merge)) |
3611 | return false; |
3612 | |
3613 | assert(True.getResNo() == 0 && |
3614 | "Expect True is the first output of an instruction." ); |
3615 | |
3616 | // Need N is the exactly one using True. |
3617 | if (!True.hasOneUse()) |
3618 | return false; |
3619 | |
3620 | if (!True.isMachineOpcode()) |
3621 | return false; |
3622 | |
3623 | unsigned TrueOpc = True.getMachineOpcode(); |
3624 | const MCInstrDesc &TrueMCID = TII->get(Opcode: TrueOpc); |
3625 | uint64_t TrueTSFlags = TrueMCID.TSFlags; |
3626 | bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(Desc: TrueMCID); |
3627 | |
3628 | bool IsMasked = false; |
3629 | const RISCV::RISCVMaskedPseudoInfo *Info = |
3630 | RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc); |
3631 | if (!Info && HasTiedDest) { |
3632 | Info = RISCV::getMaskedPseudoInfo(TrueOpc); |
3633 | IsMasked = true; |
3634 | } |
3635 | |
3636 | if (!Info) |
3637 | return false; |
3638 | |
3639 | // When Mask is not a true mask, this transformation is illegal for some |
3640 | // operations whose results are affected by mask, like viota.m. |
3641 | if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(MaskOp: Mask, GlueOp: Glue)) |
3642 | return false; |
3643 | |
3644 | // If True has a merge operand then it needs to be the same as vmerge's False, |
3645 | // since False will be used for the result's merge operand. |
3646 | if (HasTiedDest && !isImplicitDef(V: True->getOperand(Num: 0))) { |
3647 | // The vmerge instruction must be TU. |
3648 | // FIXME: This could be relaxed, but we need to handle the policy for the |
3649 | // resulting op correctly. |
3650 | if (isImplicitDef(V: Merge)) |
3651 | return false; |
3652 | SDValue MergeOpTrue = True->getOperand(Num: 0); |
3653 | if (False != MergeOpTrue) |
3654 | return false; |
3655 | } |
3656 | |
3657 | // If True is masked then the vmerge must have an all 1s mask, since we're |
3658 | // going to keep the mask from True. |
3659 | if (IsMasked) { |
3660 | assert(HasTiedDest && "Expected tied dest" ); |
3661 | // The vmerge instruction must be TU. |
3662 | if (isImplicitDef(V: Merge)) |
3663 | return false; |
3664 | // FIXME: Support mask agnostic True instruction which would have an |
3665 | // undef merge operand. |
3666 | if (Mask && !usesAllOnesMask(MaskOp: Mask, GlueOp: Glue)) |
3667 | return false; |
3668 | } |
3669 | |
3670 | // Skip if True has side effect. |
3671 | // TODO: Support vleff and vlsegff. |
3672 | if (TII->get(Opcode: TrueOpc).hasUnmodeledSideEffects()) |
3673 | return false; |
3674 | |
3675 | // The last operand of a masked instruction may be glued. |
3676 | bool HasGlueOp = True->getGluedNode() != nullptr; |
3677 | |
3678 | // The chain operand may exist either before the glued operands or in the last |
3679 | // position. |
3680 | unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1; |
3681 | bool HasChainOp = |
3682 | True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other; |
3683 | |
3684 | if (HasChainOp) { |
3685 | // Avoid creating cycles in the DAG. We must ensure that none of the other |
3686 | // operands depend on True through it's Chain. |
3687 | SmallVector<const SDNode *, 4> LoopWorklist; |
3688 | SmallPtrSet<const SDNode *, 16> Visited; |
3689 | LoopWorklist.push_back(Elt: False.getNode()); |
3690 | if (Mask) |
3691 | LoopWorklist.push_back(Elt: Mask.getNode()); |
3692 | LoopWorklist.push_back(Elt: VL.getNode()); |
3693 | if (Glue) |
3694 | LoopWorklist.push_back(Elt: Glue.getNode()); |
3695 | if (SDNode::hasPredecessorHelper(N: True.getNode(), Visited, Worklist&: LoopWorklist)) |
3696 | return false; |
3697 | } |
3698 | |
3699 | // The vector policy operand may be present for masked intrinsics |
3700 | bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags: TrueTSFlags); |
3701 | unsigned TrueVLIndex = |
3702 | True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; |
3703 | SDValue TrueVL = True.getOperand(i: TrueVLIndex); |
3704 | SDValue SEW = True.getOperand(i: TrueVLIndex + 1); |
3705 | |
3706 | auto GetMinVL = [](SDValue LHS, SDValue RHS) { |
3707 | if (LHS == RHS) |
3708 | return LHS; |
3709 | if (isAllOnesConstant(V: LHS)) |
3710 | return RHS; |
3711 | if (isAllOnesConstant(V: RHS)) |
3712 | return LHS; |
3713 | auto *CLHS = dyn_cast<ConstantSDNode>(Val&: LHS); |
3714 | auto *CRHS = dyn_cast<ConstantSDNode>(Val&: RHS); |
3715 | if (!CLHS || !CRHS) |
3716 | return SDValue(); |
3717 | return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS; |
3718 | }; |
3719 | |
3720 | // Because N and True must have the same merge operand (or True's operand is |
3721 | // implicit_def), the "effective" body is the minimum of their VLs. |
3722 | SDValue OrigVL = VL; |
3723 | VL = GetMinVL(TrueVL, VL); |
3724 | if (!VL) |
3725 | return false; |
3726 | |
3727 | // If we end up changing the VL or mask of True, then we need to make sure it |
3728 | // doesn't raise any observable fp exceptions, since changing the active |
3729 | // elements will affect how fflags is set. |
3730 | if (TrueVL != VL || !IsMasked) |
3731 | if (mayRaiseFPException(Node: True.getNode()) && |
3732 | !True->getFlags().hasNoFPExcept()) |
3733 | return false; |
3734 | |
3735 | SDLoc DL(N); |
3736 | |
3737 | // From the preconditions we checked above, we know the mask and thus glue |
3738 | // for the result node will be taken from True. |
3739 | if (IsMasked) { |
3740 | Mask = True->getOperand(Num: Info->MaskOpIdx); |
3741 | Glue = True->getOperand(Num: True->getNumOperands() - 1); |
3742 | assert(Glue.getValueType() == MVT::Glue); |
3743 | } |
3744 | // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create |
3745 | // an all-ones mask to use. |
3746 | else if (IsVMv(N)) { |
3747 | unsigned TSFlags = TII->get(Opcode: N->getMachineOpcode()).TSFlags; |
3748 | unsigned VMSetOpc = GetVMSetForLMul(LMUL: RISCVII::getLMul(TSFlags)); |
3749 | ElementCount EC = N->getValueType(ResNo: 0).getVectorElementCount(); |
3750 | MVT MaskVT = MVT::getVectorVT(MVT::i1, EC); |
3751 | |
3752 | SDValue AllOnesMask = |
3753 | SDValue(CurDAG->getMachineNode(Opcode: VMSetOpc, dl: DL, VT: MaskVT, Op1: VL, Op2: SEW), 0); |
3754 | SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, |
3755 | RISCV::V0, AllOnesMask, SDValue()); |
3756 | Mask = CurDAG->getRegister(RISCV::V0, MaskVT); |
3757 | Glue = MaskCopy.getValue(R: 1); |
3758 | } |
3759 | |
3760 | unsigned MaskedOpc = Info->MaskedPseudo; |
3761 | #ifndef NDEBUG |
3762 | const MCInstrDesc &MaskedMCID = TII->get(Opcode: MaskedOpc); |
3763 | assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) && |
3764 | "Expected instructions with mask have policy operand." ); |
3765 | assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(), |
3766 | MCOI::TIED_TO) == 0 && |
3767 | "Expected instructions with mask have a tied dest." ); |
3768 | #endif |
3769 | |
3770 | // Use a tumu policy, relaxing it to tail agnostic provided that the merge |
3771 | // operand is undefined. |
3772 | // |
3773 | // However, if the VL became smaller than what the vmerge had originally, then |
3774 | // elements past VL that were previously in the vmerge's body will have moved |
3775 | // to the tail. In that case we always need to use tail undisturbed to |
3776 | // preserve them. |
3777 | bool MergeVLShrunk = VL != OrigVL; |
3778 | uint64_t Policy = (isImplicitDef(V: Merge) && !MergeVLShrunk) |
3779 | ? RISCVII::TAIL_AGNOSTIC |
3780 | : /*TUMU*/ 0; |
3781 | SDValue PolicyOp = |
3782 | CurDAG->getTargetConstant(Val: Policy, DL, VT: Subtarget->getXLenVT()); |
3783 | |
3784 | |
3785 | SmallVector<SDValue, 8> Ops; |
3786 | Ops.push_back(Elt: False); |
3787 | |
3788 | const bool HasRoundingMode = RISCVII::hasRoundModeOp(TSFlags: TrueTSFlags); |
3789 | const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode; |
3790 | assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx); |
3791 | Ops.append(in_start: True->op_begin() + HasTiedDest, in_end: True->op_begin() + NormalOpsEnd); |
3792 | |
3793 | Ops.push_back(Elt: Mask); |
3794 | |
3795 | // For unmasked "VOp" with rounding mode operand, that is interfaces like |
3796 | // (..., rm, vl) or (..., rm, vl, policy). |
3797 | // Its masked version is (..., vm, rm, vl, policy). |
3798 | // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td |
3799 | if (HasRoundingMode) |
3800 | Ops.push_back(Elt: True->getOperand(Num: TrueVLIndex - 1)); |
3801 | |
3802 | Ops.append(IL: {VL, SEW, PolicyOp}); |
3803 | |
3804 | // Result node should have chain operand of True. |
3805 | if (HasChainOp) |
3806 | Ops.push_back(Elt: True.getOperand(i: TrueChainOpIdx)); |
3807 | |
3808 | // Add the glue for the CopyToReg of mask->v0. |
3809 | Ops.push_back(Elt: Glue); |
3810 | |
3811 | MachineSDNode *Result = |
3812 | CurDAG->getMachineNode(Opcode: MaskedOpc, dl: DL, VTs: True->getVTList(), Ops); |
3813 | Result->setFlags(True->getFlags()); |
3814 | |
3815 | if (!cast<MachineSDNode>(Val&: True)->memoperands_empty()) |
3816 | CurDAG->setNodeMemRefs(N: Result, NewMemRefs: cast<MachineSDNode>(Val&: True)->memoperands()); |
3817 | |
3818 | // Replace vmerge.vvm node by Result. |
3819 | ReplaceUses(F: SDValue(N, 0), T: SDValue(Result, 0)); |
3820 | |
3821 | // Replace another value of True. E.g. chain and VL. |
3822 | for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx) |
3823 | ReplaceUses(F: True.getValue(R: Idx), T: SDValue(Result, Idx)); |
3824 | |
3825 | return true; |
3826 | } |
3827 | |
3828 | bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { |
3829 | bool MadeChange = false; |
3830 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); |
3831 | |
3832 | while (Position != CurDAG->allnodes_begin()) { |
3833 | SDNode *N = &*--Position; |
3834 | if (N->use_empty() || !N->isMachineOpcode()) |
3835 | continue; |
3836 | |
3837 | if (IsVMerge(N) || IsVMv(N)) |
3838 | MadeChange |= performCombineVMergeAndVOps(N); |
3839 | } |
3840 | return MadeChange; |
3841 | } |
3842 | |
3843 | /// If our passthru is an implicit_def, use noreg instead. This side |
3844 | /// steps issues with MachineCSE not being able to CSE expressions with |
3845 | /// IMPLICIT_DEF operands while preserving the semantic intent. See |
3846 | /// pr64282 for context. Note that this transform is the last one |
3847 | /// performed at ISEL DAG to DAG. |
3848 | bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() { |
3849 | bool MadeChange = false; |
3850 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); |
3851 | |
3852 | while (Position != CurDAG->allnodes_begin()) { |
3853 | SDNode *N = &*--Position; |
3854 | if (N->use_empty() || !N->isMachineOpcode()) |
3855 | continue; |
3856 | |
3857 | const unsigned Opc = N->getMachineOpcode(); |
3858 | if (!RISCVVPseudosTable::getPseudoInfo(Opc) || |
3859 | !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) || |
3860 | !isImplicitDef(N->getOperand(0))) |
3861 | continue; |
3862 | |
3863 | SmallVector<SDValue> Ops; |
3864 | Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0))); |
3865 | for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) { |
3866 | SDValue Op = N->getOperand(Num: I); |
3867 | Ops.push_back(Elt: Op); |
3868 | } |
3869 | |
3870 | MachineSDNode *Result = |
3871 | CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops); |
3872 | Result->setFlags(N->getFlags()); |
3873 | CurDAG->setNodeMemRefs(N: Result, NewMemRefs: cast<MachineSDNode>(Val: N)->memoperands()); |
3874 | ReplaceUses(F: N, T: Result); |
3875 | MadeChange = true; |
3876 | } |
3877 | return MadeChange; |
3878 | } |
3879 | |
3880 | |
3881 | // This pass converts a legalized DAG into a RISCV-specific DAG, ready |
3882 | // for instruction scheduling. |
3883 | FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, |
3884 | CodeGenOptLevel OptLevel) { |
3885 | return new RISCVDAGToDAGISel(TM, OptLevel); |
3886 | } |
3887 | |
3888 | char RISCVDAGToDAGISel::ID = 0; |
3889 | |
3890 | INITIALIZE_PASS(RISCVDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) |
3891 | |