1 | //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the SelectionDAG::LegalizeVectors method. |
10 | // |
11 | // The vector legalizer looks for vector operations which might need to be |
12 | // scalarized and legalizes them. This is a separate step from Legalize because |
13 | // scalarizing can introduce illegal types. For example, suppose we have an |
14 | // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition |
15 | // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the |
16 | // operation, which introduces nodes with the illegal type i64 which must be |
17 | // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; |
18 | // the operation must be unrolled, which introduces nodes with the illegal |
19 | // type i8 which must be promoted. |
20 | // |
21 | // This does not legalize vector manipulations like ISD::BUILD_VECTOR, |
22 | // or operations that happen to take a vector which are custom-lowered; |
23 | // the legalization for such operations never produces nodes |
24 | // with illegal types, so it's okay to put off legalizing them until |
25 | // SelectionDAG::Legalize runs. |
26 | // |
27 | //===----------------------------------------------------------------------===// |
28 | |
29 | #include "llvm/ADT/DenseMap.h" |
30 | #include "llvm/ADT/SmallVector.h" |
31 | #include "llvm/CodeGen/ISDOpcodes.h" |
32 | #include "llvm/CodeGen/SelectionDAG.h" |
33 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
34 | #include "llvm/CodeGen/TargetLowering.h" |
35 | #include "llvm/CodeGen/ValueTypes.h" |
36 | #include "llvm/CodeGenTypes/MachineValueType.h" |
37 | #include "llvm/IR/DataLayout.h" |
38 | #include "llvm/Support/Casting.h" |
39 | #include "llvm/Support/Compiler.h" |
40 | #include "llvm/Support/Debug.h" |
41 | #include "llvm/Support/ErrorHandling.h" |
42 | #include <cassert> |
43 | #include <cstdint> |
44 | #include <iterator> |
45 | #include <utility> |
46 | |
47 | using namespace llvm; |
48 | |
49 | #define DEBUG_TYPE "legalizevectorops" |
50 | |
51 | namespace { |
52 | |
53 | class VectorLegalizer { |
54 | SelectionDAG& DAG; |
55 | const TargetLowering &TLI; |
56 | bool Changed = false; // Keep track of whether anything changed |
57 | |
58 | /// For nodes that are of legal width, and that have more than one use, this |
59 | /// map indicates what regularized operand to use. This allows us to avoid |
60 | /// legalizing the same thing more than once. |
61 | SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; |
62 | |
63 | /// Adds a node to the translation cache. |
64 | void AddLegalizedOperand(SDValue From, SDValue To) { |
65 | LegalizedNodes.insert(KV: std::make_pair(x&: From, y&: To)); |
66 | // If someone requests legalization of the new node, return itself. |
67 | if (From != To) |
68 | LegalizedNodes.insert(KV: std::make_pair(x&: To, y&: To)); |
69 | } |
70 | |
71 | /// Legalizes the given node. |
72 | SDValue LegalizeOp(SDValue Op); |
73 | |
74 | /// Assuming the node is legal, "legalize" the results. |
75 | SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result); |
76 | |
77 | /// Make sure Results are legal and update the translation cache. |
78 | SDValue RecursivelyLegalizeResults(SDValue Op, |
79 | MutableArrayRef<SDValue> Results); |
80 | |
81 | /// Wrapper to interface LowerOperation with a vector of Results. |
82 | /// Returns false if the target wants to use default expansion. Otherwise |
83 | /// returns true. If return is true and the Results are empty, then the |
84 | /// target wants to keep the input node as is. |
85 | bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results); |
86 | |
87 | /// Implements unrolling a VSETCC. |
88 | SDValue UnrollVSETCC(SDNode *Node); |
89 | |
90 | /// Implement expand-based legalization of vector operations. |
91 | /// |
92 | /// This is just a high-level routine to dispatch to specific code paths for |
93 | /// operations to legalize them. |
94 | void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
95 | |
96 | /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if |
97 | /// FP_TO_SINT isn't legal. |
98 | void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
99 | |
100 | /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if |
101 | /// SINT_TO_FLOAT and SHR on vectors isn't legal. |
102 | void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
103 | |
104 | /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. |
105 | SDValue ExpandSEXTINREG(SDNode *Node); |
106 | |
107 | /// Implement expansion for ANY_EXTEND_VECTOR_INREG. |
108 | /// |
109 | /// Shuffles the low lanes of the operand into place and bitcasts to the proper |
110 | /// type. The contents of the bits in the extended part of each element are |
111 | /// undef. |
112 | SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node); |
113 | |
114 | /// Implement expansion for SIGN_EXTEND_VECTOR_INREG. |
115 | /// |
116 | /// Shuffles the low lanes of the operand into place, bitcasts to the proper |
117 | /// type, then shifts left and arithmetic shifts right to introduce a sign |
118 | /// extension. |
119 | SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node); |
120 | |
121 | /// Implement expansion for ZERO_EXTEND_VECTOR_INREG. |
122 | /// |
123 | /// Shuffles the low lanes of the operand into place and blends zeros into |
124 | /// the remaining lanes, finally bitcasting to the proper type. |
125 | SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node); |
126 | |
127 | /// Expand bswap of vectors into a shuffle if legal. |
128 | SDValue ExpandBSWAP(SDNode *Node); |
129 | |
130 | /// Implement vselect in terms of XOR, AND, OR when blend is not |
131 | /// supported by the target. |
132 | SDValue ExpandVSELECT(SDNode *Node); |
133 | SDValue ExpandVP_SELECT(SDNode *Node); |
134 | SDValue ExpandVP_MERGE(SDNode *Node); |
135 | SDValue ExpandVP_REM(SDNode *Node); |
136 | SDValue ExpandSELECT(SDNode *Node); |
137 | std::pair<SDValue, SDValue> ExpandLoad(SDNode *N); |
138 | SDValue ExpandStore(SDNode *N); |
139 | SDValue ExpandFNEG(SDNode *Node); |
140 | void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
141 | void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
142 | void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
143 | void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
144 | void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
145 | void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
146 | void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
147 | void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
148 | void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
149 | |
150 | void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
151 | |
152 | /// Implements vector promotion. |
153 | /// |
154 | /// This is essentially just bitcasting the operands to a different type and |
155 | /// bitcasting the result back to the original type. |
156 | void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
157 | |
158 | /// Implements [SU]INT_TO_FP vector promotion. |
159 | /// |
160 | /// This is a [zs]ext of the input operand to a larger integer type. |
161 | void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
162 | |
163 | /// Implements FP_TO_[SU]INT vector promotion of the result type. |
164 | /// |
165 | /// It is promoted to a larger integer type. The result is then |
166 | /// truncated back to the original type. |
167 | void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
168 | |
169 | /// Implements vector reduce operation promotion. |
170 | /// |
171 | /// All vector operands are promoted to a vector type with larger element |
172 | /// type, and the start value is promoted to a larger scalar type. Then the |
173 | /// result is truncated back to the original scalar type. |
174 | void PromoteReduction(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
175 | |
176 | /// Implements vector setcc operation promotion. |
177 | /// |
178 | /// All vector operands are promoted to a vector type with larger element |
179 | /// type. |
180 | void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
181 | |
182 | void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
183 | |
184 | public: |
185 | VectorLegalizer(SelectionDAG& dag) : |
186 | DAG(dag), TLI(dag.getTargetLoweringInfo()) {} |
187 | |
188 | /// Begin legalizer the vector operations in the DAG. |
189 | bool Run(); |
190 | }; |
191 | |
192 | } // end anonymous namespace |
193 | |
194 | bool VectorLegalizer::Run() { |
195 | // Before we start legalizing vector nodes, check if there are any vectors. |
196 | bool HasVectors = false; |
197 | for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), |
198 | E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I) { |
199 | // Check if the values of the nodes contain vectors. We don't need to check |
200 | // the operands because we are going to check their values at some point. |
201 | HasVectors = llvm::any_of(Range: I->values(), P: [](EVT T) { return T.isVector(); }); |
202 | |
203 | // If we found a vector node we can start the legalization. |
204 | if (HasVectors) |
205 | break; |
206 | } |
207 | |
208 | // If this basic block has no vectors then no need to legalize vectors. |
209 | if (!HasVectors) |
210 | return false; |
211 | |
212 | // The legalize process is inherently a bottom-up recursive process (users |
213 | // legalize their uses before themselves). Given infinite stack space, we |
214 | // could just start legalizing on the root and traverse the whole graph. In |
215 | // practice however, this causes us to run out of stack space on large basic |
216 | // blocks. To avoid this problem, compute an ordering of the nodes where each |
217 | // node is only legalized after all of its operands are legalized. |
218 | DAG.AssignTopologicalOrder(); |
219 | for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), |
220 | E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I) |
221 | LegalizeOp(Op: SDValue(&*I, 0)); |
222 | |
223 | // Finally, it's possible the root changed. Get the new root. |
224 | SDValue OldRoot = DAG.getRoot(); |
225 | assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?" ); |
226 | DAG.setRoot(LegalizedNodes[OldRoot]); |
227 | |
228 | LegalizedNodes.clear(); |
229 | |
230 | // Remove dead nodes now. |
231 | DAG.RemoveDeadNodes(); |
232 | |
233 | return Changed; |
234 | } |
235 | |
236 | SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) { |
237 | assert(Op->getNumValues() == Result->getNumValues() && |
238 | "Unexpected number of results" ); |
239 | // Generic legalization: just pass the operand through. |
240 | for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i) |
241 | AddLegalizedOperand(From: Op.getValue(R: i), To: SDValue(Result, i)); |
242 | return SDValue(Result, Op.getResNo()); |
243 | } |
244 | |
245 | SDValue |
246 | VectorLegalizer::RecursivelyLegalizeResults(SDValue Op, |
247 | MutableArrayRef<SDValue> Results) { |
248 | assert(Results.size() == Op->getNumValues() && |
249 | "Unexpected number of results" ); |
250 | // Make sure that the generated code is itself legal. |
251 | for (unsigned i = 0, e = Results.size(); i != e; ++i) { |
252 | Results[i] = LegalizeOp(Op: Results[i]); |
253 | AddLegalizedOperand(From: Op.getValue(R: i), To: Results[i]); |
254 | } |
255 | |
256 | return Results[Op.getResNo()]; |
257 | } |
258 | |
259 | SDValue VectorLegalizer::LegalizeOp(SDValue Op) { |
260 | // Note that LegalizeOp may be reentered even from single-use nodes, which |
261 | // means that we always must cache transformed nodes. |
262 | DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Val: Op); |
263 | if (I != LegalizedNodes.end()) return I->second; |
264 | |
265 | // Legalize the operands |
266 | SmallVector<SDValue, 8> Ops; |
267 | for (const SDValue &Oper : Op->op_values()) |
268 | Ops.push_back(Elt: LegalizeOp(Op: Oper)); |
269 | |
270 | SDNode *Node = DAG.UpdateNodeOperands(N: Op.getNode(), Ops); |
271 | |
272 | bool HasVectorValueOrOp = |
273 | llvm::any_of(Range: Node->values(), P: [](EVT T) { return T.isVector(); }) || |
274 | llvm::any_of(Range: Node->op_values(), |
275 | P: [](SDValue O) { return O.getValueType().isVector(); }); |
276 | if (!HasVectorValueOrOp) |
277 | return TranslateLegalizeResults(Op, Result: Node); |
278 | |
279 | TargetLowering::LegalizeAction Action = TargetLowering::Legal; |
280 | EVT ValVT; |
281 | switch (Op.getOpcode()) { |
282 | default: |
283 | return TranslateLegalizeResults(Op, Result: Node); |
284 | case ISD::LOAD: { |
285 | LoadSDNode *LD = cast<LoadSDNode>(Val: Node); |
286 | ISD::LoadExtType ExtType = LD->getExtensionType(); |
287 | EVT LoadedVT = LD->getMemoryVT(); |
288 | if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD) |
289 | Action = TLI.getLoadExtAction(ExtType, ValVT: LD->getValueType(ResNo: 0), MemVT: LoadedVT); |
290 | break; |
291 | } |
292 | case ISD::STORE: { |
293 | StoreSDNode *ST = cast<StoreSDNode>(Val: Node); |
294 | EVT StVT = ST->getMemoryVT(); |
295 | MVT ValVT = ST->getValue().getSimpleValueType(); |
296 | if (StVT.isVector() && ST->isTruncatingStore()) |
297 | Action = TLI.getTruncStoreAction(ValVT, MemVT: StVT); |
298 | break; |
299 | } |
300 | case ISD::MERGE_VALUES: |
301 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0)); |
302 | // This operation lies about being legal: when it claims to be legal, |
303 | // it should actually be expanded. |
304 | if (Action == TargetLowering::Legal) |
305 | Action = TargetLowering::Expand; |
306 | break; |
307 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
308 | case ISD::STRICT_##DAGN: |
309 | #include "llvm/IR/ConstrainedOps.def" |
310 | ValVT = Node->getValueType(ResNo: 0); |
311 | if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP || |
312 | Op.getOpcode() == ISD::STRICT_UINT_TO_FP) |
313 | ValVT = Node->getOperand(Num: 1).getValueType(); |
314 | if (Op.getOpcode() == ISD::STRICT_FSETCC || |
315 | Op.getOpcode() == ISD::STRICT_FSETCCS) { |
316 | MVT OpVT = Node->getOperand(Num: 1).getSimpleValueType(); |
317 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 3))->get(); |
318 | Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT); |
319 | if (Action == TargetLowering::Legal) |
320 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT); |
321 | } else { |
322 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: ValVT); |
323 | } |
324 | // If we're asked to expand a strict vector floating-point operation, |
325 | // by default we're going to simply unroll it. That is usually the |
326 | // best approach, except in the case where the resulting strict (scalar) |
327 | // operations would themselves use the fallback mutation to non-strict. |
328 | // In that specific case, just do the fallback on the vector op. |
329 | if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() && |
330 | TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: ValVT) == |
331 | TargetLowering::Legal) { |
332 | EVT EltVT = ValVT.getVectorElementType(); |
333 | if (TLI.getOperationAction(Op: Node->getOpcode(), VT: EltVT) |
334 | == TargetLowering::Expand && |
335 | TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: EltVT) |
336 | == TargetLowering::Legal) |
337 | Action = TargetLowering::Legal; |
338 | } |
339 | break; |
340 | case ISD::ADD: |
341 | case ISD::SUB: |
342 | case ISD::MUL: |
343 | case ISD::MULHS: |
344 | case ISD::MULHU: |
345 | case ISD::SDIV: |
346 | case ISD::UDIV: |
347 | case ISD::SREM: |
348 | case ISD::UREM: |
349 | case ISD::SDIVREM: |
350 | case ISD::UDIVREM: |
351 | case ISD::FADD: |
352 | case ISD::FSUB: |
353 | case ISD::FMUL: |
354 | case ISD::FDIV: |
355 | case ISD::FREM: |
356 | case ISD::AND: |
357 | case ISD::OR: |
358 | case ISD::XOR: |
359 | case ISD::SHL: |
360 | case ISD::SRA: |
361 | case ISD::SRL: |
362 | case ISD::FSHL: |
363 | case ISD::FSHR: |
364 | case ISD::ROTL: |
365 | case ISD::ROTR: |
366 | case ISD::ABS: |
367 | case ISD::BSWAP: |
368 | case ISD::BITREVERSE: |
369 | case ISD::CTLZ: |
370 | case ISD::CTTZ: |
371 | case ISD::CTLZ_ZERO_UNDEF: |
372 | case ISD::CTTZ_ZERO_UNDEF: |
373 | case ISD::CTPOP: |
374 | case ISD::SELECT: |
375 | case ISD::VSELECT: |
376 | case ISD::SELECT_CC: |
377 | case ISD::ZERO_EXTEND: |
378 | case ISD::ANY_EXTEND: |
379 | case ISD::TRUNCATE: |
380 | case ISD::SIGN_EXTEND: |
381 | case ISD::FP_TO_SINT: |
382 | case ISD::FP_TO_UINT: |
383 | case ISD::FNEG: |
384 | case ISD::FABS: |
385 | case ISD::FMINNUM: |
386 | case ISD::FMAXNUM: |
387 | case ISD::FMINNUM_IEEE: |
388 | case ISD::FMAXNUM_IEEE: |
389 | case ISD::FMINIMUM: |
390 | case ISD::FMAXIMUM: |
391 | case ISD::FCOPYSIGN: |
392 | case ISD::FSQRT: |
393 | case ISD::FSIN: |
394 | case ISD::FCOS: |
395 | case ISD::FLDEXP: |
396 | case ISD::FPOWI: |
397 | case ISD::FPOW: |
398 | case ISD::FLOG: |
399 | case ISD::FLOG2: |
400 | case ISD::FLOG10: |
401 | case ISD::FEXP: |
402 | case ISD::FEXP2: |
403 | case ISD::FEXP10: |
404 | case ISD::FCEIL: |
405 | case ISD::FTRUNC: |
406 | case ISD::FRINT: |
407 | case ISD::LRINT: |
408 | case ISD::LLRINT: |
409 | case ISD::FNEARBYINT: |
410 | case ISD::FROUND: |
411 | case ISD::FROUNDEVEN: |
412 | case ISD::FFLOOR: |
413 | case ISD::FP_ROUND: |
414 | case ISD::FP_EXTEND: |
415 | case ISD::FMA: |
416 | case ISD::SIGN_EXTEND_INREG: |
417 | case ISD::ANY_EXTEND_VECTOR_INREG: |
418 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
419 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
420 | case ISD::SMIN: |
421 | case ISD::SMAX: |
422 | case ISD::UMIN: |
423 | case ISD::UMAX: |
424 | case ISD::SMUL_LOHI: |
425 | case ISD::UMUL_LOHI: |
426 | case ISD::SADDO: |
427 | case ISD::UADDO: |
428 | case ISD::SSUBO: |
429 | case ISD::USUBO: |
430 | case ISD::SMULO: |
431 | case ISD::UMULO: |
432 | case ISD::FCANONICALIZE: |
433 | case ISD::FFREXP: |
434 | case ISD::SADDSAT: |
435 | case ISD::UADDSAT: |
436 | case ISD::SSUBSAT: |
437 | case ISD::USUBSAT: |
438 | case ISD::SSHLSAT: |
439 | case ISD::USHLSAT: |
440 | case ISD::FP_TO_SINT_SAT: |
441 | case ISD::FP_TO_UINT_SAT: |
442 | case ISD::MGATHER: |
443 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0)); |
444 | break; |
445 | case ISD::SMULFIX: |
446 | case ISD::SMULFIXSAT: |
447 | case ISD::UMULFIX: |
448 | case ISD::UMULFIXSAT: |
449 | case ISD::SDIVFIX: |
450 | case ISD::SDIVFIXSAT: |
451 | case ISD::UDIVFIX: |
452 | case ISD::UDIVFIXSAT: { |
453 | unsigned Scale = Node->getConstantOperandVal(Num: 2); |
454 | Action = TLI.getFixedPointOperationAction(Op: Node->getOpcode(), |
455 | VT: Node->getValueType(ResNo: 0), Scale); |
456 | break; |
457 | } |
458 | case ISD::SINT_TO_FP: |
459 | case ISD::UINT_TO_FP: |
460 | case ISD::VECREDUCE_ADD: |
461 | case ISD::VECREDUCE_MUL: |
462 | case ISD::VECREDUCE_AND: |
463 | case ISD::VECREDUCE_OR: |
464 | case ISD::VECREDUCE_XOR: |
465 | case ISD::VECREDUCE_SMAX: |
466 | case ISD::VECREDUCE_SMIN: |
467 | case ISD::VECREDUCE_UMAX: |
468 | case ISD::VECREDUCE_UMIN: |
469 | case ISD::VECREDUCE_FADD: |
470 | case ISD::VECREDUCE_FMUL: |
471 | case ISD::VECREDUCE_FMAX: |
472 | case ISD::VECREDUCE_FMIN: |
473 | case ISD::VECREDUCE_FMAXIMUM: |
474 | case ISD::VECREDUCE_FMINIMUM: |
475 | Action = TLI.getOperationAction(Op: Node->getOpcode(), |
476 | VT: Node->getOperand(Num: 0).getValueType()); |
477 | break; |
478 | case ISD::VECREDUCE_SEQ_FADD: |
479 | case ISD::VECREDUCE_SEQ_FMUL: |
480 | Action = TLI.getOperationAction(Op: Node->getOpcode(), |
481 | VT: Node->getOperand(Num: 1).getValueType()); |
482 | break; |
483 | case ISD::SETCC: { |
484 | MVT OpVT = Node->getOperand(Num: 0).getSimpleValueType(); |
485 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 2))->get(); |
486 | Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT); |
487 | if (Action == TargetLowering::Legal) |
488 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT); |
489 | break; |
490 | } |
491 | |
492 | #define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \ |
493 | case ISD::VPID: { \ |
494 | EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \ |
495 | : Node->getOperand(LEGALPOS).getValueType(); \ |
496 | if (ISD::VPID == ISD::VP_SETCC) { \ |
497 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \ |
498 | Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \ |
499 | if (Action != TargetLowering::Legal) \ |
500 | break; \ |
501 | } \ |
502 | Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \ |
503 | } break; |
504 | #include "llvm/IR/VPIntrinsics.def" |
505 | } |
506 | |
507 | LLVM_DEBUG(dbgs() << "\nLegalizing vector op: " ; Node->dump(&DAG)); |
508 | |
509 | SmallVector<SDValue, 8> ResultVals; |
510 | switch (Action) { |
511 | default: llvm_unreachable("This action is not supported yet!" ); |
512 | case TargetLowering::Promote: |
513 | assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) && |
514 | "This action is not supported yet!" ); |
515 | LLVM_DEBUG(dbgs() << "Promoting\n" ); |
516 | Promote(Node, Results&: ResultVals); |
517 | assert(!ResultVals.empty() && "No results for promotion?" ); |
518 | break; |
519 | case TargetLowering::Legal: |
520 | LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n" ); |
521 | break; |
522 | case TargetLowering::Custom: |
523 | LLVM_DEBUG(dbgs() << "Trying custom legalization\n" ); |
524 | if (LowerOperationWrapper(N: Node, Results&: ResultVals)) |
525 | break; |
526 | LLVM_DEBUG(dbgs() << "Could not custom legalize node\n" ); |
527 | [[fallthrough]]; |
528 | case TargetLowering::Expand: |
529 | LLVM_DEBUG(dbgs() << "Expanding\n" ); |
530 | Expand(Node, Results&: ResultVals); |
531 | break; |
532 | } |
533 | |
534 | if (ResultVals.empty()) |
535 | return TranslateLegalizeResults(Op, Result: Node); |
536 | |
537 | Changed = true; |
538 | return RecursivelyLegalizeResults(Op, Results: ResultVals); |
539 | } |
540 | |
541 | // FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we |
542 | // merge them somehow? |
543 | bool VectorLegalizer::LowerOperationWrapper(SDNode *Node, |
544 | SmallVectorImpl<SDValue> &Results) { |
545 | SDValue Res = TLI.LowerOperation(Op: SDValue(Node, 0), DAG); |
546 | |
547 | if (!Res.getNode()) |
548 | return false; |
549 | |
550 | if (Res == SDValue(Node, 0)) |
551 | return true; |
552 | |
553 | // If the original node has one result, take the return value from |
554 | // LowerOperation as is. It might not be result number 0. |
555 | if (Node->getNumValues() == 1) { |
556 | Results.push_back(Elt: Res); |
557 | return true; |
558 | } |
559 | |
560 | // If the original node has multiple results, then the return node should |
561 | // have the same number of results. |
562 | assert((Node->getNumValues() == Res->getNumValues()) && |
563 | "Lowering returned the wrong number of results!" ); |
564 | |
565 | // Places new result values base on N result number. |
566 | for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I) |
567 | Results.push_back(Elt: Res.getValue(R: I)); |
568 | |
569 | return true; |
570 | } |
571 | |
572 | void VectorLegalizer::PromoteReduction(SDNode *Node, |
573 | SmallVectorImpl<SDValue> &Results) { |
574 | MVT VecVT = Node->getOperand(Num: 1).getSimpleValueType(); |
575 | MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT); |
576 | MVT ScalarVT = Node->getSimpleValueType(ResNo: 0); |
577 | MVT NewScalarVT = NewVecVT.getVectorElementType(); |
578 | |
579 | SDLoc DL(Node); |
580 | SmallVector<SDValue, 4> Operands(Node->getNumOperands()); |
581 | |
582 | // promote the initial value. |
583 | if (Node->getOperand(Num: 0).getValueType().isFloatingPoint()) |
584 | Operands[0] = |
585 | DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NewScalarVT, Operand: Node->getOperand(Num: 0)); |
586 | else |
587 | Operands[0] = |
588 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: NewScalarVT, Operand: Node->getOperand(Num: 0)); |
589 | |
590 | for (unsigned j = 1; j != Node->getNumOperands(); ++j) |
591 | if (Node->getOperand(Num: j).getValueType().isVector() && |
592 | !(ISD::isVPOpcode(Opcode: Node->getOpcode()) && |
593 | ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j)) // Skip mask operand. |
594 | // promote the vector operand. |
595 | if (Node->getOperand(Num: j).getValueType().isFloatingPoint()) |
596 | Operands[j] = |
597 | DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NewVecVT, Operand: Node->getOperand(Num: j)); |
598 | else |
599 | Operands[j] = |
600 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: NewVecVT, Operand: Node->getOperand(Num: j)); |
601 | else |
602 | Operands[j] = Node->getOperand(Num: j); // Skip VL operand. |
603 | |
604 | SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL, VT: NewScalarVT, Ops: Operands, |
605 | Flags: Node->getFlags()); |
606 | |
607 | if (ScalarVT.isFloatingPoint()) |
608 | Res = DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: ScalarVT, N1: Res, |
609 | N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)); |
610 | else |
611 | Res = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ScalarVT, Operand: Res); |
612 | |
613 | Results.push_back(Elt: Res); |
614 | } |
615 | |
616 | void VectorLegalizer::PromoteSETCC(SDNode *Node, |
617 | SmallVectorImpl<SDValue> &Results) { |
618 | MVT VecVT = Node->getOperand(Num: 0).getSimpleValueType(); |
619 | MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT); |
620 | |
621 | unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND; |
622 | |
623 | SDLoc DL(Node); |
624 | SmallVector<SDValue, 5> Operands(Node->getNumOperands()); |
625 | |
626 | Operands[0] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 0)); |
627 | Operands[1] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 1)); |
628 | Operands[2] = Node->getOperand(Num: 2); |
629 | |
630 | if (Node->getOpcode() == ISD::VP_SETCC) { |
631 | Operands[3] = Node->getOperand(Num: 3); // mask |
632 | Operands[4] = Node->getOperand(Num: 4); // evl |
633 | } |
634 | |
635 | SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL, VT: Node->getSimpleValueType(ResNo: 0), |
636 | Ops: Operands, Flags: Node->getFlags()); |
637 | |
638 | Results.push_back(Elt: Res); |
639 | } |
640 | |
641 | void VectorLegalizer::PromoteSTRICT(SDNode *Node, |
642 | SmallVectorImpl<SDValue> &Results) { |
643 | MVT VecVT = Node->getOperand(Num: 1).getSimpleValueType(); |
644 | MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT); |
645 | |
646 | assert(VecVT.isFloatingPoint()); |
647 | |
648 | SDLoc DL(Node); |
649 | SmallVector<SDValue, 5> Operands(Node->getNumOperands()); |
650 | SmallVector<SDValue, 2> Chains; |
651 | |
652 | for (unsigned j = 1; j != Node->getNumOperands(); ++j) |
653 | if (Node->getOperand(Num: j).getValueType().isVector() && |
654 | !(ISD::isVPOpcode(Opcode: Node->getOpcode()) && |
655 | ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j)) // Skip mask operand. |
656 | { |
657 | // promote the vector operand. |
658 | SDValue Ext = |
659 | DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {NewVecVT, MVT::Other}, |
660 | {Node->getOperand(0), Node->getOperand(j)}); |
661 | Operands[j] = Ext.getValue(R: 0); |
662 | Chains.push_back(Elt: Ext.getValue(R: 1)); |
663 | } else |
664 | Operands[j] = Node->getOperand(Num: j); // Skip no vector operand. |
665 | |
666 | SDVTList VTs = DAG.getVTList(VT1: NewVecVT, VT2: Node->getValueType(ResNo: 1)); |
667 | |
668 | Operands[0] = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); |
669 | |
670 | SDValue Res = |
671 | DAG.getNode(Opcode: Node->getOpcode(), DL, VTList: VTs, Ops: Operands, Flags: Node->getFlags()); |
672 | |
673 | SDValue Round = |
674 | DAG.getNode(ISD::STRICT_FP_ROUND, DL, {VecVT, MVT::Other}, |
675 | {Res.getValue(1), Res.getValue(0), |
676 | DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)}); |
677 | |
678 | Results.push_back(Elt: Round.getValue(R: 0)); |
679 | Results.push_back(Elt: Round.getValue(R: 1)); |
680 | } |
681 | |
682 | void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) { |
683 | // For a few operations there is a specific concept for promotion based on |
684 | // the operand's type. |
685 | switch (Node->getOpcode()) { |
686 | case ISD::SINT_TO_FP: |
687 | case ISD::UINT_TO_FP: |
688 | case ISD::STRICT_SINT_TO_FP: |
689 | case ISD::STRICT_UINT_TO_FP: |
690 | // "Promote" the operation by extending the operand. |
691 | PromoteINT_TO_FP(Node, Results); |
692 | return; |
693 | case ISD::FP_TO_UINT: |
694 | case ISD::FP_TO_SINT: |
695 | case ISD::STRICT_FP_TO_UINT: |
696 | case ISD::STRICT_FP_TO_SINT: |
697 | // Promote the operation by extending the operand. |
698 | PromoteFP_TO_INT(Node, Results); |
699 | return; |
700 | case ISD::VP_REDUCE_ADD: |
701 | case ISD::VP_REDUCE_MUL: |
702 | case ISD::VP_REDUCE_AND: |
703 | case ISD::VP_REDUCE_OR: |
704 | case ISD::VP_REDUCE_XOR: |
705 | case ISD::VP_REDUCE_SMAX: |
706 | case ISD::VP_REDUCE_SMIN: |
707 | case ISD::VP_REDUCE_UMAX: |
708 | case ISD::VP_REDUCE_UMIN: |
709 | case ISD::VP_REDUCE_FADD: |
710 | case ISD::VP_REDUCE_FMUL: |
711 | case ISD::VP_REDUCE_FMAX: |
712 | case ISD::VP_REDUCE_FMIN: |
713 | case ISD::VP_REDUCE_SEQ_FADD: |
714 | // Promote the operation by extending the operand. |
715 | PromoteReduction(Node, Results); |
716 | return; |
717 | case ISD::VP_SETCC: |
718 | case ISD::SETCC: |
719 | // Promote the operation by extending the operand. |
720 | PromoteSETCC(Node, Results); |
721 | return; |
722 | case ISD::STRICT_FADD: |
723 | case ISD::STRICT_FSUB: |
724 | case ISD::STRICT_FMUL: |
725 | case ISD::STRICT_FDIV: |
726 | case ISD::STRICT_FSQRT: |
727 | case ISD::STRICT_FMA: |
728 | PromoteSTRICT(Node, Results); |
729 | return; |
730 | case ISD::FP_ROUND: |
731 | case ISD::FP_EXTEND: |
732 | // These operations are used to do promotion so they can't be promoted |
733 | // themselves. |
734 | llvm_unreachable("Don't know how to promote this operation!" ); |
735 | } |
736 | |
737 | // There are currently two cases of vector promotion: |
738 | // 1) Bitcasting a vector of integers to a different type to a vector of the |
739 | // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. |
740 | // 2) Extending a vector of floats to a vector of the same number of larger |
741 | // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. |
742 | assert(Node->getNumValues() == 1 && |
743 | "Can't promote a vector with multiple results!" ); |
744 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
745 | MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT); |
746 | SDLoc dl(Node); |
747 | SmallVector<SDValue, 4> Operands(Node->getNumOperands()); |
748 | |
749 | for (unsigned j = 0; j != Node->getNumOperands(); ++j) { |
750 | // Do not promote the mask operand of a VP OP. |
751 | bool SkipPromote = ISD::isVPOpcode(Opcode: Node->getOpcode()) && |
752 | ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j; |
753 | if (Node->getOperand(Num: j).getValueType().isVector() && !SkipPromote) |
754 | if (Node->getOperand(Num: j) |
755 | .getValueType() |
756 | .getVectorElementType() |
757 | .isFloatingPoint() && |
758 | NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()) |
759 | Operands[j] = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j)); |
760 | else |
761 | Operands[j] = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j)); |
762 | else |
763 | Operands[j] = Node->getOperand(Num: j); |
764 | } |
765 | |
766 | SDValue Res = |
767 | DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: NVT, Ops: Operands, Flags: Node->getFlags()); |
768 | |
769 | if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || |
770 | (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && |
771 | NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) |
772 | Res = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT, N1: Res, |
773 | N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true)); |
774 | else |
775 | Res = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Res); |
776 | |
777 | Results.push_back(Elt: Res); |
778 | } |
779 | |
780 | void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node, |
781 | SmallVectorImpl<SDValue> &Results) { |
782 | // INT_TO_FP operations may require the input operand be promoted even |
783 | // when the type is otherwise legal. |
784 | bool IsStrict = Node->isStrictFPOpcode(); |
785 | MVT VT = Node->getOperand(Num: IsStrict ? 1 : 0).getSimpleValueType(); |
786 | MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT); |
787 | assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && |
788 | "Vectors have different number of elements!" ); |
789 | |
790 | SDLoc dl(Node); |
791 | SmallVector<SDValue, 4> Operands(Node->getNumOperands()); |
792 | |
793 | unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP || |
794 | Node->getOpcode() == ISD::STRICT_UINT_TO_FP) |
795 | ? ISD::ZERO_EXTEND |
796 | : ISD::SIGN_EXTEND; |
797 | for (unsigned j = 0; j != Node->getNumOperands(); ++j) { |
798 | if (Node->getOperand(Num: j).getValueType().isVector()) |
799 | Operands[j] = DAG.getNode(Opcode: Opc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j)); |
800 | else |
801 | Operands[j] = Node->getOperand(Num: j); |
802 | } |
803 | |
804 | if (IsStrict) { |
805 | SDValue Res = DAG.getNode(Node->getOpcode(), dl, |
806 | {Node->getValueType(0), MVT::Other}, Operands); |
807 | Results.push_back(Elt: Res); |
808 | Results.push_back(Elt: Res.getValue(R: 1)); |
809 | return; |
810 | } |
811 | |
812 | SDValue Res = |
813 | DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: 0), Ops: Operands); |
814 | Results.push_back(Elt: Res); |
815 | } |
816 | |
817 | // For FP_TO_INT we promote the result type to a vector type with wider |
818 | // elements and then truncate the result. This is different from the default |
819 | // PromoteVector which uses bitcast to promote thus assumning that the |
820 | // promoted vector type has the same overall size. |
821 | void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node, |
822 | SmallVectorImpl<SDValue> &Results) { |
823 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
824 | MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT); |
825 | bool IsStrict = Node->isStrictFPOpcode(); |
826 | assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && |
827 | "Vectors have different number of elements!" ); |
828 | |
829 | unsigned NewOpc = Node->getOpcode(); |
830 | // Change FP_TO_UINT to FP_TO_SINT if possible. |
831 | // TODO: Should we only do this if FP_TO_UINT itself isn't legal? |
832 | if (NewOpc == ISD::FP_TO_UINT && |
833 | TLI.isOperationLegalOrCustom(Op: ISD::FP_TO_SINT, VT: NVT)) |
834 | NewOpc = ISD::FP_TO_SINT; |
835 | |
836 | if (NewOpc == ISD::STRICT_FP_TO_UINT && |
837 | TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FP_TO_SINT, VT: NVT)) |
838 | NewOpc = ISD::STRICT_FP_TO_SINT; |
839 | |
840 | SDLoc dl(Node); |
841 | SDValue Promoted, Chain; |
842 | if (IsStrict) { |
843 | Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other}, |
844 | {Node->getOperand(0), Node->getOperand(1)}); |
845 | Chain = Promoted.getValue(R: 1); |
846 | } else |
847 | Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: 0)); |
848 | |
849 | // Assert that the converted value fits in the original type. If it doesn't |
850 | // (eg: because the value being converted is too big), then the result of the |
851 | // original operation was undefined anyway, so the assert is still correct. |
852 | if (Node->getOpcode() == ISD::FP_TO_UINT || |
853 | Node->getOpcode() == ISD::STRICT_FP_TO_UINT) |
854 | NewOpc = ISD::AssertZext; |
855 | else |
856 | NewOpc = ISD::AssertSext; |
857 | |
858 | Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, N1: Promoted, |
859 | N2: DAG.getValueType(VT.getScalarType())); |
860 | Promoted = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Promoted); |
861 | Results.push_back(Elt: Promoted); |
862 | if (IsStrict) |
863 | Results.push_back(Elt: Chain); |
864 | } |
865 | |
866 | std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) { |
867 | LoadSDNode *LD = cast<LoadSDNode>(Val: N); |
868 | return TLI.scalarizeVectorLoad(LD, DAG); |
869 | } |
870 | |
871 | SDValue VectorLegalizer::ExpandStore(SDNode *N) { |
872 | StoreSDNode *ST = cast<StoreSDNode>(Val: N); |
873 | SDValue TF = TLI.scalarizeVectorStore(ST, DAG); |
874 | return TF; |
875 | } |
876 | |
877 | void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { |
878 | switch (Node->getOpcode()) { |
879 | case ISD::LOAD: { |
880 | std::pair<SDValue, SDValue> Tmp = ExpandLoad(N: Node); |
881 | Results.push_back(Elt: Tmp.first); |
882 | Results.push_back(Elt: Tmp.second); |
883 | return; |
884 | } |
885 | case ISD::STORE: |
886 | Results.push_back(Elt: ExpandStore(N: Node)); |
887 | return; |
888 | case ISD::MERGE_VALUES: |
889 | for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) |
890 | Results.push_back(Elt: Node->getOperand(Num: i)); |
891 | return; |
892 | case ISD::SIGN_EXTEND_INREG: |
893 | Results.push_back(Elt: ExpandSEXTINREG(Node)); |
894 | return; |
895 | case ISD::ANY_EXTEND_VECTOR_INREG: |
896 | Results.push_back(Elt: ExpandANY_EXTEND_VECTOR_INREG(Node)); |
897 | return; |
898 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
899 | Results.push_back(Elt: ExpandSIGN_EXTEND_VECTOR_INREG(Node)); |
900 | return; |
901 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
902 | Results.push_back(Elt: ExpandZERO_EXTEND_VECTOR_INREG(Node)); |
903 | return; |
904 | case ISD::BSWAP: |
905 | Results.push_back(Elt: ExpandBSWAP(Node)); |
906 | return; |
907 | case ISD::VP_BSWAP: |
908 | Results.push_back(Elt: TLI.expandVPBSWAP(N: Node, DAG)); |
909 | return; |
910 | case ISD::VSELECT: |
911 | Results.push_back(Elt: ExpandVSELECT(Node)); |
912 | return; |
913 | case ISD::VP_SELECT: |
914 | Results.push_back(Elt: ExpandVP_SELECT(Node)); |
915 | return; |
916 | case ISD::VP_SREM: |
917 | case ISD::VP_UREM: |
918 | if (SDValue Expanded = ExpandVP_REM(Node)) { |
919 | Results.push_back(Elt: Expanded); |
920 | return; |
921 | } |
922 | break; |
923 | case ISD::SELECT: |
924 | Results.push_back(Elt: ExpandSELECT(Node)); |
925 | return; |
926 | case ISD::SELECT_CC: { |
927 | if (Node->getValueType(ResNo: 0).isScalableVector()) { |
928 | EVT CondVT = TLI.getSetCCResultType( |
929 | DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0)); |
930 | SDValue SetCC = |
931 | DAG.getNode(Opcode: ISD::SETCC, DL: SDLoc(Node), VT: CondVT, N1: Node->getOperand(Num: 0), |
932 | N2: Node->getOperand(Num: 1), N3: Node->getOperand(Num: 4)); |
933 | Results.push_back(Elt: DAG.getSelect(DL: SDLoc(Node), VT: Node->getValueType(ResNo: 0), Cond: SetCC, |
934 | LHS: Node->getOperand(Num: 2), |
935 | RHS: Node->getOperand(Num: 3))); |
936 | return; |
937 | } |
938 | break; |
939 | } |
940 | case ISD::FP_TO_UINT: |
941 | ExpandFP_TO_UINT(Node, Results); |
942 | return; |
943 | case ISD::UINT_TO_FP: |
944 | ExpandUINT_TO_FLOAT(Node, Results); |
945 | return; |
946 | case ISD::FNEG: |
947 | Results.push_back(Elt: ExpandFNEG(Node)); |
948 | return; |
949 | case ISD::FSUB: |
950 | ExpandFSUB(Node, Results); |
951 | return; |
952 | case ISD::SETCC: |
953 | case ISD::VP_SETCC: |
954 | ExpandSETCC(Node, Results); |
955 | return; |
956 | case ISD::ABS: |
957 | if (SDValue Expanded = TLI.expandABS(N: Node, DAG)) { |
958 | Results.push_back(Elt: Expanded); |
959 | return; |
960 | } |
961 | break; |
962 | case ISD::ABDS: |
963 | case ISD::ABDU: |
964 | if (SDValue Expanded = TLI.expandABD(N: Node, DAG)) { |
965 | Results.push_back(Elt: Expanded); |
966 | return; |
967 | } |
968 | break; |
969 | case ISD::BITREVERSE: |
970 | ExpandBITREVERSE(Node, Results); |
971 | return; |
972 | case ISD::VP_BITREVERSE: |
973 | if (SDValue Expanded = TLI.expandVPBITREVERSE(N: Node, DAG)) { |
974 | Results.push_back(Elt: Expanded); |
975 | return; |
976 | } |
977 | break; |
978 | case ISD::CTPOP: |
979 | if (SDValue Expanded = TLI.expandCTPOP(N: Node, DAG)) { |
980 | Results.push_back(Elt: Expanded); |
981 | return; |
982 | } |
983 | break; |
984 | case ISD::VP_CTPOP: |
985 | if (SDValue Expanded = TLI.expandVPCTPOP(N: Node, DAG)) { |
986 | Results.push_back(Elt: Expanded); |
987 | return; |
988 | } |
989 | break; |
990 | case ISD::CTLZ: |
991 | case ISD::CTLZ_ZERO_UNDEF: |
992 | if (SDValue Expanded = TLI.expandCTLZ(N: Node, DAG)) { |
993 | Results.push_back(Elt: Expanded); |
994 | return; |
995 | } |
996 | break; |
997 | case ISD::VP_CTLZ: |
998 | case ISD::VP_CTLZ_ZERO_UNDEF: |
999 | if (SDValue Expanded = TLI.expandVPCTLZ(N: Node, DAG)) { |
1000 | Results.push_back(Elt: Expanded); |
1001 | return; |
1002 | } |
1003 | break; |
1004 | case ISD::CTTZ: |
1005 | case ISD::CTTZ_ZERO_UNDEF: |
1006 | if (SDValue Expanded = TLI.expandCTTZ(N: Node, DAG)) { |
1007 | Results.push_back(Elt: Expanded); |
1008 | return; |
1009 | } |
1010 | break; |
1011 | case ISD::VP_CTTZ: |
1012 | case ISD::VP_CTTZ_ZERO_UNDEF: |
1013 | if (SDValue Expanded = TLI.expandVPCTTZ(N: Node, DAG)) { |
1014 | Results.push_back(Elt: Expanded); |
1015 | return; |
1016 | } |
1017 | break; |
1018 | case ISD::FSHL: |
1019 | case ISD::VP_FSHL: |
1020 | case ISD::FSHR: |
1021 | case ISD::VP_FSHR: |
1022 | if (SDValue Expanded = TLI.expandFunnelShift(N: Node, DAG)) { |
1023 | Results.push_back(Elt: Expanded); |
1024 | return; |
1025 | } |
1026 | break; |
1027 | case ISD::ROTL: |
1028 | case ISD::ROTR: |
1029 | if (SDValue Expanded = TLI.expandROT(N: Node, AllowVectorOps: false /*AllowVectorOps*/, DAG)) { |
1030 | Results.push_back(Elt: Expanded); |
1031 | return; |
1032 | } |
1033 | break; |
1034 | case ISD::FMINNUM: |
1035 | case ISD::FMAXNUM: |
1036 | if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(N: Node, DAG)) { |
1037 | Results.push_back(Elt: Expanded); |
1038 | return; |
1039 | } |
1040 | break; |
1041 | case ISD::SMIN: |
1042 | case ISD::SMAX: |
1043 | case ISD::UMIN: |
1044 | case ISD::UMAX: |
1045 | if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) { |
1046 | Results.push_back(Elt: Expanded); |
1047 | return; |
1048 | } |
1049 | break; |
1050 | case ISD::UADDO: |
1051 | case ISD::USUBO: |
1052 | ExpandUADDSUBO(Node, Results); |
1053 | return; |
1054 | case ISD::SADDO: |
1055 | case ISD::SSUBO: |
1056 | ExpandSADDSUBO(Node, Results); |
1057 | return; |
1058 | case ISD::UMULO: |
1059 | case ISD::SMULO: |
1060 | ExpandMULO(Node, Results); |
1061 | return; |
1062 | case ISD::USUBSAT: |
1063 | case ISD::SSUBSAT: |
1064 | case ISD::UADDSAT: |
1065 | case ISD::SADDSAT: |
1066 | if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) { |
1067 | Results.push_back(Elt: Expanded); |
1068 | return; |
1069 | } |
1070 | break; |
1071 | case ISD::USHLSAT: |
1072 | case ISD::SSHLSAT: |
1073 | if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) { |
1074 | Results.push_back(Elt: Expanded); |
1075 | return; |
1076 | } |
1077 | break; |
1078 | case ISD::FP_TO_SINT_SAT: |
1079 | case ISD::FP_TO_UINT_SAT: |
1080 | // Expand the fpsosisat if it is scalable to prevent it from unrolling below. |
1081 | if (Node->getValueType(ResNo: 0).isScalableVector()) { |
1082 | if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(N: Node, DAG)) { |
1083 | Results.push_back(Elt: Expanded); |
1084 | return; |
1085 | } |
1086 | } |
1087 | break; |
1088 | case ISD::SMULFIX: |
1089 | case ISD::UMULFIX: |
1090 | if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) { |
1091 | Results.push_back(Elt: Expanded); |
1092 | return; |
1093 | } |
1094 | break; |
1095 | case ISD::SMULFIXSAT: |
1096 | case ISD::UMULFIXSAT: |
1097 | // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly |
1098 | // why. Maybe it results in worse codegen compared to the unroll for some |
1099 | // targets? This should probably be investigated. And if we still prefer to |
1100 | // unroll an explanation could be helpful. |
1101 | break; |
1102 | case ISD::SDIVFIX: |
1103 | case ISD::UDIVFIX: |
1104 | ExpandFixedPointDiv(Node, Results); |
1105 | return; |
1106 | case ISD::SDIVFIXSAT: |
1107 | case ISD::UDIVFIXSAT: |
1108 | break; |
1109 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
1110 | case ISD::STRICT_##DAGN: |
1111 | #include "llvm/IR/ConstrainedOps.def" |
1112 | ExpandStrictFPOp(Node, Results); |
1113 | return; |
1114 | case ISD::VECREDUCE_ADD: |
1115 | case ISD::VECREDUCE_MUL: |
1116 | case ISD::VECREDUCE_AND: |
1117 | case ISD::VECREDUCE_OR: |
1118 | case ISD::VECREDUCE_XOR: |
1119 | case ISD::VECREDUCE_SMAX: |
1120 | case ISD::VECREDUCE_SMIN: |
1121 | case ISD::VECREDUCE_UMAX: |
1122 | case ISD::VECREDUCE_UMIN: |
1123 | case ISD::VECREDUCE_FADD: |
1124 | case ISD::VECREDUCE_FMUL: |
1125 | case ISD::VECREDUCE_FMAX: |
1126 | case ISD::VECREDUCE_FMIN: |
1127 | case ISD::VECREDUCE_FMAXIMUM: |
1128 | case ISD::VECREDUCE_FMINIMUM: |
1129 | Results.push_back(Elt: TLI.expandVecReduce(Node, DAG)); |
1130 | return; |
1131 | case ISD::VECREDUCE_SEQ_FADD: |
1132 | case ISD::VECREDUCE_SEQ_FMUL: |
1133 | Results.push_back(Elt: TLI.expandVecReduceSeq(Node, DAG)); |
1134 | return; |
1135 | case ISD::SREM: |
1136 | case ISD::UREM: |
1137 | ExpandREM(Node, Results); |
1138 | return; |
1139 | case ISD::VP_MERGE: |
1140 | Results.push_back(Elt: ExpandVP_MERGE(Node)); |
1141 | return; |
1142 | } |
1143 | |
1144 | SDValue Unrolled = DAG.UnrollVectorOp(N: Node); |
1145 | for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I) |
1146 | Results.push_back(Elt: Unrolled.getValue(R: I)); |
1147 | } |
1148 | |
1149 | SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { |
1150 | // Lower a select instruction where the condition is a scalar and the |
1151 | // operands are vectors. Lower this select to VSELECT and implement it |
1152 | // using XOR AND OR. The selector bit is broadcasted. |
1153 | EVT VT = Node->getValueType(ResNo: 0); |
1154 | SDLoc DL(Node); |
1155 | |
1156 | SDValue Mask = Node->getOperand(Num: 0); |
1157 | SDValue Op1 = Node->getOperand(Num: 1); |
1158 | SDValue Op2 = Node->getOperand(Num: 2); |
1159 | |
1160 | assert(VT.isVector() && !Mask.getValueType().isVector() |
1161 | && Op1.getValueType() == Op2.getValueType() && "Invalid type" ); |
1162 | |
1163 | // If we can't even use the basic vector operations of |
1164 | // AND,OR,XOR, we will have to scalarize the op. |
1165 | // Notice that the operation may be 'promoted' which means that it is |
1166 | // 'bitcasted' to another type which is handled. |
1167 | // Also, we need to be able to construct a splat vector using either |
1168 | // BUILD_VECTOR or SPLAT_VECTOR. |
1169 | // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to |
1170 | // BUILD_VECTOR? |
1171 | if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand || |
1172 | TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand || |
1173 | TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand || |
1174 | TLI.getOperationAction(Op: VT.isFixedLengthVector() ? ISD::BUILD_VECTOR |
1175 | : ISD::SPLAT_VECTOR, |
1176 | VT) == TargetLowering::Expand) |
1177 | return DAG.UnrollVectorOp(N: Node); |
1178 | |
1179 | // Generate a mask operand. |
1180 | EVT MaskTy = VT.changeVectorElementTypeToInteger(); |
1181 | |
1182 | // What is the size of each element in the vector mask. |
1183 | EVT BitTy = MaskTy.getScalarType(); |
1184 | |
1185 | Mask = DAG.getSelect(DL, VT: BitTy, Cond: Mask, LHS: DAG.getAllOnesConstant(DL, VT: BitTy), |
1186 | RHS: DAG.getConstant(Val: 0, DL, VT: BitTy)); |
1187 | |
1188 | // Broadcast the mask so that the entire vector is all one or all zero. |
1189 | Mask = DAG.getSplat(VT: MaskTy, DL, Op: Mask); |
1190 | |
1191 | // Bitcast the operands to be the same type as the mask. |
1192 | // This is needed when we select between FP types because |
1193 | // the mask is a vector of integers. |
1194 | Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op1); |
1195 | Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op2); |
1196 | |
1197 | SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT: MaskTy); |
1198 | |
1199 | Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op1, N2: Mask); |
1200 | Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op2, N2: NotMask); |
1201 | SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MaskTy, N1: Op1, N2: Op2); |
1202 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val); |
1203 | } |
1204 | |
1205 | SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) { |
1206 | EVT VT = Node->getValueType(ResNo: 0); |
1207 | |
1208 | // Make sure that the SRA and SHL instructions are available. |
1209 | if (TLI.getOperationAction(Op: ISD::SRA, VT) == TargetLowering::Expand || |
1210 | TLI.getOperationAction(Op: ISD::SHL, VT) == TargetLowering::Expand) |
1211 | return DAG.UnrollVectorOp(N: Node); |
1212 | |
1213 | SDLoc DL(Node); |
1214 | EVT OrigTy = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT(); |
1215 | |
1216 | unsigned BW = VT.getScalarSizeInBits(); |
1217 | unsigned OrigBW = OrigTy.getScalarSizeInBits(); |
1218 | SDValue ShiftSz = DAG.getConstant(Val: BW - OrigBW, DL, VT); |
1219 | |
1220 | SDValue Op = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Node->getOperand(Num: 0), N2: ShiftSz); |
1221 | return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Op, N2: ShiftSz); |
1222 | } |
1223 | |
1224 | // Generically expand a vector anyext in register to a shuffle of the relevant |
1225 | // lanes into the appropriate locations, with other lanes left undef. |
1226 | SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) { |
1227 | SDLoc DL(Node); |
1228 | EVT VT = Node->getValueType(ResNo: 0); |
1229 | int NumElements = VT.getVectorNumElements(); |
1230 | SDValue Src = Node->getOperand(Num: 0); |
1231 | EVT SrcVT = Src.getValueType(); |
1232 | int NumSrcElements = SrcVT.getVectorNumElements(); |
1233 | |
1234 | // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector |
1235 | // into a larger vector type. |
1236 | if (SrcVT.bitsLE(VT)) { |
1237 | assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && |
1238 | "ANY_EXTEND_VECTOR_INREG vector size mismatch" ); |
1239 | NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); |
1240 | SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(), |
1241 | NumElements: NumSrcElements); |
1242 | Src = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: SrcVT, N1: DAG.getUNDEF(VT: SrcVT), |
1243 | N2: Src, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
1244 | } |
1245 | |
1246 | // Build a base mask of undef shuffles. |
1247 | SmallVector<int, 16> ShuffleMask; |
1248 | ShuffleMask.resize(N: NumSrcElements, NV: -1); |
1249 | |
1250 | // Place the extended lanes into the correct locations. |
1251 | int ExtLaneScale = NumSrcElements / NumElements; |
1252 | int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; |
1253 | for (int i = 0; i < NumElements; ++i) |
1254 | ShuffleMask[i * ExtLaneScale + EndianOffset] = i; |
1255 | |
1256 | return DAG.getNode( |
1257 | Opcode: ISD::BITCAST, DL, VT, |
1258 | Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Src, N2: DAG.getUNDEF(VT: SrcVT), Mask: ShuffleMask)); |
1259 | } |
1260 | |
1261 | SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) { |
1262 | SDLoc DL(Node); |
1263 | EVT VT = Node->getValueType(ResNo: 0); |
1264 | SDValue Src = Node->getOperand(Num: 0); |
1265 | EVT SrcVT = Src.getValueType(); |
1266 | |
1267 | // First build an any-extend node which can be legalized above when we |
1268 | // recurse through it. |
1269 | SDValue Op = DAG.getNode(Opcode: ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Operand: Src); |
1270 | |
1271 | // Now we need sign extend. Do this by shifting the elements. Even if these |
1272 | // aren't legal operations, they have a better chance of being legalized |
1273 | // without full scalarization than the sign extension does. |
1274 | unsigned EltWidth = VT.getScalarSizeInBits(); |
1275 | unsigned SrcEltWidth = SrcVT.getScalarSizeInBits(); |
1276 | SDValue ShiftAmount = DAG.getConstant(Val: EltWidth - SrcEltWidth, DL, VT); |
1277 | return DAG.getNode(Opcode: ISD::SRA, DL, VT, |
1278 | N1: DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Op, N2: ShiftAmount), |
1279 | N2: ShiftAmount); |
1280 | } |
1281 | |
1282 | // Generically expand a vector zext in register to a shuffle of the relevant |
1283 | // lanes into the appropriate locations, a blend of zero into the high bits, |
1284 | // and a bitcast to the wider element type. |
1285 | SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) { |
1286 | SDLoc DL(Node); |
1287 | EVT VT = Node->getValueType(ResNo: 0); |
1288 | int NumElements = VT.getVectorNumElements(); |
1289 | SDValue Src = Node->getOperand(Num: 0); |
1290 | EVT SrcVT = Src.getValueType(); |
1291 | int NumSrcElements = SrcVT.getVectorNumElements(); |
1292 | |
1293 | // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector |
1294 | // into a larger vector type. |
1295 | if (SrcVT.bitsLE(VT)) { |
1296 | assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && |
1297 | "ZERO_EXTEND_VECTOR_INREG vector size mismatch" ); |
1298 | NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); |
1299 | SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(), |
1300 | NumElements: NumSrcElements); |
1301 | Src = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: SrcVT, N1: DAG.getUNDEF(VT: SrcVT), |
1302 | N2: Src, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
1303 | } |
1304 | |
1305 | // Build up a zero vector to blend into this one. |
1306 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: SrcVT); |
1307 | |
1308 | // Shuffle the incoming lanes into the correct position, and pull all other |
1309 | // lanes from the zero vector. |
1310 | auto ShuffleMask = llvm::to_vector<16>(Range: llvm::seq<int>(Begin: 0, End: NumSrcElements)); |
1311 | |
1312 | int ExtLaneScale = NumSrcElements / NumElements; |
1313 | int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; |
1314 | for (int i = 0; i < NumElements; ++i) |
1315 | ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; |
1316 | |
1317 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, |
1318 | Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Zero, N2: Src, Mask: ShuffleMask)); |
1319 | } |
1320 | |
1321 | static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { |
1322 | int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; |
1323 | for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) |
1324 | for (int J = ScalarSizeInBytes - 1; J >= 0; --J) |
1325 | ShuffleMask.push_back(Elt: (I * ScalarSizeInBytes) + J); |
1326 | } |
1327 | |
1328 | SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) { |
1329 | EVT VT = Node->getValueType(ResNo: 0); |
1330 | |
1331 | // Scalable vectors can't use shuffle expansion. |
1332 | if (VT.isScalableVector()) |
1333 | return TLI.expandBSWAP(N: Node, DAG); |
1334 | |
1335 | // Generate a byte wise shuffle mask for the BSWAP. |
1336 | SmallVector<int, 16> ShuffleMask; |
1337 | createBSWAPShuffleMask(VT, ShuffleMask); |
1338 | EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); |
1339 | |
1340 | // Only emit a shuffle if the mask is legal. |
1341 | if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) { |
1342 | SDLoc DL(Node); |
1343 | SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0)); |
1344 | Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getUNDEF(VT: ByteVT), Mask: ShuffleMask); |
1345 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
1346 | } |
1347 | |
1348 | // If we have the appropriate vector bit operations, it is better to use them |
1349 | // than unrolling and expanding each component. |
1350 | if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) && |
1351 | TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) && |
1352 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) && |
1353 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)) |
1354 | return TLI.expandBSWAP(N: Node, DAG); |
1355 | |
1356 | // Otherwise unroll. |
1357 | return DAG.UnrollVectorOp(N: Node); |
1358 | } |
1359 | |
1360 | void VectorLegalizer::ExpandBITREVERSE(SDNode *Node, |
1361 | SmallVectorImpl<SDValue> &Results) { |
1362 | EVT VT = Node->getValueType(ResNo: 0); |
1363 | |
1364 | // We can't unroll or use shuffles for scalable vectors. |
1365 | if (VT.isScalableVector()) { |
1366 | Results.push_back(Elt: TLI.expandBITREVERSE(N: Node, DAG)); |
1367 | return; |
1368 | } |
1369 | |
1370 | // If we have the scalar operation, it's probably cheaper to unroll it. |
1371 | if (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: VT.getScalarType())) { |
1372 | SDValue Tmp = DAG.UnrollVectorOp(N: Node); |
1373 | Results.push_back(Elt: Tmp); |
1374 | return; |
1375 | } |
1376 | |
1377 | // If the vector element width is a whole number of bytes, test if its legal |
1378 | // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte |
1379 | // vector. This greatly reduces the number of bit shifts necessary. |
1380 | unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); |
1381 | if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) { |
1382 | SmallVector<int, 16> BSWAPMask; |
1383 | createBSWAPShuffleMask(VT, ShuffleMask&: BSWAPMask); |
1384 | |
1385 | EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size()); |
1386 | if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) && |
1387 | (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: ByteVT) || |
1388 | (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT: ByteVT) && |
1389 | TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT: ByteVT) && |
1390 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: ByteVT) && |
1391 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: ByteVT)))) { |
1392 | SDLoc DL(Node); |
1393 | SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0)); |
1394 | Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getUNDEF(VT: ByteVT), |
1395 | Mask: BSWAPMask); |
1396 | Op = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT: ByteVT, Operand: Op); |
1397 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
1398 | Results.push_back(Elt: Op); |
1399 | return; |
1400 | } |
1401 | } |
1402 | |
1403 | // If we have the appropriate vector bit operations, it is better to use them |
1404 | // than unrolling and expanding each component. |
1405 | if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) && |
1406 | TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) && |
1407 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) && |
1408 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)) { |
1409 | Results.push_back(Elt: TLI.expandBITREVERSE(N: Node, DAG)); |
1410 | return; |
1411 | } |
1412 | |
1413 | // Otherwise unroll. |
1414 | SDValue Tmp = DAG.UnrollVectorOp(N: Node); |
1415 | Results.push_back(Elt: Tmp); |
1416 | } |
1417 | |
1418 | SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) { |
1419 | // Implement VSELECT in terms of XOR, AND, OR |
1420 | // on platforms which do not support blend natively. |
1421 | SDLoc DL(Node); |
1422 | |
1423 | SDValue Mask = Node->getOperand(Num: 0); |
1424 | SDValue Op1 = Node->getOperand(Num: 1); |
1425 | SDValue Op2 = Node->getOperand(Num: 2); |
1426 | |
1427 | EVT VT = Mask.getValueType(); |
1428 | |
1429 | // If we can't even use the basic vector operations of |
1430 | // AND,OR,XOR, we will have to scalarize the op. |
1431 | // Notice that the operation may be 'promoted' which means that it is |
1432 | // 'bitcasted' to another type which is handled. |
1433 | if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand || |
1434 | TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand || |
1435 | TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand) |
1436 | return DAG.UnrollVectorOp(N: Node); |
1437 | |
1438 | // This operation also isn't safe with AND, OR, XOR when the boolean type is |
1439 | // 0/1 and the select operands aren't also booleans, as we need an all-ones |
1440 | // vector constant to mask with. |
1441 | // FIXME: Sign extend 1 to all ones if that's legal on the target. |
1442 | auto BoolContents = TLI.getBooleanContents(Type: Op1.getValueType()); |
1443 | if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent && |
1444 | !(BoolContents == TargetLowering::ZeroOrOneBooleanContent && |
1445 | Op1.getValueType().getVectorElementType() == MVT::i1)) |
1446 | return DAG.UnrollVectorOp(N: Node); |
1447 | |
1448 | // If the mask and the type are different sizes, unroll the vector op. This |
1449 | // can occur when getSetCCResultType returns something that is different in |
1450 | // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. |
1451 | if (VT.getSizeInBits() != Op1.getValueSizeInBits()) |
1452 | return DAG.UnrollVectorOp(N: Node); |
1453 | |
1454 | // Bitcast the operands to be the same type as the mask. |
1455 | // This is needed when we select between FP types because |
1456 | // the mask is a vector of integers. |
1457 | Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op1); |
1458 | Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op2); |
1459 | |
1460 | SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT); |
1461 | |
1462 | Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op1, N2: Mask); |
1463 | Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op2, N2: NotMask); |
1464 | SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Op1, N2: Op2); |
1465 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val); |
1466 | } |
1467 | |
1468 | SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) { |
1469 | // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which |
1470 | // do not support it natively. |
1471 | SDLoc DL(Node); |
1472 | |
1473 | SDValue Mask = Node->getOperand(Num: 0); |
1474 | SDValue Op1 = Node->getOperand(Num: 1); |
1475 | SDValue Op2 = Node->getOperand(Num: 2); |
1476 | SDValue EVL = Node->getOperand(Num: 3); |
1477 | |
1478 | EVT VT = Mask.getValueType(); |
1479 | |
1480 | // If we can't even use the basic vector operations of |
1481 | // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op. |
1482 | if (TLI.getOperationAction(Op: ISD::VP_AND, VT) == TargetLowering::Expand || |
1483 | TLI.getOperationAction(Op: ISD::VP_XOR, VT) == TargetLowering::Expand || |
1484 | TLI.getOperationAction(Op: ISD::VP_OR, VT) == TargetLowering::Expand) |
1485 | return DAG.UnrollVectorOp(N: Node); |
1486 | |
1487 | // This operation also isn't safe when the operands aren't also booleans. |
1488 | if (Op1.getValueType().getVectorElementType() != MVT::i1) |
1489 | return DAG.UnrollVectorOp(N: Node); |
1490 | |
1491 | SDValue Ones = DAG.getAllOnesConstant(DL, VT); |
1492 | SDValue NotMask = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT, N1: Mask, N2: Ones, N3: Ones, N4: EVL); |
1493 | |
1494 | Op1 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op1, N2: Mask, N3: Ones, N4: EVL); |
1495 | Op2 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op2, N2: NotMask, N3: Ones, N4: EVL); |
1496 | return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: Op1, N2: Op2, N3: Ones, N4: EVL); |
1497 | } |
1498 | |
1499 | SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) { |
1500 | // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector |
1501 | // indices less than the EVL/pivot are true. Combine that with the original |
1502 | // mask for a full-length mask. Use a full-length VSELECT to select between |
1503 | // the true and false values. |
1504 | SDLoc DL(Node); |
1505 | |
1506 | SDValue Mask = Node->getOperand(Num: 0); |
1507 | SDValue Op1 = Node->getOperand(Num: 1); |
1508 | SDValue Op2 = Node->getOperand(Num: 2); |
1509 | SDValue EVL = Node->getOperand(Num: 3); |
1510 | |
1511 | EVT MaskVT = Mask.getValueType(); |
1512 | bool IsFixedLen = MaskVT.isFixedLengthVector(); |
1513 | |
1514 | EVT EVLVecVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EVL.getValueType(), |
1515 | EC: MaskVT.getVectorElementCount()); |
1516 | |
1517 | // If we can't construct the EVL mask efficiently, it's better to unroll. |
1518 | if ((IsFixedLen && |
1519 | !TLI.isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: EVLVecVT)) || |
1520 | (!IsFixedLen && |
1521 | (!TLI.isOperationLegalOrCustom(Op: ISD::STEP_VECTOR, VT: EVLVecVT) || |
1522 | !TLI.isOperationLegalOrCustom(Op: ISD::SPLAT_VECTOR, VT: EVLVecVT)))) |
1523 | return DAG.UnrollVectorOp(N: Node); |
1524 | |
1525 | // If using a SETCC would result in a different type than the mask type, |
1526 | // unroll. |
1527 | if (TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), |
1528 | VT: EVLVecVT) != MaskVT) |
1529 | return DAG.UnrollVectorOp(N: Node); |
1530 | |
1531 | SDValue StepVec = DAG.getStepVector(DL, ResVT: EVLVecVT); |
1532 | SDValue SplatEVL = DAG.getSplat(VT: EVLVecVT, DL, Op: EVL); |
1533 | SDValue EVLMask = |
1534 | DAG.getSetCC(DL, VT: MaskVT, LHS: StepVec, RHS: SplatEVL, Cond: ISD::CondCode::SETULT); |
1535 | |
1536 | SDValue FullMask = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskVT, N1: Mask, N2: EVLMask); |
1537 | return DAG.getSelect(DL, VT: Node->getValueType(ResNo: 0), Cond: FullMask, LHS: Op1, RHS: Op2); |
1538 | } |
1539 | |
1540 | SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) { |
1541 | // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB. |
1542 | EVT VT = Node->getValueType(ResNo: 0); |
1543 | |
1544 | unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV; |
1545 | |
1546 | if (!TLI.isOperationLegalOrCustom(Op: DivOpc, VT) || |
1547 | !TLI.isOperationLegalOrCustom(Op: ISD::VP_MUL, VT) || |
1548 | !TLI.isOperationLegalOrCustom(Op: ISD::VP_SUB, VT)) |
1549 | return SDValue(); |
1550 | |
1551 | SDLoc DL(Node); |
1552 | |
1553 | SDValue Dividend = Node->getOperand(Num: 0); |
1554 | SDValue Divisor = Node->getOperand(Num: 1); |
1555 | SDValue Mask = Node->getOperand(Num: 2); |
1556 | SDValue EVL = Node->getOperand(Num: 3); |
1557 | |
1558 | // X % Y -> X-X/Y*Y |
1559 | SDValue Div = DAG.getNode(Opcode: DivOpc, DL, VT, N1: Dividend, N2: Divisor, N3: Mask, N4: EVL); |
1560 | SDValue Mul = DAG.getNode(Opcode: ISD::VP_MUL, DL, VT, N1: Divisor, N2: Div, N3: Mask, N4: EVL); |
1561 | return DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: Dividend, N2: Mul, N3: Mask, N4: EVL); |
1562 | } |
1563 | |
1564 | void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, |
1565 | SmallVectorImpl<SDValue> &Results) { |
1566 | // Attempt to expand using TargetLowering. |
1567 | SDValue Result, Chain; |
1568 | if (TLI.expandFP_TO_UINT(N: Node, Result, Chain, DAG)) { |
1569 | Results.push_back(Elt: Result); |
1570 | if (Node->isStrictFPOpcode()) |
1571 | Results.push_back(Elt: Chain); |
1572 | return; |
1573 | } |
1574 | |
1575 | // Otherwise go ahead and unroll. |
1576 | if (Node->isStrictFPOpcode()) { |
1577 | UnrollStrictFPOp(Node, Results); |
1578 | return; |
1579 | } |
1580 | |
1581 | Results.push_back(Elt: DAG.UnrollVectorOp(N: Node)); |
1582 | } |
1583 | |
1584 | void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, |
1585 | SmallVectorImpl<SDValue> &Results) { |
1586 | bool IsStrict = Node->isStrictFPOpcode(); |
1587 | unsigned OpNo = IsStrict ? 1 : 0; |
1588 | SDValue Src = Node->getOperand(Num: OpNo); |
1589 | EVT VT = Src.getValueType(); |
1590 | SDLoc DL(Node); |
1591 | |
1592 | // Attempt to expand using TargetLowering. |
1593 | SDValue Result; |
1594 | SDValue Chain; |
1595 | if (TLI.expandUINT_TO_FP(N: Node, Result, Chain, DAG)) { |
1596 | Results.push_back(Elt: Result); |
1597 | if (IsStrict) |
1598 | Results.push_back(Elt: Chain); |
1599 | return; |
1600 | } |
1601 | |
1602 | // Make sure that the SINT_TO_FP and SRL instructions are available. |
1603 | if (((!IsStrict && TLI.getOperationAction(Op: ISD::SINT_TO_FP, VT) == |
1604 | TargetLowering::Expand) || |
1605 | (IsStrict && TLI.getOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT) == |
1606 | TargetLowering::Expand)) || |
1607 | TLI.getOperationAction(Op: ISD::SRL, VT) == TargetLowering::Expand) { |
1608 | if (IsStrict) { |
1609 | UnrollStrictFPOp(Node, Results); |
1610 | return; |
1611 | } |
1612 | |
1613 | Results.push_back(Elt: DAG.UnrollVectorOp(N: Node)); |
1614 | return; |
1615 | } |
1616 | |
1617 | unsigned BW = VT.getScalarSizeInBits(); |
1618 | assert((BW == 64 || BW == 32) && |
1619 | "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide" ); |
1620 | |
1621 | SDValue HalfWord = DAG.getConstant(Val: BW / 2, DL, VT); |
1622 | |
1623 | // Constants to clear the upper part of the word. |
1624 | // Notice that we can also use SHL+SHR, but using a constant is slightly |
1625 | // faster on x86. |
1626 | uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF; |
1627 | SDValue HalfWordMask = DAG.getConstant(Val: HWMask, DL, VT); |
1628 | |
1629 | // Two to the power of half-word-size. |
1630 | SDValue TWOHW = |
1631 | DAG.getConstantFP(Val: 1ULL << (BW / 2), DL, VT: Node->getValueType(ResNo: 0)); |
1632 | |
1633 | // Clear upper part of LO, lower HI |
1634 | SDValue HI = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Src, N2: HalfWord); |
1635 | SDValue LO = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Src, N2: HalfWordMask); |
1636 | |
1637 | if (IsStrict) { |
1638 | // Convert hi and lo to floats |
1639 | // Convert the hi part back to the upper values |
1640 | // TODO: Can any fast-math-flags be set on these nodes? |
1641 | SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, |
1642 | {Node->getValueType(0), MVT::Other}, |
1643 | {Node->getOperand(0), HI}); |
1644 | fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other}, |
1645 | {fHI.getValue(1), fHI, TWOHW}); |
1646 | SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, |
1647 | {Node->getValueType(0), MVT::Other}, |
1648 | {Node->getOperand(0), LO}); |
1649 | |
1650 | SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(R: 1), |
1651 | fLO.getValue(R: 1)); |
1652 | |
1653 | // Add the two halves |
1654 | SDValue Result = |
1655 | DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other}, |
1656 | {TF, fHI, fLO}); |
1657 | |
1658 | Results.push_back(Elt: Result); |
1659 | Results.push_back(Elt: Result.getValue(R: 1)); |
1660 | return; |
1661 | } |
1662 | |
1663 | // Convert hi and lo to floats |
1664 | // Convert the hi part back to the upper values |
1665 | // TODO: Can any fast-math-flags be set on these nodes? |
1666 | SDValue fHI = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: Node->getValueType(ResNo: 0), Operand: HI); |
1667 | fHI = DAG.getNode(Opcode: ISD::FMUL, DL, VT: Node->getValueType(ResNo: 0), N1: fHI, N2: TWOHW); |
1668 | SDValue fLO = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: Node->getValueType(ResNo: 0), Operand: LO); |
1669 | |
1670 | // Add the two halves |
1671 | Results.push_back( |
1672 | Elt: DAG.getNode(Opcode: ISD::FADD, DL, VT: Node->getValueType(ResNo: 0), N1: fHI, N2: fLO)); |
1673 | } |
1674 | |
1675 | SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) { |
1676 | if (TLI.isOperationLegalOrCustom(Op: ISD::FSUB, VT: Node->getValueType(ResNo: 0))) { |
1677 | SDLoc DL(Node); |
1678 | SDValue Zero = DAG.getConstantFP(Val: -0.0, DL, VT: Node->getValueType(ResNo: 0)); |
1679 | // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB. |
1680 | return DAG.getNode(Opcode: ISD::FSUB, DL, VT: Node->getValueType(ResNo: 0), N1: Zero, |
1681 | N2: Node->getOperand(Num: 0)); |
1682 | } |
1683 | return DAG.UnrollVectorOp(N: Node); |
1684 | } |
1685 | |
1686 | void VectorLegalizer::ExpandFSUB(SDNode *Node, |
1687 | SmallVectorImpl<SDValue> &Results) { |
1688 | // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal, |
1689 | // we can defer this to operation legalization where it will be lowered as |
1690 | // a+(-b). |
1691 | EVT VT = Node->getValueType(ResNo: 0); |
1692 | if (TLI.isOperationLegalOrCustom(Op: ISD::FNEG, VT) && |
1693 | TLI.isOperationLegalOrCustom(Op: ISD::FADD, VT)) |
1694 | return; // Defer to LegalizeDAG |
1695 | |
1696 | SDValue Tmp = DAG.UnrollVectorOp(N: Node); |
1697 | Results.push_back(Elt: Tmp); |
1698 | } |
1699 | |
1700 | void VectorLegalizer::ExpandSETCC(SDNode *Node, |
1701 | SmallVectorImpl<SDValue> &Results) { |
1702 | bool NeedInvert = false; |
1703 | bool IsVP = Node->getOpcode() == ISD::VP_SETCC; |
1704 | bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC || |
1705 | Node->getOpcode() == ISD::STRICT_FSETCCS; |
1706 | bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS; |
1707 | unsigned Offset = IsStrict ? 1 : 0; |
1708 | |
1709 | SDValue Chain = IsStrict ? Node->getOperand(Num: 0) : SDValue(); |
1710 | SDValue LHS = Node->getOperand(Num: 0 + Offset); |
1711 | SDValue RHS = Node->getOperand(Num: 1 + Offset); |
1712 | SDValue CC = Node->getOperand(Num: 2 + Offset); |
1713 | |
1714 | MVT OpVT = LHS.getSimpleValueType(); |
1715 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get(); |
1716 | |
1717 | if (TLI.getCondCodeAction(CC: CCCode, VT: OpVT) != TargetLowering::Expand) { |
1718 | if (IsStrict) { |
1719 | UnrollStrictFPOp(Node, Results); |
1720 | return; |
1721 | } |
1722 | Results.push_back(Elt: UnrollVSETCC(Node)); |
1723 | return; |
1724 | } |
1725 | |
1726 | SDValue Mask, EVL; |
1727 | if (IsVP) { |
1728 | Mask = Node->getOperand(Num: 3 + Offset); |
1729 | EVL = Node->getOperand(Num: 4 + Offset); |
1730 | } |
1731 | |
1732 | SDLoc dl(Node); |
1733 | bool Legalized = |
1734 | TLI.LegalizeSetCCCondCode(DAG, VT: Node->getValueType(ResNo: 0), LHS, RHS, CC, Mask, |
1735 | EVL, NeedInvert, dl, Chain, IsSignaling); |
1736 | |
1737 | if (Legalized) { |
1738 | // If we expanded the SETCC by swapping LHS and RHS, or by inverting the |
1739 | // condition code, create a new SETCC node. |
1740 | if (CC.getNode()) { |
1741 | if (IsStrict) { |
1742 | LHS = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VTList: Node->getVTList(), |
1743 | Ops: {Chain, LHS, RHS, CC}, Flags: Node->getFlags()); |
1744 | Chain = LHS.getValue(R: 1); |
1745 | } else if (IsVP) { |
1746 | LHS = DAG.getNode(Opcode: ISD::VP_SETCC, DL: dl, VT: Node->getValueType(ResNo: 0), |
1747 | Ops: {LHS, RHS, CC, Mask, EVL}, Flags: Node->getFlags()); |
1748 | } else { |
1749 | LHS = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: Node->getValueType(ResNo: 0), N1: LHS, N2: RHS, N3: CC, |
1750 | Flags: Node->getFlags()); |
1751 | } |
1752 | } |
1753 | |
1754 | // If we expanded the SETCC by inverting the condition code, then wrap |
1755 | // the existing SETCC in a NOT to restore the intended condition. |
1756 | if (NeedInvert) { |
1757 | if (!IsVP) |
1758 | LHS = DAG.getLogicalNOT(DL: dl, Val: LHS, VT: LHS->getValueType(ResNo: 0)); |
1759 | else |
1760 | LHS = DAG.getVPLogicalNOT(DL: dl, Val: LHS, Mask, EVL, VT: LHS->getValueType(ResNo: 0)); |
1761 | } |
1762 | } else { |
1763 | assert(!IsStrict && "Don't know how to expand for strict nodes." ); |
1764 | |
1765 | // Otherwise, SETCC for the given comparison type must be completely |
1766 | // illegal; expand it into a SELECT_CC. |
1767 | EVT VT = Node->getValueType(ResNo: 0); |
1768 | LHS = |
1769 | DAG.getNode(Opcode: ISD::SELECT_CC, DL: dl, VT, N1: LHS, N2: RHS, |
1770 | N3: DAG.getBoolConstant(V: true, DL: dl, VT, OpVT: LHS.getValueType()), |
1771 | N4: DAG.getBoolConstant(V: false, DL: dl, VT, OpVT: LHS.getValueType()), N5: CC); |
1772 | LHS->setFlags(Node->getFlags()); |
1773 | } |
1774 | |
1775 | Results.push_back(Elt: LHS); |
1776 | if (IsStrict) |
1777 | Results.push_back(Elt: Chain); |
1778 | } |
1779 | |
1780 | void VectorLegalizer::ExpandUADDSUBO(SDNode *Node, |
1781 | SmallVectorImpl<SDValue> &Results) { |
1782 | SDValue Result, Overflow; |
1783 | TLI.expandUADDSUBO(Node, Result, Overflow, DAG); |
1784 | Results.push_back(Elt: Result); |
1785 | Results.push_back(Elt: Overflow); |
1786 | } |
1787 | |
1788 | void VectorLegalizer::ExpandSADDSUBO(SDNode *Node, |
1789 | SmallVectorImpl<SDValue> &Results) { |
1790 | SDValue Result, Overflow; |
1791 | TLI.expandSADDSUBO(Node, Result, Overflow, DAG); |
1792 | Results.push_back(Elt: Result); |
1793 | Results.push_back(Elt: Overflow); |
1794 | } |
1795 | |
1796 | void VectorLegalizer::ExpandMULO(SDNode *Node, |
1797 | SmallVectorImpl<SDValue> &Results) { |
1798 | SDValue Result, Overflow; |
1799 | if (!TLI.expandMULO(Node, Result, Overflow, DAG)) |
1800 | std::tie(args&: Result, args&: Overflow) = DAG.UnrollVectorOverflowOp(N: Node); |
1801 | |
1802 | Results.push_back(Elt: Result); |
1803 | Results.push_back(Elt: Overflow); |
1804 | } |
1805 | |
1806 | void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node, |
1807 | SmallVectorImpl<SDValue> &Results) { |
1808 | SDNode *N = Node; |
1809 | if (SDValue Expanded = TLI.expandFixedPointDiv(Opcode: N->getOpcode(), dl: SDLoc(N), |
1810 | LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Scale: N->getConstantOperandVal(Num: 2), DAG)) |
1811 | Results.push_back(Elt: Expanded); |
1812 | } |
1813 | |
1814 | void VectorLegalizer::ExpandStrictFPOp(SDNode *Node, |
1815 | SmallVectorImpl<SDValue> &Results) { |
1816 | if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) { |
1817 | ExpandUINT_TO_FLOAT(Node, Results); |
1818 | return; |
1819 | } |
1820 | if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) { |
1821 | ExpandFP_TO_UINT(Node, Results); |
1822 | return; |
1823 | } |
1824 | |
1825 | if (Node->getOpcode() == ISD::STRICT_FSETCC || |
1826 | Node->getOpcode() == ISD::STRICT_FSETCCS) { |
1827 | ExpandSETCC(Node, Results); |
1828 | return; |
1829 | } |
1830 | |
1831 | UnrollStrictFPOp(Node, Results); |
1832 | } |
1833 | |
1834 | void VectorLegalizer::ExpandREM(SDNode *Node, |
1835 | SmallVectorImpl<SDValue> &Results) { |
1836 | assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) && |
1837 | "Expected REM node" ); |
1838 | |
1839 | SDValue Result; |
1840 | if (!TLI.expandREM(Node, Result, DAG)) |
1841 | Result = DAG.UnrollVectorOp(N: Node); |
1842 | Results.push_back(Elt: Result); |
1843 | } |
1844 | |
1845 | void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, |
1846 | SmallVectorImpl<SDValue> &Results) { |
1847 | EVT VT = Node->getValueType(ResNo: 0); |
1848 | EVT EltVT = VT.getVectorElementType(); |
1849 | unsigned NumElems = VT.getVectorNumElements(); |
1850 | unsigned NumOpers = Node->getNumOperands(); |
1851 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1852 | |
1853 | EVT TmpEltVT = EltVT; |
1854 | if (Node->getOpcode() == ISD::STRICT_FSETCC || |
1855 | Node->getOpcode() == ISD::STRICT_FSETCCS) |
1856 | TmpEltVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(), |
1857 | Context&: *DAG.getContext(), VT: TmpEltVT); |
1858 | |
1859 | EVT ValueVTs[] = {TmpEltVT, MVT::Other}; |
1860 | SDValue Chain = Node->getOperand(Num: 0); |
1861 | SDLoc dl(Node); |
1862 | |
1863 | SmallVector<SDValue, 32> OpValues; |
1864 | SmallVector<SDValue, 32> OpChains; |
1865 | for (unsigned i = 0; i < NumElems; ++i) { |
1866 | SmallVector<SDValue, 4> Opers; |
1867 | SDValue Idx = DAG.getVectorIdxConstant(Val: i, DL: dl); |
1868 | |
1869 | // The Chain is the first operand. |
1870 | Opers.push_back(Elt: Chain); |
1871 | |
1872 | // Now process the remaining operands. |
1873 | for (unsigned j = 1; j < NumOpers; ++j) { |
1874 | SDValue Oper = Node->getOperand(Num: j); |
1875 | EVT OperVT = Oper.getValueType(); |
1876 | |
1877 | if (OperVT.isVector()) |
1878 | Oper = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, |
1879 | VT: OperVT.getVectorElementType(), N1: Oper, N2: Idx); |
1880 | |
1881 | Opers.push_back(Elt: Oper); |
1882 | } |
1883 | |
1884 | SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers); |
1885 | SDValue ScalarResult = ScalarOp.getValue(R: 0); |
1886 | SDValue ScalarChain = ScalarOp.getValue(R: 1); |
1887 | |
1888 | if (Node->getOpcode() == ISD::STRICT_FSETCC || |
1889 | Node->getOpcode() == ISD::STRICT_FSETCCS) |
1890 | ScalarResult = DAG.getSelect(DL: dl, VT: EltVT, Cond: ScalarResult, |
1891 | LHS: DAG.getAllOnesConstant(DL: dl, VT: EltVT), |
1892 | RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT)); |
1893 | |
1894 | OpValues.push_back(Elt: ScalarResult); |
1895 | OpChains.push_back(Elt: ScalarChain); |
1896 | } |
1897 | |
1898 | SDValue Result = DAG.getBuildVector(VT, DL: dl, Ops: OpValues); |
1899 | SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); |
1900 | |
1901 | Results.push_back(Elt: Result); |
1902 | Results.push_back(Elt: NewChain); |
1903 | } |
1904 | |
1905 | SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) { |
1906 | EVT VT = Node->getValueType(ResNo: 0); |
1907 | unsigned NumElems = VT.getVectorNumElements(); |
1908 | EVT EltVT = VT.getVectorElementType(); |
1909 | SDValue LHS = Node->getOperand(Num: 0); |
1910 | SDValue RHS = Node->getOperand(Num: 1); |
1911 | SDValue CC = Node->getOperand(Num: 2); |
1912 | EVT TmpEltVT = LHS.getValueType().getVectorElementType(); |
1913 | SDLoc dl(Node); |
1914 | SmallVector<SDValue, 8> Ops(NumElems); |
1915 | for (unsigned i = 0; i < NumElems; ++i) { |
1916 | SDValue LHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: LHS, |
1917 | N2: DAG.getVectorIdxConstant(Val: i, DL: dl)); |
1918 | SDValue RHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: RHS, |
1919 | N2: DAG.getVectorIdxConstant(Val: i, DL: dl)); |
1920 | Ops[i] = DAG.getNode(Opcode: ISD::SETCC, DL: dl, |
1921 | VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), |
1922 | Context&: *DAG.getContext(), VT: TmpEltVT), |
1923 | N1: LHSElem, N2: RHSElem, N3: CC); |
1924 | Ops[i] = DAG.getSelect(DL: dl, VT: EltVT, Cond: Ops[i], LHS: DAG.getAllOnesConstant(DL: dl, VT: EltVT), |
1925 | RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT)); |
1926 | } |
1927 | return DAG.getBuildVector(VT, DL: dl, Ops); |
1928 | } |
1929 | |
1930 | bool SelectionDAG::LegalizeVectors() { |
1931 | return VectorLegalizer(*this).Run(); |
1932 | } |
1933 | |