1 | //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the SelectionDAG::LegalizeVectors method. |
10 | // |
11 | // The vector legalizer looks for vector operations which might need to be |
12 | // scalarized and legalizes them. This is a separate step from Legalize because |
13 | // scalarizing can introduce illegal types. For example, suppose we have an |
14 | // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition |
15 | // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the |
16 | // operation, which introduces nodes with the illegal type i64 which must be |
17 | // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; |
18 | // the operation must be unrolled, which introduces nodes with the illegal |
19 | // type i8 which must be promoted. |
20 | // |
21 | // This does not legalize vector manipulations like ISD::BUILD_VECTOR, |
22 | // or operations that happen to take a vector which are custom-lowered; |
23 | // the legalization for such operations never produces nodes |
24 | // with illegal types, so it's okay to put off legalizing them until |
25 | // SelectionDAG::Legalize runs. |
26 | // |
27 | //===----------------------------------------------------------------------===// |
28 | |
29 | #include "llvm/ADT/DenseMap.h" |
30 | #include "llvm/ADT/SmallVector.h" |
31 | #include "llvm/Analysis/TargetLibraryInfo.h" |
32 | #include "llvm/Analysis/VectorUtils.h" |
33 | #include "llvm/CodeGen/ISDOpcodes.h" |
34 | #include "llvm/CodeGen/SelectionDAG.h" |
35 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
36 | #include "llvm/CodeGen/TargetLowering.h" |
37 | #include "llvm/CodeGen/ValueTypes.h" |
38 | #include "llvm/CodeGenTypes/MachineValueType.h" |
39 | #include "llvm/IR/DataLayout.h" |
40 | #include "llvm/Support/Casting.h" |
41 | #include "llvm/Support/Compiler.h" |
42 | #include "llvm/Support/Debug.h" |
43 | #include "llvm/Support/ErrorHandling.h" |
44 | #include <cassert> |
45 | #include <cstdint> |
46 | #include <iterator> |
47 | #include <utility> |
48 | |
49 | using namespace llvm; |
50 | |
51 | #define DEBUG_TYPE "legalizevectorops" |
52 | |
53 | namespace { |
54 | |
55 | class VectorLegalizer { |
56 | SelectionDAG& DAG; |
57 | const TargetLowering &TLI; |
58 | bool Changed = false; // Keep track of whether anything changed |
59 | |
60 | /// For nodes that are of legal width, and that have more than one use, this |
61 | /// map indicates what regularized operand to use. This allows us to avoid |
62 | /// legalizing the same thing more than once. |
63 | SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; |
64 | |
65 | /// Adds a node to the translation cache. |
66 | void AddLegalizedOperand(SDValue From, SDValue To) { |
67 | LegalizedNodes.insert(KV: std::make_pair(x&: From, y&: To)); |
68 | // If someone requests legalization of the new node, return itself. |
69 | if (From != To) |
70 | LegalizedNodes.insert(KV: std::make_pair(x&: To, y&: To)); |
71 | } |
72 | |
73 | /// Legalizes the given node. |
74 | SDValue LegalizeOp(SDValue Op); |
75 | |
76 | /// Assuming the node is legal, "legalize" the results. |
77 | SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result); |
78 | |
79 | /// Make sure Results are legal and update the translation cache. |
80 | SDValue RecursivelyLegalizeResults(SDValue Op, |
81 | MutableArrayRef<SDValue> Results); |
82 | |
83 | /// Wrapper to interface LowerOperation with a vector of Results. |
84 | /// Returns false if the target wants to use default expansion. Otherwise |
85 | /// returns true. If return is true and the Results are empty, then the |
86 | /// target wants to keep the input node as is. |
87 | bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results); |
88 | |
89 | /// Implements unrolling a VSETCC. |
90 | SDValue UnrollVSETCC(SDNode *Node); |
91 | |
92 | /// Implement expand-based legalization of vector operations. |
93 | /// |
94 | /// This is just a high-level routine to dispatch to specific code paths for |
95 | /// operations to legalize them. |
96 | void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
97 | |
98 | /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if |
99 | /// FP_TO_SINT isn't legal. |
100 | void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
101 | |
102 | /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if |
103 | /// SINT_TO_FLOAT and SHR on vectors isn't legal. |
104 | void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
105 | |
106 | /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. |
107 | SDValue ExpandSEXTINREG(SDNode *Node); |
108 | |
109 | /// Implement expansion for ANY_EXTEND_VECTOR_INREG. |
110 | /// |
111 | /// Shuffles the low lanes of the operand into place and bitcasts to the proper |
112 | /// type. The contents of the bits in the extended part of each element are |
113 | /// undef. |
114 | SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node); |
115 | |
116 | /// Implement expansion for SIGN_EXTEND_VECTOR_INREG. |
117 | /// |
118 | /// Shuffles the low lanes of the operand into place, bitcasts to the proper |
119 | /// type, then shifts left and arithmetic shifts right to introduce a sign |
120 | /// extension. |
121 | SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node); |
122 | |
123 | /// Implement expansion for ZERO_EXTEND_VECTOR_INREG. |
124 | /// |
125 | /// Shuffles the low lanes of the operand into place and blends zeros into |
126 | /// the remaining lanes, finally bitcasting to the proper type. |
127 | SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node); |
128 | |
129 | /// Expand bswap of vectors into a shuffle if legal. |
130 | SDValue ExpandBSWAP(SDNode *Node); |
131 | |
132 | /// Implement vselect in terms of XOR, AND, OR when blend is not |
133 | /// supported by the target. |
134 | SDValue ExpandVSELECT(SDNode *Node); |
135 | SDValue ExpandVP_SELECT(SDNode *Node); |
136 | SDValue ExpandVP_MERGE(SDNode *Node); |
137 | SDValue ExpandVP_REM(SDNode *Node); |
138 | SDValue ExpandSELECT(SDNode *Node); |
139 | std::pair<SDValue, SDValue> ExpandLoad(SDNode *N); |
140 | SDValue ExpandStore(SDNode *N); |
141 | SDValue ExpandFNEG(SDNode *Node); |
142 | void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
143 | void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
144 | void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
145 | void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
146 | void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
147 | void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
148 | void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
149 | void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
150 | void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
151 | |
152 | bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, |
153 | SmallVectorImpl<SDValue> &Results); |
154 | bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall Call_F32, |
155 | RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, |
156 | RTLIB::Libcall Call_F128, |
157 | RTLIB::Libcall Call_PPCF128, |
158 | SmallVectorImpl<SDValue> &Results); |
159 | |
160 | void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
161 | |
162 | /// Implements vector promotion. |
163 | /// |
164 | /// This is essentially just bitcasting the operands to a different type and |
165 | /// bitcasting the result back to the original type. |
166 | void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
167 | |
168 | /// Implements [SU]INT_TO_FP vector promotion. |
169 | /// |
170 | /// This is a [zs]ext of the input operand to a larger integer type. |
171 | void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
172 | |
173 | /// Implements FP_TO_[SU]INT vector promotion of the result type. |
174 | /// |
175 | /// It is promoted to a larger integer type. The result is then |
176 | /// truncated back to the original type. |
177 | void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
178 | |
179 | /// Implements vector reduce operation promotion. |
180 | /// |
181 | /// All vector operands are promoted to a vector type with larger element |
182 | /// type, and the start value is promoted to a larger scalar type. Then the |
183 | /// result is truncated back to the original scalar type. |
184 | void PromoteReduction(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
185 | |
186 | /// Implements vector setcc operation promotion. |
187 | /// |
188 | /// All vector operands are promoted to a vector type with larger element |
189 | /// type. |
190 | void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
191 | |
192 | void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
193 | |
194 | public: |
195 | VectorLegalizer(SelectionDAG& dag) : |
196 | DAG(dag), TLI(dag.getTargetLoweringInfo()) {} |
197 | |
198 | /// Begin legalizer the vector operations in the DAG. |
199 | bool Run(); |
200 | }; |
201 | |
202 | } // end anonymous namespace |
203 | |
204 | bool VectorLegalizer::Run() { |
205 | // Before we start legalizing vector nodes, check if there are any vectors. |
206 | bool HasVectors = false; |
207 | for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), |
208 | E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I) { |
209 | // Check if the values of the nodes contain vectors. We don't need to check |
210 | // the operands because we are going to check their values at some point. |
211 | HasVectors = llvm::any_of(Range: I->values(), P: [](EVT T) { return T.isVector(); }); |
212 | |
213 | // If we found a vector node we can start the legalization. |
214 | if (HasVectors) |
215 | break; |
216 | } |
217 | |
218 | // If this basic block has no vectors then no need to legalize vectors. |
219 | if (!HasVectors) |
220 | return false; |
221 | |
222 | // The legalize process is inherently a bottom-up recursive process (users |
223 | // legalize their uses before themselves). Given infinite stack space, we |
224 | // could just start legalizing on the root and traverse the whole graph. In |
225 | // practice however, this causes us to run out of stack space on large basic |
226 | // blocks. To avoid this problem, compute an ordering of the nodes where each |
227 | // node is only legalized after all of its operands are legalized. |
228 | DAG.AssignTopologicalOrder(); |
229 | for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), |
230 | E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I) |
231 | LegalizeOp(Op: SDValue(&*I, 0)); |
232 | |
233 | // Finally, it's possible the root changed. Get the new root. |
234 | SDValue OldRoot = DAG.getRoot(); |
235 | assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?" ); |
236 | DAG.setRoot(LegalizedNodes[OldRoot]); |
237 | |
238 | LegalizedNodes.clear(); |
239 | |
240 | // Remove dead nodes now. |
241 | DAG.RemoveDeadNodes(); |
242 | |
243 | return Changed; |
244 | } |
245 | |
246 | SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) { |
247 | assert(Op->getNumValues() == Result->getNumValues() && |
248 | "Unexpected number of results" ); |
249 | // Generic legalization: just pass the operand through. |
250 | for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i) |
251 | AddLegalizedOperand(From: Op.getValue(R: i), To: SDValue(Result, i)); |
252 | return SDValue(Result, Op.getResNo()); |
253 | } |
254 | |
255 | SDValue |
256 | VectorLegalizer::RecursivelyLegalizeResults(SDValue Op, |
257 | MutableArrayRef<SDValue> Results) { |
258 | assert(Results.size() == Op->getNumValues() && |
259 | "Unexpected number of results" ); |
260 | // Make sure that the generated code is itself legal. |
261 | for (unsigned i = 0, e = Results.size(); i != e; ++i) { |
262 | Results[i] = LegalizeOp(Op: Results[i]); |
263 | AddLegalizedOperand(From: Op.getValue(R: i), To: Results[i]); |
264 | } |
265 | |
266 | return Results[Op.getResNo()]; |
267 | } |
268 | |
269 | SDValue VectorLegalizer::LegalizeOp(SDValue Op) { |
270 | // Note that LegalizeOp may be reentered even from single-use nodes, which |
271 | // means that we always must cache transformed nodes. |
272 | DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Val: Op); |
273 | if (I != LegalizedNodes.end()) return I->second; |
274 | |
275 | // Legalize the operands |
276 | SmallVector<SDValue, 8> Ops; |
277 | for (const SDValue &Oper : Op->op_values()) |
278 | Ops.push_back(Elt: LegalizeOp(Op: Oper)); |
279 | |
280 | SDNode *Node = DAG.UpdateNodeOperands(N: Op.getNode(), Ops); |
281 | |
282 | bool HasVectorValueOrOp = |
283 | llvm::any_of(Range: Node->values(), P: [](EVT T) { return T.isVector(); }) || |
284 | llvm::any_of(Range: Node->op_values(), |
285 | P: [](SDValue O) { return O.getValueType().isVector(); }); |
286 | if (!HasVectorValueOrOp) |
287 | return TranslateLegalizeResults(Op, Result: Node); |
288 | |
289 | TargetLowering::LegalizeAction Action = TargetLowering::Legal; |
290 | EVT ValVT; |
291 | switch (Op.getOpcode()) { |
292 | default: |
293 | return TranslateLegalizeResults(Op, Result: Node); |
294 | case ISD::LOAD: { |
295 | LoadSDNode *LD = cast<LoadSDNode>(Val: Node); |
296 | ISD::LoadExtType ExtType = LD->getExtensionType(); |
297 | EVT LoadedVT = LD->getMemoryVT(); |
298 | if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD) |
299 | Action = TLI.getLoadExtAction(ExtType, ValVT: LD->getValueType(ResNo: 0), MemVT: LoadedVT); |
300 | break; |
301 | } |
302 | case ISD::STORE: { |
303 | StoreSDNode *ST = cast<StoreSDNode>(Val: Node); |
304 | EVT StVT = ST->getMemoryVT(); |
305 | MVT ValVT = ST->getValue().getSimpleValueType(); |
306 | if (StVT.isVector() && ST->isTruncatingStore()) |
307 | Action = TLI.getTruncStoreAction(ValVT, MemVT: StVT); |
308 | break; |
309 | } |
310 | case ISD::MERGE_VALUES: |
311 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0)); |
312 | // This operation lies about being legal: when it claims to be legal, |
313 | // it should actually be expanded. |
314 | if (Action == TargetLowering::Legal) |
315 | Action = TargetLowering::Expand; |
316 | break; |
317 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
318 | case ISD::STRICT_##DAGN: |
319 | #include "llvm/IR/ConstrainedOps.def" |
320 | ValVT = Node->getValueType(ResNo: 0); |
321 | if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP || |
322 | Op.getOpcode() == ISD::STRICT_UINT_TO_FP) |
323 | ValVT = Node->getOperand(Num: 1).getValueType(); |
324 | if (Op.getOpcode() == ISD::STRICT_FSETCC || |
325 | Op.getOpcode() == ISD::STRICT_FSETCCS) { |
326 | MVT OpVT = Node->getOperand(Num: 1).getSimpleValueType(); |
327 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 3))->get(); |
328 | Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT); |
329 | if (Action == TargetLowering::Legal) |
330 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT); |
331 | } else { |
332 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: ValVT); |
333 | } |
334 | // If we're asked to expand a strict vector floating-point operation, |
335 | // by default we're going to simply unroll it. That is usually the |
336 | // best approach, except in the case where the resulting strict (scalar) |
337 | // operations would themselves use the fallback mutation to non-strict. |
338 | // In that specific case, just do the fallback on the vector op. |
339 | if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() && |
340 | TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: ValVT) == |
341 | TargetLowering::Legal) { |
342 | EVT EltVT = ValVT.getVectorElementType(); |
343 | if (TLI.getOperationAction(Op: Node->getOpcode(), VT: EltVT) |
344 | == TargetLowering::Expand && |
345 | TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: EltVT) |
346 | == TargetLowering::Legal) |
347 | Action = TargetLowering::Legal; |
348 | } |
349 | break; |
350 | case ISD::ADD: |
351 | case ISD::SUB: |
352 | case ISD::MUL: |
353 | case ISD::MULHS: |
354 | case ISD::MULHU: |
355 | case ISD::SDIV: |
356 | case ISD::UDIV: |
357 | case ISD::SREM: |
358 | case ISD::UREM: |
359 | case ISD::SDIVREM: |
360 | case ISD::UDIVREM: |
361 | case ISD::FADD: |
362 | case ISD::FSUB: |
363 | case ISD::FMUL: |
364 | case ISD::FDIV: |
365 | case ISD::FREM: |
366 | case ISD::AND: |
367 | case ISD::OR: |
368 | case ISD::XOR: |
369 | case ISD::SHL: |
370 | case ISD::SRA: |
371 | case ISD::SRL: |
372 | case ISD::FSHL: |
373 | case ISD::FSHR: |
374 | case ISD::ROTL: |
375 | case ISD::ROTR: |
376 | case ISD::ABS: |
377 | case ISD::BSWAP: |
378 | case ISD::BITREVERSE: |
379 | case ISD::CTLZ: |
380 | case ISD::CTTZ: |
381 | case ISD::CTLZ_ZERO_UNDEF: |
382 | case ISD::CTTZ_ZERO_UNDEF: |
383 | case ISD::CTPOP: |
384 | case ISD::SELECT: |
385 | case ISD::VSELECT: |
386 | case ISD::SELECT_CC: |
387 | case ISD::ZERO_EXTEND: |
388 | case ISD::ANY_EXTEND: |
389 | case ISD::TRUNCATE: |
390 | case ISD::SIGN_EXTEND: |
391 | case ISD::FP_TO_SINT: |
392 | case ISD::FP_TO_UINT: |
393 | case ISD::FNEG: |
394 | case ISD::FABS: |
395 | case ISD::FMINNUM: |
396 | case ISD::FMAXNUM: |
397 | case ISD::FMINNUM_IEEE: |
398 | case ISD::FMAXNUM_IEEE: |
399 | case ISD::FMINIMUM: |
400 | case ISD::FMAXIMUM: |
401 | case ISD::FCOPYSIGN: |
402 | case ISD::FSQRT: |
403 | case ISD::FSIN: |
404 | case ISD::FCOS: |
405 | case ISD::FLDEXP: |
406 | case ISD::FPOWI: |
407 | case ISD::FPOW: |
408 | case ISD::FLOG: |
409 | case ISD::FLOG2: |
410 | case ISD::FLOG10: |
411 | case ISD::FEXP: |
412 | case ISD::FEXP2: |
413 | case ISD::FEXP10: |
414 | case ISD::FCEIL: |
415 | case ISD::FTRUNC: |
416 | case ISD::FRINT: |
417 | case ISD::FNEARBYINT: |
418 | case ISD::FROUND: |
419 | case ISD::FROUNDEVEN: |
420 | case ISD::FFLOOR: |
421 | case ISD::FP_ROUND: |
422 | case ISD::FP_EXTEND: |
423 | case ISD::FPTRUNC_ROUND: |
424 | case ISD::FMA: |
425 | case ISD::SIGN_EXTEND_INREG: |
426 | case ISD::ANY_EXTEND_VECTOR_INREG: |
427 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
428 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
429 | case ISD::SMIN: |
430 | case ISD::SMAX: |
431 | case ISD::UMIN: |
432 | case ISD::UMAX: |
433 | case ISD::SMUL_LOHI: |
434 | case ISD::UMUL_LOHI: |
435 | case ISD::SADDO: |
436 | case ISD::UADDO: |
437 | case ISD::SSUBO: |
438 | case ISD::USUBO: |
439 | case ISD::SMULO: |
440 | case ISD::UMULO: |
441 | case ISD::FCANONICALIZE: |
442 | case ISD::FFREXP: |
443 | case ISD::SADDSAT: |
444 | case ISD::UADDSAT: |
445 | case ISD::SSUBSAT: |
446 | case ISD::USUBSAT: |
447 | case ISD::SSHLSAT: |
448 | case ISD::USHLSAT: |
449 | case ISD::FP_TO_SINT_SAT: |
450 | case ISD::FP_TO_UINT_SAT: |
451 | case ISD::MGATHER: |
452 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0)); |
453 | break; |
454 | case ISD::SMULFIX: |
455 | case ISD::SMULFIXSAT: |
456 | case ISD::UMULFIX: |
457 | case ISD::UMULFIXSAT: |
458 | case ISD::SDIVFIX: |
459 | case ISD::SDIVFIXSAT: |
460 | case ISD::UDIVFIX: |
461 | case ISD::UDIVFIXSAT: { |
462 | unsigned Scale = Node->getConstantOperandVal(Num: 2); |
463 | Action = TLI.getFixedPointOperationAction(Op: Node->getOpcode(), |
464 | VT: Node->getValueType(ResNo: 0), Scale); |
465 | break; |
466 | } |
467 | case ISD::LRINT: |
468 | case ISD::LLRINT: |
469 | case ISD::SINT_TO_FP: |
470 | case ISD::UINT_TO_FP: |
471 | case ISD::VECREDUCE_ADD: |
472 | case ISD::VECREDUCE_MUL: |
473 | case ISD::VECREDUCE_AND: |
474 | case ISD::VECREDUCE_OR: |
475 | case ISD::VECREDUCE_XOR: |
476 | case ISD::VECREDUCE_SMAX: |
477 | case ISD::VECREDUCE_SMIN: |
478 | case ISD::VECREDUCE_UMAX: |
479 | case ISD::VECREDUCE_UMIN: |
480 | case ISD::VECREDUCE_FADD: |
481 | case ISD::VECREDUCE_FMUL: |
482 | case ISD::VECREDUCE_FMAX: |
483 | case ISD::VECREDUCE_FMIN: |
484 | case ISD::VECREDUCE_FMAXIMUM: |
485 | case ISD::VECREDUCE_FMINIMUM: |
486 | Action = TLI.getOperationAction(Op: Node->getOpcode(), |
487 | VT: Node->getOperand(Num: 0).getValueType()); |
488 | break; |
489 | case ISD::VECREDUCE_SEQ_FADD: |
490 | case ISD::VECREDUCE_SEQ_FMUL: |
491 | Action = TLI.getOperationAction(Op: Node->getOpcode(), |
492 | VT: Node->getOperand(Num: 1).getValueType()); |
493 | break; |
494 | case ISD::SETCC: { |
495 | MVT OpVT = Node->getOperand(Num: 0).getSimpleValueType(); |
496 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 2))->get(); |
497 | Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT); |
498 | if (Action == TargetLowering::Legal) |
499 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT); |
500 | break; |
501 | } |
502 | |
503 | #define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \ |
504 | case ISD::VPID: { \ |
505 | EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \ |
506 | : Node->getOperand(LEGALPOS).getValueType(); \ |
507 | if (ISD::VPID == ISD::VP_SETCC) { \ |
508 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \ |
509 | Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \ |
510 | if (Action != TargetLowering::Legal) \ |
511 | break; \ |
512 | } \ |
513 | Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \ |
514 | } break; |
515 | #include "llvm/IR/VPIntrinsics.def" |
516 | } |
517 | |
518 | LLVM_DEBUG(dbgs() << "\nLegalizing vector op: " ; Node->dump(&DAG)); |
519 | |
520 | SmallVector<SDValue, 8> ResultVals; |
521 | switch (Action) { |
522 | default: llvm_unreachable("This action is not supported yet!" ); |
523 | case TargetLowering::Promote: |
524 | assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) && |
525 | "This action is not supported yet!" ); |
526 | LLVM_DEBUG(dbgs() << "Promoting\n" ); |
527 | Promote(Node, Results&: ResultVals); |
528 | assert(!ResultVals.empty() && "No results for promotion?" ); |
529 | break; |
530 | case TargetLowering::Legal: |
531 | LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n" ); |
532 | break; |
533 | case TargetLowering::Custom: |
534 | LLVM_DEBUG(dbgs() << "Trying custom legalization\n" ); |
535 | if (LowerOperationWrapper(N: Node, Results&: ResultVals)) |
536 | break; |
537 | LLVM_DEBUG(dbgs() << "Could not custom legalize node\n" ); |
538 | [[fallthrough]]; |
539 | case TargetLowering::Expand: |
540 | LLVM_DEBUG(dbgs() << "Expanding\n" ); |
541 | Expand(Node, Results&: ResultVals); |
542 | break; |
543 | } |
544 | |
545 | if (ResultVals.empty()) |
546 | return TranslateLegalizeResults(Op, Result: Node); |
547 | |
548 | Changed = true; |
549 | return RecursivelyLegalizeResults(Op, Results: ResultVals); |
550 | } |
551 | |
552 | // FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we |
553 | // merge them somehow? |
554 | bool VectorLegalizer::LowerOperationWrapper(SDNode *Node, |
555 | SmallVectorImpl<SDValue> &Results) { |
556 | SDValue Res = TLI.LowerOperation(Op: SDValue(Node, 0), DAG); |
557 | |
558 | if (!Res.getNode()) |
559 | return false; |
560 | |
561 | if (Res == SDValue(Node, 0)) |
562 | return true; |
563 | |
564 | // If the original node has one result, take the return value from |
565 | // LowerOperation as is. It might not be result number 0. |
566 | if (Node->getNumValues() == 1) { |
567 | Results.push_back(Elt: Res); |
568 | return true; |
569 | } |
570 | |
571 | // If the original node has multiple results, then the return node should |
572 | // have the same number of results. |
573 | assert((Node->getNumValues() == Res->getNumValues()) && |
574 | "Lowering returned the wrong number of results!" ); |
575 | |
576 | // Places new result values base on N result number. |
577 | for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I) |
578 | Results.push_back(Elt: Res.getValue(R: I)); |
579 | |
580 | return true; |
581 | } |
582 | |
583 | void VectorLegalizer::PromoteReduction(SDNode *Node, |
584 | SmallVectorImpl<SDValue> &Results) { |
585 | MVT VecVT = Node->getOperand(Num: 1).getSimpleValueType(); |
586 | MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT); |
587 | MVT ScalarVT = Node->getSimpleValueType(ResNo: 0); |
588 | MVT NewScalarVT = NewVecVT.getVectorElementType(); |
589 | |
590 | SDLoc DL(Node); |
591 | SmallVector<SDValue, 4> Operands(Node->getNumOperands()); |
592 | |
593 | // promote the initial value. |
594 | if (Node->getOperand(Num: 0).getValueType().isFloatingPoint()) |
595 | Operands[0] = |
596 | DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NewScalarVT, Operand: Node->getOperand(Num: 0)); |
597 | else |
598 | Operands[0] = |
599 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: NewScalarVT, Operand: Node->getOperand(Num: 0)); |
600 | |
601 | for (unsigned j = 1; j != Node->getNumOperands(); ++j) |
602 | if (Node->getOperand(Num: j).getValueType().isVector() && |
603 | !(ISD::isVPOpcode(Opcode: Node->getOpcode()) && |
604 | ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j)) // Skip mask operand. |
605 | // promote the vector operand. |
606 | if (Node->getOperand(Num: j).getValueType().isFloatingPoint()) |
607 | Operands[j] = |
608 | DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NewVecVT, Operand: Node->getOperand(Num: j)); |
609 | else |
610 | Operands[j] = |
611 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: NewVecVT, Operand: Node->getOperand(Num: j)); |
612 | else |
613 | Operands[j] = Node->getOperand(Num: j); // Skip VL operand. |
614 | |
615 | SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL, VT: NewScalarVT, Ops: Operands, |
616 | Flags: Node->getFlags()); |
617 | |
618 | if (ScalarVT.isFloatingPoint()) |
619 | Res = DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: ScalarVT, N1: Res, |
620 | N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)); |
621 | else |
622 | Res = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ScalarVT, Operand: Res); |
623 | |
624 | Results.push_back(Elt: Res); |
625 | } |
626 | |
627 | void VectorLegalizer::PromoteSETCC(SDNode *Node, |
628 | SmallVectorImpl<SDValue> &Results) { |
629 | MVT VecVT = Node->getOperand(Num: 0).getSimpleValueType(); |
630 | MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT); |
631 | |
632 | unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND; |
633 | |
634 | SDLoc DL(Node); |
635 | SmallVector<SDValue, 5> Operands(Node->getNumOperands()); |
636 | |
637 | Operands[0] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 0)); |
638 | Operands[1] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 1)); |
639 | Operands[2] = Node->getOperand(Num: 2); |
640 | |
641 | if (Node->getOpcode() == ISD::VP_SETCC) { |
642 | Operands[3] = Node->getOperand(Num: 3); // mask |
643 | Operands[4] = Node->getOperand(Num: 4); // evl |
644 | } |
645 | |
646 | SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL, VT: Node->getSimpleValueType(ResNo: 0), |
647 | Ops: Operands, Flags: Node->getFlags()); |
648 | |
649 | Results.push_back(Elt: Res); |
650 | } |
651 | |
652 | void VectorLegalizer::PromoteSTRICT(SDNode *Node, |
653 | SmallVectorImpl<SDValue> &Results) { |
654 | MVT VecVT = Node->getOperand(Num: 1).getSimpleValueType(); |
655 | MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT); |
656 | |
657 | assert(VecVT.isFloatingPoint()); |
658 | |
659 | SDLoc DL(Node); |
660 | SmallVector<SDValue, 5> Operands(Node->getNumOperands()); |
661 | SmallVector<SDValue, 2> Chains; |
662 | |
663 | for (unsigned j = 1; j != Node->getNumOperands(); ++j) |
664 | if (Node->getOperand(Num: j).getValueType().isVector() && |
665 | !(ISD::isVPOpcode(Opcode: Node->getOpcode()) && |
666 | ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j)) // Skip mask operand. |
667 | { |
668 | // promote the vector operand. |
669 | SDValue Ext = |
670 | DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {NewVecVT, MVT::Other}, |
671 | {Node->getOperand(0), Node->getOperand(j)}); |
672 | Operands[j] = Ext.getValue(R: 0); |
673 | Chains.push_back(Elt: Ext.getValue(R: 1)); |
674 | } else |
675 | Operands[j] = Node->getOperand(Num: j); // Skip no vector operand. |
676 | |
677 | SDVTList VTs = DAG.getVTList(VT1: NewVecVT, VT2: Node->getValueType(ResNo: 1)); |
678 | |
679 | Operands[0] = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); |
680 | |
681 | SDValue Res = |
682 | DAG.getNode(Opcode: Node->getOpcode(), DL, VTList: VTs, Ops: Operands, Flags: Node->getFlags()); |
683 | |
684 | SDValue Round = |
685 | DAG.getNode(ISD::STRICT_FP_ROUND, DL, {VecVT, MVT::Other}, |
686 | {Res.getValue(1), Res.getValue(0), |
687 | DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)}); |
688 | |
689 | Results.push_back(Elt: Round.getValue(R: 0)); |
690 | Results.push_back(Elt: Round.getValue(R: 1)); |
691 | } |
692 | |
693 | void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) { |
694 | // For a few operations there is a specific concept for promotion based on |
695 | // the operand's type. |
696 | switch (Node->getOpcode()) { |
697 | case ISD::SINT_TO_FP: |
698 | case ISD::UINT_TO_FP: |
699 | case ISD::STRICT_SINT_TO_FP: |
700 | case ISD::STRICT_UINT_TO_FP: |
701 | // "Promote" the operation by extending the operand. |
702 | PromoteINT_TO_FP(Node, Results); |
703 | return; |
704 | case ISD::FP_TO_UINT: |
705 | case ISD::FP_TO_SINT: |
706 | case ISD::STRICT_FP_TO_UINT: |
707 | case ISD::STRICT_FP_TO_SINT: |
708 | // Promote the operation by extending the operand. |
709 | PromoteFP_TO_INT(Node, Results); |
710 | return; |
711 | case ISD::VP_REDUCE_ADD: |
712 | case ISD::VP_REDUCE_MUL: |
713 | case ISD::VP_REDUCE_AND: |
714 | case ISD::VP_REDUCE_OR: |
715 | case ISD::VP_REDUCE_XOR: |
716 | case ISD::VP_REDUCE_SMAX: |
717 | case ISD::VP_REDUCE_SMIN: |
718 | case ISD::VP_REDUCE_UMAX: |
719 | case ISD::VP_REDUCE_UMIN: |
720 | case ISD::VP_REDUCE_FADD: |
721 | case ISD::VP_REDUCE_FMUL: |
722 | case ISD::VP_REDUCE_FMAX: |
723 | case ISD::VP_REDUCE_FMIN: |
724 | case ISD::VP_REDUCE_SEQ_FADD: |
725 | // Promote the operation by extending the operand. |
726 | PromoteReduction(Node, Results); |
727 | return; |
728 | case ISD::VP_SETCC: |
729 | case ISD::SETCC: |
730 | // Promote the operation by extending the operand. |
731 | PromoteSETCC(Node, Results); |
732 | return; |
733 | case ISD::STRICT_FADD: |
734 | case ISD::STRICT_FSUB: |
735 | case ISD::STRICT_FMUL: |
736 | case ISD::STRICT_FDIV: |
737 | case ISD::STRICT_FSQRT: |
738 | case ISD::STRICT_FMA: |
739 | PromoteSTRICT(Node, Results); |
740 | return; |
741 | case ISD::FP_ROUND: |
742 | case ISD::FP_EXTEND: |
743 | // These operations are used to do promotion so they can't be promoted |
744 | // themselves. |
745 | llvm_unreachable("Don't know how to promote this operation!" ); |
746 | } |
747 | |
748 | // There are currently two cases of vector promotion: |
749 | // 1) Bitcasting a vector of integers to a different type to a vector of the |
750 | // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. |
751 | // 2) Extending a vector of floats to a vector of the same number of larger |
752 | // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. |
753 | assert(Node->getNumValues() == 1 && |
754 | "Can't promote a vector with multiple results!" ); |
755 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
756 | MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT); |
757 | SDLoc dl(Node); |
758 | SmallVector<SDValue, 4> Operands(Node->getNumOperands()); |
759 | |
760 | for (unsigned j = 0; j != Node->getNumOperands(); ++j) { |
761 | // Do not promote the mask operand of a VP OP. |
762 | bool SkipPromote = ISD::isVPOpcode(Opcode: Node->getOpcode()) && |
763 | ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j; |
764 | if (Node->getOperand(Num: j).getValueType().isVector() && !SkipPromote) |
765 | if (Node->getOperand(Num: j) |
766 | .getValueType() |
767 | .getVectorElementType() |
768 | .isFloatingPoint() && |
769 | NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()) |
770 | Operands[j] = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j)); |
771 | else |
772 | Operands[j] = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j)); |
773 | else |
774 | Operands[j] = Node->getOperand(Num: j); |
775 | } |
776 | |
777 | SDValue Res = |
778 | DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: NVT, Ops: Operands, Flags: Node->getFlags()); |
779 | |
780 | if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || |
781 | (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && |
782 | NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) |
783 | Res = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT, N1: Res, |
784 | N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true)); |
785 | else |
786 | Res = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Res); |
787 | |
788 | Results.push_back(Elt: Res); |
789 | } |
790 | |
791 | void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node, |
792 | SmallVectorImpl<SDValue> &Results) { |
793 | // INT_TO_FP operations may require the input operand be promoted even |
794 | // when the type is otherwise legal. |
795 | bool IsStrict = Node->isStrictFPOpcode(); |
796 | MVT VT = Node->getOperand(Num: IsStrict ? 1 : 0).getSimpleValueType(); |
797 | MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT); |
798 | assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && |
799 | "Vectors have different number of elements!" ); |
800 | |
801 | SDLoc dl(Node); |
802 | SmallVector<SDValue, 4> Operands(Node->getNumOperands()); |
803 | |
804 | unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP || |
805 | Node->getOpcode() == ISD::STRICT_UINT_TO_FP) |
806 | ? ISD::ZERO_EXTEND |
807 | : ISD::SIGN_EXTEND; |
808 | for (unsigned j = 0; j != Node->getNumOperands(); ++j) { |
809 | if (Node->getOperand(Num: j).getValueType().isVector()) |
810 | Operands[j] = DAG.getNode(Opcode: Opc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j)); |
811 | else |
812 | Operands[j] = Node->getOperand(Num: j); |
813 | } |
814 | |
815 | if (IsStrict) { |
816 | SDValue Res = DAG.getNode(Node->getOpcode(), dl, |
817 | {Node->getValueType(0), MVT::Other}, Operands); |
818 | Results.push_back(Elt: Res); |
819 | Results.push_back(Elt: Res.getValue(R: 1)); |
820 | return; |
821 | } |
822 | |
823 | SDValue Res = |
824 | DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: 0), Ops: Operands); |
825 | Results.push_back(Elt: Res); |
826 | } |
827 | |
828 | // For FP_TO_INT we promote the result type to a vector type with wider |
829 | // elements and then truncate the result. This is different from the default |
830 | // PromoteVector which uses bitcast to promote thus assumning that the |
831 | // promoted vector type has the same overall size. |
832 | void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node, |
833 | SmallVectorImpl<SDValue> &Results) { |
834 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
835 | MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT); |
836 | bool IsStrict = Node->isStrictFPOpcode(); |
837 | assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && |
838 | "Vectors have different number of elements!" ); |
839 | |
840 | unsigned NewOpc = Node->getOpcode(); |
841 | // Change FP_TO_UINT to FP_TO_SINT if possible. |
842 | // TODO: Should we only do this if FP_TO_UINT itself isn't legal? |
843 | if (NewOpc == ISD::FP_TO_UINT && |
844 | TLI.isOperationLegalOrCustom(Op: ISD::FP_TO_SINT, VT: NVT)) |
845 | NewOpc = ISD::FP_TO_SINT; |
846 | |
847 | if (NewOpc == ISD::STRICT_FP_TO_UINT && |
848 | TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FP_TO_SINT, VT: NVT)) |
849 | NewOpc = ISD::STRICT_FP_TO_SINT; |
850 | |
851 | SDLoc dl(Node); |
852 | SDValue Promoted, Chain; |
853 | if (IsStrict) { |
854 | Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other}, |
855 | {Node->getOperand(0), Node->getOperand(1)}); |
856 | Chain = Promoted.getValue(R: 1); |
857 | } else |
858 | Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: 0)); |
859 | |
860 | // Assert that the converted value fits in the original type. If it doesn't |
861 | // (eg: because the value being converted is too big), then the result of the |
862 | // original operation was undefined anyway, so the assert is still correct. |
863 | if (Node->getOpcode() == ISD::FP_TO_UINT || |
864 | Node->getOpcode() == ISD::STRICT_FP_TO_UINT) |
865 | NewOpc = ISD::AssertZext; |
866 | else |
867 | NewOpc = ISD::AssertSext; |
868 | |
869 | Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, N1: Promoted, |
870 | N2: DAG.getValueType(VT.getScalarType())); |
871 | Promoted = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Promoted); |
872 | Results.push_back(Elt: Promoted); |
873 | if (IsStrict) |
874 | Results.push_back(Elt: Chain); |
875 | } |
876 | |
877 | std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) { |
878 | LoadSDNode *LD = cast<LoadSDNode>(Val: N); |
879 | return TLI.scalarizeVectorLoad(LD, DAG); |
880 | } |
881 | |
882 | SDValue VectorLegalizer::ExpandStore(SDNode *N) { |
883 | StoreSDNode *ST = cast<StoreSDNode>(Val: N); |
884 | SDValue TF = TLI.scalarizeVectorStore(ST, DAG); |
885 | return TF; |
886 | } |
887 | |
888 | void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { |
889 | switch (Node->getOpcode()) { |
890 | case ISD::LOAD: { |
891 | std::pair<SDValue, SDValue> Tmp = ExpandLoad(N: Node); |
892 | Results.push_back(Elt: Tmp.first); |
893 | Results.push_back(Elt: Tmp.second); |
894 | return; |
895 | } |
896 | case ISD::STORE: |
897 | Results.push_back(Elt: ExpandStore(N: Node)); |
898 | return; |
899 | case ISD::MERGE_VALUES: |
900 | for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) |
901 | Results.push_back(Elt: Node->getOperand(Num: i)); |
902 | return; |
903 | case ISD::SIGN_EXTEND_INREG: |
904 | Results.push_back(Elt: ExpandSEXTINREG(Node)); |
905 | return; |
906 | case ISD::ANY_EXTEND_VECTOR_INREG: |
907 | Results.push_back(Elt: ExpandANY_EXTEND_VECTOR_INREG(Node)); |
908 | return; |
909 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
910 | Results.push_back(Elt: ExpandSIGN_EXTEND_VECTOR_INREG(Node)); |
911 | return; |
912 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
913 | Results.push_back(Elt: ExpandZERO_EXTEND_VECTOR_INREG(Node)); |
914 | return; |
915 | case ISD::BSWAP: |
916 | Results.push_back(Elt: ExpandBSWAP(Node)); |
917 | return; |
918 | case ISD::VP_BSWAP: |
919 | Results.push_back(Elt: TLI.expandVPBSWAP(N: Node, DAG)); |
920 | return; |
921 | case ISD::VSELECT: |
922 | Results.push_back(Elt: ExpandVSELECT(Node)); |
923 | return; |
924 | case ISD::VP_SELECT: |
925 | Results.push_back(Elt: ExpandVP_SELECT(Node)); |
926 | return; |
927 | case ISD::VP_SREM: |
928 | case ISD::VP_UREM: |
929 | if (SDValue Expanded = ExpandVP_REM(Node)) { |
930 | Results.push_back(Elt: Expanded); |
931 | return; |
932 | } |
933 | break; |
934 | case ISD::SELECT: |
935 | Results.push_back(Elt: ExpandSELECT(Node)); |
936 | return; |
937 | case ISD::SELECT_CC: { |
938 | if (Node->getValueType(ResNo: 0).isScalableVector()) { |
939 | EVT CondVT = TLI.getSetCCResultType( |
940 | DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0)); |
941 | SDValue SetCC = |
942 | DAG.getNode(Opcode: ISD::SETCC, DL: SDLoc(Node), VT: CondVT, N1: Node->getOperand(Num: 0), |
943 | N2: Node->getOperand(Num: 1), N3: Node->getOperand(Num: 4)); |
944 | Results.push_back(Elt: DAG.getSelect(DL: SDLoc(Node), VT: Node->getValueType(ResNo: 0), Cond: SetCC, |
945 | LHS: Node->getOperand(Num: 2), |
946 | RHS: Node->getOperand(Num: 3))); |
947 | return; |
948 | } |
949 | break; |
950 | } |
951 | case ISD::FP_TO_UINT: |
952 | ExpandFP_TO_UINT(Node, Results); |
953 | return; |
954 | case ISD::UINT_TO_FP: |
955 | ExpandUINT_TO_FLOAT(Node, Results); |
956 | return; |
957 | case ISD::FNEG: |
958 | Results.push_back(Elt: ExpandFNEG(Node)); |
959 | return; |
960 | case ISD::FSUB: |
961 | ExpandFSUB(Node, Results); |
962 | return; |
963 | case ISD::SETCC: |
964 | case ISD::VP_SETCC: |
965 | ExpandSETCC(Node, Results); |
966 | return; |
967 | case ISD::ABS: |
968 | if (SDValue Expanded = TLI.expandABS(N: Node, DAG)) { |
969 | Results.push_back(Elt: Expanded); |
970 | return; |
971 | } |
972 | break; |
973 | case ISD::ABDS: |
974 | case ISD::ABDU: |
975 | if (SDValue Expanded = TLI.expandABD(N: Node, DAG)) { |
976 | Results.push_back(Elt: Expanded); |
977 | return; |
978 | } |
979 | break; |
980 | case ISD::BITREVERSE: |
981 | ExpandBITREVERSE(Node, Results); |
982 | return; |
983 | case ISD::VP_BITREVERSE: |
984 | if (SDValue Expanded = TLI.expandVPBITREVERSE(N: Node, DAG)) { |
985 | Results.push_back(Elt: Expanded); |
986 | return; |
987 | } |
988 | break; |
989 | case ISD::CTPOP: |
990 | if (SDValue Expanded = TLI.expandCTPOP(N: Node, DAG)) { |
991 | Results.push_back(Elt: Expanded); |
992 | return; |
993 | } |
994 | break; |
995 | case ISD::VP_CTPOP: |
996 | if (SDValue Expanded = TLI.expandVPCTPOP(N: Node, DAG)) { |
997 | Results.push_back(Elt: Expanded); |
998 | return; |
999 | } |
1000 | break; |
1001 | case ISD::CTLZ: |
1002 | case ISD::CTLZ_ZERO_UNDEF: |
1003 | if (SDValue Expanded = TLI.expandCTLZ(N: Node, DAG)) { |
1004 | Results.push_back(Elt: Expanded); |
1005 | return; |
1006 | } |
1007 | break; |
1008 | case ISD::VP_CTLZ: |
1009 | case ISD::VP_CTLZ_ZERO_UNDEF: |
1010 | if (SDValue Expanded = TLI.expandVPCTLZ(N: Node, DAG)) { |
1011 | Results.push_back(Elt: Expanded); |
1012 | return; |
1013 | } |
1014 | break; |
1015 | case ISD::CTTZ: |
1016 | case ISD::CTTZ_ZERO_UNDEF: |
1017 | if (SDValue Expanded = TLI.expandCTTZ(N: Node, DAG)) { |
1018 | Results.push_back(Elt: Expanded); |
1019 | return; |
1020 | } |
1021 | break; |
1022 | case ISD::VP_CTTZ: |
1023 | case ISD::VP_CTTZ_ZERO_UNDEF: |
1024 | if (SDValue Expanded = TLI.expandVPCTTZ(N: Node, DAG)) { |
1025 | Results.push_back(Elt: Expanded); |
1026 | return; |
1027 | } |
1028 | break; |
1029 | case ISD::FSHL: |
1030 | case ISD::VP_FSHL: |
1031 | case ISD::FSHR: |
1032 | case ISD::VP_FSHR: |
1033 | if (SDValue Expanded = TLI.expandFunnelShift(N: Node, DAG)) { |
1034 | Results.push_back(Elt: Expanded); |
1035 | return; |
1036 | } |
1037 | break; |
1038 | case ISD::ROTL: |
1039 | case ISD::ROTR: |
1040 | if (SDValue Expanded = TLI.expandROT(N: Node, AllowVectorOps: false /*AllowVectorOps*/, DAG)) { |
1041 | Results.push_back(Elt: Expanded); |
1042 | return; |
1043 | } |
1044 | break; |
1045 | case ISD::FMINNUM: |
1046 | case ISD::FMAXNUM: |
1047 | if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(N: Node, DAG)) { |
1048 | Results.push_back(Elt: Expanded); |
1049 | return; |
1050 | } |
1051 | break; |
1052 | case ISD::SMIN: |
1053 | case ISD::SMAX: |
1054 | case ISD::UMIN: |
1055 | case ISD::UMAX: |
1056 | if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) { |
1057 | Results.push_back(Elt: Expanded); |
1058 | return; |
1059 | } |
1060 | break; |
1061 | case ISD::UADDO: |
1062 | case ISD::USUBO: |
1063 | ExpandUADDSUBO(Node, Results); |
1064 | return; |
1065 | case ISD::SADDO: |
1066 | case ISD::SSUBO: |
1067 | ExpandSADDSUBO(Node, Results); |
1068 | return; |
1069 | case ISD::UMULO: |
1070 | case ISD::SMULO: |
1071 | ExpandMULO(Node, Results); |
1072 | return; |
1073 | case ISD::USUBSAT: |
1074 | case ISD::SSUBSAT: |
1075 | case ISD::UADDSAT: |
1076 | case ISD::SADDSAT: |
1077 | if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) { |
1078 | Results.push_back(Elt: Expanded); |
1079 | return; |
1080 | } |
1081 | break; |
1082 | case ISD::USHLSAT: |
1083 | case ISD::SSHLSAT: |
1084 | if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) { |
1085 | Results.push_back(Elt: Expanded); |
1086 | return; |
1087 | } |
1088 | break; |
1089 | case ISD::FP_TO_SINT_SAT: |
1090 | case ISD::FP_TO_UINT_SAT: |
1091 | // Expand the fpsosisat if it is scalable to prevent it from unrolling below. |
1092 | if (Node->getValueType(ResNo: 0).isScalableVector()) { |
1093 | if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(N: Node, DAG)) { |
1094 | Results.push_back(Elt: Expanded); |
1095 | return; |
1096 | } |
1097 | } |
1098 | break; |
1099 | case ISD::SMULFIX: |
1100 | case ISD::UMULFIX: |
1101 | if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) { |
1102 | Results.push_back(Elt: Expanded); |
1103 | return; |
1104 | } |
1105 | break; |
1106 | case ISD::SMULFIXSAT: |
1107 | case ISD::UMULFIXSAT: |
1108 | // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly |
1109 | // why. Maybe it results in worse codegen compared to the unroll for some |
1110 | // targets? This should probably be investigated. And if we still prefer to |
1111 | // unroll an explanation could be helpful. |
1112 | break; |
1113 | case ISD::SDIVFIX: |
1114 | case ISD::UDIVFIX: |
1115 | ExpandFixedPointDiv(Node, Results); |
1116 | return; |
1117 | case ISD::SDIVFIXSAT: |
1118 | case ISD::UDIVFIXSAT: |
1119 | break; |
1120 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
1121 | case ISD::STRICT_##DAGN: |
1122 | #include "llvm/IR/ConstrainedOps.def" |
1123 | ExpandStrictFPOp(Node, Results); |
1124 | return; |
1125 | case ISD::VECREDUCE_ADD: |
1126 | case ISD::VECREDUCE_MUL: |
1127 | case ISD::VECREDUCE_AND: |
1128 | case ISD::VECREDUCE_OR: |
1129 | case ISD::VECREDUCE_XOR: |
1130 | case ISD::VECREDUCE_SMAX: |
1131 | case ISD::VECREDUCE_SMIN: |
1132 | case ISD::VECREDUCE_UMAX: |
1133 | case ISD::VECREDUCE_UMIN: |
1134 | case ISD::VECREDUCE_FADD: |
1135 | case ISD::VECREDUCE_FMUL: |
1136 | case ISD::VECREDUCE_FMAX: |
1137 | case ISD::VECREDUCE_FMIN: |
1138 | case ISD::VECREDUCE_FMAXIMUM: |
1139 | case ISD::VECREDUCE_FMINIMUM: |
1140 | Results.push_back(Elt: TLI.expandVecReduce(Node, DAG)); |
1141 | return; |
1142 | case ISD::VECREDUCE_SEQ_FADD: |
1143 | case ISD::VECREDUCE_SEQ_FMUL: |
1144 | Results.push_back(Elt: TLI.expandVecReduceSeq(Node, DAG)); |
1145 | return; |
1146 | case ISD::SREM: |
1147 | case ISD::UREM: |
1148 | ExpandREM(Node, Results); |
1149 | return; |
1150 | case ISD::VP_MERGE: |
1151 | Results.push_back(Elt: ExpandVP_MERGE(Node)); |
1152 | return; |
1153 | case ISD::FREM: |
1154 | if (tryExpandVecMathCall(Node, Call_F32: RTLIB::REM_F32, Call_F64: RTLIB::REM_F64, |
1155 | Call_F80: RTLIB::REM_F80, Call_F128: RTLIB::REM_F128, |
1156 | Call_PPCF128: RTLIB::REM_PPCF128, Results)) |
1157 | return; |
1158 | |
1159 | break; |
1160 | } |
1161 | |
1162 | SDValue Unrolled = DAG.UnrollVectorOp(N: Node); |
1163 | if (Node->getNumValues() == 1) { |
1164 | Results.push_back(Elt: Unrolled); |
1165 | } else { |
1166 | assert(Node->getNumValues() == Unrolled->getNumValues() && |
1167 | "VectorLegalizer Expand returned wrong number of results!" ); |
1168 | for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I) |
1169 | Results.push_back(Elt: Unrolled.getValue(R: I)); |
1170 | } |
1171 | } |
1172 | |
1173 | SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { |
1174 | // Lower a select instruction where the condition is a scalar and the |
1175 | // operands are vectors. Lower this select to VSELECT and implement it |
1176 | // using XOR AND OR. The selector bit is broadcasted. |
1177 | EVT VT = Node->getValueType(ResNo: 0); |
1178 | SDLoc DL(Node); |
1179 | |
1180 | SDValue Mask = Node->getOperand(Num: 0); |
1181 | SDValue Op1 = Node->getOperand(Num: 1); |
1182 | SDValue Op2 = Node->getOperand(Num: 2); |
1183 | |
1184 | assert(VT.isVector() && !Mask.getValueType().isVector() |
1185 | && Op1.getValueType() == Op2.getValueType() && "Invalid type" ); |
1186 | |
1187 | // If we can't even use the basic vector operations of |
1188 | // AND,OR,XOR, we will have to scalarize the op. |
1189 | // Notice that the operation may be 'promoted' which means that it is |
1190 | // 'bitcasted' to another type which is handled. |
1191 | // Also, we need to be able to construct a splat vector using either |
1192 | // BUILD_VECTOR or SPLAT_VECTOR. |
1193 | // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to |
1194 | // BUILD_VECTOR? |
1195 | if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand || |
1196 | TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand || |
1197 | TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand || |
1198 | TLI.getOperationAction(Op: VT.isFixedLengthVector() ? ISD::BUILD_VECTOR |
1199 | : ISD::SPLAT_VECTOR, |
1200 | VT) == TargetLowering::Expand) |
1201 | return DAG.UnrollVectorOp(N: Node); |
1202 | |
1203 | // Generate a mask operand. |
1204 | EVT MaskTy = VT.changeVectorElementTypeToInteger(); |
1205 | |
1206 | // What is the size of each element in the vector mask. |
1207 | EVT BitTy = MaskTy.getScalarType(); |
1208 | |
1209 | Mask = DAG.getSelect(DL, VT: BitTy, Cond: Mask, LHS: DAG.getAllOnesConstant(DL, VT: BitTy), |
1210 | RHS: DAG.getConstant(Val: 0, DL, VT: BitTy)); |
1211 | |
1212 | // Broadcast the mask so that the entire vector is all one or all zero. |
1213 | Mask = DAG.getSplat(VT: MaskTy, DL, Op: Mask); |
1214 | |
1215 | // Bitcast the operands to be the same type as the mask. |
1216 | // This is needed when we select between FP types because |
1217 | // the mask is a vector of integers. |
1218 | Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op1); |
1219 | Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op2); |
1220 | |
1221 | SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT: MaskTy); |
1222 | |
1223 | Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op1, N2: Mask); |
1224 | Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op2, N2: NotMask); |
1225 | SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MaskTy, N1: Op1, N2: Op2); |
1226 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val); |
1227 | } |
1228 | |
1229 | SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) { |
1230 | EVT VT = Node->getValueType(ResNo: 0); |
1231 | |
1232 | // Make sure that the SRA and SHL instructions are available. |
1233 | if (TLI.getOperationAction(Op: ISD::SRA, VT) == TargetLowering::Expand || |
1234 | TLI.getOperationAction(Op: ISD::SHL, VT) == TargetLowering::Expand) |
1235 | return DAG.UnrollVectorOp(N: Node); |
1236 | |
1237 | SDLoc DL(Node); |
1238 | EVT OrigTy = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT(); |
1239 | |
1240 | unsigned BW = VT.getScalarSizeInBits(); |
1241 | unsigned OrigBW = OrigTy.getScalarSizeInBits(); |
1242 | SDValue ShiftSz = DAG.getConstant(Val: BW - OrigBW, DL, VT); |
1243 | |
1244 | SDValue Op = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Node->getOperand(Num: 0), N2: ShiftSz); |
1245 | return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Op, N2: ShiftSz); |
1246 | } |
1247 | |
1248 | // Generically expand a vector anyext in register to a shuffle of the relevant |
1249 | // lanes into the appropriate locations, with other lanes left undef. |
1250 | SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) { |
1251 | SDLoc DL(Node); |
1252 | EVT VT = Node->getValueType(ResNo: 0); |
1253 | int NumElements = VT.getVectorNumElements(); |
1254 | SDValue Src = Node->getOperand(Num: 0); |
1255 | EVT SrcVT = Src.getValueType(); |
1256 | int NumSrcElements = SrcVT.getVectorNumElements(); |
1257 | |
1258 | // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector |
1259 | // into a larger vector type. |
1260 | if (SrcVT.bitsLE(VT)) { |
1261 | assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && |
1262 | "ANY_EXTEND_VECTOR_INREG vector size mismatch" ); |
1263 | NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); |
1264 | SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(), |
1265 | NumElements: NumSrcElements); |
1266 | Src = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: SrcVT, N1: DAG.getUNDEF(VT: SrcVT), |
1267 | N2: Src, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
1268 | } |
1269 | |
1270 | // Build a base mask of undef shuffles. |
1271 | SmallVector<int, 16> ShuffleMask; |
1272 | ShuffleMask.resize(N: NumSrcElements, NV: -1); |
1273 | |
1274 | // Place the extended lanes into the correct locations. |
1275 | int ExtLaneScale = NumSrcElements / NumElements; |
1276 | int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; |
1277 | for (int i = 0; i < NumElements; ++i) |
1278 | ShuffleMask[i * ExtLaneScale + EndianOffset] = i; |
1279 | |
1280 | return DAG.getNode( |
1281 | Opcode: ISD::BITCAST, DL, VT, |
1282 | Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Src, N2: DAG.getUNDEF(VT: SrcVT), Mask: ShuffleMask)); |
1283 | } |
1284 | |
1285 | SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) { |
1286 | SDLoc DL(Node); |
1287 | EVT VT = Node->getValueType(ResNo: 0); |
1288 | SDValue Src = Node->getOperand(Num: 0); |
1289 | EVT SrcVT = Src.getValueType(); |
1290 | |
1291 | // First build an any-extend node which can be legalized above when we |
1292 | // recurse through it. |
1293 | SDValue Op = DAG.getNode(Opcode: ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Operand: Src); |
1294 | |
1295 | // Now we need sign extend. Do this by shifting the elements. Even if these |
1296 | // aren't legal operations, they have a better chance of being legalized |
1297 | // without full scalarization than the sign extension does. |
1298 | unsigned EltWidth = VT.getScalarSizeInBits(); |
1299 | unsigned SrcEltWidth = SrcVT.getScalarSizeInBits(); |
1300 | SDValue ShiftAmount = DAG.getConstant(Val: EltWidth - SrcEltWidth, DL, VT); |
1301 | return DAG.getNode(Opcode: ISD::SRA, DL, VT, |
1302 | N1: DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Op, N2: ShiftAmount), |
1303 | N2: ShiftAmount); |
1304 | } |
1305 | |
1306 | // Generically expand a vector zext in register to a shuffle of the relevant |
1307 | // lanes into the appropriate locations, a blend of zero into the high bits, |
1308 | // and a bitcast to the wider element type. |
1309 | SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) { |
1310 | SDLoc DL(Node); |
1311 | EVT VT = Node->getValueType(ResNo: 0); |
1312 | int NumElements = VT.getVectorNumElements(); |
1313 | SDValue Src = Node->getOperand(Num: 0); |
1314 | EVT SrcVT = Src.getValueType(); |
1315 | int NumSrcElements = SrcVT.getVectorNumElements(); |
1316 | |
1317 | // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector |
1318 | // into a larger vector type. |
1319 | if (SrcVT.bitsLE(VT)) { |
1320 | assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && |
1321 | "ZERO_EXTEND_VECTOR_INREG vector size mismatch" ); |
1322 | NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); |
1323 | SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(), |
1324 | NumElements: NumSrcElements); |
1325 | Src = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: SrcVT, N1: DAG.getUNDEF(VT: SrcVT), |
1326 | N2: Src, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
1327 | } |
1328 | |
1329 | // Build up a zero vector to blend into this one. |
1330 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: SrcVT); |
1331 | |
1332 | // Shuffle the incoming lanes into the correct position, and pull all other |
1333 | // lanes from the zero vector. |
1334 | auto ShuffleMask = llvm::to_vector<16>(Range: llvm::seq<int>(Begin: 0, End: NumSrcElements)); |
1335 | |
1336 | int ExtLaneScale = NumSrcElements / NumElements; |
1337 | int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; |
1338 | for (int i = 0; i < NumElements; ++i) |
1339 | ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; |
1340 | |
1341 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, |
1342 | Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Zero, N2: Src, Mask: ShuffleMask)); |
1343 | } |
1344 | |
1345 | static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { |
1346 | int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; |
1347 | for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) |
1348 | for (int J = ScalarSizeInBytes - 1; J >= 0; --J) |
1349 | ShuffleMask.push_back(Elt: (I * ScalarSizeInBytes) + J); |
1350 | } |
1351 | |
1352 | SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) { |
1353 | EVT VT = Node->getValueType(ResNo: 0); |
1354 | |
1355 | // Scalable vectors can't use shuffle expansion. |
1356 | if (VT.isScalableVector()) |
1357 | return TLI.expandBSWAP(N: Node, DAG); |
1358 | |
1359 | // Generate a byte wise shuffle mask for the BSWAP. |
1360 | SmallVector<int, 16> ShuffleMask; |
1361 | createBSWAPShuffleMask(VT, ShuffleMask); |
1362 | EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); |
1363 | |
1364 | // Only emit a shuffle if the mask is legal. |
1365 | if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) { |
1366 | SDLoc DL(Node); |
1367 | SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0)); |
1368 | Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getUNDEF(VT: ByteVT), Mask: ShuffleMask); |
1369 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
1370 | } |
1371 | |
1372 | // If we have the appropriate vector bit operations, it is better to use them |
1373 | // than unrolling and expanding each component. |
1374 | if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) && |
1375 | TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) && |
1376 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) && |
1377 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)) |
1378 | return TLI.expandBSWAP(N: Node, DAG); |
1379 | |
1380 | // Otherwise unroll. |
1381 | return DAG.UnrollVectorOp(N: Node); |
1382 | } |
1383 | |
1384 | void VectorLegalizer::ExpandBITREVERSE(SDNode *Node, |
1385 | SmallVectorImpl<SDValue> &Results) { |
1386 | EVT VT = Node->getValueType(ResNo: 0); |
1387 | |
1388 | // We can't unroll or use shuffles for scalable vectors. |
1389 | if (VT.isScalableVector()) { |
1390 | Results.push_back(Elt: TLI.expandBITREVERSE(N: Node, DAG)); |
1391 | return; |
1392 | } |
1393 | |
1394 | // If we have the scalar operation, it's probably cheaper to unroll it. |
1395 | if (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: VT.getScalarType())) { |
1396 | SDValue Tmp = DAG.UnrollVectorOp(N: Node); |
1397 | Results.push_back(Elt: Tmp); |
1398 | return; |
1399 | } |
1400 | |
1401 | // If the vector element width is a whole number of bytes, test if its legal |
1402 | // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte |
1403 | // vector. This greatly reduces the number of bit shifts necessary. |
1404 | unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); |
1405 | if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) { |
1406 | SmallVector<int, 16> BSWAPMask; |
1407 | createBSWAPShuffleMask(VT, ShuffleMask&: BSWAPMask); |
1408 | |
1409 | EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size()); |
1410 | if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) && |
1411 | (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: ByteVT) || |
1412 | (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT: ByteVT) && |
1413 | TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT: ByteVT) && |
1414 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: ByteVT) && |
1415 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: ByteVT)))) { |
1416 | SDLoc DL(Node); |
1417 | SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0)); |
1418 | Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getUNDEF(VT: ByteVT), |
1419 | Mask: BSWAPMask); |
1420 | Op = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT: ByteVT, Operand: Op); |
1421 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
1422 | Results.push_back(Elt: Op); |
1423 | return; |
1424 | } |
1425 | } |
1426 | |
1427 | // If we have the appropriate vector bit operations, it is better to use them |
1428 | // than unrolling and expanding each component. |
1429 | if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) && |
1430 | TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) && |
1431 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) && |
1432 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)) { |
1433 | Results.push_back(Elt: TLI.expandBITREVERSE(N: Node, DAG)); |
1434 | return; |
1435 | } |
1436 | |
1437 | // Otherwise unroll. |
1438 | SDValue Tmp = DAG.UnrollVectorOp(N: Node); |
1439 | Results.push_back(Elt: Tmp); |
1440 | } |
1441 | |
1442 | SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) { |
1443 | // Implement VSELECT in terms of XOR, AND, OR |
1444 | // on platforms which do not support blend natively. |
1445 | SDLoc DL(Node); |
1446 | |
1447 | SDValue Mask = Node->getOperand(Num: 0); |
1448 | SDValue Op1 = Node->getOperand(Num: 1); |
1449 | SDValue Op2 = Node->getOperand(Num: 2); |
1450 | |
1451 | EVT VT = Mask.getValueType(); |
1452 | |
1453 | // If we can't even use the basic vector operations of |
1454 | // AND,OR,XOR, we will have to scalarize the op. |
1455 | // Notice that the operation may be 'promoted' which means that it is |
1456 | // 'bitcasted' to another type which is handled. |
1457 | if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand || |
1458 | TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand || |
1459 | TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand) |
1460 | return DAG.UnrollVectorOp(N: Node); |
1461 | |
1462 | // This operation also isn't safe with AND, OR, XOR when the boolean type is |
1463 | // 0/1 and the select operands aren't also booleans, as we need an all-ones |
1464 | // vector constant to mask with. |
1465 | // FIXME: Sign extend 1 to all ones if that's legal on the target. |
1466 | auto BoolContents = TLI.getBooleanContents(Type: Op1.getValueType()); |
1467 | if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent && |
1468 | !(BoolContents == TargetLowering::ZeroOrOneBooleanContent && |
1469 | Op1.getValueType().getVectorElementType() == MVT::i1)) |
1470 | return DAG.UnrollVectorOp(N: Node); |
1471 | |
1472 | // If the mask and the type are different sizes, unroll the vector op. This |
1473 | // can occur when getSetCCResultType returns something that is different in |
1474 | // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. |
1475 | if (VT.getSizeInBits() != Op1.getValueSizeInBits()) |
1476 | return DAG.UnrollVectorOp(N: Node); |
1477 | |
1478 | // Bitcast the operands to be the same type as the mask. |
1479 | // This is needed when we select between FP types because |
1480 | // the mask is a vector of integers. |
1481 | Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op1); |
1482 | Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op2); |
1483 | |
1484 | SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT); |
1485 | |
1486 | Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op1, N2: Mask); |
1487 | Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op2, N2: NotMask); |
1488 | SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Op1, N2: Op2); |
1489 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val); |
1490 | } |
1491 | |
1492 | SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) { |
1493 | // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which |
1494 | // do not support it natively. |
1495 | SDLoc DL(Node); |
1496 | |
1497 | SDValue Mask = Node->getOperand(Num: 0); |
1498 | SDValue Op1 = Node->getOperand(Num: 1); |
1499 | SDValue Op2 = Node->getOperand(Num: 2); |
1500 | SDValue EVL = Node->getOperand(Num: 3); |
1501 | |
1502 | EVT VT = Mask.getValueType(); |
1503 | |
1504 | // If we can't even use the basic vector operations of |
1505 | // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op. |
1506 | if (TLI.getOperationAction(Op: ISD::VP_AND, VT) == TargetLowering::Expand || |
1507 | TLI.getOperationAction(Op: ISD::VP_XOR, VT) == TargetLowering::Expand || |
1508 | TLI.getOperationAction(Op: ISD::VP_OR, VT) == TargetLowering::Expand) |
1509 | return DAG.UnrollVectorOp(N: Node); |
1510 | |
1511 | // This operation also isn't safe when the operands aren't also booleans. |
1512 | if (Op1.getValueType().getVectorElementType() != MVT::i1) |
1513 | return DAG.UnrollVectorOp(N: Node); |
1514 | |
1515 | SDValue Ones = DAG.getAllOnesConstant(DL, VT); |
1516 | SDValue NotMask = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT, N1: Mask, N2: Ones, N3: Ones, N4: EVL); |
1517 | |
1518 | Op1 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op1, N2: Mask, N3: Ones, N4: EVL); |
1519 | Op2 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op2, N2: NotMask, N3: Ones, N4: EVL); |
1520 | return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: Op1, N2: Op2, N3: Ones, N4: EVL); |
1521 | } |
1522 | |
1523 | SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) { |
1524 | // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector |
1525 | // indices less than the EVL/pivot are true. Combine that with the original |
1526 | // mask for a full-length mask. Use a full-length VSELECT to select between |
1527 | // the true and false values. |
1528 | SDLoc DL(Node); |
1529 | |
1530 | SDValue Mask = Node->getOperand(Num: 0); |
1531 | SDValue Op1 = Node->getOperand(Num: 1); |
1532 | SDValue Op2 = Node->getOperand(Num: 2); |
1533 | SDValue EVL = Node->getOperand(Num: 3); |
1534 | |
1535 | EVT MaskVT = Mask.getValueType(); |
1536 | bool IsFixedLen = MaskVT.isFixedLengthVector(); |
1537 | |
1538 | EVT EVLVecVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EVL.getValueType(), |
1539 | EC: MaskVT.getVectorElementCount()); |
1540 | |
1541 | // If we can't construct the EVL mask efficiently, it's better to unroll. |
1542 | if ((IsFixedLen && |
1543 | !TLI.isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: EVLVecVT)) || |
1544 | (!IsFixedLen && |
1545 | (!TLI.isOperationLegalOrCustom(Op: ISD::STEP_VECTOR, VT: EVLVecVT) || |
1546 | !TLI.isOperationLegalOrCustom(Op: ISD::SPLAT_VECTOR, VT: EVLVecVT)))) |
1547 | return DAG.UnrollVectorOp(N: Node); |
1548 | |
1549 | // If using a SETCC would result in a different type than the mask type, |
1550 | // unroll. |
1551 | if (TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), |
1552 | VT: EVLVecVT) != MaskVT) |
1553 | return DAG.UnrollVectorOp(N: Node); |
1554 | |
1555 | SDValue StepVec = DAG.getStepVector(DL, ResVT: EVLVecVT); |
1556 | SDValue SplatEVL = DAG.getSplat(VT: EVLVecVT, DL, Op: EVL); |
1557 | SDValue EVLMask = |
1558 | DAG.getSetCC(DL, VT: MaskVT, LHS: StepVec, RHS: SplatEVL, Cond: ISD::CondCode::SETULT); |
1559 | |
1560 | SDValue FullMask = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskVT, N1: Mask, N2: EVLMask); |
1561 | return DAG.getSelect(DL, VT: Node->getValueType(ResNo: 0), Cond: FullMask, LHS: Op1, RHS: Op2); |
1562 | } |
1563 | |
1564 | SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) { |
1565 | // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB. |
1566 | EVT VT = Node->getValueType(ResNo: 0); |
1567 | |
1568 | unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV; |
1569 | |
1570 | if (!TLI.isOperationLegalOrCustom(Op: DivOpc, VT) || |
1571 | !TLI.isOperationLegalOrCustom(Op: ISD::VP_MUL, VT) || |
1572 | !TLI.isOperationLegalOrCustom(Op: ISD::VP_SUB, VT)) |
1573 | return SDValue(); |
1574 | |
1575 | SDLoc DL(Node); |
1576 | |
1577 | SDValue Dividend = Node->getOperand(Num: 0); |
1578 | SDValue Divisor = Node->getOperand(Num: 1); |
1579 | SDValue Mask = Node->getOperand(Num: 2); |
1580 | SDValue EVL = Node->getOperand(Num: 3); |
1581 | |
1582 | // X % Y -> X-X/Y*Y |
1583 | SDValue Div = DAG.getNode(Opcode: DivOpc, DL, VT, N1: Dividend, N2: Divisor, N3: Mask, N4: EVL); |
1584 | SDValue Mul = DAG.getNode(Opcode: ISD::VP_MUL, DL, VT, N1: Divisor, N2: Div, N3: Mask, N4: EVL); |
1585 | return DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: Dividend, N2: Mul, N3: Mask, N4: EVL); |
1586 | } |
1587 | |
1588 | void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, |
1589 | SmallVectorImpl<SDValue> &Results) { |
1590 | // Attempt to expand using TargetLowering. |
1591 | SDValue Result, Chain; |
1592 | if (TLI.expandFP_TO_UINT(N: Node, Result, Chain, DAG)) { |
1593 | Results.push_back(Elt: Result); |
1594 | if (Node->isStrictFPOpcode()) |
1595 | Results.push_back(Elt: Chain); |
1596 | return; |
1597 | } |
1598 | |
1599 | // Otherwise go ahead and unroll. |
1600 | if (Node->isStrictFPOpcode()) { |
1601 | UnrollStrictFPOp(Node, Results); |
1602 | return; |
1603 | } |
1604 | |
1605 | Results.push_back(Elt: DAG.UnrollVectorOp(N: Node)); |
1606 | } |
1607 | |
1608 | void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, |
1609 | SmallVectorImpl<SDValue> &Results) { |
1610 | bool IsStrict = Node->isStrictFPOpcode(); |
1611 | unsigned OpNo = IsStrict ? 1 : 0; |
1612 | SDValue Src = Node->getOperand(Num: OpNo); |
1613 | EVT VT = Src.getValueType(); |
1614 | SDLoc DL(Node); |
1615 | |
1616 | // Attempt to expand using TargetLowering. |
1617 | SDValue Result; |
1618 | SDValue Chain; |
1619 | if (TLI.expandUINT_TO_FP(N: Node, Result, Chain, DAG)) { |
1620 | Results.push_back(Elt: Result); |
1621 | if (IsStrict) |
1622 | Results.push_back(Elt: Chain); |
1623 | return; |
1624 | } |
1625 | |
1626 | // Make sure that the SINT_TO_FP and SRL instructions are available. |
1627 | if (((!IsStrict && TLI.getOperationAction(Op: ISD::SINT_TO_FP, VT) == |
1628 | TargetLowering::Expand) || |
1629 | (IsStrict && TLI.getOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT) == |
1630 | TargetLowering::Expand)) || |
1631 | TLI.getOperationAction(Op: ISD::SRL, VT) == TargetLowering::Expand) { |
1632 | if (IsStrict) { |
1633 | UnrollStrictFPOp(Node, Results); |
1634 | return; |
1635 | } |
1636 | |
1637 | Results.push_back(Elt: DAG.UnrollVectorOp(N: Node)); |
1638 | return; |
1639 | } |
1640 | |
1641 | unsigned BW = VT.getScalarSizeInBits(); |
1642 | assert((BW == 64 || BW == 32) && |
1643 | "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide" ); |
1644 | |
1645 | SDValue HalfWord = DAG.getConstant(Val: BW / 2, DL, VT); |
1646 | |
1647 | // Constants to clear the upper part of the word. |
1648 | // Notice that we can also use SHL+SHR, but using a constant is slightly |
1649 | // faster on x86. |
1650 | uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF; |
1651 | SDValue HalfWordMask = DAG.getConstant(Val: HWMask, DL, VT); |
1652 | |
1653 | // Two to the power of half-word-size. |
1654 | SDValue TWOHW = |
1655 | DAG.getConstantFP(Val: 1ULL << (BW / 2), DL, VT: Node->getValueType(ResNo: 0)); |
1656 | |
1657 | // Clear upper part of LO, lower HI |
1658 | SDValue HI = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Src, N2: HalfWord); |
1659 | SDValue LO = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Src, N2: HalfWordMask); |
1660 | |
1661 | if (IsStrict) { |
1662 | // Convert hi and lo to floats |
1663 | // Convert the hi part back to the upper values |
1664 | // TODO: Can any fast-math-flags be set on these nodes? |
1665 | SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, |
1666 | {Node->getValueType(0), MVT::Other}, |
1667 | {Node->getOperand(0), HI}); |
1668 | fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other}, |
1669 | {fHI.getValue(1), fHI, TWOHW}); |
1670 | SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, |
1671 | {Node->getValueType(0), MVT::Other}, |
1672 | {Node->getOperand(0), LO}); |
1673 | |
1674 | SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(R: 1), |
1675 | fLO.getValue(R: 1)); |
1676 | |
1677 | // Add the two halves |
1678 | SDValue Result = |
1679 | DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other}, |
1680 | {TF, fHI, fLO}); |
1681 | |
1682 | Results.push_back(Elt: Result); |
1683 | Results.push_back(Elt: Result.getValue(R: 1)); |
1684 | return; |
1685 | } |
1686 | |
1687 | // Convert hi and lo to floats |
1688 | // Convert the hi part back to the upper values |
1689 | // TODO: Can any fast-math-flags be set on these nodes? |
1690 | SDValue fHI = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: Node->getValueType(ResNo: 0), Operand: HI); |
1691 | fHI = DAG.getNode(Opcode: ISD::FMUL, DL, VT: Node->getValueType(ResNo: 0), N1: fHI, N2: TWOHW); |
1692 | SDValue fLO = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: Node->getValueType(ResNo: 0), Operand: LO); |
1693 | |
1694 | // Add the two halves |
1695 | Results.push_back( |
1696 | Elt: DAG.getNode(Opcode: ISD::FADD, DL, VT: Node->getValueType(ResNo: 0), N1: fHI, N2: fLO)); |
1697 | } |
1698 | |
1699 | SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) { |
1700 | if (TLI.isOperationLegalOrCustom(Op: ISD::FSUB, VT: Node->getValueType(ResNo: 0))) { |
1701 | SDLoc DL(Node); |
1702 | SDValue Zero = DAG.getConstantFP(Val: -0.0, DL, VT: Node->getValueType(ResNo: 0)); |
1703 | // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB. |
1704 | return DAG.getNode(Opcode: ISD::FSUB, DL, VT: Node->getValueType(ResNo: 0), N1: Zero, |
1705 | N2: Node->getOperand(Num: 0)); |
1706 | } |
1707 | return DAG.UnrollVectorOp(N: Node); |
1708 | } |
1709 | |
1710 | void VectorLegalizer::ExpandFSUB(SDNode *Node, |
1711 | SmallVectorImpl<SDValue> &Results) { |
1712 | // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal, |
1713 | // we can defer this to operation legalization where it will be lowered as |
1714 | // a+(-b). |
1715 | EVT VT = Node->getValueType(ResNo: 0); |
1716 | if (TLI.isOperationLegalOrCustom(Op: ISD::FNEG, VT) && |
1717 | TLI.isOperationLegalOrCustom(Op: ISD::FADD, VT)) |
1718 | return; // Defer to LegalizeDAG |
1719 | |
1720 | SDValue Tmp = DAG.UnrollVectorOp(N: Node); |
1721 | Results.push_back(Elt: Tmp); |
1722 | } |
1723 | |
1724 | void VectorLegalizer::ExpandSETCC(SDNode *Node, |
1725 | SmallVectorImpl<SDValue> &Results) { |
1726 | bool NeedInvert = false; |
1727 | bool IsVP = Node->getOpcode() == ISD::VP_SETCC; |
1728 | bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC || |
1729 | Node->getOpcode() == ISD::STRICT_FSETCCS; |
1730 | bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS; |
1731 | unsigned Offset = IsStrict ? 1 : 0; |
1732 | |
1733 | SDValue Chain = IsStrict ? Node->getOperand(Num: 0) : SDValue(); |
1734 | SDValue LHS = Node->getOperand(Num: 0 + Offset); |
1735 | SDValue RHS = Node->getOperand(Num: 1 + Offset); |
1736 | SDValue CC = Node->getOperand(Num: 2 + Offset); |
1737 | |
1738 | MVT OpVT = LHS.getSimpleValueType(); |
1739 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get(); |
1740 | |
1741 | if (TLI.getCondCodeAction(CC: CCCode, VT: OpVT) != TargetLowering::Expand) { |
1742 | if (IsStrict) { |
1743 | UnrollStrictFPOp(Node, Results); |
1744 | return; |
1745 | } |
1746 | Results.push_back(Elt: UnrollVSETCC(Node)); |
1747 | return; |
1748 | } |
1749 | |
1750 | SDValue Mask, EVL; |
1751 | if (IsVP) { |
1752 | Mask = Node->getOperand(Num: 3 + Offset); |
1753 | EVL = Node->getOperand(Num: 4 + Offset); |
1754 | } |
1755 | |
1756 | SDLoc dl(Node); |
1757 | bool Legalized = |
1758 | TLI.LegalizeSetCCCondCode(DAG, VT: Node->getValueType(ResNo: 0), LHS, RHS, CC, Mask, |
1759 | EVL, NeedInvert, dl, Chain, IsSignaling); |
1760 | |
1761 | if (Legalized) { |
1762 | // If we expanded the SETCC by swapping LHS and RHS, or by inverting the |
1763 | // condition code, create a new SETCC node. |
1764 | if (CC.getNode()) { |
1765 | if (IsStrict) { |
1766 | LHS = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VTList: Node->getVTList(), |
1767 | Ops: {Chain, LHS, RHS, CC}, Flags: Node->getFlags()); |
1768 | Chain = LHS.getValue(R: 1); |
1769 | } else if (IsVP) { |
1770 | LHS = DAG.getNode(Opcode: ISD::VP_SETCC, DL: dl, VT: Node->getValueType(ResNo: 0), |
1771 | Ops: {LHS, RHS, CC, Mask, EVL}, Flags: Node->getFlags()); |
1772 | } else { |
1773 | LHS = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: Node->getValueType(ResNo: 0), N1: LHS, N2: RHS, N3: CC, |
1774 | Flags: Node->getFlags()); |
1775 | } |
1776 | } |
1777 | |
1778 | // If we expanded the SETCC by inverting the condition code, then wrap |
1779 | // the existing SETCC in a NOT to restore the intended condition. |
1780 | if (NeedInvert) { |
1781 | if (!IsVP) |
1782 | LHS = DAG.getLogicalNOT(DL: dl, Val: LHS, VT: LHS->getValueType(ResNo: 0)); |
1783 | else |
1784 | LHS = DAG.getVPLogicalNOT(DL: dl, Val: LHS, Mask, EVL, VT: LHS->getValueType(ResNo: 0)); |
1785 | } |
1786 | } else { |
1787 | assert(!IsStrict && "Don't know how to expand for strict nodes." ); |
1788 | |
1789 | // Otherwise, SETCC for the given comparison type must be completely |
1790 | // illegal; expand it into a SELECT_CC. |
1791 | EVT VT = Node->getValueType(ResNo: 0); |
1792 | LHS = |
1793 | DAG.getNode(Opcode: ISD::SELECT_CC, DL: dl, VT, N1: LHS, N2: RHS, |
1794 | N3: DAG.getBoolConstant(V: true, DL: dl, VT, OpVT: LHS.getValueType()), |
1795 | N4: DAG.getBoolConstant(V: false, DL: dl, VT, OpVT: LHS.getValueType()), N5: CC); |
1796 | LHS->setFlags(Node->getFlags()); |
1797 | } |
1798 | |
1799 | Results.push_back(Elt: LHS); |
1800 | if (IsStrict) |
1801 | Results.push_back(Elt: Chain); |
1802 | } |
1803 | |
1804 | void VectorLegalizer::ExpandUADDSUBO(SDNode *Node, |
1805 | SmallVectorImpl<SDValue> &Results) { |
1806 | SDValue Result, Overflow; |
1807 | TLI.expandUADDSUBO(Node, Result, Overflow, DAG); |
1808 | Results.push_back(Elt: Result); |
1809 | Results.push_back(Elt: Overflow); |
1810 | } |
1811 | |
1812 | void VectorLegalizer::ExpandSADDSUBO(SDNode *Node, |
1813 | SmallVectorImpl<SDValue> &Results) { |
1814 | SDValue Result, Overflow; |
1815 | TLI.expandSADDSUBO(Node, Result, Overflow, DAG); |
1816 | Results.push_back(Elt: Result); |
1817 | Results.push_back(Elt: Overflow); |
1818 | } |
1819 | |
1820 | void VectorLegalizer::ExpandMULO(SDNode *Node, |
1821 | SmallVectorImpl<SDValue> &Results) { |
1822 | SDValue Result, Overflow; |
1823 | if (!TLI.expandMULO(Node, Result, Overflow, DAG)) |
1824 | std::tie(args&: Result, args&: Overflow) = DAG.UnrollVectorOverflowOp(N: Node); |
1825 | |
1826 | Results.push_back(Elt: Result); |
1827 | Results.push_back(Elt: Overflow); |
1828 | } |
1829 | |
1830 | void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node, |
1831 | SmallVectorImpl<SDValue> &Results) { |
1832 | SDNode *N = Node; |
1833 | if (SDValue Expanded = TLI.expandFixedPointDiv(Opcode: N->getOpcode(), dl: SDLoc(N), |
1834 | LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Scale: N->getConstantOperandVal(Num: 2), DAG)) |
1835 | Results.push_back(Elt: Expanded); |
1836 | } |
1837 | |
1838 | void VectorLegalizer::ExpandStrictFPOp(SDNode *Node, |
1839 | SmallVectorImpl<SDValue> &Results) { |
1840 | if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) { |
1841 | ExpandUINT_TO_FLOAT(Node, Results); |
1842 | return; |
1843 | } |
1844 | if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) { |
1845 | ExpandFP_TO_UINT(Node, Results); |
1846 | return; |
1847 | } |
1848 | |
1849 | if (Node->getOpcode() == ISD::STRICT_FSETCC || |
1850 | Node->getOpcode() == ISD::STRICT_FSETCCS) { |
1851 | ExpandSETCC(Node, Results); |
1852 | return; |
1853 | } |
1854 | |
1855 | UnrollStrictFPOp(Node, Results); |
1856 | } |
1857 | |
1858 | void VectorLegalizer::ExpandREM(SDNode *Node, |
1859 | SmallVectorImpl<SDValue> &Results) { |
1860 | assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) && |
1861 | "Expected REM node" ); |
1862 | |
1863 | SDValue Result; |
1864 | if (!TLI.expandREM(Node, Result, DAG)) |
1865 | Result = DAG.UnrollVectorOp(N: Node); |
1866 | Results.push_back(Elt: Result); |
1867 | } |
1868 | |
1869 | // Try to expand libm nodes into vector math routine calls. Callers provide the |
1870 | // LibFunc equivalent of the passed in Node, which is used to lookup mappings |
1871 | // within TargetLibraryInfo. The only mappings considered are those where the |
1872 | // result and all operands are the same vector type. While predicated nodes are |
1873 | // not supported, we will emit calls to masked routines by passing in an all |
1874 | // true mask. |
1875 | bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, |
1876 | SmallVectorImpl<SDValue> &Results) { |
1877 | // Chain must be propagated but currently strict fp operations are down |
1878 | // converted to their none strict counterpart. |
1879 | assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!" ); |
1880 | |
1881 | const char *LCName = TLI.getLibcallName(Call: LC); |
1882 | if (!LCName) |
1883 | return false; |
1884 | LLVM_DEBUG(dbgs() << "Looking for vector variant of " << LCName << "\n" ); |
1885 | |
1886 | EVT VT = Node->getValueType(ResNo: 0); |
1887 | ElementCount VL = VT.getVectorElementCount(); |
1888 | |
1889 | // Lookup a vector function equivalent to the specified libcall. Prefer |
1890 | // unmasked variants but we will generate a mask if need be. |
1891 | const TargetLibraryInfo &TLibInfo = DAG.getLibInfo(); |
1892 | const VecDesc *VD = TLibInfo.getVectorMappingInfo(F: LCName, VF: VL, Masked: false); |
1893 | if (!VD) |
1894 | VD = TLibInfo.getVectorMappingInfo(F: LCName, VF: VL, /*Masked=*/true); |
1895 | if (!VD) |
1896 | return false; |
1897 | |
1898 | LLVMContext *Ctx = DAG.getContext(); |
1899 | Type *Ty = VT.getTypeForEVT(Context&: *Ctx); |
1900 | Type *ScalarTy = Ty->getScalarType(); |
1901 | |
1902 | // Construct a scalar function type based on Node's operands. |
1903 | SmallVector<Type *, 8> ArgTys; |
1904 | for (unsigned i = 0; i < Node->getNumOperands(); ++i) { |
1905 | assert(Node->getOperand(i).getValueType() == VT && |
1906 | "Expected matching vector types!" ); |
1907 | ArgTys.push_back(Elt: ScalarTy); |
1908 | } |
1909 | FunctionType *ScalarFTy = FunctionType::get(Result: ScalarTy, Params: ArgTys, isVarArg: false); |
1910 | |
1911 | // Generate call information for the vector function. |
1912 | const std::string MangledName = VD->getVectorFunctionABIVariantString(); |
1913 | auto OptVFInfo = VFABI::tryDemangleForVFABI(MangledName, FTy: ScalarFTy); |
1914 | if (!OptVFInfo) |
1915 | return false; |
1916 | |
1917 | LLVM_DEBUG(dbgs() << "Found vector variant " << VD->getVectorFnName() |
1918 | << "\n" ); |
1919 | |
1920 | // Sanity check just in case OptVFInfo has unexpected parameters. |
1921 | if (OptVFInfo->Shape.Parameters.size() != |
1922 | Node->getNumOperands() + VD->isMasked()) |
1923 | return false; |
1924 | |
1925 | // Collect vector call operands. |
1926 | |
1927 | SDLoc DL(Node); |
1928 | TargetLowering::ArgListTy Args; |
1929 | TargetLowering::ArgListEntry Entry; |
1930 | Entry.IsSExt = false; |
1931 | Entry.IsZExt = false; |
1932 | |
1933 | unsigned OpNum = 0; |
1934 | for (auto &VFParam : OptVFInfo->Shape.Parameters) { |
1935 | if (VFParam.ParamKind == VFParamKind::GlobalPredicate) { |
1936 | EVT MaskVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *Ctx, VT); |
1937 | Entry.Node = DAG.getBoolConstant(V: true, DL, VT: MaskVT, OpVT: VT); |
1938 | Entry.Ty = MaskVT.getTypeForEVT(Context&: *Ctx); |
1939 | Args.push_back(x: Entry); |
1940 | continue; |
1941 | } |
1942 | |
1943 | // Only vector operands are supported. |
1944 | if (VFParam.ParamKind != VFParamKind::Vector) |
1945 | return false; |
1946 | |
1947 | Entry.Node = Node->getOperand(Num: OpNum++); |
1948 | Entry.Ty = Ty; |
1949 | Args.push_back(x: Entry); |
1950 | } |
1951 | |
1952 | // Emit a call to the vector function. |
1953 | SDValue Callee = DAG.getExternalSymbol(Sym: VD->getVectorFnName().data(), |
1954 | VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
1955 | TargetLowering::CallLoweringInfo CLI(DAG); |
1956 | CLI.setDebugLoc(DL) |
1957 | .setChain(DAG.getEntryNode()) |
1958 | .setLibCallee(CC: CallingConv::C, ResultType: Ty, Target: Callee, ArgsList: std::move(Args)); |
1959 | |
1960 | std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); |
1961 | Results.push_back(Elt: CallResult.first); |
1962 | return true; |
1963 | } |
1964 | |
1965 | /// Try to expand the node to a vector libcall based on the result type. |
1966 | bool VectorLegalizer::tryExpandVecMathCall( |
1967 | SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, |
1968 | RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, |
1969 | RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results) { |
1970 | RTLIB::Libcall LC = RTLIB::getFPLibCall( |
1971 | VT: Node->getValueType(ResNo: 0).getVectorElementType(), Call_F32, Call_F64, |
1972 | Call_F80, Call_F128, Call_PPCF128); |
1973 | |
1974 | if (LC == RTLIB::UNKNOWN_LIBCALL) |
1975 | return false; |
1976 | |
1977 | return tryExpandVecMathCall(Node, LC, Results); |
1978 | } |
1979 | |
1980 | void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, |
1981 | SmallVectorImpl<SDValue> &Results) { |
1982 | EVT VT = Node->getValueType(ResNo: 0); |
1983 | EVT EltVT = VT.getVectorElementType(); |
1984 | unsigned NumElems = VT.getVectorNumElements(); |
1985 | unsigned NumOpers = Node->getNumOperands(); |
1986 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1987 | |
1988 | EVT TmpEltVT = EltVT; |
1989 | if (Node->getOpcode() == ISD::STRICT_FSETCC || |
1990 | Node->getOpcode() == ISD::STRICT_FSETCCS) |
1991 | TmpEltVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(), |
1992 | Context&: *DAG.getContext(), VT: TmpEltVT); |
1993 | |
1994 | EVT ValueVTs[] = {TmpEltVT, MVT::Other}; |
1995 | SDValue Chain = Node->getOperand(Num: 0); |
1996 | SDLoc dl(Node); |
1997 | |
1998 | SmallVector<SDValue, 32> OpValues; |
1999 | SmallVector<SDValue, 32> OpChains; |
2000 | for (unsigned i = 0; i < NumElems; ++i) { |
2001 | SmallVector<SDValue, 4> Opers; |
2002 | SDValue Idx = DAG.getVectorIdxConstant(Val: i, DL: dl); |
2003 | |
2004 | // The Chain is the first operand. |
2005 | Opers.push_back(Elt: Chain); |
2006 | |
2007 | // Now process the remaining operands. |
2008 | for (unsigned j = 1; j < NumOpers; ++j) { |
2009 | SDValue Oper = Node->getOperand(Num: j); |
2010 | EVT OperVT = Oper.getValueType(); |
2011 | |
2012 | if (OperVT.isVector()) |
2013 | Oper = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, |
2014 | VT: OperVT.getVectorElementType(), N1: Oper, N2: Idx); |
2015 | |
2016 | Opers.push_back(Elt: Oper); |
2017 | } |
2018 | |
2019 | SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers); |
2020 | SDValue ScalarResult = ScalarOp.getValue(R: 0); |
2021 | SDValue ScalarChain = ScalarOp.getValue(R: 1); |
2022 | |
2023 | if (Node->getOpcode() == ISD::STRICT_FSETCC || |
2024 | Node->getOpcode() == ISD::STRICT_FSETCCS) |
2025 | ScalarResult = DAG.getSelect(DL: dl, VT: EltVT, Cond: ScalarResult, |
2026 | LHS: DAG.getAllOnesConstant(DL: dl, VT: EltVT), |
2027 | RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT)); |
2028 | |
2029 | OpValues.push_back(Elt: ScalarResult); |
2030 | OpChains.push_back(Elt: ScalarChain); |
2031 | } |
2032 | |
2033 | SDValue Result = DAG.getBuildVector(VT, DL: dl, Ops: OpValues); |
2034 | SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); |
2035 | |
2036 | Results.push_back(Elt: Result); |
2037 | Results.push_back(Elt: NewChain); |
2038 | } |
2039 | |
2040 | SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) { |
2041 | EVT VT = Node->getValueType(ResNo: 0); |
2042 | unsigned NumElems = VT.getVectorNumElements(); |
2043 | EVT EltVT = VT.getVectorElementType(); |
2044 | SDValue LHS = Node->getOperand(Num: 0); |
2045 | SDValue RHS = Node->getOperand(Num: 1); |
2046 | SDValue CC = Node->getOperand(Num: 2); |
2047 | EVT TmpEltVT = LHS.getValueType().getVectorElementType(); |
2048 | SDLoc dl(Node); |
2049 | SmallVector<SDValue, 8> Ops(NumElems); |
2050 | for (unsigned i = 0; i < NumElems; ++i) { |
2051 | SDValue LHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: LHS, |
2052 | N2: DAG.getVectorIdxConstant(Val: i, DL: dl)); |
2053 | SDValue RHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: RHS, |
2054 | N2: DAG.getVectorIdxConstant(Val: i, DL: dl)); |
2055 | Ops[i] = DAG.getNode(Opcode: ISD::SETCC, DL: dl, |
2056 | VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), |
2057 | Context&: *DAG.getContext(), VT: TmpEltVT), |
2058 | N1: LHSElem, N2: RHSElem, N3: CC); |
2059 | Ops[i] = DAG.getSelect(DL: dl, VT: EltVT, Cond: Ops[i], LHS: DAG.getAllOnesConstant(DL: dl, VT: EltVT), |
2060 | RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT)); |
2061 | } |
2062 | return DAG.getBuildVector(VT, DL: dl, Ops); |
2063 | } |
2064 | |
2065 | bool SelectionDAG::LegalizeVectors() { |
2066 | return VectorLegalizer(*this).Run(); |
2067 | } |
2068 | |