1 | //===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file describes how to lower LLVM code to machine code. This has two |
11 | /// main components: |
12 | /// |
13 | /// 1. Which ValueTypes are natively supported by the target. |
14 | /// 2. Which operations are supported for supported ValueTypes. |
15 | /// 3. Cost thresholds for alternative implementations of certain operations. |
16 | /// |
17 | /// In addition it has a few other components, like information about FP |
18 | /// immediates. |
19 | /// |
20 | //===----------------------------------------------------------------------===// |
21 | |
22 | #ifndef LLVM_CODEGEN_TARGETLOWERING_H |
23 | #define LLVM_CODEGEN_TARGETLOWERING_H |
24 | |
25 | #include "llvm/ADT/APInt.h" |
26 | #include "llvm/ADT/ArrayRef.h" |
27 | #include "llvm/ADT/DenseMap.h" |
28 | #include "llvm/ADT/SmallVector.h" |
29 | #include "llvm/ADT/StringRef.h" |
30 | #include "llvm/CodeGen/DAGCombine.h" |
31 | #include "llvm/CodeGen/ISDOpcodes.h" |
32 | #include "llvm/CodeGen/LowLevelTypeUtils.h" |
33 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
34 | #include "llvm/CodeGen/RuntimeLibcalls.h" |
35 | #include "llvm/CodeGen/SelectionDAG.h" |
36 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
37 | #include "llvm/CodeGen/TargetCallingConv.h" |
38 | #include "llvm/CodeGen/ValueTypes.h" |
39 | #include "llvm/CodeGenTypes/MachineValueType.h" |
40 | #include "llvm/IR/Attributes.h" |
41 | #include "llvm/IR/CallingConv.h" |
42 | #include "llvm/IR/DataLayout.h" |
43 | #include "llvm/IR/DerivedTypes.h" |
44 | #include "llvm/IR/Function.h" |
45 | #include "llvm/IR/InlineAsm.h" |
46 | #include "llvm/IR/Instruction.h" |
47 | #include "llvm/IR/Instructions.h" |
48 | #include "llvm/IR/Type.h" |
49 | #include "llvm/Support/Alignment.h" |
50 | #include "llvm/Support/AtomicOrdering.h" |
51 | #include "llvm/Support/Casting.h" |
52 | #include "llvm/Support/ErrorHandling.h" |
53 | #include <algorithm> |
54 | #include <cassert> |
55 | #include <climits> |
56 | #include <cstdint> |
57 | #include <iterator> |
58 | #include <map> |
59 | #include <string> |
60 | #include <utility> |
61 | #include <vector> |
62 | |
63 | namespace llvm { |
64 | |
65 | class AssumptionCache; |
66 | class CCState; |
67 | class CCValAssign; |
68 | enum class ComplexDeinterleavingOperation; |
69 | enum class ComplexDeinterleavingRotation; |
70 | class Constant; |
71 | class FastISel; |
72 | class FunctionLoweringInfo; |
73 | class GlobalValue; |
74 | class Loop; |
75 | class GISelKnownBits; |
76 | class IntrinsicInst; |
77 | class IRBuilderBase; |
78 | struct KnownBits; |
79 | class LLVMContext; |
80 | class MachineBasicBlock; |
81 | class MachineFunction; |
82 | class MachineInstr; |
83 | class MachineJumpTableInfo; |
84 | class MachineLoop; |
85 | class MachineRegisterInfo; |
86 | class MCContext; |
87 | class MCExpr; |
88 | class Module; |
89 | class ProfileSummaryInfo; |
90 | class TargetLibraryInfo; |
91 | class TargetMachine; |
92 | class TargetRegisterClass; |
93 | class TargetRegisterInfo; |
94 | class TargetTransformInfo; |
95 | class Value; |
96 | |
97 | namespace Sched { |
98 | |
99 | enum Preference { |
100 | None, // No preference |
101 | Source, // Follow source order. |
102 | RegPressure, // Scheduling for lowest register pressure. |
103 | Hybrid, // Scheduling for both latency and register pressure. |
104 | ILP, // Scheduling for ILP in low register pressure mode. |
105 | VLIW, // Scheduling for VLIW targets. |
106 | Fast, // Fast suboptimal list scheduling |
107 | Linearize // Linearize DAG, no scheduling |
108 | }; |
109 | |
110 | } // end namespace Sched |
111 | |
112 | // MemOp models a memory operation, either memset or memcpy/memmove. |
113 | struct MemOp { |
114 | private: |
115 | // Shared |
116 | uint64_t Size; |
117 | bool DstAlignCanChange; // true if destination alignment can satisfy any |
118 | // constraint. |
119 | Align DstAlign; // Specified alignment of the memory operation. |
120 | |
121 | bool AllowOverlap; |
122 | // memset only |
123 | bool IsMemset; // If setthis memory operation is a memset. |
124 | bool ZeroMemset; // If set clears out memory with zeros. |
125 | // memcpy only |
126 | bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register |
127 | // constant so it does not need to be loaded. |
128 | Align SrcAlign; // Inferred alignment of the source or default value if the |
129 | // memory operation does not need to load the value. |
130 | public: |
131 | static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, |
132 | Align SrcAlign, bool IsVolatile, |
133 | bool MemcpyStrSrc = false) { |
134 | MemOp Op; |
135 | Op.Size = Size; |
136 | Op.DstAlignCanChange = DstAlignCanChange; |
137 | Op.DstAlign = DstAlign; |
138 | Op.AllowOverlap = !IsVolatile; |
139 | Op.IsMemset = false; |
140 | Op.ZeroMemset = false; |
141 | Op.MemcpyStrSrc = MemcpyStrSrc; |
142 | Op.SrcAlign = SrcAlign; |
143 | return Op; |
144 | } |
145 | |
146 | static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, |
147 | bool IsZeroMemset, bool IsVolatile) { |
148 | MemOp Op; |
149 | Op.Size = Size; |
150 | Op.DstAlignCanChange = DstAlignCanChange; |
151 | Op.DstAlign = DstAlign; |
152 | Op.AllowOverlap = !IsVolatile; |
153 | Op.IsMemset = true; |
154 | Op.ZeroMemset = IsZeroMemset; |
155 | Op.MemcpyStrSrc = false; |
156 | return Op; |
157 | } |
158 | |
159 | uint64_t size() const { return Size; } |
160 | Align getDstAlign() const { |
161 | assert(!DstAlignCanChange); |
162 | return DstAlign; |
163 | } |
164 | bool isFixedDstAlign() const { return !DstAlignCanChange; } |
165 | bool allowOverlap() const { return AllowOverlap; } |
166 | bool isMemset() const { return IsMemset; } |
167 | bool isMemcpy() const { return !IsMemset; } |
168 | bool isMemcpyWithFixedDstAlign() const { |
169 | return isMemcpy() && !DstAlignCanChange; |
170 | } |
171 | bool isZeroMemset() const { return isMemset() && ZeroMemset; } |
172 | bool isMemcpyStrSrc() const { |
173 | assert(isMemcpy() && "Must be a memcpy" ); |
174 | return MemcpyStrSrc; |
175 | } |
176 | Align getSrcAlign() const { |
177 | assert(isMemcpy() && "Must be a memcpy" ); |
178 | return SrcAlign; |
179 | } |
180 | bool isSrcAligned(Align AlignCheck) const { |
181 | return isMemset() || llvm::isAligned(Lhs: AlignCheck, SizeInBytes: SrcAlign.value()); |
182 | } |
183 | bool isDstAligned(Align AlignCheck) const { |
184 | return DstAlignCanChange || llvm::isAligned(Lhs: AlignCheck, SizeInBytes: DstAlign.value()); |
185 | } |
186 | bool isAligned(Align AlignCheck) const { |
187 | return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck); |
188 | } |
189 | }; |
190 | |
191 | /// This base class for TargetLowering contains the SelectionDAG-independent |
192 | /// parts that can be used from the rest of CodeGen. |
193 | class TargetLoweringBase { |
194 | public: |
195 | /// This enum indicates whether operations are valid for a target, and if not, |
196 | /// what action should be used to make them valid. |
197 | enum LegalizeAction : uint8_t { |
198 | Legal, // The target natively supports this operation. |
199 | Promote, // This operation should be executed in a larger type. |
200 | Expand, // Try to expand this to other ops, otherwise use a libcall. |
201 | LibCall, // Don't try to expand this to other ops, always use a libcall. |
202 | Custom // Use the LowerOperation hook to implement custom lowering. |
203 | }; |
204 | |
205 | /// This enum indicates whether a types are legal for a target, and if not, |
206 | /// what action should be used to make them valid. |
207 | enum LegalizeTypeAction : uint8_t { |
208 | TypeLegal, // The target natively supports this type. |
209 | TypePromoteInteger, // Replace this integer with a larger one. |
210 | TypeExpandInteger, // Split this integer into two of half the size. |
211 | TypeSoftenFloat, // Convert this float to a same size integer type. |
212 | TypeExpandFloat, // Split this float into two of half the size. |
213 | TypeScalarizeVector, // Replace this one-element vector with its element. |
214 | TypeSplitVector, // Split this vector into two of half the size. |
215 | TypeWidenVector, // This vector should be widened into a larger vector. |
216 | TypePromoteFloat, // Replace this float with a larger one. |
217 | TypeSoftPromoteHalf, // Soften half to i16 and use float to do arithmetic. |
218 | TypeScalarizeScalableVector, // This action is explicitly left unimplemented. |
219 | // While it is theoretically possible to |
220 | // legalize operations on scalable types with a |
221 | // loop that handles the vscale * #lanes of the |
222 | // vector, this is non-trivial at SelectionDAG |
223 | // level and these types are better to be |
224 | // widened or promoted. |
225 | }; |
226 | |
227 | /// LegalizeKind holds the legalization kind that needs to happen to EVT |
228 | /// in order to type-legalize it. |
229 | using LegalizeKind = std::pair<LegalizeTypeAction, EVT>; |
230 | |
231 | /// Enum that describes how the target represents true/false values. |
232 | enum BooleanContent { |
233 | UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. |
234 | ZeroOrOneBooleanContent, // All bits zero except for bit 0. |
235 | ZeroOrNegativeOneBooleanContent // All bits equal to bit 0. |
236 | }; |
237 | |
238 | /// Enum that describes what type of support for selects the target has. |
239 | enum SelectSupportKind { |
240 | ScalarValSelect, // The target supports scalar selects (ex: cmov). |
241 | ScalarCondVectorVal, // The target supports selects with a scalar condition |
242 | // and vector values (ex: cmov). |
243 | VectorMaskSelect // The target supports vector selects with a vector |
244 | // mask (ex: x86 blends). |
245 | }; |
246 | |
247 | /// Enum that specifies what an atomic load/AtomicRMWInst is expanded |
248 | /// to, if at all. Exists because different targets have different levels of |
249 | /// support for these atomic instructions, and also have different options |
250 | /// w.r.t. what they should expand to. |
251 | enum class AtomicExpansionKind { |
252 | None, // Don't expand the instruction. |
253 | CastToInteger, // Cast the atomic instruction to another type, e.g. from |
254 | // floating-point to integer type. |
255 | LLSC, // Expand the instruction into loadlinked/storeconditional; used |
256 | // by ARM/AArch64. |
257 | LLOnly, // Expand the (load) instruction into just a load-linked, which has |
258 | // greater atomic guarantees than a normal load. |
259 | CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. |
260 | MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. |
261 | BitTestIntrinsic, // Use a target-specific intrinsic for special bit |
262 | // operations; used by X86. |
263 | CmpArithIntrinsic,// Use a target-specific intrinsic for special compare |
264 | // operations; used by X86. |
265 | Expand, // Generic expansion in terms of other atomic operations. |
266 | |
267 | // Rewrite to a non-atomic form for use in a known non-preemptible |
268 | // environment. |
269 | NotAtomic |
270 | }; |
271 | |
272 | /// Enum that specifies when a multiplication should be expanded. |
273 | enum class MulExpansionKind { |
274 | Always, // Always expand the instruction. |
275 | OnlyLegalOrCustom, // Only expand when the resulting instructions are legal |
276 | // or custom. |
277 | }; |
278 | |
279 | /// Enum that specifies when a float negation is beneficial. |
280 | enum class NegatibleCost { |
281 | Cheaper = 0, // Negated expression is cheaper. |
282 | Neutral = 1, // Negated expression has the same cost. |
283 | Expensive = 2 // Negated expression is more expensive. |
284 | }; |
285 | |
286 | /// Enum of different potentially desirable ways to fold (and/or (setcc ...), |
287 | /// (setcc ...)). |
288 | enum AndOrSETCCFoldKind : uint8_t { |
289 | None = 0, // No fold is preferable. |
290 | AddAnd = 1, // Fold with `Add` op and `And` op is preferable. |
291 | NotAnd = 2, // Fold with `Not` op and `And` op is preferable. |
292 | ABS = 4, // Fold with `llvm.abs` op is preferable. |
293 | }; |
294 | |
295 | class ArgListEntry { |
296 | public: |
297 | Value *Val = nullptr; |
298 | SDValue Node = SDValue(); |
299 | Type *Ty = nullptr; |
300 | bool IsSExt : 1; |
301 | bool IsZExt : 1; |
302 | bool IsInReg : 1; |
303 | bool IsSRet : 1; |
304 | bool IsNest : 1; |
305 | bool IsByVal : 1; |
306 | bool IsByRef : 1; |
307 | bool IsInAlloca : 1; |
308 | bool IsPreallocated : 1; |
309 | bool IsReturned : 1; |
310 | bool IsSwiftSelf : 1; |
311 | bool IsSwiftAsync : 1; |
312 | bool IsSwiftError : 1; |
313 | bool IsCFGuardTarget : 1; |
314 | MaybeAlign Alignment = std::nullopt; |
315 | Type *IndirectType = nullptr; |
316 | |
317 | ArgListEntry() |
318 | : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), |
319 | IsNest(false), IsByVal(false), IsByRef(false), IsInAlloca(false), |
320 | IsPreallocated(false), IsReturned(false), IsSwiftSelf(false), |
321 | IsSwiftAsync(false), IsSwiftError(false), IsCFGuardTarget(false) {} |
322 | |
323 | void setAttributes(const CallBase *Call, unsigned ArgIdx); |
324 | }; |
325 | using ArgListTy = std::vector<ArgListEntry>; |
326 | |
327 | virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC, |
328 | ArgListTy &Args) const {}; |
329 | |
330 | static ISD::NodeType getExtendForContent(BooleanContent Content) { |
331 | switch (Content) { |
332 | case UndefinedBooleanContent: |
333 | // Extend by adding rubbish bits. |
334 | return ISD::ANY_EXTEND; |
335 | case ZeroOrOneBooleanContent: |
336 | // Extend by adding zero bits. |
337 | return ISD::ZERO_EXTEND; |
338 | case ZeroOrNegativeOneBooleanContent: |
339 | // Extend by copying the sign bit. |
340 | return ISD::SIGN_EXTEND; |
341 | } |
342 | llvm_unreachable("Invalid content kind" ); |
343 | } |
344 | |
345 | explicit TargetLoweringBase(const TargetMachine &TM); |
346 | TargetLoweringBase(const TargetLoweringBase &) = delete; |
347 | TargetLoweringBase &operator=(const TargetLoweringBase &) = delete; |
348 | virtual ~TargetLoweringBase() = default; |
349 | |
350 | /// Return true if the target support strict float operation |
351 | bool isStrictFPEnabled() const { |
352 | return IsStrictFPEnabled; |
353 | } |
354 | |
355 | protected: |
356 | /// Initialize all of the actions to default values. |
357 | void initActions(); |
358 | |
359 | public: |
360 | const TargetMachine &getTargetMachine() const { return TM; } |
361 | |
362 | virtual bool useSoftFloat() const { return false; } |
363 | |
364 | /// Return the pointer type for the given address space, defaults to |
365 | /// the pointer type from the data layout. |
366 | /// FIXME: The default needs to be removed once all the code is updated. |
367 | virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const { |
368 | return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS)); |
369 | } |
370 | |
371 | /// Return the in-memory pointer type for the given address space, defaults to |
372 | /// the pointer type from the data layout. |
373 | /// FIXME: The default needs to be removed once all the code is updated. |
374 | virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const { |
375 | return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS)); |
376 | } |
377 | |
378 | /// Return the type for frame index, which is determined by |
379 | /// the alloca address space specified through the data layout. |
380 | MVT getFrameIndexTy(const DataLayout &DL) const { |
381 | return getPointerTy(DL, AS: DL.getAllocaAddrSpace()); |
382 | } |
383 | |
384 | /// Return the type for code pointers, which is determined by the program |
385 | /// address space specified through the data layout. |
386 | MVT getProgramPointerTy(const DataLayout &DL) const { |
387 | return getPointerTy(DL, AS: DL.getProgramAddressSpace()); |
388 | } |
389 | |
390 | /// Return the type for operands of fence. |
391 | /// TODO: Let fence operands be of i32 type and remove this. |
392 | virtual MVT getFenceOperandTy(const DataLayout &DL) const { |
393 | return getPointerTy(DL); |
394 | } |
395 | |
396 | /// Return the type to use for a scalar shift opcode, given the shifted amount |
397 | /// type. Targets should return a legal type if the input type is legal. |
398 | /// Targets can return a type that is too small if the input type is illegal. |
399 | virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; |
400 | |
401 | /// Returns the type for the shift amount of a shift opcode. For vectors, |
402 | /// returns the input type. For scalars, behavior depends on \p LegalTypes. If |
403 | /// \p LegalTypes is true, calls getScalarShiftAmountTy, otherwise uses |
404 | /// pointer type. If getScalarShiftAmountTy or pointer type cannot represent |
405 | /// all possible shift amounts, returns MVT::i32. In general, \p LegalTypes |
406 | /// should be set to true for calls during type legalization and after type |
407 | /// legalization has been completed. |
408 | EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, |
409 | bool LegalTypes = true) const; |
410 | |
411 | /// Return the preferred type to use for a shift opcode, given the shifted |
412 | /// amount type is \p ShiftValueTy. |
413 | LLVM_READONLY |
414 | virtual LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const { |
415 | return ShiftValueTy; |
416 | } |
417 | |
418 | /// Returns the type to be used for the index operand of: |
419 | /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, |
420 | /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR |
421 | virtual MVT getVectorIdxTy(const DataLayout &DL) const { |
422 | return getPointerTy(DL); |
423 | } |
424 | |
425 | /// Returns the type to be used for the EVL/AVL operand of VP nodes: |
426 | /// ISD::VP_ADD, ISD::VP_SUB, etc. It must be a legal scalar integer type, |
427 | /// and must be at least as large as i32. The EVL is implicitly zero-extended |
428 | /// to any larger type. |
429 | virtual MVT getVPExplicitVectorLengthTy() const { return MVT::i32; } |
430 | |
431 | /// This callback is used to inspect load/store instructions and add |
432 | /// target-specific MachineMemOperand flags to them. The default |
433 | /// implementation does nothing. |
434 | virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const { |
435 | return MachineMemOperand::MONone; |
436 | } |
437 | |
438 | /// This callback is used to inspect load/store SDNode. |
439 | /// The default implementation does nothing. |
440 | virtual MachineMemOperand::Flags |
441 | getTargetMMOFlags(const MemSDNode &Node) const { |
442 | return MachineMemOperand::MONone; |
443 | } |
444 | |
445 | MachineMemOperand::Flags |
446 | getLoadMemOperandFlags(const LoadInst &LI, const DataLayout &DL, |
447 | AssumptionCache *AC = nullptr, |
448 | const TargetLibraryInfo *LibInfo = nullptr) const; |
449 | MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI, |
450 | const DataLayout &DL) const; |
451 | MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI, |
452 | const DataLayout &DL) const; |
453 | |
454 | virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { |
455 | return true; |
456 | } |
457 | |
458 | /// Return true if the @llvm.get.active.lane.mask intrinsic should be expanded |
459 | /// using generic code in SelectionDAGBuilder. |
460 | virtual bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const { |
461 | return true; |
462 | } |
463 | |
464 | virtual bool shouldExpandGetVectorLength(EVT CountVT, unsigned VF, |
465 | bool IsScalable) const { |
466 | return true; |
467 | } |
468 | |
469 | /// Return true if the @llvm.experimental.cttz.elts intrinsic should be |
470 | /// expanded using generic code in SelectionDAGBuilder. |
471 | virtual bool shouldExpandCttzElements(EVT VT) const { return true; } |
472 | |
473 | // Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to |
474 | // vecreduce(op(x, y)) for the reduction opcode RedOpc. |
475 | virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const { |
476 | return true; |
477 | } |
478 | |
479 | /// Return true if it is profitable to convert a select of FP constants into |
480 | /// a constant pool load whose address depends on the select condition. The |
481 | /// parameter may be used to differentiate a select with FP compare from |
482 | /// integer compare. |
483 | virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { |
484 | return true; |
485 | } |
486 | |
487 | /// Return true if multiple condition registers are available. |
488 | bool hasMultipleConditionRegisters() const { |
489 | return HasMultipleConditionRegisters; |
490 | } |
491 | |
492 | /// Return true if the target has BitExtract instructions. |
493 | bool () const { return HasExtractBitsInsn; } |
494 | |
495 | /// Return the preferred vector type legalization action. |
496 | virtual TargetLoweringBase::LegalizeTypeAction |
497 | getPreferredVectorAction(MVT VT) const { |
498 | // The default action for one element vectors is to scalarize |
499 | if (VT.getVectorElementCount().isScalar()) |
500 | return TypeScalarizeVector; |
501 | // The default action for an odd-width vector is to widen. |
502 | if (!VT.isPow2VectorType()) |
503 | return TypeWidenVector; |
504 | // The default action for other vectors is to promote |
505 | return TypePromoteInteger; |
506 | } |
507 | |
508 | // Return true if the half type should be promoted using soft promotion rules |
509 | // where each operation is promoted to f32 individually, then converted to |
510 | // fp16. The default behavior is to promote chains of operations, keeping |
511 | // intermediate results in f32 precision and range. |
512 | virtual bool softPromoteHalfType() const { return false; } |
513 | |
514 | // Return true if, for soft-promoted half, the half type should be passed |
515 | // passed to and returned from functions as f32. The default behavior is to |
516 | // pass as i16. If soft-promoted half is not used, this function is ignored |
517 | // and values are always passed and returned as f32. |
518 | virtual bool useFPRegsForHalfType() const { return false; } |
519 | |
520 | // There are two general methods for expanding a BUILD_VECTOR node: |
521 | // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle |
522 | // them together. |
523 | // 2. Build the vector on the stack and then load it. |
524 | // If this function returns true, then method (1) will be used, subject to |
525 | // the constraint that all of the necessary shuffles are legal (as determined |
526 | // by isShuffleMaskLegal). If this function returns false, then method (2) is |
527 | // always used. The vector type, and the number of defined values, are |
528 | // provided. |
529 | virtual bool |
530 | shouldExpandBuildVectorWithShuffles(EVT /* VT */, |
531 | unsigned DefinedValues) const { |
532 | return DefinedValues < 3; |
533 | } |
534 | |
535 | /// Return true if integer divide is usually cheaper than a sequence of |
536 | /// several shifts, adds, and multiplies for this target. |
537 | /// The definition of "cheaper" may depend on whether we're optimizing |
538 | /// for speed or for size. |
539 | virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; } |
540 | |
541 | /// Return true if the target can handle a standalone remainder operation. |
542 | virtual bool hasStandaloneRem(EVT VT) const { |
543 | return true; |
544 | } |
545 | |
546 | /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X). |
547 | virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const { |
548 | // Default behavior is to replace SQRT(X) with X*RSQRT(X). |
549 | return false; |
550 | } |
551 | |
552 | /// Reciprocal estimate status values used by the functions below. |
553 | enum ReciprocalEstimate : int { |
554 | Unspecified = -1, |
555 | Disabled = 0, |
556 | Enabled = 1 |
557 | }; |
558 | |
559 | /// Return a ReciprocalEstimate enum value for a square root of the given type |
560 | /// based on the function's attributes. If the operation is not overridden by |
561 | /// the function's attributes, "Unspecified" is returned and target defaults |
562 | /// are expected to be used for instruction selection. |
563 | int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const; |
564 | |
565 | /// Return a ReciprocalEstimate enum value for a division of the given type |
566 | /// based on the function's attributes. If the operation is not overridden by |
567 | /// the function's attributes, "Unspecified" is returned and target defaults |
568 | /// are expected to be used for instruction selection. |
569 | int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const; |
570 | |
571 | /// Return the refinement step count for a square root of the given type based |
572 | /// on the function's attributes. If the operation is not overridden by |
573 | /// the function's attributes, "Unspecified" is returned and target defaults |
574 | /// are expected to be used for instruction selection. |
575 | int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; |
576 | |
577 | /// Return the refinement step count for a division of the given type based |
578 | /// on the function's attributes. If the operation is not overridden by |
579 | /// the function's attributes, "Unspecified" is returned and target defaults |
580 | /// are expected to be used for instruction selection. |
581 | int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; |
582 | |
583 | /// Returns true if target has indicated at least one type should be bypassed. |
584 | bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } |
585 | |
586 | /// Returns map of slow types for division or remainder with corresponding |
587 | /// fast types |
588 | const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const { |
589 | return BypassSlowDivWidths; |
590 | } |
591 | |
592 | /// Return true only if vscale must be a power of two. |
593 | virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; } |
594 | |
595 | /// Return true if Flow Control is an expensive operation that should be |
596 | /// avoided. |
597 | bool isJumpExpensive() const { return JumpIsExpensive; } |
598 | |
599 | // Costs parameters used by |
600 | // SelectionDAGBuilder::shouldKeepJumpConditionsTogether. |
601 | // shouldKeepJumpConditionsTogether will use these parameter value to |
602 | // determine if two conditions in the form `br (and/or cond1, cond2)` should |
603 | // be split into two branches or left as one. |
604 | // |
605 | // BaseCost is the cost threshold (in latency). If the estimated latency of |
606 | // computing both `cond1` and `cond2` is below the cost of just computing |
607 | // `cond1` + BaseCost, the two conditions will be kept together. Otherwise |
608 | // they will be split. |
609 | // |
610 | // LikelyBias increases BaseCost if branch probability info indicates that it |
611 | // is likely that both `cond1` and `cond2` will be computed. |
612 | // |
613 | // UnlikelyBias decreases BaseCost if branch probability info indicates that |
614 | // it is likely that both `cond1` and `cond2` will be computed. |
615 | // |
616 | // Set any field to -1 to make it ignored (setting BaseCost to -1 results in |
617 | // `shouldKeepJumpConditionsTogether` always returning false). |
618 | struct CondMergingParams { |
619 | int BaseCost; |
620 | int LikelyBias; |
621 | int UnlikelyBias; |
622 | }; |
623 | // Return params for deciding if we should keep two branch conditions merged |
624 | // or split them into two separate branches. |
625 | // Arg0: The binary op joining the two conditions (and/or). |
626 | // Arg1: The first condition (cond1) |
627 | // Arg2: The second condition (cond2) |
628 | virtual CondMergingParams |
629 | getJumpConditionMergingParams(Instruction::BinaryOps, const Value *, |
630 | const Value *) const { |
631 | // -1 will always result in splitting. |
632 | return {.BaseCost: -1, .LikelyBias: -1, .UnlikelyBias: -1}; |
633 | } |
634 | |
635 | /// Return true if selects are only cheaper than branches if the branch is |
636 | /// unlikely to be predicted right. |
637 | bool isPredictableSelectExpensive() const { |
638 | return PredictableSelectIsExpensive; |
639 | } |
640 | |
641 | virtual bool fallBackToDAGISel(const Instruction &Inst) const { |
642 | return false; |
643 | } |
644 | |
645 | /// Return true if the following transform is beneficial: |
646 | /// fold (conv (load x)) -> (load (conv*)x) |
647 | /// On architectures that don't natively support some vector loads |
648 | /// efficiently, casting the load to a smaller vector of larger types and |
649 | /// loading is more efficient, however, this can be undone by optimizations in |
650 | /// dag combiner. |
651 | virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, |
652 | const SelectionDAG &DAG, |
653 | const MachineMemOperand &MMO) const; |
654 | |
655 | /// Return true if the following transform is beneficial: |
656 | /// (store (y (conv x)), y*)) -> (store x, (x*)) |
657 | virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, |
658 | const SelectionDAG &DAG, |
659 | const MachineMemOperand &MMO) const { |
660 | // Default to the same logic as loads. |
661 | return isLoadBitCastBeneficial(LoadVT: StoreVT, BitcastVT, DAG, MMO); |
662 | } |
663 | |
664 | /// Return true if it is expected to be cheaper to do a store of vector |
665 | /// constant with the given size and type for the address space than to |
666 | /// store the individual scalar element constants. |
667 | virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, |
668 | unsigned NumElem, |
669 | unsigned AddrSpace) const { |
670 | return IsZero; |
671 | } |
672 | |
673 | /// Allow store merging for the specified type after legalization in addition |
674 | /// to before legalization. This may transform stores that do not exist |
675 | /// earlier (for example, stores created from intrinsics). |
676 | virtual bool mergeStoresAfterLegalization(EVT MemVT) const { |
677 | return true; |
678 | } |
679 | |
680 | /// Returns if it's reasonable to merge stores to MemVT size. |
681 | virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, |
682 | const MachineFunction &MF) const { |
683 | return true; |
684 | } |
685 | |
686 | /// Return true if it is cheap to speculate a call to intrinsic cttz. |
687 | virtual bool isCheapToSpeculateCttz(Type *Ty) const { |
688 | return false; |
689 | } |
690 | |
691 | /// Return true if it is cheap to speculate a call to intrinsic ctlz. |
692 | virtual bool isCheapToSpeculateCtlz(Type *Ty) const { |
693 | return false; |
694 | } |
695 | |
696 | /// Return true if ctlz instruction is fast. |
697 | virtual bool isCtlzFast() const { |
698 | return false; |
699 | } |
700 | |
701 | /// Return true if ctpop instruction is fast. |
702 | virtual bool isCtpopFast(EVT VT) const { |
703 | return isOperationLegal(Op: ISD::CTPOP, VT); |
704 | } |
705 | |
706 | /// Return the maximum number of "x & (x - 1)" operations that can be done |
707 | /// instead of deferring to a custom CTPOP. |
708 | virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const { |
709 | return 1; |
710 | } |
711 | |
712 | /// Return true if instruction generated for equality comparison is folded |
713 | /// with instruction generated for signed comparison. |
714 | virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; } |
715 | |
716 | /// Return true if the heuristic to prefer icmp eq zero should be used in code |
717 | /// gen prepare. |
718 | virtual bool preferZeroCompareBranch() const { return false; } |
719 | |
720 | /// Return true if it is cheaper to split the store of a merged int val |
721 | /// from a pair of smaller values into multiple stores. |
722 | virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { |
723 | return false; |
724 | } |
725 | |
726 | /// Return if the target supports combining a |
727 | /// chain like: |
728 | /// \code |
729 | /// %andResult = and %val1, #mask |
730 | /// %icmpResult = icmp %andResult, 0 |
731 | /// \endcode |
732 | /// into a single machine instruction of a form like: |
733 | /// \code |
734 | /// cc = test %register, #mask |
735 | /// \endcode |
736 | virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { |
737 | return false; |
738 | } |
739 | |
740 | /// Return true if it is valid to merge the TargetMMOFlags in two SDNodes. |
741 | virtual bool |
742 | areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, |
743 | const MemSDNode &NodeY) const { |
744 | return true; |
745 | } |
746 | |
747 | /// Use bitwise logic to make pairs of compares more efficient. For example: |
748 | /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 |
749 | /// This should be true when it takes more than one instruction to lower |
750 | /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on |
751 | /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win. |
752 | virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const { |
753 | return false; |
754 | } |
755 | |
756 | /// Return the preferred operand type if the target has a quick way to compare |
757 | /// integer values of the given size. Assume that any legal integer type can |
758 | /// be compared efficiently. Targets may override this to allow illegal wide |
759 | /// types to return a vector type if there is support to compare that type. |
760 | virtual MVT hasFastEqualityCompare(unsigned NumBits) const { |
761 | MVT VT = MVT::getIntegerVT(BitWidth: NumBits); |
762 | return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE; |
763 | } |
764 | |
765 | /// Return true if the target should transform: |
766 | /// (X & Y) == Y ---> (~X & Y) == 0 |
767 | /// (X & Y) != Y ---> (~X & Y) != 0 |
768 | /// |
769 | /// This may be profitable if the target has a bitwise and-not operation that |
770 | /// sets comparison flags. A target may want to limit the transformation based |
771 | /// on the type of Y or if Y is a constant. |
772 | /// |
773 | /// Note that the transform will not occur if Y is known to be a power-of-2 |
774 | /// because a mask and compare of a single bit can be handled by inverting the |
775 | /// predicate, for example: |
776 | /// (X & 8) == 8 ---> (X & 8) != 0 |
777 | virtual bool hasAndNotCompare(SDValue Y) const { |
778 | return false; |
779 | } |
780 | |
781 | /// Return true if the target has a bitwise and-not operation: |
782 | /// X = ~A & B |
783 | /// This can be used to simplify select or other instructions. |
784 | virtual bool hasAndNot(SDValue X) const { |
785 | // If the target has the more complex version of this operation, assume that |
786 | // it has this operation too. |
787 | return hasAndNotCompare(Y: X); |
788 | } |
789 | |
790 | /// Return true if the target has a bit-test instruction: |
791 | /// (X & (1 << Y)) ==/!= 0 |
792 | /// This knowledge can be used to prevent breaking the pattern, |
793 | /// or creating it if it could be recognized. |
794 | virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; } |
795 | |
796 | /// There are two ways to clear extreme bits (either low or high): |
797 | /// Mask: x & (-1 << y) (the instcombine canonical form) |
798 | /// Shifts: x >> y << y |
799 | /// Return true if the variant with 2 variable shifts is preferred. |
800 | /// Return false if there is no preference. |
801 | virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const { |
802 | // By default, let's assume that no one prefers shifts. |
803 | return false; |
804 | } |
805 | |
806 | /// Return true if it is profitable to fold a pair of shifts into a mask. |
807 | /// This is usually true on most targets. But some targets, like Thumb1, |
808 | /// have immediate shift instructions, but no immediate "and" instruction; |
809 | /// this makes the fold unprofitable. |
810 | virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, |
811 | CombineLevel Level) const { |
812 | return true; |
813 | } |
814 | |
815 | /// Should we tranform the IR-optimal check for whether given truncation |
816 | /// down into KeptBits would be truncating or not: |
817 | /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) |
818 | /// Into it's more traditional form: |
819 | /// ((%x << C) a>> C) dstcond %x |
820 | /// Return true if we should transform. |
821 | /// Return false if there is no preference. |
822 | virtual bool shouldTransformSignedTruncationCheck(EVT XVT, |
823 | unsigned KeptBits) const { |
824 | // By default, let's assume that no one prefers shifts. |
825 | return false; |
826 | } |
827 | |
828 | /// Given the pattern |
829 | /// (X & (C l>>/<< Y)) ==/!= 0 |
830 | /// return true if it should be transformed into: |
831 | /// ((X <</l>> Y) & C) ==/!= 0 |
832 | /// WARNING: if 'X' is a constant, the fold may deadlock! |
833 | /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat() |
834 | /// here because it can end up being not linked in. |
835 | virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
836 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
837 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
838 | SelectionDAG &DAG) const { |
839 | if (hasBitTest(X, Y)) { |
840 | // One interesting pattern that we'd want to form is 'bit test': |
841 | // ((1 << Y) & C) ==/!= 0 |
842 | // But we also need to be careful not to try to reverse that fold. |
843 | |
844 | // Is this '1 << Y' ? |
845 | if (OldShiftOpcode == ISD::SHL && CC->isOne()) |
846 | return false; // Keep the 'bit test' pattern. |
847 | |
848 | // Will it be '1 << Y' after the transform ? |
849 | if (XC && NewShiftOpcode == ISD::SHL && XC->isOne()) |
850 | return true; // Do form the 'bit test' pattern. |
851 | } |
852 | |
853 | // If 'X' is a constant, and we transform, then we will immediately |
854 | // try to undo the fold, thus causing endless combine loop. |
855 | // So by default, let's assume everyone prefers the fold |
856 | // iff 'X' is not a constant. |
857 | return !XC; |
858 | } |
859 | |
860 | // Return true if its desirable to perform the following transform: |
861 | // (fmul C, (uitofp Pow2)) |
862 | // -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa)) |
863 | // (fdiv C, (uitofp Pow2)) |
864 | // -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa)) |
865 | // |
866 | // This is only queried after we have verified the transform will be bitwise |
867 | // equals. |
868 | // |
869 | // SDNode *N : The FDiv/FMul node we want to transform. |
870 | // SDValue FPConst: The Float constant operand in `N`. |
871 | // SDValue IntPow2: The Integer power of 2 operand in `N`. |
872 | virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, |
873 | SDValue IntPow2) const { |
874 | // Default to avoiding fdiv which is often very expensive. |
875 | return N->getOpcode() == ISD::FDIV; |
876 | } |
877 | |
878 | // Given: |
879 | // (icmp eq/ne (and X, C0), (shift X, C1)) |
880 | // or |
881 | // (icmp eq/ne X, (rotate X, CPow2)) |
882 | |
883 | // If C0 is a mask or shifted mask and the shift amt (C1) isolates the |
884 | // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`) |
885 | // Do we prefer the shift to be shift-right, shift-left, or rotate. |
886 | // Note: Its only valid to convert the rotate version to the shift version iff |
887 | // the shift-amt (`C1`) is a power of 2 (including 0). |
888 | // If ShiftOpc (current Opcode) is returned, do nothing. |
889 | virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand( |
890 | EVT VT, unsigned ShiftOpc, bool MayTransformRotate, |
891 | const APInt &ShiftOrRotateAmt, |
892 | const std::optional<APInt> &AndMask) const { |
893 | return ShiftOpc; |
894 | } |
895 | |
896 | /// These two forms are equivalent: |
897 | /// sub %y, (xor %x, -1) |
898 | /// add (add %x, 1), %y |
899 | /// The variant with two add's is IR-canonical. |
900 | /// Some targets may prefer one to the other. |
901 | virtual bool preferIncOfAddToSubOfNot(EVT VT) const { |
902 | // By default, let's assume that everyone prefers the form with two add's. |
903 | return true; |
904 | } |
905 | |
906 | // By default prefer folding (abs (sub nsw x, y)) -> abds(x, y). Some targets |
907 | // may want to avoid this to prevent loss of sub_nsw pattern. |
908 | virtual bool preferABDSToABSWithNSW(EVT VT) const { |
909 | return true; |
910 | } |
911 | |
912 | // Return true if the target wants to transform Op(Splat(X)) -> Splat(Op(X)) |
913 | virtual bool preferScalarizeSplat(SDNode *N) const { return true; } |
914 | |
915 | // Return true if the target wants to transform: |
916 | // (TruncVT truncate(sext_in_reg(VT X, ExtVT)) |
917 | // -> (TruncVT sext_in_reg(truncate(VT X), ExtVT)) |
918 | // Some targets might prefer pre-sextinreg to improve truncation/saturation. |
919 | virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const { |
920 | return true; |
921 | } |
922 | |
923 | /// Return true if the target wants to use the optimization that |
924 | /// turns ext(promotableInst1(...(promotableInstN(load)))) into |
925 | /// promotedInst1(...(promotedInstN(ext(load)))). |
926 | bool enableExtLdPromotion() const { return EnableExtLdPromotion; } |
927 | |
928 | /// Return true if the target can combine store(extractelement VectorTy, |
929 | /// Idx). |
930 | /// \p Cost[out] gives the cost of that transformation when this is true. |
931 | virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, |
932 | unsigned &Cost) const { |
933 | return false; |
934 | } |
935 | |
936 | /// Return true if the target shall perform extract vector element and store |
937 | /// given that the vector is known to be splat of constant. |
938 | /// \p Index[out] gives the index of the vector element to be extracted when |
939 | /// this is true. |
940 | virtual bool ( |
941 | Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const { |
942 | return false; |
943 | } |
944 | |
945 | /// Return true if inserting a scalar into a variable element of an undef |
946 | /// vector is more efficiently handled by splatting the scalar instead. |
947 | virtual bool shouldSplatInsEltVarIndex(EVT) const { |
948 | return false; |
949 | } |
950 | |
951 | /// Return true if target always benefits from combining into FMA for a |
952 | /// given value type. This must typically return false on targets where FMA |
953 | /// takes more cycles to execute than FADD. |
954 | virtual bool enableAggressiveFMAFusion(EVT VT) const { return false; } |
955 | |
956 | /// Return true if target always benefits from combining into FMA for a |
957 | /// given value type. This must typically return false on targets where FMA |
958 | /// takes more cycles to execute than FADD. |
959 | virtual bool enableAggressiveFMAFusion(LLT Ty) const { return false; } |
960 | |
961 | /// Return the ValueType of the result of SETCC operations. |
962 | virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
963 | EVT VT) const; |
964 | |
965 | /// Return the ValueType for comparison libcalls. Comparison libcalls include |
966 | /// floating point comparison calls, and Ordered/Unordered check calls on |
967 | /// floating point numbers. |
968 | virtual |
969 | MVT::SimpleValueType getCmpLibcallReturnType() const; |
970 | |
971 | /// For targets without i1 registers, this gives the nature of the high-bits |
972 | /// of boolean values held in types wider than i1. |
973 | /// |
974 | /// "Boolean values" are special true/false values produced by nodes like |
975 | /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND. |
976 | /// Not to be confused with general values promoted from i1. Some cpus |
977 | /// distinguish between vectors of boolean and scalars; the isVec parameter |
978 | /// selects between the two kinds. For example on X86 a scalar boolean should |
979 | /// be zero extended from i1, while the elements of a vector of booleans |
980 | /// should be sign extended from i1. |
981 | /// |
982 | /// Some cpus also treat floating point types the same way as they treat |
983 | /// vectors instead of the way they treat scalars. |
984 | BooleanContent getBooleanContents(bool isVec, bool isFloat) const { |
985 | if (isVec) |
986 | return BooleanVectorContents; |
987 | return isFloat ? BooleanFloatContents : BooleanContents; |
988 | } |
989 | |
990 | BooleanContent getBooleanContents(EVT Type) const { |
991 | return getBooleanContents(isVec: Type.isVector(), isFloat: Type.isFloatingPoint()); |
992 | } |
993 | |
994 | /// Promote the given target boolean to a target boolean of the given type. |
995 | /// A target boolean is an integer value, not necessarily of type i1, the bits |
996 | /// of which conform to getBooleanContents. |
997 | /// |
998 | /// ValVT is the type of values that produced the boolean. |
999 | SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, |
1000 | EVT ValVT) const { |
1001 | SDLoc dl(Bool); |
1002 | EVT BoolVT = |
1003 | getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ValVT); |
1004 | ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: ValVT)); |
1005 | return DAG.getNode(Opcode: ExtendCode, DL: dl, VT: BoolVT, Operand: Bool); |
1006 | } |
1007 | |
1008 | /// Return target scheduling preference. |
1009 | Sched::Preference getSchedulingPreference() const { |
1010 | return SchedPreferenceInfo; |
1011 | } |
1012 | |
1013 | /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics |
1014 | /// for different nodes. This function returns the preference (or none) for |
1015 | /// the given node. |
1016 | virtual Sched::Preference getSchedulingPreference(SDNode *) const { |
1017 | return Sched::None; |
1018 | } |
1019 | |
1020 | /// Return the register class that should be used for the specified value |
1021 | /// type. |
1022 | virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const { |
1023 | (void)isDivergent; |
1024 | const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; |
1025 | assert(RC && "This value type is not natively supported!" ); |
1026 | return RC; |
1027 | } |
1028 | |
1029 | /// Allows target to decide about the register class of the |
1030 | /// specific value that is live outside the defining block. |
1031 | /// Returns true if the value needs uniform register class. |
1032 | virtual bool requiresUniformRegister(MachineFunction &MF, |
1033 | const Value *) const { |
1034 | return false; |
1035 | } |
1036 | |
1037 | /// Return the 'representative' register class for the specified value |
1038 | /// type. |
1039 | /// |
1040 | /// The 'representative' register class is the largest legal super-reg |
1041 | /// register class for the register class of the value type. For example, on |
1042 | /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep |
1043 | /// register class is GR64 on x86_64. |
1044 | virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { |
1045 | const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy]; |
1046 | return RC; |
1047 | } |
1048 | |
1049 | /// Return the cost of the 'representative' register class for the specified |
1050 | /// value type. |
1051 | virtual uint8_t getRepRegClassCostFor(MVT VT) const { |
1052 | return RepRegClassCostForVT[VT.SimpleTy]; |
1053 | } |
1054 | |
1055 | /// Return the preferred strategy to legalize tihs SHIFT instruction, with |
1056 | /// \p ExpansionFactor being the recursion depth - how many expansion needed. |
1057 | enum class ShiftLegalizationStrategy { |
1058 | ExpandToParts, |
1059 | ExpandThroughStack, |
1060 | LowerToLibcall |
1061 | }; |
1062 | virtual ShiftLegalizationStrategy |
1063 | preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, |
1064 | unsigned ExpansionFactor) const { |
1065 | if (ExpansionFactor == 1) |
1066 | return ShiftLegalizationStrategy::ExpandToParts; |
1067 | return ShiftLegalizationStrategy::ExpandThroughStack; |
1068 | } |
1069 | |
1070 | /// Return true if the target has native support for the specified value type. |
1071 | /// This means that it has a register that directly holds it without |
1072 | /// promotions or expansions. |
1073 | bool isTypeLegal(EVT VT) const { |
1074 | assert(!VT.isSimple() || |
1075 | (unsigned)VT.getSimpleVT().SimpleTy < std::size(RegClassForVT)); |
1076 | return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr; |
1077 | } |
1078 | |
1079 | class ValueTypeActionImpl { |
1080 | /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum |
1081 | /// that indicates how instruction selection should deal with the type. |
1082 | LegalizeTypeAction ValueTypeActions[MVT::VALUETYPE_SIZE]; |
1083 | |
1084 | public: |
1085 | ValueTypeActionImpl() { |
1086 | std::fill(first: std::begin(arr&: ValueTypeActions), last: std::end(arr&: ValueTypeActions), |
1087 | value: TypeLegal); |
1088 | } |
1089 | |
1090 | LegalizeTypeAction getTypeAction(MVT VT) const { |
1091 | return ValueTypeActions[VT.SimpleTy]; |
1092 | } |
1093 | |
1094 | void setTypeAction(MVT VT, LegalizeTypeAction Action) { |
1095 | ValueTypeActions[VT.SimpleTy] = Action; |
1096 | } |
1097 | }; |
1098 | |
1099 | const ValueTypeActionImpl &getValueTypeActions() const { |
1100 | return ValueTypeActions; |
1101 | } |
1102 | |
1103 | /// Return pair that represents the legalization kind (first) that needs to |
1104 | /// happen to EVT (second) in order to type-legalize it. |
1105 | /// |
1106 | /// First: how we should legalize values of this type, either it is already |
1107 | /// legal (return 'Legal') or we need to promote it to a larger type (return |
1108 | /// 'Promote'), or we need to expand it into multiple registers of smaller |
1109 | /// integer type (return 'Expand'). 'Custom' is not an option. |
1110 | /// |
1111 | /// Second: for types supported by the target, this is an identity function. |
1112 | /// For types that must be promoted to larger types, this returns the larger |
1113 | /// type to promote to. For integer types that are larger than the largest |
1114 | /// integer register, this contains one step in the expansion to get to the |
1115 | /// smaller register. For illegal floating point types, this returns the |
1116 | /// integer type to transform to. |
1117 | LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; |
1118 | |
1119 | /// Return how we should legalize values of this type, either it is already |
1120 | /// legal (return 'Legal') or we need to promote it to a larger type (return |
1121 | /// 'Promote'), or we need to expand it into multiple registers of smaller |
1122 | /// integer type (return 'Expand'). 'Custom' is not an option. |
1123 | LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const { |
1124 | return getTypeConversion(Context, VT).first; |
1125 | } |
1126 | LegalizeTypeAction getTypeAction(MVT VT) const { |
1127 | return ValueTypeActions.getTypeAction(VT); |
1128 | } |
1129 | |
1130 | /// For types supported by the target, this is an identity function. For |
1131 | /// types that must be promoted to larger types, this returns the larger type |
1132 | /// to promote to. For integer types that are larger than the largest integer |
1133 | /// register, this contains one step in the expansion to get to the smaller |
1134 | /// register. For illegal floating point types, this returns the integer type |
1135 | /// to transform to. |
1136 | virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { |
1137 | return getTypeConversion(Context, VT).second; |
1138 | } |
1139 | |
1140 | /// For types supported by the target, this is an identity function. For |
1141 | /// types that must be expanded (i.e. integer types that are larger than the |
1142 | /// largest integer register or illegal floating point types), this returns |
1143 | /// the largest legal type it will be expanded to. |
1144 | EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const { |
1145 | assert(!VT.isVector()); |
1146 | while (true) { |
1147 | switch (getTypeAction(Context, VT)) { |
1148 | case TypeLegal: |
1149 | return VT; |
1150 | case TypeExpandInteger: |
1151 | VT = getTypeToTransformTo(Context, VT); |
1152 | break; |
1153 | default: |
1154 | llvm_unreachable("Type is not legal nor is it to be expanded!" ); |
1155 | } |
1156 | } |
1157 | } |
1158 | |
1159 | /// Vector types are broken down into some number of legal first class types. |
1160 | /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 |
1161 | /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 |
1162 | /// turns into 4 EVT::i32 values with both PPC and X86. |
1163 | /// |
1164 | /// This method returns the number of registers needed, and the VT for each |
1165 | /// register. It also returns the VT and quantity of the intermediate values |
1166 | /// before they are promoted/expanded. |
1167 | unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT, |
1168 | EVT &IntermediateVT, |
1169 | unsigned &NumIntermediates, |
1170 | MVT &RegisterVT) const; |
1171 | |
1172 | /// Certain targets such as MIPS require that some types such as vectors are |
1173 | /// always broken down into scalars in some contexts. This occurs even if the |
1174 | /// vector type is legal. |
1175 | virtual unsigned getVectorTypeBreakdownForCallingConv( |
1176 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
1177 | unsigned &NumIntermediates, MVT &RegisterVT) const { |
1178 | return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, |
1179 | RegisterVT); |
1180 | } |
1181 | |
1182 | struct IntrinsicInfo { |
1183 | unsigned opc = 0; // target opcode |
1184 | EVT memVT; // memory VT |
1185 | |
1186 | // value representing memory location |
1187 | PointerUnion<const Value *, const PseudoSourceValue *> ptrVal; |
1188 | |
1189 | // Fallback address space for use if ptrVal is nullptr. std::nullopt means |
1190 | // unknown address space. |
1191 | std::optional<unsigned> fallbackAddressSpace; |
1192 | |
1193 | int offset = 0; // offset off of ptrVal |
1194 | uint64_t size = 0; // the size of the memory location |
1195 | // (taken from memVT if zero) |
1196 | MaybeAlign align = Align(1); // alignment |
1197 | |
1198 | MachineMemOperand::Flags flags = MachineMemOperand::MONone; |
1199 | IntrinsicInfo() = default; |
1200 | }; |
1201 | |
1202 | /// Given an intrinsic, checks if on the target the intrinsic will need to map |
1203 | /// to a MemIntrinsicNode (touches memory). If this is the case, it returns |
1204 | /// true and store the intrinsic information into the IntrinsicInfo that was |
1205 | /// passed to the function. |
1206 | virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, |
1207 | MachineFunction &, |
1208 | unsigned /*Intrinsic*/) const { |
1209 | return false; |
1210 | } |
1211 | |
1212 | /// Returns true if the target can instruction select the specified FP |
1213 | /// immediate natively. If false, the legalizer will materialize the FP |
1214 | /// immediate as a load from a constant pool. |
1215 | virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/, |
1216 | bool ForCodeSize = false) const { |
1217 | return false; |
1218 | } |
1219 | |
1220 | /// Targets can use this to indicate that they only support *some* |
1221 | /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a |
1222 | /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be |
1223 | /// legal. |
1224 | virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const { |
1225 | return true; |
1226 | } |
1227 | |
1228 | /// Returns true if the operation can trap for the value type. |
1229 | /// |
1230 | /// VT must be a legal type. By default, we optimistically assume most |
1231 | /// operations don't trap except for integer divide and remainder. |
1232 | virtual bool canOpTrap(unsigned Op, EVT VT) const; |
1233 | |
1234 | /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there |
1235 | /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a |
1236 | /// constant pool entry. |
1237 | virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/, |
1238 | EVT /*VT*/) const { |
1239 | return false; |
1240 | } |
1241 | |
1242 | /// How to legalize this custom operation? |
1243 | virtual LegalizeAction getCustomOperationAction(SDNode &Op) const { |
1244 | return Legal; |
1245 | } |
1246 | |
1247 | /// Return how this operation should be treated: either it is legal, needs to |
1248 | /// be promoted to a larger size, needs to be expanded to some other code |
1249 | /// sequence, or the target has a custom expander for it. |
1250 | LegalizeAction getOperationAction(unsigned Op, EVT VT) const { |
1251 | if (VT.isExtended()) return Expand; |
1252 | // If a target-specific SDNode requires legalization, require the target |
1253 | // to provide custom legalization for it. |
1254 | if (Op >= std::size(OpActions[0])) |
1255 | return Custom; |
1256 | return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; |
1257 | } |
1258 | |
1259 | /// Custom method defined by each target to indicate if an operation which |
1260 | /// may require a scale is supported natively by the target. |
1261 | /// If not, the operation is illegal. |
1262 | virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT, |
1263 | unsigned Scale) const { |
1264 | return false; |
1265 | } |
1266 | |
1267 | /// Some fixed point operations may be natively supported by the target but |
1268 | /// only for specific scales. This method allows for checking |
1269 | /// if the width is supported by the target for a given operation that may |
1270 | /// depend on scale. |
1271 | LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT, |
1272 | unsigned Scale) const { |
1273 | auto Action = getOperationAction(Op, VT); |
1274 | if (Action != Legal) |
1275 | return Action; |
1276 | |
1277 | // This operation is supported in this type but may only work on specific |
1278 | // scales. |
1279 | bool Supported; |
1280 | switch (Op) { |
1281 | default: |
1282 | llvm_unreachable("Unexpected fixed point operation." ); |
1283 | case ISD::SMULFIX: |
1284 | case ISD::SMULFIXSAT: |
1285 | case ISD::UMULFIX: |
1286 | case ISD::UMULFIXSAT: |
1287 | case ISD::SDIVFIX: |
1288 | case ISD::SDIVFIXSAT: |
1289 | case ISD::UDIVFIX: |
1290 | case ISD::UDIVFIXSAT: |
1291 | Supported = isSupportedFixedPointOperation(Op, VT, Scale); |
1292 | break; |
1293 | } |
1294 | |
1295 | return Supported ? Action : Expand; |
1296 | } |
1297 | |
1298 | // If Op is a strict floating-point operation, return the result |
1299 | // of getOperationAction for the equivalent non-strict operation. |
1300 | LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { |
1301 | unsigned EqOpc; |
1302 | switch (Op) { |
1303 | default: llvm_unreachable("Unexpected FP pseudo-opcode" ); |
1304 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
1305 | case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break; |
1306 | #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
1307 | case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break; |
1308 | #include "llvm/IR/ConstrainedOps.def" |
1309 | } |
1310 | |
1311 | return getOperationAction(Op: EqOpc, VT); |
1312 | } |
1313 | |
1314 | /// Return true if the specified operation is legal on this target or can be |
1315 | /// made legal with custom lowering. This is used to help guide high-level |
1316 | /// lowering decisions. LegalOnly is an optional convenience for code paths |
1317 | /// traversed pre and post legalisation. |
1318 | bool isOperationLegalOrCustom(unsigned Op, EVT VT, |
1319 | bool LegalOnly = false) const { |
1320 | if (LegalOnly) |
1321 | return isOperationLegal(Op, VT); |
1322 | |
1323 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1324 | (getOperationAction(Op, VT) == Legal || |
1325 | getOperationAction(Op, VT) == Custom); |
1326 | } |
1327 | |
1328 | /// Return true if the specified operation is legal on this target or can be |
1329 | /// made legal using promotion. This is used to help guide high-level lowering |
1330 | /// decisions. LegalOnly is an optional convenience for code paths traversed |
1331 | /// pre and post legalisation. |
1332 | bool isOperationLegalOrPromote(unsigned Op, EVT VT, |
1333 | bool LegalOnly = false) const { |
1334 | if (LegalOnly) |
1335 | return isOperationLegal(Op, VT); |
1336 | |
1337 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1338 | (getOperationAction(Op, VT) == Legal || |
1339 | getOperationAction(Op, VT) == Promote); |
1340 | } |
1341 | |
1342 | /// Return true if the specified operation is legal on this target or can be |
1343 | /// made legal with custom lowering or using promotion. This is used to help |
1344 | /// guide high-level lowering decisions. LegalOnly is an optional convenience |
1345 | /// for code paths traversed pre and post legalisation. |
1346 | bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, |
1347 | bool LegalOnly = false) const { |
1348 | if (LegalOnly) |
1349 | return isOperationLegal(Op, VT); |
1350 | |
1351 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1352 | (getOperationAction(Op, VT) == Legal || |
1353 | getOperationAction(Op, VT) == Custom || |
1354 | getOperationAction(Op, VT) == Promote); |
1355 | } |
1356 | |
1357 | /// Return true if the operation uses custom lowering, regardless of whether |
1358 | /// the type is legal or not. |
1359 | bool isOperationCustom(unsigned Op, EVT VT) const { |
1360 | return getOperationAction(Op, VT) == Custom; |
1361 | } |
1362 | |
1363 | /// Return true if lowering to a jump table is allowed. |
1364 | virtual bool areJTsAllowed(const Function *Fn) const { |
1365 | if (Fn->getFnAttribute(Kind: "no-jump-tables" ).getValueAsBool()) |
1366 | return false; |
1367 | |
1368 | return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || |
1369 | isOperationLegalOrCustom(ISD::BRIND, MVT::Other); |
1370 | } |
1371 | |
1372 | /// Check whether the range [Low,High] fits in a machine word. |
1373 | bool rangeFitsInWord(const APInt &Low, const APInt &High, |
1374 | const DataLayout &DL) const { |
1375 | // FIXME: Using the pointer type doesn't seem ideal. |
1376 | uint64_t BW = DL.getIndexSizeInBits(AS: 0u); |
1377 | uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; |
1378 | return Range <= BW; |
1379 | } |
1380 | |
1381 | /// Return true if lowering to a jump table is suitable for a set of case |
1382 | /// clusters which may contain \p NumCases cases, \p Range range of values. |
1383 | virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, |
1384 | uint64_t Range, ProfileSummaryInfo *PSI, |
1385 | BlockFrequencyInfo *BFI) const; |
1386 | |
1387 | /// Returns preferred type for switch condition. |
1388 | virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, |
1389 | EVT ConditionVT) const; |
1390 | |
1391 | /// Return true if lowering to a bit test is suitable for a set of case |
1392 | /// clusters which contains \p NumDests unique destinations, \p Low and |
1393 | /// \p High as its lowest and highest case values, and expects \p NumCmps |
1394 | /// case value comparisons. Check if the number of destinations, comparison |
1395 | /// metric, and range are all suitable. |
1396 | bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, |
1397 | const APInt &Low, const APInt &High, |
1398 | const DataLayout &DL) const { |
1399 | // FIXME: I don't think NumCmps is the correct metric: a single case and a |
1400 | // range of cases both require only one branch to lower. Just looking at the |
1401 | // number of clusters and destinations should be enough to decide whether to |
1402 | // build bit tests. |
1403 | |
1404 | // To lower a range with bit tests, the range must fit the bitwidth of a |
1405 | // machine word. |
1406 | if (!rangeFitsInWord(Low, High, DL)) |
1407 | return false; |
1408 | |
1409 | // Decide whether it's profitable to lower this range with bit tests. Each |
1410 | // destination requires a bit test and branch, and there is an overall range |
1411 | // check branch. For a small number of clusters, separate comparisons might |
1412 | // be cheaper, and for many destinations, splitting the range might be |
1413 | // better. |
1414 | return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || |
1415 | (NumDests == 3 && NumCmps >= 6); |
1416 | } |
1417 | |
1418 | /// Return true if the specified operation is illegal on this target or |
1419 | /// unlikely to be made legal with custom lowering. This is used to help guide |
1420 | /// high-level lowering decisions. |
1421 | bool isOperationExpand(unsigned Op, EVT VT) const { |
1422 | return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); |
1423 | } |
1424 | |
1425 | /// Return true if the specified operation is legal on this target. |
1426 | bool isOperationLegal(unsigned Op, EVT VT) const { |
1427 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1428 | getOperationAction(Op, VT) == Legal; |
1429 | } |
1430 | |
1431 | /// Return how this load with extension should be treated: either it is legal, |
1432 | /// needs to be promoted to a larger size, needs to be expanded to some other |
1433 | /// code sequence, or the target has a custom expander for it. |
1434 | LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, |
1435 | EVT MemVT) const { |
1436 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
1437 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
1438 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
1439 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE && |
1440 | MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!" ); |
1441 | unsigned Shift = 4 * ExtType; |
1442 | return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf); |
1443 | } |
1444 | |
1445 | /// Return true if the specified load with extension is legal on this target. |
1446 | bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
1447 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal; |
1448 | } |
1449 | |
1450 | /// Return true if the specified load with extension is legal or custom |
1451 | /// on this target. |
1452 | bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
1453 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal || |
1454 | getLoadExtAction(ExtType, ValVT, MemVT) == Custom; |
1455 | } |
1456 | |
1457 | /// Same as getLoadExtAction, but for atomic loads. |
1458 | LegalizeAction getAtomicLoadExtAction(unsigned ExtType, EVT ValVT, |
1459 | EVT MemVT) const { |
1460 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
1461 | unsigned ValI = (unsigned)ValVT.getSimpleVT().SimpleTy; |
1462 | unsigned MemI = (unsigned)MemVT.getSimpleVT().SimpleTy; |
1463 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE && |
1464 | MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!" ); |
1465 | unsigned Shift = 4 * ExtType; |
1466 | LegalizeAction Action = |
1467 | (LegalizeAction)((AtomicLoadExtActions[ValI][MemI] >> Shift) & 0xf); |
1468 | assert((Action == Legal || Action == Expand) && |
1469 | "Unsupported atomic load extension action." ); |
1470 | return Action; |
1471 | } |
1472 | |
1473 | /// Return true if the specified atomic load with extension is legal on |
1474 | /// this target. |
1475 | bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
1476 | return getAtomicLoadExtAction(ExtType, ValVT, MemVT) == Legal; |
1477 | } |
1478 | |
1479 | /// Return how this store with truncation should be treated: either it is |
1480 | /// legal, needs to be promoted to a larger size, needs to be expanded to some |
1481 | /// other code sequence, or the target has a custom expander for it. |
1482 | LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { |
1483 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
1484 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
1485 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
1486 | assert(ValI < MVT::VALUETYPE_SIZE && MemI < MVT::VALUETYPE_SIZE && |
1487 | "Table isn't big enough!" ); |
1488 | return TruncStoreActions[ValI][MemI]; |
1489 | } |
1490 | |
1491 | /// Return true if the specified store with truncation is legal on this |
1492 | /// target. |
1493 | bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const { |
1494 | return isTypeLegal(VT: ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal; |
1495 | } |
1496 | |
1497 | /// Return true if the specified store with truncation has solution on this |
1498 | /// target. |
1499 | bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { |
1500 | return isTypeLegal(VT: ValVT) && |
1501 | (getTruncStoreAction(ValVT, MemVT) == Legal || |
1502 | getTruncStoreAction(ValVT, MemVT) == Custom); |
1503 | } |
1504 | |
1505 | virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, |
1506 | bool LegalOnly) const { |
1507 | if (LegalOnly) |
1508 | return isTruncStoreLegal(ValVT, MemVT); |
1509 | |
1510 | return isTruncStoreLegalOrCustom(ValVT, MemVT); |
1511 | } |
1512 | |
1513 | /// Return how the indexed load should be treated: either it is legal, needs |
1514 | /// to be promoted to a larger size, needs to be expanded to some other code |
1515 | /// sequence, or the target has a custom expander for it. |
1516 | LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const { |
1517 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_Load); |
1518 | } |
1519 | |
1520 | /// Return true if the specified indexed load is legal on this target. |
1521 | bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const { |
1522 | return VT.isSimple() && |
1523 | (getIndexedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1524 | getIndexedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1525 | } |
1526 | |
1527 | /// Return how the indexed store should be treated: either it is legal, needs |
1528 | /// to be promoted to a larger size, needs to be expanded to some other code |
1529 | /// sequence, or the target has a custom expander for it. |
1530 | LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const { |
1531 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_Store); |
1532 | } |
1533 | |
1534 | /// Return true if the specified indexed load is legal on this target. |
1535 | bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const { |
1536 | return VT.isSimple() && |
1537 | (getIndexedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1538 | getIndexedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1539 | } |
1540 | |
1541 | /// Return how the indexed load should be treated: either it is legal, needs |
1542 | /// to be promoted to a larger size, needs to be expanded to some other code |
1543 | /// sequence, or the target has a custom expander for it. |
1544 | LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const { |
1545 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedLoad); |
1546 | } |
1547 | |
1548 | /// Return true if the specified indexed load is legal on this target. |
1549 | bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const { |
1550 | return VT.isSimple() && |
1551 | (getIndexedMaskedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1552 | getIndexedMaskedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1553 | } |
1554 | |
1555 | /// Return how the indexed store should be treated: either it is legal, needs |
1556 | /// to be promoted to a larger size, needs to be expanded to some other code |
1557 | /// sequence, or the target has a custom expander for it. |
1558 | LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const { |
1559 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedStore); |
1560 | } |
1561 | |
1562 | /// Return true if the specified indexed load is legal on this target. |
1563 | bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const { |
1564 | return VT.isSimple() && |
1565 | (getIndexedMaskedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1566 | getIndexedMaskedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1567 | } |
1568 | |
1569 | /// Returns true if the index type for a masked gather/scatter requires |
1570 | /// extending |
1571 | virtual bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const { return false; } |
1572 | |
1573 | // Returns true if Extend can be folded into the index of a masked gathers/scatters |
1574 | // on this target. |
1575 | virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const { |
1576 | return false; |
1577 | } |
1578 | |
1579 | // Return true if the target supports a scatter/gather instruction with |
1580 | // indices which are scaled by the particular value. Note that all targets |
1581 | // must by definition support scale of 1. |
1582 | virtual bool isLegalScaleForGatherScatter(uint64_t Scale, |
1583 | uint64_t ElemSize) const { |
1584 | // MGATHER/MSCATTER are only required to support scaling by one or by the |
1585 | // element size. |
1586 | if (Scale != ElemSize && Scale != 1) |
1587 | return false; |
1588 | return true; |
1589 | } |
1590 | |
1591 | /// Return how the condition code should be treated: either it is legal, needs |
1592 | /// to be expanded to some other code sequence, or the target has a custom |
1593 | /// expander for it. |
1594 | LegalizeAction |
1595 | getCondCodeAction(ISD::CondCode CC, MVT VT) const { |
1596 | assert((unsigned)CC < std::size(CondCodeActions) && |
1597 | ((unsigned)VT.SimpleTy >> 3) < std::size(CondCodeActions[0]) && |
1598 | "Table isn't big enough!" ); |
1599 | // See setCondCodeAction for how this is encoded. |
1600 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
1601 | uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3]; |
1602 | LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF); |
1603 | assert(Action != Promote && "Can't promote condition code!" ); |
1604 | return Action; |
1605 | } |
1606 | |
1607 | /// Return true if the specified condition code is legal on this target. |
1608 | bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { |
1609 | return getCondCodeAction(CC, VT) == Legal; |
1610 | } |
1611 | |
1612 | /// Return true if the specified condition code is legal or custom on this |
1613 | /// target. |
1614 | bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const { |
1615 | return getCondCodeAction(CC, VT) == Legal || |
1616 | getCondCodeAction(CC, VT) == Custom; |
1617 | } |
1618 | |
1619 | /// If the action for this operation is to promote, this method returns the |
1620 | /// ValueType to promote to. |
1621 | MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { |
1622 | assert(getOperationAction(Op, VT) == Promote && |
1623 | "This operation isn't promoted!" ); |
1624 | |
1625 | // See if this has an explicit type specified. |
1626 | std::map<std::pair<unsigned, MVT::SimpleValueType>, |
1627 | MVT::SimpleValueType>::const_iterator PTTI = |
1628 | PromoteToType.find(x: std::make_pair(x&: Op, y&: VT.SimpleTy)); |
1629 | if (PTTI != PromoteToType.end()) return PTTI->second; |
1630 | |
1631 | assert((VT.isInteger() || VT.isFloatingPoint()) && |
1632 | "Cannot autopromote this type, add it with AddPromotedToType." ); |
1633 | |
1634 | uint64_t VTBits = VT.getScalarSizeInBits(); |
1635 | MVT NVT = VT; |
1636 | do { |
1637 | NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1); |
1638 | assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && |
1639 | "Didn't find type to promote to!" ); |
1640 | } while (VTBits >= NVT.getScalarSizeInBits() || !isTypeLegal(VT: NVT) || |
1641 | getOperationAction(Op, VT: NVT) == Promote); |
1642 | return NVT; |
1643 | } |
1644 | |
1645 | virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, |
1646 | bool AllowUnknown = false) const { |
1647 | return getValueType(DL, Ty, AllowUnknown); |
1648 | } |
1649 | |
1650 | /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM |
1651 | /// operations except for the pointer size. If AllowUnknown is true, this |
1652 | /// will return MVT::Other for types with no EVT counterpart (e.g. structs), |
1653 | /// otherwise it will assert. |
1654 | EVT getValueType(const DataLayout &DL, Type *Ty, |
1655 | bool AllowUnknown = false) const { |
1656 | // Lower scalar pointers to native pointer types. |
1657 | if (auto *PTy = dyn_cast<PointerType>(Val: Ty)) |
1658 | return getPointerTy(DL, AS: PTy->getAddressSpace()); |
1659 | |
1660 | if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) { |
1661 | Type *EltTy = VTy->getElementType(); |
1662 | // Lower vectors of pointers to native pointer types. |
1663 | if (auto *PTy = dyn_cast<PointerType>(Val: EltTy)) { |
1664 | EVT PointerTy(getPointerTy(DL, AS: PTy->getAddressSpace())); |
1665 | EltTy = PointerTy.getTypeForEVT(Context&: Ty->getContext()); |
1666 | } |
1667 | return EVT::getVectorVT(Context&: Ty->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false), |
1668 | EC: VTy->getElementCount()); |
1669 | } |
1670 | |
1671 | return EVT::getEVT(Ty, HandleUnknown: AllowUnknown); |
1672 | } |
1673 | |
1674 | EVT getMemValueType(const DataLayout &DL, Type *Ty, |
1675 | bool AllowUnknown = false) const { |
1676 | // Lower scalar pointers to native pointer types. |
1677 | if (auto *PTy = dyn_cast<PointerType>(Val: Ty)) |
1678 | return getPointerMemTy(DL, AS: PTy->getAddressSpace()); |
1679 | |
1680 | if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) { |
1681 | Type *EltTy = VTy->getElementType(); |
1682 | if (auto *PTy = dyn_cast<PointerType>(Val: EltTy)) { |
1683 | EVT PointerTy(getPointerMemTy(DL, AS: PTy->getAddressSpace())); |
1684 | EltTy = PointerTy.getTypeForEVT(Context&: Ty->getContext()); |
1685 | } |
1686 | return EVT::getVectorVT(Context&: Ty->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false), |
1687 | EC: VTy->getElementCount()); |
1688 | } |
1689 | |
1690 | return getValueType(DL, Ty, AllowUnknown); |
1691 | } |
1692 | |
1693 | |
1694 | /// Return the MVT corresponding to this LLVM type. See getValueType. |
1695 | MVT getSimpleValueType(const DataLayout &DL, Type *Ty, |
1696 | bool AllowUnknown = false) const { |
1697 | return getValueType(DL, Ty, AllowUnknown).getSimpleVT(); |
1698 | } |
1699 | |
1700 | /// Return the desired alignment for ByVal or InAlloca aggregate function |
1701 | /// arguments in the caller parameter area. This is the actual alignment, not |
1702 | /// its logarithm. |
1703 | virtual uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; |
1704 | |
1705 | /// Return the type of registers that this ValueType will eventually require. |
1706 | MVT getRegisterType(MVT VT) const { |
1707 | assert((unsigned)VT.SimpleTy < std::size(RegisterTypeForVT)); |
1708 | return RegisterTypeForVT[VT.SimpleTy]; |
1709 | } |
1710 | |
1711 | /// Return the type of registers that this ValueType will eventually require. |
1712 | MVT getRegisterType(LLVMContext &Context, EVT VT) const { |
1713 | if (VT.isSimple()) |
1714 | return getRegisterType(VT: VT.getSimpleVT()); |
1715 | if (VT.isVector()) { |
1716 | EVT VT1; |
1717 | MVT RegisterVT; |
1718 | unsigned NumIntermediates; |
1719 | (void)getVectorTypeBreakdown(Context, VT, IntermediateVT&: VT1, |
1720 | NumIntermediates, RegisterVT); |
1721 | return RegisterVT; |
1722 | } |
1723 | if (VT.isInteger()) { |
1724 | return getRegisterType(Context, VT: getTypeToTransformTo(Context, VT)); |
1725 | } |
1726 | llvm_unreachable("Unsupported extended type!" ); |
1727 | } |
1728 | |
1729 | /// Return the number of registers that this ValueType will eventually |
1730 | /// require. |
1731 | /// |
1732 | /// This is one for any types promoted to live in larger registers, but may be |
1733 | /// more than one for types (like i64) that are split into pieces. For types |
1734 | /// like i140, which are first promoted then expanded, it is the number of |
1735 | /// registers needed to hold all the bits of the original type. For an i140 |
1736 | /// on a 32 bit machine this means 5 registers. |
1737 | /// |
1738 | /// RegisterVT may be passed as a way to override the default settings, for |
1739 | /// instance with i128 inline assembly operands on SystemZ. |
1740 | virtual unsigned |
1741 | getNumRegisters(LLVMContext &Context, EVT VT, |
1742 | std::optional<MVT> RegisterVT = std::nullopt) const { |
1743 | if (VT.isSimple()) { |
1744 | assert((unsigned)VT.getSimpleVT().SimpleTy < |
1745 | std::size(NumRegistersForVT)); |
1746 | return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; |
1747 | } |
1748 | if (VT.isVector()) { |
1749 | EVT VT1; |
1750 | MVT VT2; |
1751 | unsigned NumIntermediates; |
1752 | return getVectorTypeBreakdown(Context, VT, IntermediateVT&: VT1, NumIntermediates, RegisterVT&: VT2); |
1753 | } |
1754 | if (VT.isInteger()) { |
1755 | unsigned BitWidth = VT.getSizeInBits(); |
1756 | unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); |
1757 | return (BitWidth + RegWidth - 1) / RegWidth; |
1758 | } |
1759 | llvm_unreachable("Unsupported extended type!" ); |
1760 | } |
1761 | |
1762 | /// Certain combinations of ABIs, Targets and features require that types |
1763 | /// are legal for some operations and not for other operations. |
1764 | /// For MIPS all vector types must be passed through the integer register set. |
1765 | virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, |
1766 | CallingConv::ID CC, EVT VT) const { |
1767 | return getRegisterType(Context, VT); |
1768 | } |
1769 | |
1770 | /// Certain targets require unusual breakdowns of certain types. For MIPS, |
1771 | /// this occurs when a vector type is used, as vector are passed through the |
1772 | /// integer register set. |
1773 | virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
1774 | CallingConv::ID CC, |
1775 | EVT VT) const { |
1776 | return getNumRegisters(Context, VT); |
1777 | } |
1778 | |
1779 | /// Certain targets have context sensitive alignment requirements, where one |
1780 | /// type has the alignment requirement of another type. |
1781 | virtual Align getABIAlignmentForCallingConv(Type *ArgTy, |
1782 | const DataLayout &DL) const { |
1783 | return DL.getABITypeAlign(Ty: ArgTy); |
1784 | } |
1785 | |
1786 | /// If true, then instruction selection should seek to shrink the FP constant |
1787 | /// of the specified type to a smaller type in order to save space and / or |
1788 | /// reduce runtime. |
1789 | virtual bool ShouldShrinkFPConstant(EVT) const { return true; } |
1790 | |
1791 | /// Return true if it is profitable to reduce a load to a smaller type. |
1792 | /// Example: (i16 (trunc (i32 (load x))) -> i16 load x |
1793 | virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, |
1794 | EVT NewVT) const { |
1795 | // By default, assume that it is cheaper to extract a subvector from a wide |
1796 | // vector load rather than creating multiple narrow vector loads. |
1797 | if (NewVT.isVector() && !Load->hasOneUse()) |
1798 | return false; |
1799 | |
1800 | return true; |
1801 | } |
1802 | |
1803 | /// Return true (the default) if it is profitable to remove a sext_inreg(x) |
1804 | /// where the sext is redundant, and use x directly. |
1805 | virtual bool shouldRemoveRedundantExtend(SDValue Op) const { return true; } |
1806 | |
1807 | /// Indicates if any padding is guaranteed to go at the most significant bits |
1808 | /// when storing the type to memory and the type size isn't equal to the store |
1809 | /// size. |
1810 | bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const { |
1811 | return VT.isScalarInteger() && !VT.isByteSized(); |
1812 | } |
1813 | |
1814 | /// When splitting a value of the specified type into parts, does the Lo |
1815 | /// or Hi part come first? This usually follows the endianness, except |
1816 | /// for ppcf128, where the Hi part always comes first. |
1817 | bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const { |
1818 | return DL.isBigEndian() || VT == MVT::ppcf128; |
1819 | } |
1820 | |
1821 | /// If true, the target has custom DAG combine transformations that it can |
1822 | /// perform for the specified node. |
1823 | bool hasTargetDAGCombine(ISD::NodeType NT) const { |
1824 | assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray)); |
1825 | return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); |
1826 | } |
1827 | |
1828 | unsigned getGatherAllAliasesMaxDepth() const { |
1829 | return GatherAllAliasesMaxDepth; |
1830 | } |
1831 | |
1832 | /// Returns the size of the platform's va_list object. |
1833 | virtual unsigned getVaListSizeInBits(const DataLayout &DL) const { |
1834 | return getPointerTy(DL).getSizeInBits(); |
1835 | } |
1836 | |
1837 | /// Get maximum # of store operations permitted for llvm.memset |
1838 | /// |
1839 | /// This function returns the maximum number of store operations permitted |
1840 | /// to replace a call to llvm.memset. The value is set by the target at the |
1841 | /// performance threshold for such a replacement. If OptSize is true, |
1842 | /// return the limit for functions that have OptSize attribute. |
1843 | unsigned getMaxStoresPerMemset(bool OptSize) const { |
1844 | return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset; |
1845 | } |
1846 | |
1847 | /// Get maximum # of store operations permitted for llvm.memcpy |
1848 | /// |
1849 | /// This function returns the maximum number of store operations permitted |
1850 | /// to replace a call to llvm.memcpy. The value is set by the target at the |
1851 | /// performance threshold for such a replacement. If OptSize is true, |
1852 | /// return the limit for functions that have OptSize attribute. |
1853 | unsigned getMaxStoresPerMemcpy(bool OptSize) const { |
1854 | return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; |
1855 | } |
1856 | |
1857 | /// \brief Get maximum # of store operations to be glued together |
1858 | /// |
1859 | /// This function returns the maximum number of store operations permitted |
1860 | /// to glue together during lowering of llvm.memcpy. The value is set by |
1861 | // the target at the performance threshold for such a replacement. |
1862 | virtual unsigned getMaxGluedStoresPerMemcpy() const { |
1863 | return MaxGluedStoresPerMemcpy; |
1864 | } |
1865 | |
1866 | /// Get maximum # of load operations permitted for memcmp |
1867 | /// |
1868 | /// This function returns the maximum number of load operations permitted |
1869 | /// to replace a call to memcmp. The value is set by the target at the |
1870 | /// performance threshold for such a replacement. If OptSize is true, |
1871 | /// return the limit for functions that have OptSize attribute. |
1872 | unsigned getMaxExpandSizeMemcmp(bool OptSize) const { |
1873 | return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; |
1874 | } |
1875 | |
1876 | /// Get maximum # of store operations permitted for llvm.memmove |
1877 | /// |
1878 | /// This function returns the maximum number of store operations permitted |
1879 | /// to replace a call to llvm.memmove. The value is set by the target at the |
1880 | /// performance threshold for such a replacement. If OptSize is true, |
1881 | /// return the limit for functions that have OptSize attribute. |
1882 | unsigned getMaxStoresPerMemmove(bool OptSize) const { |
1883 | return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove; |
1884 | } |
1885 | |
1886 | /// Determine if the target supports unaligned memory accesses. |
1887 | /// |
1888 | /// This function returns true if the target allows unaligned memory accesses |
1889 | /// of the specified type in the given address space. If true, it also returns |
1890 | /// a relative speed of the unaligned memory access in the last argument by |
1891 | /// reference. The higher the speed number the faster the operation comparing |
1892 | /// to a number returned by another such call. This is used, for example, in |
1893 | /// situations where an array copy/move/set is converted to a sequence of |
1894 | /// store operations. Its use helps to ensure that such replacements don't |
1895 | /// generate code that causes an alignment error (trap) on the target machine. |
1896 | virtual bool allowsMisalignedMemoryAccesses( |
1897 | EVT, unsigned AddrSpace = 0, Align Alignment = Align(1), |
1898 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1899 | unsigned * /*Fast*/ = nullptr) const { |
1900 | return false; |
1901 | } |
1902 | |
1903 | /// LLT handling variant. |
1904 | virtual bool allowsMisalignedMemoryAccesses( |
1905 | LLT, unsigned AddrSpace = 0, Align Alignment = Align(1), |
1906 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1907 | unsigned * /*Fast*/ = nullptr) const { |
1908 | return false; |
1909 | } |
1910 | |
1911 | /// This function returns true if the memory access is aligned or if the |
1912 | /// target allows this specific unaligned memory access. If the access is |
1913 | /// allowed, the optional final parameter returns a relative speed of the |
1914 | /// access (as defined by the target). |
1915 | bool allowsMemoryAccessForAlignment( |
1916 | LLVMContext &Context, const DataLayout &DL, EVT VT, |
1917 | unsigned AddrSpace = 0, Align Alignment = Align(1), |
1918 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1919 | unsigned *Fast = nullptr) const; |
1920 | |
1921 | /// Return true if the memory access of this type is aligned or if the target |
1922 | /// allows this specific unaligned access for the given MachineMemOperand. |
1923 | /// If the access is allowed, the optional final parameter returns a relative |
1924 | /// speed of the access (as defined by the target). |
1925 | bool allowsMemoryAccessForAlignment(LLVMContext &Context, |
1926 | const DataLayout &DL, EVT VT, |
1927 | const MachineMemOperand &MMO, |
1928 | unsigned *Fast = nullptr) const; |
1929 | |
1930 | /// Return true if the target supports a memory access of this type for the |
1931 | /// given address space and alignment. If the access is allowed, the optional |
1932 | /// final parameter returns the relative speed of the access (as defined by |
1933 | /// the target). |
1934 | virtual bool |
1935 | allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
1936 | unsigned AddrSpace = 0, Align Alignment = Align(1), |
1937 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1938 | unsigned *Fast = nullptr) const; |
1939 | |
1940 | /// Return true if the target supports a memory access of this type for the |
1941 | /// given MachineMemOperand. If the access is allowed, the optional |
1942 | /// final parameter returns the relative access speed (as defined by the |
1943 | /// target). |
1944 | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
1945 | const MachineMemOperand &MMO, |
1946 | unsigned *Fast = nullptr) const; |
1947 | |
1948 | /// LLT handling variant. |
1949 | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty, |
1950 | const MachineMemOperand &MMO, |
1951 | unsigned *Fast = nullptr) const; |
1952 | |
1953 | /// Returns the target specific optimal type for load and store operations as |
1954 | /// a result of memset, memcpy, and memmove lowering. |
1955 | /// It returns EVT::Other if the type should be determined using generic |
1956 | /// target-independent logic. |
1957 | virtual EVT |
1958 | getOptimalMemOpType(const MemOp &Op, |
1959 | const AttributeList & /*FuncAttributes*/) const { |
1960 | return MVT::Other; |
1961 | } |
1962 | |
1963 | /// LLT returning variant. |
1964 | virtual LLT |
1965 | getOptimalMemOpLLT(const MemOp &Op, |
1966 | const AttributeList & /*FuncAttributes*/) const { |
1967 | return LLT(); |
1968 | } |
1969 | |
1970 | /// Returns true if it's safe to use load / store of the specified type to |
1971 | /// expand memcpy / memset inline. |
1972 | /// |
1973 | /// This is mostly true for all types except for some special cases. For |
1974 | /// example, on X86 targets without SSE2 f64 load / store are done with fldl / |
1975 | /// fstpl which also does type conversion. Note the specified type doesn't |
1976 | /// have to be legal as the hook is used before type legalization. |
1977 | virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } |
1978 | |
1979 | /// Return lower limit for number of blocks in a jump table. |
1980 | virtual unsigned getMinimumJumpTableEntries() const; |
1981 | |
1982 | /// Return lower limit of the density in a jump table. |
1983 | unsigned getMinimumJumpTableDensity(bool OptForSize) const; |
1984 | |
1985 | /// Return upper limit for number of entries in a jump table. |
1986 | /// Zero if no limit. |
1987 | unsigned getMaximumJumpTableSize() const; |
1988 | |
1989 | virtual bool isJumpTableRelative() const; |
1990 | |
1991 | /// If a physical register, this specifies the register that |
1992 | /// llvm.savestack/llvm.restorestack should save and restore. |
1993 | Register getStackPointerRegisterToSaveRestore() const { |
1994 | return StackPointerRegisterToSaveRestore; |
1995 | } |
1996 | |
1997 | /// If a physical register, this returns the register that receives the |
1998 | /// exception address on entry to an EH pad. |
1999 | virtual Register |
2000 | getExceptionPointerRegister(const Constant *PersonalityFn) const { |
2001 | return Register(); |
2002 | } |
2003 | |
2004 | /// If a physical register, this returns the register that receives the |
2005 | /// exception typeid on entry to a landing pad. |
2006 | virtual Register |
2007 | getExceptionSelectorRegister(const Constant *PersonalityFn) const { |
2008 | return Register(); |
2009 | } |
2010 | |
2011 | virtual bool needsFixedCatchObjects() const { |
2012 | report_fatal_error(reason: "Funclet EH is not implemented for this target" ); |
2013 | } |
2014 | |
2015 | /// Return the minimum stack alignment of an argument. |
2016 | Align getMinStackArgumentAlignment() const { |
2017 | return MinStackArgumentAlignment; |
2018 | } |
2019 | |
2020 | /// Return the minimum function alignment. |
2021 | Align getMinFunctionAlignment() const { return MinFunctionAlignment; } |
2022 | |
2023 | /// Return the preferred function alignment. |
2024 | Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; } |
2025 | |
2026 | /// Return the preferred loop alignment. |
2027 | virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const; |
2028 | |
2029 | /// Return the maximum amount of bytes allowed to be emitted when padding for |
2030 | /// alignment |
2031 | virtual unsigned |
2032 | getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const; |
2033 | |
2034 | /// Should loops be aligned even when the function is marked OptSize (but not |
2035 | /// MinSize). |
2036 | virtual bool alignLoopsWithOptSize() const { return false; } |
2037 | |
2038 | /// If the target has a standard location for the stack protector guard, |
2039 | /// returns the address of that location. Otherwise, returns nullptr. |
2040 | /// DEPRECATED: please override useLoadStackGuardNode and customize |
2041 | /// LOAD_STACK_GUARD, or customize \@llvm.stackguard(). |
2042 | virtual Value *getIRStackGuard(IRBuilderBase &IRB) const; |
2043 | |
2044 | /// Inserts necessary declarations for SSP (stack protection) purpose. |
2045 | /// Should be used only when getIRStackGuard returns nullptr. |
2046 | virtual void insertSSPDeclarations(Module &M) const; |
2047 | |
2048 | /// Return the variable that's previously inserted by insertSSPDeclarations, |
2049 | /// if any, otherwise return nullptr. Should be used only when |
2050 | /// getIRStackGuard returns nullptr. |
2051 | virtual Value *getSDagStackGuard(const Module &M) const; |
2052 | |
2053 | /// If this function returns true, stack protection checks should XOR the |
2054 | /// frame pointer (or whichever pointer is used to address locals) into the |
2055 | /// stack guard value before checking it. getIRStackGuard must return nullptr |
2056 | /// if this returns true. |
2057 | virtual bool useStackGuardXorFP() const { return false; } |
2058 | |
2059 | /// If the target has a standard stack protection check function that |
2060 | /// performs validation and error handling, returns the function. Otherwise, |
2061 | /// returns nullptr. Must be previously inserted by insertSSPDeclarations. |
2062 | /// Should be used only when getIRStackGuard returns nullptr. |
2063 | virtual Function *getSSPStackGuardCheck(const Module &M) const; |
2064 | |
2065 | protected: |
2066 | Value *getDefaultSafeStackPointerLocation(IRBuilderBase &IRB, |
2067 | bool UseTLS) const; |
2068 | |
2069 | public: |
2070 | /// Returns the target-specific address of the unsafe stack pointer. |
2071 | virtual Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const; |
2072 | |
2073 | /// Returns the name of the symbol used to emit stack probes or the empty |
2074 | /// string if not applicable. |
2075 | virtual bool hasStackProbeSymbol(const MachineFunction &MF) const { return false; } |
2076 | |
2077 | virtual bool hasInlineStackProbe(const MachineFunction &MF) const { return false; } |
2078 | |
2079 | virtual StringRef getStackProbeSymbolName(const MachineFunction &MF) const { |
2080 | return "" ; |
2081 | } |
2082 | |
2083 | /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we |
2084 | /// are happy to sink it into basic blocks. A cast may be free, but not |
2085 | /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer. |
2086 | virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const; |
2087 | |
2088 | /// Return true if the pointer arguments to CI should be aligned by aligning |
2089 | /// the object whose address is being passed. If so then MinSize is set to the |
2090 | /// minimum size the object must be to be aligned and PrefAlign is set to the |
2091 | /// preferred alignment. |
2092 | virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, |
2093 | Align & /*PrefAlign*/) const { |
2094 | return false; |
2095 | } |
2096 | |
2097 | //===--------------------------------------------------------------------===// |
2098 | /// \name Helpers for TargetTransformInfo implementations |
2099 | /// @{ |
2100 | |
2101 | /// Get the ISD node that corresponds to the Instruction class opcode. |
2102 | int InstructionOpcodeToISD(unsigned Opcode) const; |
2103 | |
2104 | /// @} |
2105 | |
2106 | //===--------------------------------------------------------------------===// |
2107 | /// \name Helpers for atomic expansion. |
2108 | /// @{ |
2109 | |
2110 | /// Returns the maximum atomic operation size (in bits) supported by |
2111 | /// the backend. Atomic operations greater than this size (as well |
2112 | /// as ones that are not naturally aligned), will be expanded by |
2113 | /// AtomicExpandPass into an __atomic_* library call. |
2114 | unsigned getMaxAtomicSizeInBitsSupported() const { |
2115 | return MaxAtomicSizeInBitsSupported; |
2116 | } |
2117 | |
2118 | /// Returns the size in bits of the maximum div/rem the backend supports. |
2119 | /// Larger operations will be expanded by ExpandLargeDivRem. |
2120 | unsigned getMaxDivRemBitWidthSupported() const { |
2121 | return MaxDivRemBitWidthSupported; |
2122 | } |
2123 | |
2124 | /// Returns the size in bits of the maximum larget fp convert the backend |
2125 | /// supports. Larger operations will be expanded by ExpandLargeFPConvert. |
2126 | unsigned getMaxLargeFPConvertBitWidthSupported() const { |
2127 | return MaxLargeFPConvertBitWidthSupported; |
2128 | } |
2129 | |
2130 | /// Returns the size of the smallest cmpxchg or ll/sc instruction |
2131 | /// the backend supports. Any smaller operations are widened in |
2132 | /// AtomicExpandPass. |
2133 | /// |
2134 | /// Note that *unlike* operations above the maximum size, atomic ops |
2135 | /// are still natively supported below the minimum; they just |
2136 | /// require a more complex expansion. |
2137 | unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; } |
2138 | |
2139 | /// Whether the target supports unaligned atomic operations. |
2140 | bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; } |
2141 | |
2142 | /// Whether AtomicExpandPass should automatically insert fences and reduce |
2143 | /// ordering for this atomic. This should be true for most architectures with |
2144 | /// weak memory ordering. Defaults to false. |
2145 | virtual bool shouldInsertFencesForAtomic(const Instruction *I) const { |
2146 | return false; |
2147 | } |
2148 | |
2149 | /// Whether AtomicExpandPass should automatically insert a trailing fence |
2150 | /// without reducing the ordering for this atomic. Defaults to false. |
2151 | virtual bool |
2152 | shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const { |
2153 | return false; |
2154 | } |
2155 | |
2156 | /// Perform a load-linked operation on Addr, returning a "Value *" with the |
2157 | /// corresponding pointee type. This may entail some non-trivial operations to |
2158 | /// truncate or reconstruct types that will be illegal in the backend. See |
2159 | /// ARMISelLowering for an example implementation. |
2160 | virtual Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, |
2161 | Value *Addr, AtomicOrdering Ord) const { |
2162 | llvm_unreachable("Load linked unimplemented on this target" ); |
2163 | } |
2164 | |
2165 | /// Perform a store-conditional operation to Addr. Return the status of the |
2166 | /// store. This should be 0 if the store succeeded, non-zero otherwise. |
2167 | virtual Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, |
2168 | Value *Addr, AtomicOrdering Ord) const { |
2169 | llvm_unreachable("Store conditional unimplemented on this target" ); |
2170 | } |
2171 | |
2172 | /// Perform a masked atomicrmw using a target-specific intrinsic. This |
2173 | /// represents the core LL/SC loop which will be lowered at a late stage by |
2174 | /// the backend. The target-specific intrinsic returns the loaded value and |
2175 | /// is not responsible for masking and shifting the result. |
2176 | virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, |
2177 | AtomicRMWInst *AI, |
2178 | Value *AlignedAddr, Value *Incr, |
2179 | Value *Mask, Value *ShiftAmt, |
2180 | AtomicOrdering Ord) const { |
2181 | llvm_unreachable("Masked atomicrmw expansion unimplemented on this target" ); |
2182 | } |
2183 | |
2184 | /// Perform a atomicrmw expansion using a target-specific way. This is |
2185 | /// expected to be called when masked atomicrmw and bit test atomicrmw don't |
2186 | /// work, and the target supports another way to lower atomicrmw. |
2187 | virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const { |
2188 | llvm_unreachable( |
2189 | "Generic atomicrmw expansion unimplemented on this target" ); |
2190 | } |
2191 | |
2192 | /// Perform a bit test atomicrmw using a target-specific intrinsic. This |
2193 | /// represents the combined bit test intrinsic which will be lowered at a late |
2194 | /// stage by the backend. |
2195 | virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const { |
2196 | llvm_unreachable( |
2197 | "Bit test atomicrmw expansion unimplemented on this target" ); |
2198 | } |
2199 | |
2200 | /// Perform a atomicrmw which the result is only used by comparison, using a |
2201 | /// target-specific intrinsic. This represents the combined atomic and compare |
2202 | /// intrinsic which will be lowered at a late stage by the backend. |
2203 | virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const { |
2204 | llvm_unreachable( |
2205 | "Compare arith atomicrmw expansion unimplemented on this target" ); |
2206 | } |
2207 | |
2208 | /// Perform a masked cmpxchg using a target-specific intrinsic. This |
2209 | /// represents the core LL/SC loop which will be lowered at a late stage by |
2210 | /// the backend. The target-specific intrinsic returns the loaded value and |
2211 | /// is not responsible for masking and shifting the result. |
2212 | virtual Value *emitMaskedAtomicCmpXchgIntrinsic( |
2213 | IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, |
2214 | Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { |
2215 | llvm_unreachable("Masked cmpxchg expansion unimplemented on this target" ); |
2216 | } |
2217 | |
2218 | //===--------------------------------------------------------------------===// |
2219 | /// \name KCFI check lowering. |
2220 | /// @{ |
2221 | |
2222 | virtual MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, |
2223 | MachineBasicBlock::instr_iterator &MBBI, |
2224 | const TargetInstrInfo *TII) const { |
2225 | llvm_unreachable("KCFI is not supported on this target" ); |
2226 | } |
2227 | |
2228 | /// @} |
2229 | |
2230 | /// Inserts in the IR a target-specific intrinsic specifying a fence. |
2231 | /// It is called by AtomicExpandPass before expanding an |
2232 | /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad |
2233 | /// if shouldInsertFencesForAtomic returns true. |
2234 | /// |
2235 | /// Inst is the original atomic instruction, prior to other expansions that |
2236 | /// may be performed. |
2237 | /// |
2238 | /// This function should either return a nullptr, or a pointer to an IR-level |
2239 | /// Instruction*. Even complex fence sequences can be represented by a |
2240 | /// single Instruction* through an intrinsic to be lowered later. |
2241 | /// |
2242 | /// The default implementation emits an IR fence before any release (or |
2243 | /// stronger) operation that stores, and after any acquire (or stronger) |
2244 | /// operation. This is generally a correct implementation, but backends may |
2245 | /// override if they wish to use alternative schemes (e.g. the PowerPC |
2246 | /// standard ABI uses a fence before a seq_cst load instead of after a |
2247 | /// seq_cst store). |
2248 | /// @{ |
2249 | virtual Instruction *emitLeadingFence(IRBuilderBase &Builder, |
2250 | Instruction *Inst, |
2251 | AtomicOrdering Ord) const; |
2252 | |
2253 | virtual Instruction *emitTrailingFence(IRBuilderBase &Builder, |
2254 | Instruction *Inst, |
2255 | AtomicOrdering Ord) const; |
2256 | /// @} |
2257 | |
2258 | // Emits code that executes when the comparison result in the ll/sc |
2259 | // expansion of a cmpxchg instruction is such that the store-conditional will |
2260 | // not execute. This makes it possible to balance out the load-linked with |
2261 | // a dedicated instruction, if desired. |
2262 | // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would |
2263 | // be unnecessarily held, except if clrex, inserted by this hook, is executed. |
2264 | virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const {} |
2265 | |
2266 | /// Returns true if arguments should be sign-extended in lib calls. |
2267 | virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { |
2268 | return IsSigned; |
2269 | } |
2270 | |
2271 | /// Returns true if arguments should be extended in lib calls. |
2272 | virtual bool shouldExtendTypeInLibCall(EVT Type) const { |
2273 | return true; |
2274 | } |
2275 | |
2276 | /// Returns how the given (atomic) load should be expanded by the |
2277 | /// IR-level AtomicExpand pass. |
2278 | virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { |
2279 | return AtomicExpansionKind::None; |
2280 | } |
2281 | |
2282 | /// Returns how the given (atomic) load should be cast by the IR-level |
2283 | /// AtomicExpand pass. |
2284 | virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const { |
2285 | if (LI->getType()->isFloatingPointTy()) |
2286 | return AtomicExpansionKind::CastToInteger; |
2287 | return AtomicExpansionKind::None; |
2288 | } |
2289 | |
2290 | /// Returns how the given (atomic) store should be expanded by the IR-level |
2291 | /// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try |
2292 | /// to use an atomicrmw xchg. |
2293 | virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const { |
2294 | return AtomicExpansionKind::None; |
2295 | } |
2296 | |
2297 | /// Returns how the given (atomic) store should be cast by the IR-level |
2298 | /// AtomicExpand pass into. For instance AtomicExpansionKind::CastToInteger |
2299 | /// will try to cast the operands to integer values. |
2300 | virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const { |
2301 | if (SI->getValueOperand()->getType()->isFloatingPointTy()) |
2302 | return AtomicExpansionKind::CastToInteger; |
2303 | return AtomicExpansionKind::None; |
2304 | } |
2305 | |
2306 | /// Returns how the given atomic cmpxchg should be expanded by the IR-level |
2307 | /// AtomicExpand pass. |
2308 | virtual AtomicExpansionKind |
2309 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { |
2310 | return AtomicExpansionKind::None; |
2311 | } |
2312 | |
2313 | /// Returns how the IR-level AtomicExpand pass should expand the given |
2314 | /// AtomicRMW, if at all. Default is to never expand. |
2315 | virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { |
2316 | return RMW->isFloatingPointOperation() ? |
2317 | AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; |
2318 | } |
2319 | |
2320 | /// Returns how the given atomic atomicrmw should be cast by the IR-level |
2321 | /// AtomicExpand pass. |
2322 | virtual AtomicExpansionKind |
2323 | shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const { |
2324 | if (RMWI->getOperation() == AtomicRMWInst::Xchg && |
2325 | (RMWI->getValOperand()->getType()->isFloatingPointTy() || |
2326 | RMWI->getValOperand()->getType()->isPointerTy())) |
2327 | return AtomicExpansionKind::CastToInteger; |
2328 | |
2329 | return AtomicExpansionKind::None; |
2330 | } |
2331 | |
2332 | /// On some platforms, an AtomicRMW that never actually modifies the value |
2333 | /// (such as fetch_add of 0) can be turned into a fence followed by an |
2334 | /// atomic load. This may sound useless, but it makes it possible for the |
2335 | /// processor to keep the cacheline shared, dramatically improving |
2336 | /// performance. And such idempotent RMWs are useful for implementing some |
2337 | /// kinds of locks, see for example (justification + benchmarks): |
2338 | /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf |
2339 | /// This method tries doing that transformation, returning the atomic load if |
2340 | /// it succeeds, and nullptr otherwise. |
2341 | /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo |
2342 | /// another round of expansion. |
2343 | virtual LoadInst * |
2344 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { |
2345 | return nullptr; |
2346 | } |
2347 | |
2348 | /// Returns how the platform's atomic operations are extended (ZERO_EXTEND, |
2349 | /// SIGN_EXTEND, or ANY_EXTEND). |
2350 | virtual ISD::NodeType getExtendForAtomicOps() const { |
2351 | return ISD::ZERO_EXTEND; |
2352 | } |
2353 | |
2354 | /// Returns how the platform's atomic compare and swap expects its comparison |
2355 | /// value to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). This is |
2356 | /// separate from getExtendForAtomicOps, which is concerned with the |
2357 | /// sign-extension of the instruction's output, whereas here we are concerned |
2358 | /// with the sign-extension of the input. For targets with compare-and-swap |
2359 | /// instructions (or sub-word comparisons in their LL/SC loop expansions), |
2360 | /// the input can be ANY_EXTEND, but the output will still have a specific |
2361 | /// extension. |
2362 | virtual ISD::NodeType getExtendForAtomicCmpSwapArg() const { |
2363 | return ISD::ANY_EXTEND; |
2364 | } |
2365 | |
2366 | /// @} |
2367 | |
2368 | /// Returns true if we should normalize |
2369 | /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and |
2370 | /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely |
2371 | /// that it saves us from materializing N0 and N1 in an integer register. |
2372 | /// Targets that are able to perform and/or on flags should return false here. |
2373 | virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, |
2374 | EVT VT) const { |
2375 | // If a target has multiple condition registers, then it likely has logical |
2376 | // operations on those registers. |
2377 | if (hasMultipleConditionRegisters()) |
2378 | return false; |
2379 | // Only do the transform if the value won't be split into multiple |
2380 | // registers. |
2381 | LegalizeTypeAction Action = getTypeAction(Context, VT); |
2382 | return Action != TypeExpandInteger && Action != TypeExpandFloat && |
2383 | Action != TypeSplitVector; |
2384 | } |
2385 | |
2386 | virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; } |
2387 | |
2388 | /// Return true if a select of constants (select Cond, C1, C2) should be |
2389 | /// transformed into simple math ops with the condition value. For example: |
2390 | /// select Cond, C1, C1-1 --> add (zext Cond), C1-1 |
2391 | virtual bool convertSelectOfConstantsToMath(EVT VT) const { |
2392 | return false; |
2393 | } |
2394 | |
2395 | /// Return true if it is profitable to transform an integer |
2396 | /// multiplication-by-constant into simpler operations like shifts and adds. |
2397 | /// This may be true if the target does not directly support the |
2398 | /// multiplication operation for the specified type or the sequence of simpler |
2399 | /// ops is faster than the multiply. |
2400 | virtual bool decomposeMulByConstant(LLVMContext &Context, |
2401 | EVT VT, SDValue C) const { |
2402 | return false; |
2403 | } |
2404 | |
2405 | /// Return true if it may be profitable to transform |
2406 | /// (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). |
2407 | /// This may not be true if c1 and c2 can be represented as immediates but |
2408 | /// c1*c2 cannot, for example. |
2409 | /// The target should check if c1, c2 and c1*c2 can be represented as |
2410 | /// immediates, or have to be materialized into registers. If it is not sure |
2411 | /// about some cases, a default true can be returned to let the DAGCombiner |
2412 | /// decide. |
2413 | /// AddNode is (add x, c1), and ConstNode is c2. |
2414 | virtual bool isMulAddWithConstProfitable(SDValue AddNode, |
2415 | SDValue ConstNode) const { |
2416 | return true; |
2417 | } |
2418 | |
2419 | /// Return true if it is more correct/profitable to use strict FP_TO_INT |
2420 | /// conversion operations - canonicalizing the FP source value instead of |
2421 | /// converting all cases and then selecting based on value. |
2422 | /// This may be true if the target throws exceptions for out of bounds |
2423 | /// conversions or has fast FP CMOV. |
2424 | virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, |
2425 | bool IsSigned) const { |
2426 | return false; |
2427 | } |
2428 | |
2429 | /// Return true if it is beneficial to expand an @llvm.powi.* intrinsic. |
2430 | /// If not optimizing for size, expanding @llvm.powi.* intrinsics is always |
2431 | /// considered beneficial. |
2432 | /// If optimizing for size, expansion is only considered beneficial for upto |
2433 | /// 5 multiplies and a divide (if the exponent is negative). |
2434 | bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const { |
2435 | if (Exponent < 0) |
2436 | Exponent = -Exponent; |
2437 | uint64_t E = static_cast<uint64_t>(Exponent); |
2438 | return !OptForSize || (llvm::popcount(Value: E) + Log2_64(Value: E) < 7); |
2439 | } |
2440 | |
2441 | //===--------------------------------------------------------------------===// |
2442 | // TargetLowering Configuration Methods - These methods should be invoked by |
2443 | // the derived class constructor to configure this object for the target. |
2444 | // |
2445 | protected: |
2446 | /// Specify how the target extends the result of integer and floating point |
2447 | /// boolean values from i1 to a wider type. See getBooleanContents. |
2448 | void setBooleanContents(BooleanContent Ty) { |
2449 | BooleanContents = Ty; |
2450 | BooleanFloatContents = Ty; |
2451 | } |
2452 | |
2453 | /// Specify how the target extends the result of integer and floating point |
2454 | /// boolean values from i1 to a wider type. See getBooleanContents. |
2455 | void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { |
2456 | BooleanContents = IntTy; |
2457 | BooleanFloatContents = FloatTy; |
2458 | } |
2459 | |
2460 | /// Specify how the target extends the result of a vector boolean value from a |
2461 | /// vector of i1 to a wider type. See getBooleanContents. |
2462 | void setBooleanVectorContents(BooleanContent Ty) { |
2463 | BooleanVectorContents = Ty; |
2464 | } |
2465 | |
2466 | /// Specify the target scheduling preference. |
2467 | void setSchedulingPreference(Sched::Preference Pref) { |
2468 | SchedPreferenceInfo = Pref; |
2469 | } |
2470 | |
2471 | /// Indicate the minimum number of blocks to generate jump tables. |
2472 | void setMinimumJumpTableEntries(unsigned Val); |
2473 | |
2474 | /// Indicate the maximum number of entries in jump tables. |
2475 | /// Set to zero to generate unlimited jump tables. |
2476 | void setMaximumJumpTableSize(unsigned); |
2477 | |
2478 | /// If set to a physical register, this specifies the register that |
2479 | /// llvm.savestack/llvm.restorestack should save and restore. |
2480 | void setStackPointerRegisterToSaveRestore(Register R) { |
2481 | StackPointerRegisterToSaveRestore = R; |
2482 | } |
2483 | |
2484 | /// Tells the code generator that the target has multiple (allocatable) |
2485 | /// condition registers that can be used to store the results of comparisons |
2486 | /// for use by selects and conditional branches. With multiple condition |
2487 | /// registers, the code generator will not aggressively sink comparisons into |
2488 | /// the blocks of their users. |
2489 | void setHasMultipleConditionRegisters(bool hasManyRegs = true) { |
2490 | HasMultipleConditionRegisters = hasManyRegs; |
2491 | } |
2492 | |
2493 | /// Tells the code generator that the target has BitExtract instructions. |
2494 | /// The code generator will aggressively sink "shift"s into the blocks of |
2495 | /// their users if the users will generate "and" instructions which can be |
2496 | /// combined with "shift" to BitExtract instructions. |
2497 | void (bool = true) { |
2498 | HasExtractBitsInsn = hasExtractInsn; |
2499 | } |
2500 | |
2501 | /// Tells the code generator not to expand logic operations on comparison |
2502 | /// predicates into separate sequences that increase the amount of flow |
2503 | /// control. |
2504 | void setJumpIsExpensive(bool isExpensive = true); |
2505 | |
2506 | /// Tells the code generator which bitwidths to bypass. |
2507 | void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { |
2508 | BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; |
2509 | } |
2510 | |
2511 | /// Add the specified register class as an available regclass for the |
2512 | /// specified value type. This indicates the selector can handle values of |
2513 | /// that class natively. |
2514 | void addRegisterClass(MVT VT, const TargetRegisterClass *RC) { |
2515 | assert((unsigned)VT.SimpleTy < std::size(RegClassForVT)); |
2516 | RegClassForVT[VT.SimpleTy] = RC; |
2517 | } |
2518 | |
2519 | /// Return the largest legal super-reg register class of the register class |
2520 | /// for the specified type and its associated "cost". |
2521 | virtual std::pair<const TargetRegisterClass *, uint8_t> |
2522 | findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const; |
2523 | |
2524 | /// Once all of the register classes are added, this allows us to compute |
2525 | /// derived properties we expose. |
2526 | void computeRegisterProperties(const TargetRegisterInfo *TRI); |
2527 | |
2528 | /// Indicate that the specified operation does not work with the specified |
2529 | /// type and indicate what to do about it. Note that VT may refer to either |
2530 | /// the type of a result or that of an operand of Op. |
2531 | void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) { |
2532 | assert(Op < std::size(OpActions[0]) && "Table isn't big enough!" ); |
2533 | OpActions[(unsigned)VT.SimpleTy][Op] = Action; |
2534 | } |
2535 | void setOperationAction(ArrayRef<unsigned> Ops, MVT VT, |
2536 | LegalizeAction Action) { |
2537 | for (auto Op : Ops) |
2538 | setOperationAction(Op, VT, Action); |
2539 | } |
2540 | void setOperationAction(ArrayRef<unsigned> Ops, ArrayRef<MVT> VTs, |
2541 | LegalizeAction Action) { |
2542 | for (auto VT : VTs) |
2543 | setOperationAction(Ops, VT, Action); |
2544 | } |
2545 | |
2546 | /// Indicate that the specified load with extension does not work with the |
2547 | /// specified type and indicate what to do about it. |
2548 | void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, |
2549 | LegalizeAction Action) { |
2550 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && |
2551 | MemVT.isValid() && "Table isn't big enough!" ); |
2552 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array" ); |
2553 | unsigned Shift = 4 * ExtType; |
2554 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift); |
2555 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift; |
2556 | } |
2557 | void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT, |
2558 | LegalizeAction Action) { |
2559 | for (auto ExtType : ExtTypes) |
2560 | setLoadExtAction(ExtType, ValVT, MemVT, Action); |
2561 | } |
2562 | void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, |
2563 | ArrayRef<MVT> MemVTs, LegalizeAction Action) { |
2564 | for (auto MemVT : MemVTs) |
2565 | setLoadExtAction(ExtTypes, ValVT, MemVT, Action); |
2566 | } |
2567 | |
2568 | /// Let target indicate that an extending atomic load of the specified type |
2569 | /// is legal. |
2570 | void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, |
2571 | LegalizeAction Action) { |
2572 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && |
2573 | MemVT.isValid() && "Table isn't big enough!" ); |
2574 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array" ); |
2575 | unsigned Shift = 4 * ExtType; |
2576 | AtomicLoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= |
2577 | ~((uint16_t)0xF << Shift); |
2578 | AtomicLoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= |
2579 | ((uint16_t)Action << Shift); |
2580 | } |
2581 | void setAtomicLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT, |
2582 | LegalizeAction Action) { |
2583 | for (auto ExtType : ExtTypes) |
2584 | setAtomicLoadExtAction(ExtType, ValVT, MemVT, Action); |
2585 | } |
2586 | void setAtomicLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, |
2587 | ArrayRef<MVT> MemVTs, LegalizeAction Action) { |
2588 | for (auto MemVT : MemVTs) |
2589 | setAtomicLoadExtAction(ExtTypes, ValVT, MemVT, Action); |
2590 | } |
2591 | |
2592 | /// Indicate that the specified truncating store does not work with the |
2593 | /// specified type and indicate what to do about it. |
2594 | void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) { |
2595 | assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!" ); |
2596 | TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; |
2597 | } |
2598 | |
2599 | /// Indicate that the specified indexed load does or does not work with the |
2600 | /// specified type and indicate what to do abort it. |
2601 | /// |
2602 | /// NOTE: All indexed mode loads are initialized to Expand in |
2603 | /// TargetLowering.cpp |
2604 | void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, MVT VT, |
2605 | LegalizeAction Action) { |
2606 | for (auto IdxMode : IdxModes) |
2607 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_Load, Action); |
2608 | } |
2609 | |
2610 | void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs, |
2611 | LegalizeAction Action) { |
2612 | for (auto VT : VTs) |
2613 | setIndexedLoadAction(IdxModes, VT, Action); |
2614 | } |
2615 | |
2616 | /// Indicate that the specified indexed store does or does not work with the |
2617 | /// specified type and indicate what to do about it. |
2618 | /// |
2619 | /// NOTE: All indexed mode stores are initialized to Expand in |
2620 | /// TargetLowering.cpp |
2621 | void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, MVT VT, |
2622 | LegalizeAction Action) { |
2623 | for (auto IdxMode : IdxModes) |
2624 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_Store, Action); |
2625 | } |
2626 | |
2627 | void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs, |
2628 | LegalizeAction Action) { |
2629 | for (auto VT : VTs) |
2630 | setIndexedStoreAction(IdxModes, VT, Action); |
2631 | } |
2632 | |
2633 | /// Indicate that the specified indexed masked load does or does not work with |
2634 | /// the specified type and indicate what to do about it. |
2635 | /// |
2636 | /// NOTE: All indexed mode masked loads are initialized to Expand in |
2637 | /// TargetLowering.cpp |
2638 | void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, |
2639 | LegalizeAction Action) { |
2640 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedLoad, Action); |
2641 | } |
2642 | |
2643 | /// Indicate that the specified indexed masked store does or does not work |
2644 | /// with the specified type and indicate what to do about it. |
2645 | /// |
2646 | /// NOTE: All indexed mode masked stores are initialized to Expand in |
2647 | /// TargetLowering.cpp |
2648 | void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, |
2649 | LegalizeAction Action) { |
2650 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedStore, Action); |
2651 | } |
2652 | |
2653 | /// Indicate that the specified condition code is or isn't supported on the |
2654 | /// target and indicate what to do about it. |
2655 | void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, MVT VT, |
2656 | LegalizeAction Action) { |
2657 | for (auto CC : CCs) { |
2658 | assert(VT.isValid() && (unsigned)CC < std::size(CondCodeActions) && |
2659 | "Table isn't big enough!" ); |
2660 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array" ); |
2661 | /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the |
2662 | /// 32-bit value and the upper 29 bits index into the second dimension of |
2663 | /// the array to select what 32-bit value to use. |
2664 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
2665 | CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); |
2666 | CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; |
2667 | } |
2668 | } |
2669 | void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, ArrayRef<MVT> VTs, |
2670 | LegalizeAction Action) { |
2671 | for (auto VT : VTs) |
2672 | setCondCodeAction(CCs, VT, Action); |
2673 | } |
2674 | |
2675 | /// If Opc/OrigVT is specified as being promoted, the promotion code defaults |
2676 | /// to trying a larger integer/fp until it can find one that works. If that |
2677 | /// default is insufficient, this method can be used by the target to override |
2678 | /// the default. |
2679 | void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
2680 | PromoteToType[std::make_pair(x&: Opc, y&: OrigVT.SimpleTy)] = DestVT.SimpleTy; |
2681 | } |
2682 | |
2683 | /// Convenience method to set an operation to Promote and specify the type |
2684 | /// in a single call. |
2685 | void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
2686 | setOperationAction(Op: Opc, VT: OrigVT, Action: Promote); |
2687 | AddPromotedToType(Opc, OrigVT, DestVT); |
2688 | } |
2689 | void setOperationPromotedToType(ArrayRef<unsigned> Ops, MVT OrigVT, |
2690 | MVT DestVT) { |
2691 | for (auto Op : Ops) { |
2692 | setOperationAction(Op, VT: OrigVT, Action: Promote); |
2693 | AddPromotedToType(Opc: Op, OrigVT, DestVT); |
2694 | } |
2695 | } |
2696 | |
2697 | /// Targets should invoke this method for each target independent node that |
2698 | /// they want to provide a custom DAG combiner for by implementing the |
2699 | /// PerformDAGCombine virtual method. |
2700 | void setTargetDAGCombine(ArrayRef<ISD::NodeType> NTs) { |
2701 | for (auto NT : NTs) { |
2702 | assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray)); |
2703 | TargetDAGCombineArray[NT >> 3] |= 1 << (NT & 7); |
2704 | } |
2705 | } |
2706 | |
2707 | /// Set the target's minimum function alignment. |
2708 | void setMinFunctionAlignment(Align Alignment) { |
2709 | MinFunctionAlignment = Alignment; |
2710 | } |
2711 | |
2712 | /// Set the target's preferred function alignment. This should be set if |
2713 | /// there is a performance benefit to higher-than-minimum alignment |
2714 | void setPrefFunctionAlignment(Align Alignment) { |
2715 | PrefFunctionAlignment = Alignment; |
2716 | } |
2717 | |
2718 | /// Set the target's preferred loop alignment. Default alignment is one, it |
2719 | /// means the target does not care about loop alignment. The target may also |
2720 | /// override getPrefLoopAlignment to provide per-loop values. |
2721 | void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; } |
2722 | void setMaxBytesForAlignment(unsigned MaxBytes) { |
2723 | MaxBytesForAlignment = MaxBytes; |
2724 | } |
2725 | |
2726 | /// Set the minimum stack alignment of an argument. |
2727 | void setMinStackArgumentAlignment(Align Alignment) { |
2728 | MinStackArgumentAlignment = Alignment; |
2729 | } |
2730 | |
2731 | /// Set the maximum atomic operation size supported by the |
2732 | /// backend. Atomic operations greater than this size (as well as |
2733 | /// ones that are not naturally aligned), will be expanded by |
2734 | /// AtomicExpandPass into an __atomic_* library call. |
2735 | void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) { |
2736 | MaxAtomicSizeInBitsSupported = SizeInBits; |
2737 | } |
2738 | |
2739 | /// Set the size in bits of the maximum div/rem the backend supports. |
2740 | /// Larger operations will be expanded by ExpandLargeDivRem. |
2741 | void setMaxDivRemBitWidthSupported(unsigned SizeInBits) { |
2742 | MaxDivRemBitWidthSupported = SizeInBits; |
2743 | } |
2744 | |
2745 | /// Set the size in bits of the maximum fp convert the backend supports. |
2746 | /// Larger operations will be expanded by ExpandLargeFPConvert. |
2747 | void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits) { |
2748 | MaxLargeFPConvertBitWidthSupported = SizeInBits; |
2749 | } |
2750 | |
2751 | /// Sets the minimum cmpxchg or ll/sc size supported by the backend. |
2752 | void setMinCmpXchgSizeInBits(unsigned SizeInBits) { |
2753 | MinCmpXchgSizeInBits = SizeInBits; |
2754 | } |
2755 | |
2756 | /// Sets whether unaligned atomic operations are supported. |
2757 | void setSupportsUnalignedAtomics(bool UnalignedSupported) { |
2758 | SupportsUnalignedAtomics = UnalignedSupported; |
2759 | } |
2760 | |
2761 | public: |
2762 | //===--------------------------------------------------------------------===// |
2763 | // Addressing mode description hooks (used by LSR etc). |
2764 | // |
2765 | |
2766 | /// CodeGenPrepare sinks address calculations into the same BB as Load/Store |
2767 | /// instructions reading the address. This allows as much computation as |
2768 | /// possible to be done in the address mode for that operand. This hook lets |
2769 | /// targets also pass back when this should be done on intrinsics which |
2770 | /// load/store. |
2771 | virtual bool getAddrModeArguments(IntrinsicInst * /*I*/, |
2772 | SmallVectorImpl<Value*> &/*Ops*/, |
2773 | Type *&/*AccessTy*/) const { |
2774 | return false; |
2775 | } |
2776 | |
2777 | /// This represents an addressing mode of: |
2778 | /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*vscale |
2779 | /// If BaseGV is null, there is no BaseGV. |
2780 | /// If BaseOffs is zero, there is no base offset. |
2781 | /// If HasBaseReg is false, there is no base register. |
2782 | /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with |
2783 | /// no scale. |
2784 | /// If ScalableOffset is zero, there is no scalable offset. |
2785 | struct AddrMode { |
2786 | GlobalValue *BaseGV = nullptr; |
2787 | int64_t BaseOffs = 0; |
2788 | bool HasBaseReg = false; |
2789 | int64_t Scale = 0; |
2790 | int64_t ScalableOffset = 0; |
2791 | AddrMode() = default; |
2792 | }; |
2793 | |
2794 | /// Return true if the addressing mode represented by AM is legal for this |
2795 | /// target, for a load/store of the specified type. |
2796 | /// |
2797 | /// The type may be VoidTy, in which case only return true if the addressing |
2798 | /// mode is legal for a load/store of any legal type. TODO: Handle |
2799 | /// pre/postinc as well. |
2800 | /// |
2801 | /// If the address space cannot be determined, it will be -1. |
2802 | /// |
2803 | /// TODO: Remove default argument |
2804 | virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, |
2805 | Type *Ty, unsigned AddrSpace, |
2806 | Instruction *I = nullptr) const; |
2807 | |
2808 | /// Returns true if the targets addressing mode can target thread local |
2809 | /// storage (TLS). |
2810 | virtual bool addressingModeSupportsTLS(const GlobalValue &) const { |
2811 | return false; |
2812 | } |
2813 | |
2814 | /// Return the prefered common base offset. |
2815 | virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, |
2816 | int64_t MaxOffset) const { |
2817 | return 0; |
2818 | } |
2819 | |
2820 | /// Return true if the specified immediate is legal icmp immediate, that is |
2821 | /// the target has icmp instructions which can compare a register against the |
2822 | /// immediate without having to materialize the immediate into a register. |
2823 | virtual bool isLegalICmpImmediate(int64_t) const { |
2824 | return true; |
2825 | } |
2826 | |
2827 | /// Return true if the specified immediate is legal add immediate, that is the |
2828 | /// target has add instructions which can add a register with the immediate |
2829 | /// without having to materialize the immediate into a register. |
2830 | virtual bool isLegalAddImmediate(int64_t) const { |
2831 | return true; |
2832 | } |
2833 | |
2834 | /// Return true if adding the specified scalable immediate is legal, that is |
2835 | /// the target has add instructions which can add a register with the |
2836 | /// immediate (multiplied by vscale) without having to materialize the |
2837 | /// immediate into a register. |
2838 | virtual bool isLegalAddScalableImmediate(int64_t) const { return false; } |
2839 | |
2840 | /// Return true if the specified immediate is legal for the value input of a |
2841 | /// store instruction. |
2842 | virtual bool isLegalStoreImmediate(int64_t Value) const { |
2843 | // Default implementation assumes that at least 0 works since it is likely |
2844 | // that a zero register exists or a zero immediate is allowed. |
2845 | return Value == 0; |
2846 | } |
2847 | |
2848 | /// Return true if it's significantly cheaper to shift a vector by a uniform |
2849 | /// scalar than by an amount which will vary across each lane. On x86 before |
2850 | /// AVX2 for example, there is a "psllw" instruction for the former case, but |
2851 | /// no simple instruction for a general "a << b" operation on vectors. |
2852 | /// This should also apply to lowering for vector funnel shifts (rotates). |
2853 | virtual bool isVectorShiftByScalarCheap(Type *Ty) const { |
2854 | return false; |
2855 | } |
2856 | |
2857 | /// Given a shuffle vector SVI representing a vector splat, return a new |
2858 | /// scalar type of size equal to SVI's scalar type if the new type is more |
2859 | /// profitable. Returns nullptr otherwise. For example under MVE float splats |
2860 | /// are converted to integer to prevent the need to move from SPR to GPR |
2861 | /// registers. |
2862 | virtual Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const { |
2863 | return nullptr; |
2864 | } |
2865 | |
2866 | /// Given a set in interconnected phis of type 'From' that are loaded/stored |
2867 | /// or bitcast to type 'To', return true if the set should be converted to |
2868 | /// 'To'. |
2869 | virtual bool shouldConvertPhiType(Type *From, Type *To) const { |
2870 | return (From->isIntegerTy() || From->isFloatingPointTy()) && |
2871 | (To->isIntegerTy() || To->isFloatingPointTy()); |
2872 | } |
2873 | |
2874 | /// Returns true if the opcode is a commutative binary operation. |
2875 | virtual bool isCommutativeBinOp(unsigned Opcode) const { |
2876 | // FIXME: This should get its info from the td file. |
2877 | switch (Opcode) { |
2878 | case ISD::ADD: |
2879 | case ISD::SMIN: |
2880 | case ISD::SMAX: |
2881 | case ISD::UMIN: |
2882 | case ISD::UMAX: |
2883 | case ISD::MUL: |
2884 | case ISD::MULHU: |
2885 | case ISD::MULHS: |
2886 | case ISD::SMUL_LOHI: |
2887 | case ISD::UMUL_LOHI: |
2888 | case ISD::FADD: |
2889 | case ISD::FMUL: |
2890 | case ISD::AND: |
2891 | case ISD::OR: |
2892 | case ISD::XOR: |
2893 | case ISD::SADDO: |
2894 | case ISD::UADDO: |
2895 | case ISD::ADDC: |
2896 | case ISD::ADDE: |
2897 | case ISD::SADDSAT: |
2898 | case ISD::UADDSAT: |
2899 | case ISD::FMINNUM: |
2900 | case ISD::FMAXNUM: |
2901 | case ISD::FMINNUM_IEEE: |
2902 | case ISD::FMAXNUM_IEEE: |
2903 | case ISD::FMINIMUM: |
2904 | case ISD::FMAXIMUM: |
2905 | case ISD::AVGFLOORS: |
2906 | case ISD::AVGFLOORU: |
2907 | case ISD::AVGCEILS: |
2908 | case ISD::AVGCEILU: |
2909 | case ISD::ABDS: |
2910 | case ISD::ABDU: |
2911 | return true; |
2912 | default: return false; |
2913 | } |
2914 | } |
2915 | |
2916 | /// Return true if the node is a math/logic binary operator. |
2917 | virtual bool isBinOp(unsigned Opcode) const { |
2918 | // A commutative binop must be a binop. |
2919 | if (isCommutativeBinOp(Opcode)) |
2920 | return true; |
2921 | // These are non-commutative binops. |
2922 | switch (Opcode) { |
2923 | case ISD::SUB: |
2924 | case ISD::SHL: |
2925 | case ISD::SRL: |
2926 | case ISD::SRA: |
2927 | case ISD::ROTL: |
2928 | case ISD::ROTR: |
2929 | case ISD::SDIV: |
2930 | case ISD::UDIV: |
2931 | case ISD::SREM: |
2932 | case ISD::UREM: |
2933 | case ISD::SSUBSAT: |
2934 | case ISD::USUBSAT: |
2935 | case ISD::FSUB: |
2936 | case ISD::FDIV: |
2937 | case ISD::FREM: |
2938 | return true; |
2939 | default: |
2940 | return false; |
2941 | } |
2942 | } |
2943 | |
2944 | /// Return true if it's free to truncate a value of type FromTy to type |
2945 | /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 |
2946 | /// by referencing its sub-register AX. |
2947 | /// Targets must return false when FromTy <= ToTy. |
2948 | virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const { |
2949 | return false; |
2950 | } |
2951 | |
2952 | /// Return true if a truncation from FromTy to ToTy is permitted when deciding |
2953 | /// whether a call is in tail position. Typically this means that both results |
2954 | /// would be assigned to the same register or stack slot, but it could mean |
2955 | /// the target performs adequate checks of its own before proceeding with the |
2956 | /// tail call. Targets must return false when FromTy <= ToTy. |
2957 | virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const { |
2958 | return false; |
2959 | } |
2960 | |
2961 | virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; } |
2962 | virtual bool isTruncateFree(LLT FromTy, LLT ToTy, const DataLayout &DL, |
2963 | LLVMContext &Ctx) const { |
2964 | return isTruncateFree(FromVT: getApproximateEVTForLLT(Ty: FromTy, DL, Ctx), |
2965 | ToVT: getApproximateEVTForLLT(Ty: ToTy, DL, Ctx)); |
2966 | } |
2967 | |
2968 | /// Return true if truncating the specific node Val to type VT2 is free. |
2969 | virtual bool isTruncateFree(SDValue Val, EVT VT2) const { |
2970 | // Fallback to type matching. |
2971 | return isTruncateFree(FromVT: Val.getValueType(), ToVT: VT2); |
2972 | } |
2973 | |
2974 | virtual bool isProfitableToHoist(Instruction *I) const { return true; } |
2975 | |
2976 | /// Return true if the extension represented by \p I is free. |
2977 | /// Unlikely the is[Z|FP]ExtFree family which is based on types, |
2978 | /// this method can use the context provided by \p I to decide |
2979 | /// whether or not \p I is free. |
2980 | /// This method extends the behavior of the is[Z|FP]ExtFree family. |
2981 | /// In other words, if is[Z|FP]Free returns true, then this method |
2982 | /// returns true as well. The converse is not true. |
2983 | /// The target can perform the adequate checks by overriding isExtFreeImpl. |
2984 | /// \pre \p I must be a sign, zero, or fp extension. |
2985 | bool isExtFree(const Instruction *I) const { |
2986 | switch (I->getOpcode()) { |
2987 | case Instruction::FPExt: |
2988 | if (isFPExtFree(DestVT: EVT::getEVT(Ty: I->getType()), |
2989 | SrcVT: EVT::getEVT(Ty: I->getOperand(i: 0)->getType()))) |
2990 | return true; |
2991 | break; |
2992 | case Instruction::ZExt: |
2993 | if (isZExtFree(FromTy: I->getOperand(i: 0)->getType(), ToTy: I->getType())) |
2994 | return true; |
2995 | break; |
2996 | case Instruction::SExt: |
2997 | break; |
2998 | default: |
2999 | llvm_unreachable("Instruction is not an extension" ); |
3000 | } |
3001 | return isExtFreeImpl(I); |
3002 | } |
3003 | |
3004 | /// Return true if \p Load and \p Ext can form an ExtLoad. |
3005 | /// For example, in AArch64 |
3006 | /// %L = load i8, i8* %ptr |
3007 | /// %E = zext i8 %L to i32 |
3008 | /// can be lowered into one load instruction |
3009 | /// ldrb w0, [x0] |
3010 | bool isExtLoad(const LoadInst *Load, const Instruction *Ext, |
3011 | const DataLayout &DL) const { |
3012 | EVT VT = getValueType(DL, Ty: Ext->getType()); |
3013 | EVT LoadVT = getValueType(DL, Ty: Load->getType()); |
3014 | |
3015 | // If the load has other users and the truncate is not free, the ext |
3016 | // probably isn't free. |
3017 | if (!Load->hasOneUse() && (isTypeLegal(VT: LoadVT) || !isTypeLegal(VT)) && |
3018 | !isTruncateFree(FromTy: Ext->getType(), ToTy: Load->getType())) |
3019 | return false; |
3020 | |
3021 | // Check whether the target supports casts folded into loads. |
3022 | unsigned LType; |
3023 | if (isa<ZExtInst>(Val: Ext)) |
3024 | LType = ISD::ZEXTLOAD; |
3025 | else { |
3026 | assert(isa<SExtInst>(Ext) && "Unexpected ext type!" ); |
3027 | LType = ISD::SEXTLOAD; |
3028 | } |
3029 | |
3030 | return isLoadExtLegal(ExtType: LType, ValVT: VT, MemVT: LoadVT); |
3031 | } |
3032 | |
3033 | /// Return true if any actual instruction that defines a value of type FromTy |
3034 | /// implicitly zero-extends the value to ToTy in the result register. |
3035 | /// |
3036 | /// The function should return true when it is likely that the truncate can |
3037 | /// be freely folded with an instruction defining a value of FromTy. If |
3038 | /// the defining instruction is unknown (because you're looking at a |
3039 | /// function argument, PHI, etc.) then the target may require an |
3040 | /// explicit truncate, which is not necessarily free, but this function |
3041 | /// does not deal with those cases. |
3042 | /// Targets must return false when FromTy >= ToTy. |
3043 | virtual bool isZExtFree(Type *FromTy, Type *ToTy) const { |
3044 | return false; |
3045 | } |
3046 | |
3047 | virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; } |
3048 | virtual bool isZExtFree(LLT FromTy, LLT ToTy, const DataLayout &DL, |
3049 | LLVMContext &Ctx) const { |
3050 | return isZExtFree(FromTy: getApproximateEVTForLLT(Ty: FromTy, DL, Ctx), |
3051 | ToTy: getApproximateEVTForLLT(Ty: ToTy, DL, Ctx)); |
3052 | } |
3053 | |
3054 | /// Return true if zero-extending the specific node Val to type VT2 is free |
3055 | /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or |
3056 | /// because it's folded such as X86 zero-extending loads). |
3057 | virtual bool isZExtFree(SDValue Val, EVT VT2) const { |
3058 | return isZExtFree(FromTy: Val.getValueType(), ToTy: VT2); |
3059 | } |
3060 | |
3061 | /// Return true if sign-extension from FromTy to ToTy is cheaper than |
3062 | /// zero-extension. |
3063 | virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const { |
3064 | return false; |
3065 | } |
3066 | |
3067 | /// Return true if this constant should be sign extended when promoting to |
3068 | /// a larger type. |
3069 | virtual bool signExtendConstant(const ConstantInt *C) const { return false; } |
3070 | |
3071 | /// Return true if sinking I's operands to the same basic block as I is |
3072 | /// profitable, e.g. because the operands can be folded into a target |
3073 | /// instruction during instruction selection. After calling the function |
3074 | /// \p Ops contains the Uses to sink ordered by dominance (dominating users |
3075 | /// come first). |
3076 | virtual bool shouldSinkOperands(Instruction *I, |
3077 | SmallVectorImpl<Use *> &Ops) const { |
3078 | return false; |
3079 | } |
3080 | |
3081 | /// Try to optimize extending or truncating conversion instructions (like |
3082 | /// zext, trunc, fptoui, uitofp) for the target. |
3083 | virtual bool |
3084 | optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, |
3085 | const TargetTransformInfo &TTI) const { |
3086 | return false; |
3087 | } |
3088 | |
3089 | /// Return true if the target supplies and combines to a paired load |
3090 | /// two loaded values of type LoadedType next to each other in memory. |
3091 | /// RequiredAlignment gives the minimal alignment constraints that must be met |
3092 | /// to be able to select this paired load. |
3093 | /// |
3094 | /// This information is *not* used to generate actual paired loads, but it is |
3095 | /// used to generate a sequence of loads that is easier to combine into a |
3096 | /// paired load. |
3097 | /// For instance, something like this: |
3098 | /// a = load i64* addr |
3099 | /// b = trunc i64 a to i32 |
3100 | /// c = lshr i64 a, 32 |
3101 | /// d = trunc i64 c to i32 |
3102 | /// will be optimized into: |
3103 | /// b = load i32* addr1 |
3104 | /// d = load i32* addr2 |
3105 | /// Where addr1 = addr2 +/- sizeof(i32). |
3106 | /// |
3107 | /// In other words, unless the target performs a post-isel load combining, |
3108 | /// this information should not be provided because it will generate more |
3109 | /// loads. |
3110 | virtual bool hasPairedLoad(EVT /*LoadedType*/, |
3111 | Align & /*RequiredAlignment*/) const { |
3112 | return false; |
3113 | } |
3114 | |
3115 | /// Return true if the target has a vector blend instruction. |
3116 | virtual bool hasVectorBlend() const { return false; } |
3117 | |
3118 | /// Get the maximum supported factor for interleaved memory accesses. |
3119 | /// Default to be the minimum interleave factor: 2. |
3120 | virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } |
3121 | |
3122 | /// Lower an interleaved load to target specific intrinsics. Return |
3123 | /// true on success. |
3124 | /// |
3125 | /// \p LI is the vector load instruction. |
3126 | /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector. |
3127 | /// \p Indices is the corresponding indices for each shufflevector. |
3128 | /// \p Factor is the interleave factor. |
3129 | virtual bool lowerInterleavedLoad(LoadInst *LI, |
3130 | ArrayRef<ShuffleVectorInst *> Shuffles, |
3131 | ArrayRef<unsigned> Indices, |
3132 | unsigned Factor) const { |
3133 | return false; |
3134 | } |
3135 | |
3136 | /// Lower an interleaved store to target specific intrinsics. Return |
3137 | /// true on success. |
3138 | /// |
3139 | /// \p SI is the vector store instruction. |
3140 | /// \p SVI is the shufflevector to RE-interleave the stored vector. |
3141 | /// \p Factor is the interleave factor. |
3142 | virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, |
3143 | unsigned Factor) const { |
3144 | return false; |
3145 | } |
3146 | |
3147 | /// Lower a deinterleave intrinsic to a target specific load intrinsic. |
3148 | /// Return true on success. Currently only supports |
3149 | /// llvm.experimental.vector.deinterleave2 |
3150 | /// |
3151 | /// \p DI is the deinterleave intrinsic. |
3152 | /// \p LI is the accompanying load instruction |
3153 | virtual bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, |
3154 | LoadInst *LI) const { |
3155 | return false; |
3156 | } |
3157 | |
3158 | /// Lower an interleave intrinsic to a target specific store intrinsic. |
3159 | /// Return true on success. Currently only supports |
3160 | /// llvm.experimental.vector.interleave2 |
3161 | /// |
3162 | /// \p II is the interleave intrinsic. |
3163 | /// \p SI is the accompanying store instruction |
3164 | virtual bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, |
3165 | StoreInst *SI) const { |
3166 | return false; |
3167 | } |
3168 | |
3169 | /// Return true if an fpext operation is free (for instance, because |
3170 | /// single-precision floating-point numbers are implicitly extended to |
3171 | /// double-precision). |
3172 | virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const { |
3173 | assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && |
3174 | "invalid fpext types" ); |
3175 | return false; |
3176 | } |
3177 | |
3178 | /// Return true if an fpext operation input to an \p Opcode operation is free |
3179 | /// (for instance, because half-precision floating-point numbers are |
3180 | /// implicitly extended to float-precision) for an FMA instruction. |
3181 | virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, |
3182 | LLT DestTy, LLT SrcTy) const { |
3183 | return false; |
3184 | } |
3185 | |
3186 | /// Return true if an fpext operation input to an \p Opcode operation is free |
3187 | /// (for instance, because half-precision floating-point numbers are |
3188 | /// implicitly extended to float-precision) for an FMA instruction. |
3189 | virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, |
3190 | EVT DestVT, EVT SrcVT) const { |
3191 | assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && |
3192 | "invalid fpext types" ); |
3193 | return isFPExtFree(DestVT, SrcVT); |
3194 | } |
3195 | |
3196 | /// Return true if folding a vector load into ExtVal (a sign, zero, or any |
3197 | /// extend node) is profitable. |
3198 | virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } |
3199 | |
3200 | /// Return true if an fneg operation is free to the point where it is never |
3201 | /// worthwhile to replace it with a bitwise operation. |
3202 | virtual bool isFNegFree(EVT VT) const { |
3203 | assert(VT.isFloatingPoint()); |
3204 | return false; |
3205 | } |
3206 | |
3207 | /// Return true if an fabs operation is free to the point where it is never |
3208 | /// worthwhile to replace it with a bitwise operation. |
3209 | virtual bool isFAbsFree(EVT VT) const { |
3210 | assert(VT.isFloatingPoint()); |
3211 | return false; |
3212 | } |
3213 | |
3214 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
3215 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method |
3216 | /// returns true, otherwise fmuladd is expanded to fmul + fadd. |
3217 | /// |
3218 | /// NOTE: This may be called before legalization on types for which FMAs are |
3219 | /// not legal, but should return true if those types will eventually legalize |
3220 | /// to types that support FMAs. After legalization, it will only be called on |
3221 | /// types that support FMAs (via Legal or Custom actions) |
3222 | virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
3223 | EVT) const { |
3224 | return false; |
3225 | } |
3226 | |
3227 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
3228 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method |
3229 | /// returns true, otherwise fmuladd is expanded to fmul + fadd. |
3230 | /// |
3231 | /// NOTE: This may be called before legalization on types for which FMAs are |
3232 | /// not legal, but should return true if those types will eventually legalize |
3233 | /// to types that support FMAs. After legalization, it will only be called on |
3234 | /// types that support FMAs (via Legal or Custom actions) |
3235 | virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
3236 | LLT) const { |
3237 | return false; |
3238 | } |
3239 | |
3240 | /// IR version |
3241 | virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const { |
3242 | return false; |
3243 | } |
3244 | |
3245 | /// Returns true if \p MI can be combined with another instruction to |
3246 | /// form TargetOpcode::G_FMAD. \p N may be an TargetOpcode::G_FADD, |
3247 | /// TargetOpcode::G_FSUB, or an TargetOpcode::G_FMUL which will be |
3248 | /// distributed into an fadd/fsub. |
3249 | virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const { |
3250 | assert((MI.getOpcode() == TargetOpcode::G_FADD || |
3251 | MI.getOpcode() == TargetOpcode::G_FSUB || |
3252 | MI.getOpcode() == TargetOpcode::G_FMUL) && |
3253 | "unexpected node in FMAD forming combine" ); |
3254 | switch (Ty.getScalarSizeInBits()) { |
3255 | case 16: |
3256 | return isOperationLegal(TargetOpcode::G_FMAD, MVT::f16); |
3257 | case 32: |
3258 | return isOperationLegal(TargetOpcode::G_FMAD, MVT::f32); |
3259 | case 64: |
3260 | return isOperationLegal(TargetOpcode::G_FMAD, MVT::f64); |
3261 | default: |
3262 | break; |
3263 | } |
3264 | |
3265 | return false; |
3266 | } |
3267 | |
3268 | /// Returns true if be combined with to form an ISD::FMAD. \p N may be an |
3269 | /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an |
3270 | /// fadd/fsub. |
3271 | virtual bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const { |
3272 | assert((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB || |
3273 | N->getOpcode() == ISD::FMUL) && |
3274 | "unexpected node in FMAD forming combine" ); |
3275 | return isOperationLegal(Op: ISD::FMAD, VT: N->getValueType(ResNo: 0)); |
3276 | } |
3277 | |
3278 | // Return true when the decision to generate FMA's (or FMS, FMLA etc) rather |
3279 | // than FMUL and ADD is delegated to the machine combiner. |
3280 | virtual bool generateFMAsInMachineCombiner(EVT VT, |
3281 | CodeGenOptLevel OptLevel) const { |
3282 | return false; |
3283 | } |
3284 | |
3285 | /// Return true if it's profitable to narrow operations of type SrcVT to |
3286 | /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not from |
3287 | /// i32 to i16. |
3288 | virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { |
3289 | return false; |
3290 | } |
3291 | |
3292 | /// Return true if pulling a binary operation into a select with an identity |
3293 | /// constant is profitable. This is the inverse of an IR transform. |
3294 | /// Example: X + (Cond ? Y : 0) --> Cond ? (X + Y) : X |
3295 | virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, |
3296 | EVT VT) const { |
3297 | return false; |
3298 | } |
3299 | |
3300 | /// Return true if it is beneficial to convert a load of a constant to |
3301 | /// just the constant itself. |
3302 | /// On some targets it might be more efficient to use a combination of |
3303 | /// arithmetic instructions to materialize the constant instead of loading it |
3304 | /// from a constant pool. |
3305 | virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
3306 | Type *Ty) const { |
3307 | return false; |
3308 | } |
3309 | |
3310 | /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type |
3311 | /// from this source type with this index. This is needed because |
3312 | /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of |
3313 | /// the first element, and only the target knows which lowering is cheap. |
3314 | virtual bool (EVT ResVT, EVT SrcVT, |
3315 | unsigned Index) const { |
3316 | return false; |
3317 | } |
3318 | |
3319 | /// Try to convert an extract element of a vector binary operation into an |
3320 | /// extract element followed by a scalar operation. |
3321 | virtual bool shouldScalarizeBinop(SDValue VecOp) const { |
3322 | return false; |
3323 | } |
3324 | |
3325 | /// Return true if extraction of a scalar element from the given vector type |
3326 | /// at the given index is cheap. For example, if scalar operations occur on |
3327 | /// the same register file as vector operations, then an extract element may |
3328 | /// be a sub-register rename rather than an actual instruction. |
3329 | virtual bool (EVT VT, unsigned Index) const { |
3330 | return false; |
3331 | } |
3332 | |
3333 | /// Try to convert math with an overflow comparison into the corresponding DAG |
3334 | /// node operation. Targets may want to override this independently of whether |
3335 | /// the operation is legal/custom for the given type because it may obscure |
3336 | /// matching of other patterns. |
3337 | virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, |
3338 | bool MathUsed) const { |
3339 | // TODO: The default logic is inherited from code in CodeGenPrepare. |
3340 | // The opcode should not make a difference by default? |
3341 | if (Opcode != ISD::UADDO) |
3342 | return false; |
3343 | |
3344 | // Allow the transform as long as we have an integer type that is not |
3345 | // obviously illegal and unsupported and if the math result is used |
3346 | // besides the overflow check. On some targets (e.g. SPARC), it is |
3347 | // not profitable to form on overflow op if the math result has no |
3348 | // concrete users. |
3349 | if (VT.isVector()) |
3350 | return false; |
3351 | return MathUsed && (VT.isSimple() || !isOperationExpand(Op: Opcode, VT)); |
3352 | } |
3353 | |
3354 | // Return true if it is profitable to use a scalar input to a BUILD_VECTOR |
3355 | // even if the vector itself has multiple uses. |
3356 | virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { |
3357 | return false; |
3358 | } |
3359 | |
3360 | // Return true if CodeGenPrepare should consider splitting large offset of a |
3361 | // GEP to make the GEP fit into the addressing mode and can be sunk into the |
3362 | // same blocks of its users. |
3363 | virtual bool shouldConsiderGEPOffsetSplit() const { return false; } |
3364 | |
3365 | /// Return true if creating a shift of the type by the given |
3366 | /// amount is not profitable. |
3367 | virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const { |
3368 | return false; |
3369 | } |
3370 | |
3371 | // Should we fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) |
3372 | // A) where y has a single bit set? |
3373 | virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, |
3374 | const APInt &AndMask) const { |
3375 | unsigned ShCt = AndMask.getBitWidth() - 1; |
3376 | return !shouldAvoidTransformToShift(VT, Amount: ShCt); |
3377 | } |
3378 | |
3379 | /// Does this target require the clearing of high-order bits in a register |
3380 | /// passed to the fp16 to fp conversion library function. |
3381 | virtual bool shouldKeepZExtForFP16Conv() const { return false; } |
3382 | |
3383 | /// Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT |
3384 | /// from min(max(fptoi)) saturation patterns. |
3385 | virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const { |
3386 | return isOperationLegalOrCustom(Op, VT); |
3387 | } |
3388 | |
3389 | /// Does this target support complex deinterleaving |
3390 | virtual bool isComplexDeinterleavingSupported() const { return false; } |
3391 | |
3392 | /// Does this target support complex deinterleaving with the given operation |
3393 | /// and type |
3394 | virtual bool isComplexDeinterleavingOperationSupported( |
3395 | ComplexDeinterleavingOperation Operation, Type *Ty) const { |
3396 | return false; |
3397 | } |
3398 | |
3399 | /// Create the IR node for the given complex deinterleaving operation. |
3400 | /// If one cannot be created using all the given inputs, nullptr should be |
3401 | /// returned. |
3402 | virtual Value *createComplexDeinterleavingIR( |
3403 | IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, |
3404 | ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, |
3405 | Value *Accumulator = nullptr) const { |
3406 | return nullptr; |
3407 | } |
3408 | |
3409 | //===--------------------------------------------------------------------===// |
3410 | // Runtime Library hooks |
3411 | // |
3412 | |
3413 | /// Rename the default libcall routine name for the specified libcall. |
3414 | void setLibcallName(RTLIB::Libcall Call, const char *Name) { |
3415 | LibcallRoutineNames[Call] = Name; |
3416 | } |
3417 | void setLibcallName(ArrayRef<RTLIB::Libcall> Calls, const char *Name) { |
3418 | for (auto Call : Calls) |
3419 | setLibcallName(Call, Name); |
3420 | } |
3421 | |
3422 | /// Get the libcall routine name for the specified libcall. |
3423 | const char *getLibcallName(RTLIB::Libcall Call) const { |
3424 | return LibcallRoutineNames[Call]; |
3425 | } |
3426 | |
3427 | /// Override the default CondCode to be used to test the result of the |
3428 | /// comparison libcall against zero. |
3429 | void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { |
3430 | CmpLibcallCCs[Call] = CC; |
3431 | } |
3432 | |
3433 | /// Get the CondCode that's to be used to test the result of the comparison |
3434 | /// libcall against zero. |
3435 | ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { |
3436 | return CmpLibcallCCs[Call]; |
3437 | } |
3438 | |
3439 | /// Set the CallingConv that should be used for the specified libcall. |
3440 | void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { |
3441 | LibcallCallingConvs[Call] = CC; |
3442 | } |
3443 | |
3444 | /// Get the CallingConv that should be used for the specified libcall. |
3445 | CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { |
3446 | return LibcallCallingConvs[Call]; |
3447 | } |
3448 | |
3449 | /// Execute target specific actions to finalize target lowering. |
3450 | /// This is used to set extra flags in MachineFrameInformation and freezing |
3451 | /// the set of reserved registers. |
3452 | /// The default implementation just freezes the set of reserved registers. |
3453 | virtual void finalizeLowering(MachineFunction &MF) const; |
3454 | |
3455 | //===----------------------------------------------------------------------===// |
3456 | // GlobalISel Hooks |
3457 | //===----------------------------------------------------------------------===// |
3458 | /// Check whether or not \p MI needs to be moved close to its uses. |
3459 | virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const; |
3460 | |
3461 | |
3462 | private: |
3463 | const TargetMachine &TM; |
3464 | |
3465 | /// Tells the code generator that the target has multiple (allocatable) |
3466 | /// condition registers that can be used to store the results of comparisons |
3467 | /// for use by selects and conditional branches. With multiple condition |
3468 | /// registers, the code generator will not aggressively sink comparisons into |
3469 | /// the blocks of their users. |
3470 | bool HasMultipleConditionRegisters; |
3471 | |
3472 | /// Tells the code generator that the target has BitExtract instructions. |
3473 | /// The code generator will aggressively sink "shift"s into the blocks of |
3474 | /// their users if the users will generate "and" instructions which can be |
3475 | /// combined with "shift" to BitExtract instructions. |
3476 | bool ; |
3477 | |
3478 | /// Tells the code generator to bypass slow divide or remainder |
3479 | /// instructions. For example, BypassSlowDivWidths[32,8] tells the code |
3480 | /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer |
3481 | /// div/rem when the operands are positive and less than 256. |
3482 | DenseMap <unsigned int, unsigned int> BypassSlowDivWidths; |
3483 | |
3484 | /// Tells the code generator that it shouldn't generate extra flow control |
3485 | /// instructions and should attempt to combine flow control instructions via |
3486 | /// predication. |
3487 | bool JumpIsExpensive; |
3488 | |
3489 | /// Information about the contents of the high-bits in boolean values held in |
3490 | /// a type wider than i1. See getBooleanContents. |
3491 | BooleanContent BooleanContents; |
3492 | |
3493 | /// Information about the contents of the high-bits in boolean values held in |
3494 | /// a type wider than i1. See getBooleanContents. |
3495 | BooleanContent BooleanFloatContents; |
3496 | |
3497 | /// Information about the contents of the high-bits in boolean vector values |
3498 | /// when the element type is wider than i1. See getBooleanContents. |
3499 | BooleanContent BooleanVectorContents; |
3500 | |
3501 | /// The target scheduling preference: shortest possible total cycles or lowest |
3502 | /// register usage. |
3503 | Sched::Preference SchedPreferenceInfo; |
3504 | |
3505 | /// The minimum alignment that any argument on the stack needs to have. |
3506 | Align MinStackArgumentAlignment; |
3507 | |
3508 | /// The minimum function alignment (used when optimizing for size, and to |
3509 | /// prevent explicitly provided alignment from leading to incorrect code). |
3510 | Align MinFunctionAlignment; |
3511 | |
3512 | /// The preferred function alignment (used when alignment unspecified and |
3513 | /// optimizing for speed). |
3514 | Align PrefFunctionAlignment; |
3515 | |
3516 | /// The preferred loop alignment (in log2 bot in bytes). |
3517 | Align PrefLoopAlignment; |
3518 | /// The maximum amount of bytes permitted to be emitted for alignment. |
3519 | unsigned MaxBytesForAlignment; |
3520 | |
3521 | /// Size in bits of the maximum atomics size the backend supports. |
3522 | /// Accesses larger than this will be expanded by AtomicExpandPass. |
3523 | unsigned MaxAtomicSizeInBitsSupported; |
3524 | |
3525 | /// Size in bits of the maximum div/rem size the backend supports. |
3526 | /// Larger operations will be expanded by ExpandLargeDivRem. |
3527 | unsigned MaxDivRemBitWidthSupported; |
3528 | |
3529 | /// Size in bits of the maximum larget fp convert size the backend |
3530 | /// supports. Larger operations will be expanded by ExpandLargeFPConvert. |
3531 | unsigned MaxLargeFPConvertBitWidthSupported; |
3532 | |
3533 | /// Size in bits of the minimum cmpxchg or ll/sc operation the |
3534 | /// backend supports. |
3535 | unsigned MinCmpXchgSizeInBits; |
3536 | |
3537 | /// This indicates if the target supports unaligned atomic operations. |
3538 | bool SupportsUnalignedAtomics; |
3539 | |
3540 | /// If set to a physical register, this specifies the register that |
3541 | /// llvm.savestack/llvm.restorestack should save and restore. |
3542 | Register StackPointerRegisterToSaveRestore; |
3543 | |
3544 | /// This indicates the default register class to use for each ValueType the |
3545 | /// target supports natively. |
3546 | const TargetRegisterClass *RegClassForVT[MVT::VALUETYPE_SIZE]; |
3547 | uint16_t NumRegistersForVT[MVT::VALUETYPE_SIZE]; |
3548 | MVT RegisterTypeForVT[MVT::VALUETYPE_SIZE]; |
3549 | |
3550 | /// This indicates the "representative" register class to use for each |
3551 | /// ValueType the target supports natively. This information is used by the |
3552 | /// scheduler to track register pressure. By default, the representative |
3553 | /// register class is the largest legal super-reg register class of the |
3554 | /// register class of the specified type. e.g. On x86, i8, i16, and i32's |
3555 | /// representative class would be GR32. |
3556 | const TargetRegisterClass *RepRegClassForVT[MVT::VALUETYPE_SIZE] = {0}; |
3557 | |
3558 | /// This indicates the "cost" of the "representative" register class for each |
3559 | /// ValueType. The cost is used by the scheduler to approximate register |
3560 | /// pressure. |
3561 | uint8_t RepRegClassCostForVT[MVT::VALUETYPE_SIZE]; |
3562 | |
3563 | /// For any value types we are promoting or expanding, this contains the value |
3564 | /// type that we are changing to. For Expanded types, this contains one step |
3565 | /// of the expand (e.g. i64 -> i32), even if there are multiple steps required |
3566 | /// (e.g. i64 -> i16). For types natively supported by the system, this holds |
3567 | /// the same type (e.g. i32 -> i32). |
3568 | MVT TransformToType[MVT::VALUETYPE_SIZE]; |
3569 | |
3570 | /// For each operation and each value type, keep a LegalizeAction that |
3571 | /// indicates how instruction selection should deal with the operation. Most |
3572 | /// operations are Legal (aka, supported natively by the target), but |
3573 | /// operations that are not should be described. Note that operations on |
3574 | /// non-legal value types are not described here. |
3575 | LegalizeAction OpActions[MVT::VALUETYPE_SIZE][ISD::BUILTIN_OP_END]; |
3576 | |
3577 | /// For each load extension type and each value type, keep a LegalizeAction |
3578 | /// that indicates how instruction selection should deal with a load of a |
3579 | /// specific value type and extension type. Uses 4-bits to store the action |
3580 | /// for each of the 4 load ext types. |
3581 | uint16_t LoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; |
3582 | |
3583 | /// Similar to LoadExtActions, but for atomic loads. Only Legal or Expand |
3584 | /// (default) values are supported. |
3585 | uint16_t AtomicLoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; |
3586 | |
3587 | /// For each value type pair keep a LegalizeAction that indicates whether a |
3588 | /// truncating store of a specific value type and truncating type is legal. |
3589 | LegalizeAction TruncStoreActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; |
3590 | |
3591 | /// For each indexed mode and each value type, keep a quad of LegalizeAction |
3592 | /// that indicates how instruction selection should deal with the load / |
3593 | /// store / maskedload / maskedstore. |
3594 | /// |
3595 | /// The first dimension is the value_type for the reference. The second |
3596 | /// dimension represents the various modes for load store. |
3597 | uint16_t IndexedModeActions[MVT::VALUETYPE_SIZE][ISD::LAST_INDEXED_MODE]; |
3598 | |
3599 | /// For each condition code (ISD::CondCode) keep a LegalizeAction that |
3600 | /// indicates how instruction selection should deal with the condition code. |
3601 | /// |
3602 | /// Because each CC action takes up 4 bits, we need to have the array size be |
3603 | /// large enough to fit all of the value types. This can be done by rounding |
3604 | /// up the MVT::VALUETYPE_SIZE value to the next multiple of 8. |
3605 | uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::VALUETYPE_SIZE + 7) / 8]; |
3606 | |
3607 | ValueTypeActionImpl ValueTypeActions; |
3608 | |
3609 | private: |
3610 | /// Targets can specify ISD nodes that they would like PerformDAGCombine |
3611 | /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this |
3612 | /// array. |
3613 | unsigned char |
3614 | TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT]; |
3615 | |
3616 | /// For operations that must be promoted to a specific type, this holds the |
3617 | /// destination type. This map should be sparse, so don't hold it as an |
3618 | /// array. |
3619 | /// |
3620 | /// Targets add entries to this map with AddPromotedToType(..), clients access |
3621 | /// this with getTypeToPromoteTo(..). |
3622 | std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType> |
3623 | PromoteToType; |
3624 | |
3625 | /// Stores the name each libcall. |
3626 | const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1]; |
3627 | |
3628 | /// The ISD::CondCode that should be used to test the result of each of the |
3629 | /// comparison libcall against zero. |
3630 | ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; |
3631 | |
3632 | /// Stores the CallingConv that should be used for each libcall. |
3633 | CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL]; |
3634 | |
3635 | /// Set default libcall names and calling conventions. |
3636 | void InitLibcalls(const Triple &TT); |
3637 | |
3638 | /// The bits of IndexedModeActions used to store the legalisation actions |
3639 | /// We store the data as | ML | MS | L | S | each taking 4 bits. |
3640 | enum IndexedModeActionsBits { |
3641 | IMAB_Store = 0, |
3642 | IMAB_Load = 4, |
3643 | IMAB_MaskedStore = 8, |
3644 | IMAB_MaskedLoad = 12 |
3645 | }; |
3646 | |
3647 | void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift, |
3648 | LegalizeAction Action) { |
3649 | assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && |
3650 | (unsigned)Action < 0xf && "Table isn't big enough!" ); |
3651 | unsigned Ty = (unsigned)VT.SimpleTy; |
3652 | IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift); |
3653 | IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift; |
3654 | } |
3655 | |
3656 | LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT, |
3657 | unsigned Shift) const { |
3658 | assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && |
3659 | "Table isn't big enough!" ); |
3660 | unsigned Ty = (unsigned)VT.SimpleTy; |
3661 | return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf); |
3662 | } |
3663 | |
3664 | protected: |
3665 | /// Return true if the extension represented by \p I is free. |
3666 | /// \pre \p I is a sign, zero, or fp extension and |
3667 | /// is[Z|FP]ExtFree of the related types is not true. |
3668 | virtual bool isExtFreeImpl(const Instruction *I) const { return false; } |
3669 | |
3670 | /// Depth that GatherAllAliases should continue looking for chain |
3671 | /// dependencies when trying to find a more preferable chain. As an |
3672 | /// approximation, this should be more than the number of consecutive stores |
3673 | /// expected to be merged. |
3674 | unsigned GatherAllAliasesMaxDepth; |
3675 | |
3676 | /// \brief Specify maximum number of store instructions per memset call. |
3677 | /// |
3678 | /// When lowering \@llvm.memset this field specifies the maximum number of |
3679 | /// store operations that may be substituted for the call to memset. Targets |
3680 | /// must set this value based on the cost threshold for that target. Targets |
3681 | /// should assume that the memset will be done using as many of the largest |
3682 | /// store operations first, followed by smaller ones, if necessary, per |
3683 | /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine |
3684 | /// with 16-bit alignment would result in four 2-byte stores and one 1-byte |
3685 | /// store. This only applies to setting a constant array of a constant size. |
3686 | unsigned MaxStoresPerMemset; |
3687 | /// Likewise for functions with the OptSize attribute. |
3688 | unsigned MaxStoresPerMemsetOptSize; |
3689 | |
3690 | /// \brief Specify maximum number of store instructions per memcpy call. |
3691 | /// |
3692 | /// When lowering \@llvm.memcpy this field specifies the maximum number of |
3693 | /// store operations that may be substituted for a call to memcpy. Targets |
3694 | /// must set this value based on the cost threshold for that target. Targets |
3695 | /// should assume that the memcpy will be done using as many of the largest |
3696 | /// store operations first, followed by smaller ones, if necessary, per |
3697 | /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine |
3698 | /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store |
3699 | /// and one 1-byte store. This only applies to copying a constant array of |
3700 | /// constant size. |
3701 | unsigned MaxStoresPerMemcpy; |
3702 | /// Likewise for functions with the OptSize attribute. |
3703 | unsigned MaxStoresPerMemcpyOptSize; |
3704 | /// \brief Specify max number of store instructions to glue in inlined memcpy. |
3705 | /// |
3706 | /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number |
3707 | /// of store instructions to keep together. This helps in pairing and |
3708 | // vectorization later on. |
3709 | unsigned MaxGluedStoresPerMemcpy = 0; |
3710 | |
3711 | /// \brief Specify maximum number of load instructions per memcmp call. |
3712 | /// |
3713 | /// When lowering \@llvm.memcmp this field specifies the maximum number of |
3714 | /// pairs of load operations that may be substituted for a call to memcmp. |
3715 | /// Targets must set this value based on the cost threshold for that target. |
3716 | /// Targets should assume that the memcmp will be done using as many of the |
3717 | /// largest load operations first, followed by smaller ones, if necessary, per |
3718 | /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine |
3719 | /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load |
3720 | /// and one 1-byte load. This only applies to copying a constant array of |
3721 | /// constant size. |
3722 | unsigned MaxLoadsPerMemcmp; |
3723 | /// Likewise for functions with the OptSize attribute. |
3724 | unsigned MaxLoadsPerMemcmpOptSize; |
3725 | |
3726 | /// \brief Specify maximum number of store instructions per memmove call. |
3727 | /// |
3728 | /// When lowering \@llvm.memmove this field specifies the maximum number of |
3729 | /// store instructions that may be substituted for a call to memmove. Targets |
3730 | /// must set this value based on the cost threshold for that target. Targets |
3731 | /// should assume that the memmove will be done using as many of the largest |
3732 | /// store operations first, followed by smaller ones, if necessary, per |
3733 | /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine |
3734 | /// with 8-bit alignment would result in nine 1-byte stores. This only |
3735 | /// applies to copying a constant array of constant size. |
3736 | unsigned MaxStoresPerMemmove; |
3737 | /// Likewise for functions with the OptSize attribute. |
3738 | unsigned MaxStoresPerMemmoveOptSize; |
3739 | |
3740 | /// Tells the code generator that select is more expensive than a branch if |
3741 | /// the branch is usually predicted right. |
3742 | bool PredictableSelectIsExpensive; |
3743 | |
3744 | /// \see enableExtLdPromotion. |
3745 | bool EnableExtLdPromotion; |
3746 | |
3747 | /// Return true if the value types that can be represented by the specified |
3748 | /// register class are all legal. |
3749 | bool isLegalRC(const TargetRegisterInfo &TRI, |
3750 | const TargetRegisterClass &RC) const; |
3751 | |
3752 | /// Replace/modify any TargetFrameIndex operands with a targte-dependent |
3753 | /// sequence of memory operands that is recognized by PrologEpilogInserter. |
3754 | MachineBasicBlock *emitPatchPoint(MachineInstr &MI, |
3755 | MachineBasicBlock *MBB) const; |
3756 | |
3757 | bool IsStrictFPEnabled; |
3758 | }; |
3759 | |
3760 | /// This class defines information used to lower LLVM code to legal SelectionDAG |
3761 | /// operators that the target instruction selector can accept natively. |
3762 | /// |
3763 | /// This class also defines callbacks that targets must implement to lower |
3764 | /// target-specific constructs to SelectionDAG operators. |
3765 | class TargetLowering : public TargetLoweringBase { |
3766 | public: |
3767 | struct DAGCombinerInfo; |
3768 | struct MakeLibCallOptions; |
3769 | |
3770 | TargetLowering(const TargetLowering &) = delete; |
3771 | TargetLowering &operator=(const TargetLowering &) = delete; |
3772 | |
3773 | explicit TargetLowering(const TargetMachine &TM); |
3774 | |
3775 | bool isPositionIndependent() const; |
3776 | |
3777 | virtual bool isSDNodeSourceOfDivergence(const SDNode *N, |
3778 | FunctionLoweringInfo *FLI, |
3779 | UniformityInfo *UA) const { |
3780 | return false; |
3781 | } |
3782 | |
3783 | // Lets target to control the following reassociation of operands: (op (op x, |
3784 | // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By |
3785 | // default consider profitable any case where N0 has single use. This |
3786 | // behavior reflects the condition replaced by this target hook call in the |
3787 | // DAGCombiner. Any particular target can implement its own heuristic to |
3788 | // restrict common combiner. |
3789 | virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, |
3790 | SDValue N1) const { |
3791 | return N0.hasOneUse(); |
3792 | } |
3793 | |
3794 | // Lets target to control the following reassociation of operands: (op (op x, |
3795 | // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By |
3796 | // default consider profitable any case where N0 has single use. This |
3797 | // behavior reflects the condition replaced by this target hook call in the |
3798 | // combiner. Any particular target can implement its own heuristic to |
3799 | // restrict common combiner. |
3800 | virtual bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, |
3801 | Register N1) const { |
3802 | return MRI.hasOneNonDBGUse(RegNo: N0); |
3803 | } |
3804 | |
3805 | virtual bool isSDNodeAlwaysUniform(const SDNode * N) const { |
3806 | return false; |
3807 | } |
3808 | |
3809 | /// Returns true by value, base pointer and offset pointer and addressing mode |
3810 | /// by reference if the node's address can be legally represented as |
3811 | /// pre-indexed load / store address. |
3812 | virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/, |
3813 | SDValue &/*Offset*/, |
3814 | ISD::MemIndexedMode &/*AM*/, |
3815 | SelectionDAG &/*DAG*/) const { |
3816 | return false; |
3817 | } |
3818 | |
3819 | /// Returns true by value, base pointer and offset pointer and addressing mode |
3820 | /// by reference if this node can be combined with a load / store to form a |
3821 | /// post-indexed load / store. |
3822 | virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/, |
3823 | SDValue &/*Base*/, |
3824 | SDValue &/*Offset*/, |
3825 | ISD::MemIndexedMode &/*AM*/, |
3826 | SelectionDAG &/*DAG*/) const { |
3827 | return false; |
3828 | } |
3829 | |
3830 | /// Returns true if the specified base+offset is a legal indexed addressing |
3831 | /// mode for this target. \p MI is the load or store instruction that is being |
3832 | /// considered for transformation. |
3833 | virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, |
3834 | bool IsPre, MachineRegisterInfo &MRI) const { |
3835 | return false; |
3836 | } |
3837 | |
3838 | /// Return the entry encoding for a jump table in the current function. The |
3839 | /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. |
3840 | virtual unsigned getJumpTableEncoding() const; |
3841 | |
3842 | virtual const MCExpr * |
3843 | LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, |
3844 | const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, |
3845 | MCContext &/*Ctx*/) const { |
3846 | llvm_unreachable("Need to implement this hook if target has custom JTIs" ); |
3847 | } |
3848 | |
3849 | /// Returns relocation base for the given PIC jumptable. |
3850 | virtual SDValue getPICJumpTableRelocBase(SDValue Table, |
3851 | SelectionDAG &DAG) const; |
3852 | |
3853 | /// This returns the relocation base for the given PIC jumptable, the same as |
3854 | /// getPICJumpTableRelocBase, but as an MCExpr. |
3855 | virtual const MCExpr * |
3856 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
3857 | unsigned JTI, MCContext &Ctx) const; |
3858 | |
3859 | /// Return true if folding a constant offset with the given GlobalAddress is |
3860 | /// legal. It is frequently not legal in PIC relocation models. |
3861 | virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; |
3862 | |
3863 | /// On x86, return true if the operand with index OpNo is a CALL or JUMP |
3864 | /// instruction, which can use either a memory constraint or an address |
3865 | /// constraint. -fasm-blocks "__asm call foo" lowers to |
3866 | /// call void asm sideeffect inteldialect "call ${0:P}", "*m..." |
3867 | /// |
3868 | /// This function is used by a hack to choose the address constraint, |
3869 | /// lowering to a direct call. |
3870 | virtual bool |
3871 | isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs, |
3872 | unsigned OpNo) const { |
3873 | return false; |
3874 | } |
3875 | |
3876 | bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, |
3877 | SDValue &Chain) const; |
3878 | |
3879 | void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, |
3880 | SDValue &NewRHS, ISD::CondCode &CCCode, |
3881 | const SDLoc &DL, const SDValue OldLHS, |
3882 | const SDValue OldRHS) const; |
3883 | |
3884 | void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, |
3885 | SDValue &NewRHS, ISD::CondCode &CCCode, |
3886 | const SDLoc &DL, const SDValue OldLHS, |
3887 | const SDValue OldRHS, SDValue &Chain, |
3888 | bool IsSignaling = false) const; |
3889 | |
3890 | /// Returns a pair of (return value, chain). |
3891 | /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. |
3892 | std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, |
3893 | EVT RetVT, ArrayRef<SDValue> Ops, |
3894 | MakeLibCallOptions CallOptions, |
3895 | const SDLoc &dl, |
3896 | SDValue Chain = SDValue()) const; |
3897 | |
3898 | /// Check whether parameters to a call that are passed in callee saved |
3899 | /// registers are the same as from the calling function. This needs to be |
3900 | /// checked for tail call eligibility. |
3901 | bool parametersInCSRMatch(const MachineRegisterInfo &MRI, |
3902 | const uint32_t *CallerPreservedMask, |
3903 | const SmallVectorImpl<CCValAssign> &ArgLocs, |
3904 | const SmallVectorImpl<SDValue> &OutVals) const; |
3905 | |
3906 | //===--------------------------------------------------------------------===// |
3907 | // TargetLowering Optimization Methods |
3908 | // |
3909 | |
3910 | /// A convenience struct that encapsulates a DAG, and two SDValues for |
3911 | /// returning information from TargetLowering to its clients that want to |
3912 | /// combine. |
3913 | struct TargetLoweringOpt { |
3914 | SelectionDAG &DAG; |
3915 | bool LegalTys; |
3916 | bool LegalOps; |
3917 | SDValue Old; |
3918 | SDValue New; |
3919 | |
3920 | explicit TargetLoweringOpt(SelectionDAG &InDAG, |
3921 | bool LT, bool LO) : |
3922 | DAG(InDAG), LegalTys(LT), LegalOps(LO) {} |
3923 | |
3924 | bool LegalTypes() const { return LegalTys; } |
3925 | bool LegalOperations() const { return LegalOps; } |
3926 | |
3927 | bool CombineTo(SDValue O, SDValue N) { |
3928 | Old = O; |
3929 | New = N; |
3930 | return true; |
3931 | } |
3932 | }; |
3933 | |
3934 | /// Determines the optimal series of memory ops to replace the memset / memcpy. |
3935 | /// Return true if the number of memory ops is below the threshold (Limit). |
3936 | /// Note that this is always the case when Limit is ~0. |
3937 | /// It returns the types of the sequence of memory ops to perform |
3938 | /// memset / memcpy by reference. |
3939 | virtual bool |
3940 | findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, |
3941 | const MemOp &Op, unsigned DstAS, unsigned SrcAS, |
3942 | const AttributeList &FuncAttributes) const; |
3943 | |
3944 | /// Check to see if the specified operand of the specified instruction is a |
3945 | /// constant integer. If so, check to see if there are any bits set in the |
3946 | /// constant that are not demanded. If so, shrink the constant and return |
3947 | /// true. |
3948 | bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
3949 | const APInt &DemandedElts, |
3950 | TargetLoweringOpt &TLO) const; |
3951 | |
3952 | /// Helper wrapper around ShrinkDemandedConstant, demanding all elements. |
3953 | bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
3954 | TargetLoweringOpt &TLO) const; |
3955 | |
3956 | // Target hook to do target-specific const optimization, which is called by |
3957 | // ShrinkDemandedConstant. This function should return true if the target |
3958 | // doesn't want ShrinkDemandedConstant to further optimize the constant. |
3959 | virtual bool targetShrinkDemandedConstant(SDValue Op, |
3960 | const APInt &DemandedBits, |
3961 | const APInt &DemandedElts, |
3962 | TargetLoweringOpt &TLO) const { |
3963 | return false; |
3964 | } |
3965 | |
3966 | /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. |
3967 | /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast, |
3968 | /// but it could be generalized for targets with other types of implicit |
3969 | /// widening casts. |
3970 | bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, |
3971 | const APInt &DemandedBits, |
3972 | TargetLoweringOpt &TLO) const; |
3973 | |
3974 | /// Look at Op. At this point, we know that only the DemandedBits bits of the |
3975 | /// result of Op are ever used downstream. If we can use this information to |
3976 | /// simplify Op, create a new simplified DAG node and return true, returning |
3977 | /// the original and new nodes in Old and New. Otherwise, analyze the |
3978 | /// expression and return a mask of KnownOne and KnownZero bits for the |
3979 | /// expression (used to simplify the caller). The KnownZero/One bits may only |
3980 | /// be accurate for those bits in the Demanded masks. |
3981 | /// \p AssumeSingleUse When this parameter is true, this function will |
3982 | /// attempt to simplify \p Op even if there are multiple uses. |
3983 | /// Callers are responsible for correctly updating the DAG based on the |
3984 | /// results of this function, because simply replacing TLO.Old |
3985 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old |
3986 | /// has multiple uses. |
3987 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
3988 | const APInt &DemandedElts, KnownBits &Known, |
3989 | TargetLoweringOpt &TLO, unsigned Depth = 0, |
3990 | bool AssumeSingleUse = false) const; |
3991 | |
3992 | /// Helper wrapper around SimplifyDemandedBits, demanding all elements. |
3993 | /// Adds Op back to the worklist upon success. |
3994 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
3995 | KnownBits &Known, TargetLoweringOpt &TLO, |
3996 | unsigned Depth = 0, |
3997 | bool AssumeSingleUse = false) const; |
3998 | |
3999 | /// Helper wrapper around SimplifyDemandedBits. |
4000 | /// Adds Op back to the worklist upon success. |
4001 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
4002 | DAGCombinerInfo &DCI) const; |
4003 | |
4004 | /// Helper wrapper around SimplifyDemandedBits. |
4005 | /// Adds Op back to the worklist upon success. |
4006 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
4007 | const APInt &DemandedElts, |
4008 | DAGCombinerInfo &DCI) const; |
4009 | |
4010 | /// More limited version of SimplifyDemandedBits that can be used to "look |
4011 | /// through" ops that don't contribute to the DemandedBits/DemandedElts - |
4012 | /// bitwise ops etc. |
4013 | SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, |
4014 | const APInt &DemandedElts, |
4015 | SelectionDAG &DAG, |
4016 | unsigned Depth = 0) const; |
4017 | |
4018 | /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all |
4019 | /// elements. |
4020 | SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, |
4021 | SelectionDAG &DAG, |
4022 | unsigned Depth = 0) const; |
4023 | |
4024 | /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all |
4025 | /// bits from only some vector elements. |
4026 | SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, |
4027 | const APInt &DemandedElts, |
4028 | SelectionDAG &DAG, |
4029 | unsigned Depth = 0) const; |
4030 | |
4031 | /// Look at Vector Op. At this point, we know that only the DemandedElts |
4032 | /// elements of the result of Op are ever used downstream. If we can use |
4033 | /// this information to simplify Op, create a new simplified DAG node and |
4034 | /// return true, storing the original and new nodes in TLO. |
4035 | /// Otherwise, analyze the expression and return a mask of KnownUndef and |
4036 | /// KnownZero elements for the expression (used to simplify the caller). |
4037 | /// The KnownUndef/Zero elements may only be accurate for those bits |
4038 | /// in the DemandedMask. |
4039 | /// \p AssumeSingleUse When this parameter is true, this function will |
4040 | /// attempt to simplify \p Op even if there are multiple uses. |
4041 | /// Callers are responsible for correctly updating the DAG based on the |
4042 | /// results of this function, because simply replacing TLO.Old |
4043 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old |
4044 | /// has multiple uses. |
4045 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, |
4046 | APInt &KnownUndef, APInt &KnownZero, |
4047 | TargetLoweringOpt &TLO, unsigned Depth = 0, |
4048 | bool AssumeSingleUse = false) const; |
4049 | |
4050 | /// Helper wrapper around SimplifyDemandedVectorElts. |
4051 | /// Adds Op back to the worklist upon success. |
4052 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, |
4053 | DAGCombinerInfo &DCI) const; |
4054 | |
4055 | /// Return true if the target supports simplifying demanded vector elements by |
4056 | /// converting them to undefs. |
4057 | virtual bool |
4058 | shouldSimplifyDemandedVectorElts(SDValue Op, |
4059 | const TargetLoweringOpt &TLO) const { |
4060 | return true; |
4061 | } |
4062 | |
4063 | /// Determine which of the bits specified in Mask are known to be either zero |
4064 | /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts |
4065 | /// argument allows us to only collect the known bits that are shared by the |
4066 | /// requested vector elements. |
4067 | virtual void computeKnownBitsForTargetNode(const SDValue Op, |
4068 | KnownBits &Known, |
4069 | const APInt &DemandedElts, |
4070 | const SelectionDAG &DAG, |
4071 | unsigned Depth = 0) const; |
4072 | |
4073 | /// Determine which of the bits specified in Mask are known to be either zero |
4074 | /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts |
4075 | /// argument allows us to only collect the known bits that are shared by the |
4076 | /// requested vector elements. This is for GISel. |
4077 | virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, |
4078 | Register R, KnownBits &Known, |
4079 | const APInt &DemandedElts, |
4080 | const MachineRegisterInfo &MRI, |
4081 | unsigned Depth = 0) const; |
4082 | |
4083 | /// Determine the known alignment for the pointer value \p R. This is can |
4084 | /// typically be inferred from the number of low known 0 bits. However, for a |
4085 | /// pointer with a non-integral address space, the alignment value may be |
4086 | /// independent from the known low bits. |
4087 | virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, |
4088 | Register R, |
4089 | const MachineRegisterInfo &MRI, |
4090 | unsigned Depth = 0) const; |
4091 | |
4092 | /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. |
4093 | /// Default implementation computes low bits based on alignment |
4094 | /// information. This should preserve known bits passed into it. |
4095 | virtual void computeKnownBitsForFrameIndex(int FIOp, |
4096 | KnownBits &Known, |
4097 | const MachineFunction &MF) const; |
4098 | |
4099 | /// This method can be implemented by targets that want to expose additional |
4100 | /// information about sign bits to the DAG Combiner. The DemandedElts |
4101 | /// argument allows us to only collect the minimum sign bits that are shared |
4102 | /// by the requested vector elements. |
4103 | virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
4104 | const APInt &DemandedElts, |
4105 | const SelectionDAG &DAG, |
4106 | unsigned Depth = 0) const; |
4107 | |
4108 | /// This method can be implemented by targets that want to expose additional |
4109 | /// information about sign bits to GlobalISel combiners. The DemandedElts |
4110 | /// argument allows us to only collect the minimum sign bits that are shared |
4111 | /// by the requested vector elements. |
4112 | virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, |
4113 | Register R, |
4114 | const APInt &DemandedElts, |
4115 | const MachineRegisterInfo &MRI, |
4116 | unsigned Depth = 0) const; |
4117 | |
4118 | /// Attempt to simplify any target nodes based on the demanded vector |
4119 | /// elements, returning true on success. Otherwise, analyze the expression and |
4120 | /// return a mask of KnownUndef and KnownZero elements for the expression |
4121 | /// (used to simplify the caller). The KnownUndef/Zero elements may only be |
4122 | /// accurate for those bits in the DemandedMask. |
4123 | virtual bool SimplifyDemandedVectorEltsForTargetNode( |
4124 | SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, |
4125 | APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const; |
4126 | |
4127 | /// Attempt to simplify any target nodes based on the demanded bits/elts, |
4128 | /// returning true on success. Otherwise, analyze the |
4129 | /// expression and return a mask of KnownOne and KnownZero bits for the |
4130 | /// expression (used to simplify the caller). The KnownZero/One bits may only |
4131 | /// be accurate for those bits in the Demanded masks. |
4132 | virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, |
4133 | const APInt &DemandedBits, |
4134 | const APInt &DemandedElts, |
4135 | KnownBits &Known, |
4136 | TargetLoweringOpt &TLO, |
4137 | unsigned Depth = 0) const; |
4138 | |
4139 | /// More limited version of SimplifyDemandedBits that can be used to "look |
4140 | /// through" ops that don't contribute to the DemandedBits/DemandedElts - |
4141 | /// bitwise ops etc. |
4142 | virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode( |
4143 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
4144 | SelectionDAG &DAG, unsigned Depth) const; |
4145 | |
4146 | /// Return true if this function can prove that \p Op is never poison |
4147 | /// and, if \p PoisonOnly is false, does not have undef bits. The DemandedElts |
4148 | /// argument limits the check to the requested vector elements. |
4149 | virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode( |
4150 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
4151 | bool PoisonOnly, unsigned Depth) const; |
4152 | |
4153 | /// Return true if Op can create undef or poison from non-undef & non-poison |
4154 | /// operands. The DemandedElts argument limits the check to the requested |
4155 | /// vector elements. |
4156 | virtual bool |
4157 | canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, |
4158 | const SelectionDAG &DAG, bool PoisonOnly, |
4159 | bool ConsiderFlags, unsigned Depth) const; |
4160 | |
4161 | /// Tries to build a legal vector shuffle using the provided parameters |
4162 | /// or equivalent variations. The Mask argument maybe be modified as the |
4163 | /// function tries different variations. |
4164 | /// Returns an empty SDValue if the operation fails. |
4165 | SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, |
4166 | SDValue N1, MutableArrayRef<int> Mask, |
4167 | SelectionDAG &DAG) const; |
4168 | |
4169 | /// This method returns the constant pool value that will be loaded by LD. |
4170 | /// NOTE: You must check for implicit extensions of the constant by LD. |
4171 | virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const; |
4172 | |
4173 | /// If \p SNaN is false, \returns true if \p Op is known to never be any |
4174 | /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling |
4175 | /// NaN. |
4176 | virtual bool isKnownNeverNaNForTargetNode(SDValue Op, |
4177 | const SelectionDAG &DAG, |
4178 | bool SNaN = false, |
4179 | unsigned Depth = 0) const; |
4180 | |
4181 | /// Return true if vector \p Op has the same value across all \p DemandedElts, |
4182 | /// indicating any elements which may be undef in the output \p UndefElts. |
4183 | virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, |
4184 | APInt &UndefElts, |
4185 | const SelectionDAG &DAG, |
4186 | unsigned Depth = 0) const; |
4187 | |
4188 | /// Returns true if the given Opc is considered a canonical constant for the |
4189 | /// target, which should not be transformed back into a BUILD_VECTOR. |
4190 | virtual bool isTargetCanonicalConstantNode(SDValue Op) const { |
4191 | return Op.getOpcode() == ISD::SPLAT_VECTOR || |
4192 | Op.getOpcode() == ISD::SPLAT_VECTOR_PARTS; |
4193 | } |
4194 | |
4195 | struct DAGCombinerInfo { |
4196 | void *DC; // The DAG Combiner object. |
4197 | CombineLevel Level; |
4198 | bool CalledByLegalizer; |
4199 | |
4200 | public: |
4201 | SelectionDAG &DAG; |
4202 | |
4203 | DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc) |
4204 | : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {} |
4205 | |
4206 | bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; } |
4207 | bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; } |
4208 | bool isAfterLegalizeDAG() const { return Level >= AfterLegalizeDAG; } |
4209 | CombineLevel getDAGCombineLevel() { return Level; } |
4210 | bool isCalledByLegalizer() const { return CalledByLegalizer; } |
4211 | |
4212 | void AddToWorklist(SDNode *N); |
4213 | SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true); |
4214 | SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); |
4215 | SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); |
4216 | |
4217 | bool recursivelyDeleteUnusedNodes(SDNode *N); |
4218 | |
4219 | void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); |
4220 | }; |
4221 | |
4222 | /// Return if the N is a constant or constant vector equal to the true value |
4223 | /// from getBooleanContents(). |
4224 | bool isConstTrueVal(SDValue N) const; |
4225 | |
4226 | /// Return if the N is a constant or constant vector equal to the false value |
4227 | /// from getBooleanContents(). |
4228 | bool isConstFalseVal(SDValue N) const; |
4229 | |
4230 | /// Return if \p N is a True value when extended to \p VT. |
4231 | bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const; |
4232 | |
4233 | /// Try to simplify a setcc built with the specified operands and cc. If it is |
4234 | /// unable to simplify it, return a null SDValue. |
4235 | SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
4236 | bool foldBooleans, DAGCombinerInfo &DCI, |
4237 | const SDLoc &dl) const; |
4238 | |
4239 | // For targets which wrap address, unwrap for analysis. |
4240 | virtual SDValue unwrapAddress(SDValue N) const { return N; } |
4241 | |
4242 | /// Returns true (and the GlobalValue and the offset) if the node is a |
4243 | /// GlobalAddress + offset. |
4244 | virtual bool |
4245 | isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; |
4246 | |
4247 | /// This method will be invoked for all target nodes and for any |
4248 | /// target-independent nodes that the target has registered with invoke it |
4249 | /// for. |
4250 | /// |
4251 | /// The semantics are as follows: |
4252 | /// Return Value: |
4253 | /// SDValue.Val == 0 - No change was made |
4254 | /// SDValue.Val == N - N was replaced, is dead, and is already handled. |
4255 | /// otherwise - N should be replaced by the returned Operand. |
4256 | /// |
4257 | /// In addition, methods provided by DAGCombinerInfo may be used to perform |
4258 | /// more complex transformations. |
4259 | /// |
4260 | virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
4261 | |
4262 | /// Return true if it is profitable to move this shift by a constant amount |
4263 | /// through its operand, adjusting any immediate operands as necessary to |
4264 | /// preserve semantics. This transformation may not be desirable if it |
4265 | /// disrupts a particularly auspicious target-specific tree (e.g. bitfield |
4266 | /// extraction in AArch64). By default, it returns true. |
4267 | /// |
4268 | /// @param N the shift node |
4269 | /// @param Level the current DAGCombine legalization level. |
4270 | virtual bool isDesirableToCommuteWithShift(const SDNode *N, |
4271 | CombineLevel Level) const { |
4272 | return true; |
4273 | } |
4274 | |
4275 | /// GlobalISel - return true if it is profitable to move this shift by a |
4276 | /// constant amount through its operand, adjusting any immediate operands as |
4277 | /// necessary to preserve semantics. This transformation may not be desirable |
4278 | /// if it disrupts a particularly auspicious target-specific tree (e.g. |
4279 | /// bitfield extraction in AArch64). By default, it returns true. |
4280 | /// |
4281 | /// @param MI the shift instruction |
4282 | /// @param IsAfterLegal true if running after legalization. |
4283 | virtual bool isDesirableToCommuteWithShift(const MachineInstr &MI, |
4284 | bool IsAfterLegal) const { |
4285 | return true; |
4286 | } |
4287 | |
4288 | /// GlobalISel - return true if it's profitable to perform the combine: |
4289 | /// shl ([sza]ext x), y => zext (shl x, y) |
4290 | virtual bool isDesirableToPullExtFromShl(const MachineInstr &MI) const { |
4291 | return true; |
4292 | } |
4293 | |
4294 | // Return AndOrSETCCFoldKind::{AddAnd, ABS} if its desirable to try and |
4295 | // optimize LogicOp(SETCC0, SETCC1). An example (what is implemented as of |
4296 | // writing this) is: |
4297 | // With C as a power of 2 and C != 0 and C != INT_MIN: |
4298 | // AddAnd: |
4299 | // (icmp eq A, C) | (icmp eq A, -C) |
4300 | // -> (icmp eq and(add(A, C), ~(C + C)), 0) |
4301 | // (icmp ne A, C) & (icmp ne A, -C)w |
4302 | // -> (icmp ne and(add(A, C), ~(C + C)), 0) |
4303 | // ABS: |
4304 | // (icmp eq A, C) | (icmp eq A, -C) |
4305 | // -> (icmp eq Abs(A), C) |
4306 | // (icmp ne A, C) & (icmp ne A, -C)w |
4307 | // -> (icmp ne Abs(A), C) |
4308 | // |
4309 | // @param LogicOp the logic op |
4310 | // @param SETCC0 the first of the SETCC nodes |
4311 | // @param SETCC0 the second of the SETCC nodes |
4312 | virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC( |
4313 | const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const { |
4314 | return AndOrSETCCFoldKind::None; |
4315 | } |
4316 | |
4317 | /// Return true if it is profitable to combine an XOR of a logical shift |
4318 | /// to create a logical shift of NOT. This transformation may not be desirable |
4319 | /// if it disrupts a particularly auspicious target-specific tree (e.g. |
4320 | /// BIC on ARM/AArch64). By default, it returns true. |
4321 | virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const { |
4322 | return true; |
4323 | } |
4324 | |
4325 | /// Return true if the target has native support for the specified value type |
4326 | /// and it is 'desirable' to use the type for the given node type. e.g. On x86 |
4327 | /// i16 is legal, but undesirable since i16 instruction encodings are longer |
4328 | /// and some i16 instructions are slow. |
4329 | virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const { |
4330 | // By default, assume all legal types are desirable. |
4331 | return isTypeLegal(VT); |
4332 | } |
4333 | |
4334 | /// Return true if it is profitable for dag combiner to transform a floating |
4335 | /// point op of specified opcode to a equivalent op of an integer |
4336 | /// type. e.g. f32 load -> i32 load can be profitable on ARM. |
4337 | virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/, |
4338 | EVT /*VT*/) const { |
4339 | return false; |
4340 | } |
4341 | |
4342 | /// This method query the target whether it is beneficial for dag combiner to |
4343 | /// promote the specified node. If true, it should return the desired |
4344 | /// promotion type by reference. |
4345 | virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const { |
4346 | return false; |
4347 | } |
4348 | |
4349 | /// Return true if the target supports swifterror attribute. It optimizes |
4350 | /// loads and stores to reading and writing a specific register. |
4351 | virtual bool supportSwiftError() const { |
4352 | return false; |
4353 | } |
4354 | |
4355 | /// Return true if the target supports that a subset of CSRs for the given |
4356 | /// machine function is handled explicitly via copies. |
4357 | virtual bool supportSplitCSR(MachineFunction *MF) const { |
4358 | return false; |
4359 | } |
4360 | |
4361 | /// Return true if the target supports kcfi operand bundles. |
4362 | virtual bool supportKCFIBundles() const { return false; } |
4363 | |
4364 | /// Perform necessary initialization to handle a subset of CSRs explicitly |
4365 | /// via copies. This function is called at the beginning of instruction |
4366 | /// selection. |
4367 | virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { |
4368 | llvm_unreachable("Not Implemented" ); |
4369 | } |
4370 | |
4371 | /// Insert explicit copies in entry and exit blocks. We copy a subset of |
4372 | /// CSRs to virtual registers in the entry block, and copy them back to |
4373 | /// physical registers in the exit blocks. This function is called at the end |
4374 | /// of instruction selection. |
4375 | virtual void insertCopiesSplitCSR( |
4376 | MachineBasicBlock *Entry, |
4377 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const { |
4378 | llvm_unreachable("Not Implemented" ); |
4379 | } |
4380 | |
4381 | /// Return the newly negated expression if the cost is not expensive and |
4382 | /// set the cost in \p Cost to indicate that if it is cheaper or neutral to |
4383 | /// do the negation. |
4384 | virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, |
4385 | bool LegalOps, bool OptForSize, |
4386 | NegatibleCost &Cost, |
4387 | unsigned Depth = 0) const; |
4388 | |
4389 | SDValue getCheaperOrNeutralNegatedExpression( |
4390 | SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, |
4391 | const NegatibleCost CostThreshold = NegatibleCost::Neutral, |
4392 | unsigned Depth = 0) const { |
4393 | NegatibleCost Cost = NegatibleCost::Expensive; |
4394 | SDValue Neg = |
4395 | getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); |
4396 | if (!Neg) |
4397 | return SDValue(); |
4398 | |
4399 | if (Cost <= CostThreshold) |
4400 | return Neg; |
4401 | |
4402 | // Remove the new created node to avoid the side effect to the DAG. |
4403 | if (Neg->use_empty()) |
4404 | DAG.RemoveDeadNode(N: Neg.getNode()); |
4405 | return SDValue(); |
4406 | } |
4407 | |
4408 | /// This is the helper function to return the newly negated expression only |
4409 | /// when the cost is cheaper. |
4410 | SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, |
4411 | bool LegalOps, bool OptForSize, |
4412 | unsigned Depth = 0) const { |
4413 | return getCheaperOrNeutralNegatedExpression(Op, DAG, LegalOps, OptForSize, |
4414 | CostThreshold: NegatibleCost::Cheaper, Depth); |
4415 | } |
4416 | |
4417 | /// This is the helper function to return the newly negated expression if |
4418 | /// the cost is not expensive. |
4419 | SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, |
4420 | bool OptForSize, unsigned Depth = 0) const { |
4421 | NegatibleCost Cost = NegatibleCost::Expensive; |
4422 | return getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); |
4423 | } |
4424 | |
4425 | //===--------------------------------------------------------------------===// |
4426 | // Lowering methods - These methods must be implemented by targets so that |
4427 | // the SelectionDAGBuilder code knows how to lower these. |
4428 | // |
4429 | |
4430 | /// Target-specific splitting of values into parts that fit a register |
4431 | /// storing a legal type |
4432 | virtual bool splitValueIntoRegisterParts( |
4433 | SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
4434 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { |
4435 | return false; |
4436 | } |
4437 | |
4438 | /// Allows the target to handle physreg-carried dependency |
4439 | /// in target-specific way. Used from the ScheduleDAGSDNodes to decide whether |
4440 | /// to add the edge to the dependency graph. |
4441 | /// Def - input: Selection DAG node defininfg physical register |
4442 | /// User - input: Selection DAG node using physical register |
4443 | /// Op - input: Number of User operand |
4444 | /// PhysReg - inout: set to the physical register if the edge is |
4445 | /// necessary, unchanged otherwise |
4446 | /// Cost - inout: physical register copy cost. |
4447 | /// Returns 'true' is the edge is necessary, 'false' otherwise |
4448 | virtual bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, |
4449 | const TargetRegisterInfo *TRI, |
4450 | const TargetInstrInfo *TII, |
4451 | unsigned &PhysReg, int &Cost) const { |
4452 | return false; |
4453 | } |
4454 | |
4455 | /// Target-specific combining of register parts into its original value |
4456 | virtual SDValue |
4457 | joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, |
4458 | const SDValue *Parts, unsigned NumParts, |
4459 | MVT PartVT, EVT ValueVT, |
4460 | std::optional<CallingConv::ID> CC) const { |
4461 | return SDValue(); |
4462 | } |
4463 | |
4464 | /// This hook must be implemented to lower the incoming (formal) arguments, |
4465 | /// described by the Ins array, into the specified DAG. The implementation |
4466 | /// should fill in the InVals array with legal-type argument values, and |
4467 | /// return the resulting token chain value. |
4468 | virtual SDValue LowerFormalArguments( |
4469 | SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/, |
4470 | const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/, |
4471 | SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const { |
4472 | llvm_unreachable("Not Implemented" ); |
4473 | } |
4474 | |
4475 | /// This structure contains all information that is necessary for lowering |
4476 | /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder |
4477 | /// needs to lower a call, and targets will see this struct in their LowerCall |
4478 | /// implementation. |
4479 | struct CallLoweringInfo { |
4480 | SDValue Chain; |
4481 | Type *RetTy = nullptr; |
4482 | bool RetSExt : 1; |
4483 | bool RetZExt : 1; |
4484 | bool IsVarArg : 1; |
4485 | bool IsInReg : 1; |
4486 | bool DoesNotReturn : 1; |
4487 | bool IsReturnValueUsed : 1; |
4488 | bool IsConvergent : 1; |
4489 | bool IsPatchPoint : 1; |
4490 | bool IsPreallocated : 1; |
4491 | bool NoMerge : 1; |
4492 | |
4493 | // IsTailCall should be modified by implementations of |
4494 | // TargetLowering::LowerCall that perform tail call conversions. |
4495 | bool IsTailCall = false; |
4496 | |
4497 | // Is Call lowering done post SelectionDAG type legalization. |
4498 | bool IsPostTypeLegalization = false; |
4499 | |
4500 | unsigned NumFixedArgs = -1; |
4501 | CallingConv::ID CallConv = CallingConv::C; |
4502 | SDValue Callee; |
4503 | ArgListTy Args; |
4504 | SelectionDAG &DAG; |
4505 | SDLoc DL; |
4506 | const CallBase *CB = nullptr; |
4507 | SmallVector<ISD::OutputArg, 32> Outs; |
4508 | SmallVector<SDValue, 32> OutVals; |
4509 | SmallVector<ISD::InputArg, 32> Ins; |
4510 | SmallVector<SDValue, 4> InVals; |
4511 | const ConstantInt *CFIType = nullptr; |
4512 | SDValue ConvergenceControlToken; |
4513 | |
4514 | CallLoweringInfo(SelectionDAG &DAG) |
4515 | : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), |
4516 | DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), |
4517 | IsPatchPoint(false), IsPreallocated(false), NoMerge(false), |
4518 | DAG(DAG) {} |
4519 | |
4520 | CallLoweringInfo &setDebugLoc(const SDLoc &dl) { |
4521 | DL = dl; |
4522 | return *this; |
4523 | } |
4524 | |
4525 | CallLoweringInfo &setChain(SDValue InChain) { |
4526 | Chain = InChain; |
4527 | return *this; |
4528 | } |
4529 | |
4530 | // setCallee with target/module-specific attributes |
4531 | CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType, |
4532 | SDValue Target, ArgListTy &&ArgsList) { |
4533 | RetTy = ResultType; |
4534 | Callee = Target; |
4535 | CallConv = CC; |
4536 | NumFixedArgs = ArgsList.size(); |
4537 | Args = std::move(ArgsList); |
4538 | |
4539 | DAG.getTargetLoweringInfo().markLibCallAttributes( |
4540 | MF: &(DAG.getMachineFunction()), CC, Args); |
4541 | return *this; |
4542 | } |
4543 | |
4544 | CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType, |
4545 | SDValue Target, ArgListTy &&ArgsList, |
4546 | AttributeSet ResultAttrs = {}) { |
4547 | RetTy = ResultType; |
4548 | IsInReg = ResultAttrs.hasAttribute(Attribute::InReg); |
4549 | RetSExt = ResultAttrs.hasAttribute(Attribute::SExt); |
4550 | RetZExt = ResultAttrs.hasAttribute(Attribute::ZExt); |
4551 | NoMerge = ResultAttrs.hasAttribute(Attribute::NoMerge); |
4552 | |
4553 | Callee = Target; |
4554 | CallConv = CC; |
4555 | NumFixedArgs = ArgsList.size(); |
4556 | Args = std::move(ArgsList); |
4557 | return *this; |
4558 | } |
4559 | |
4560 | CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy, |
4561 | SDValue Target, ArgListTy &&ArgsList, |
4562 | const CallBase &Call) { |
4563 | RetTy = ResultType; |
4564 | |
4565 | IsInReg = Call.hasRetAttr(Attribute::InReg); |
4566 | DoesNotReturn = |
4567 | Call.doesNotReturn() || |
4568 | (!isa<InvokeInst>(Val: Call) && isa<UnreachableInst>(Val: Call.getNextNode())); |
4569 | IsVarArg = FTy->isVarArg(); |
4570 | IsReturnValueUsed = !Call.use_empty(); |
4571 | RetSExt = Call.hasRetAttr(Attribute::SExt); |
4572 | RetZExt = Call.hasRetAttr(Attribute::ZExt); |
4573 | NoMerge = Call.hasFnAttr(Attribute::NoMerge); |
4574 | |
4575 | Callee = Target; |
4576 | |
4577 | CallConv = Call.getCallingConv(); |
4578 | NumFixedArgs = FTy->getNumParams(); |
4579 | Args = std::move(ArgsList); |
4580 | |
4581 | CB = &Call; |
4582 | |
4583 | return *this; |
4584 | } |
4585 | |
4586 | CallLoweringInfo &setInRegister(bool Value = true) { |
4587 | IsInReg = Value; |
4588 | return *this; |
4589 | } |
4590 | |
4591 | CallLoweringInfo &setNoReturn(bool Value = true) { |
4592 | DoesNotReturn = Value; |
4593 | return *this; |
4594 | } |
4595 | |
4596 | CallLoweringInfo &setVarArg(bool Value = true) { |
4597 | IsVarArg = Value; |
4598 | return *this; |
4599 | } |
4600 | |
4601 | CallLoweringInfo &setTailCall(bool Value = true) { |
4602 | IsTailCall = Value; |
4603 | return *this; |
4604 | } |
4605 | |
4606 | CallLoweringInfo &setDiscardResult(bool Value = true) { |
4607 | IsReturnValueUsed = !Value; |
4608 | return *this; |
4609 | } |
4610 | |
4611 | CallLoweringInfo &setConvergent(bool Value = true) { |
4612 | IsConvergent = Value; |
4613 | return *this; |
4614 | } |
4615 | |
4616 | CallLoweringInfo &setSExtResult(bool Value = true) { |
4617 | RetSExt = Value; |
4618 | return *this; |
4619 | } |
4620 | |
4621 | CallLoweringInfo &setZExtResult(bool Value = true) { |
4622 | RetZExt = Value; |
4623 | return *this; |
4624 | } |
4625 | |
4626 | CallLoweringInfo &setIsPatchPoint(bool Value = true) { |
4627 | IsPatchPoint = Value; |
4628 | return *this; |
4629 | } |
4630 | |
4631 | CallLoweringInfo &setIsPreallocated(bool Value = true) { |
4632 | IsPreallocated = Value; |
4633 | return *this; |
4634 | } |
4635 | |
4636 | CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { |
4637 | IsPostTypeLegalization = Value; |
4638 | return *this; |
4639 | } |
4640 | |
4641 | CallLoweringInfo &setCFIType(const ConstantInt *Type) { |
4642 | CFIType = Type; |
4643 | return *this; |
4644 | } |
4645 | |
4646 | CallLoweringInfo &setConvergenceControlToken(SDValue Token) { |
4647 | ConvergenceControlToken = Token; |
4648 | return *this; |
4649 | } |
4650 | |
4651 | ArgListTy &getArgs() { |
4652 | return Args; |
4653 | } |
4654 | }; |
4655 | |
4656 | /// This structure is used to pass arguments to makeLibCall function. |
4657 | struct MakeLibCallOptions { |
4658 | // By passing type list before soften to makeLibCall, the target hook |
4659 | // shouldExtendTypeInLibCall can get the original type before soften. |
4660 | ArrayRef<EVT> OpsVTBeforeSoften; |
4661 | EVT RetVTBeforeSoften; |
4662 | bool IsSExt : 1; |
4663 | bool DoesNotReturn : 1; |
4664 | bool IsReturnValueUsed : 1; |
4665 | bool IsPostTypeLegalization : 1; |
4666 | bool IsSoften : 1; |
4667 | |
4668 | MakeLibCallOptions() |
4669 | : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true), |
4670 | IsPostTypeLegalization(false), IsSoften(false) {} |
4671 | |
4672 | MakeLibCallOptions &setSExt(bool Value = true) { |
4673 | IsSExt = Value; |
4674 | return *this; |
4675 | } |
4676 | |
4677 | MakeLibCallOptions &setNoReturn(bool Value = true) { |
4678 | DoesNotReturn = Value; |
4679 | return *this; |
4680 | } |
4681 | |
4682 | MakeLibCallOptions &setDiscardResult(bool Value = true) { |
4683 | IsReturnValueUsed = !Value; |
4684 | return *this; |
4685 | } |
4686 | |
4687 | MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) { |
4688 | IsPostTypeLegalization = Value; |
4689 | return *this; |
4690 | } |
4691 | |
4692 | MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT, |
4693 | bool Value = true) { |
4694 | OpsVTBeforeSoften = OpsVT; |
4695 | RetVTBeforeSoften = RetVT; |
4696 | IsSoften = Value; |
4697 | return *this; |
4698 | } |
4699 | }; |
4700 | |
4701 | /// This function lowers an abstract call to a function into an actual call. |
4702 | /// This returns a pair of operands. The first element is the return value |
4703 | /// for the function (if RetTy is not VoidTy). The second element is the |
4704 | /// outgoing token chain. It calls LowerCall to do the actual lowering. |
4705 | std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const; |
4706 | |
4707 | /// This hook must be implemented to lower calls into the specified |
4708 | /// DAG. The outgoing arguments to the call are described by the Outs array, |
4709 | /// and the values to be returned by the call are described by the Ins |
4710 | /// array. The implementation should fill in the InVals array with legal-type |
4711 | /// return values from the call, and return the resulting token chain value. |
4712 | virtual SDValue |
4713 | LowerCall(CallLoweringInfo &/*CLI*/, |
4714 | SmallVectorImpl<SDValue> &/*InVals*/) const { |
4715 | llvm_unreachable("Not Implemented" ); |
4716 | } |
4717 | |
4718 | /// Target-specific cleanup for formal ByVal parameters. |
4719 | virtual void HandleByVal(CCState *, unsigned &, Align) const {} |
4720 | |
4721 | /// This hook should be implemented to check whether the return values |
4722 | /// described by the Outs array can fit into the return registers. If false |
4723 | /// is returned, an sret-demotion is performed. |
4724 | virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/, |
4725 | MachineFunction &/*MF*/, bool /*isVarArg*/, |
4726 | const SmallVectorImpl<ISD::OutputArg> &/*Outs*/, |
4727 | LLVMContext &/*Context*/) const |
4728 | { |
4729 | // Return true by default to get preexisting behavior. |
4730 | return true; |
4731 | } |
4732 | |
4733 | /// This hook must be implemented to lower outgoing return values, described |
4734 | /// by the Outs array, into the specified DAG. The implementation should |
4735 | /// return the resulting token chain value. |
4736 | virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/, |
4737 | bool /*isVarArg*/, |
4738 | const SmallVectorImpl<ISD::OutputArg> & /*Outs*/, |
4739 | const SmallVectorImpl<SDValue> & /*OutVals*/, |
4740 | const SDLoc & /*dl*/, |
4741 | SelectionDAG & /*DAG*/) const { |
4742 | llvm_unreachable("Not Implemented" ); |
4743 | } |
4744 | |
4745 | /// Return true if result of the specified node is used by a return node |
4746 | /// only. It also compute and return the input chain for the tail call. |
4747 | /// |
4748 | /// This is used to determine whether it is possible to codegen a libcall as |
4749 | /// tail call at legalization time. |
4750 | virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const { |
4751 | return false; |
4752 | } |
4753 | |
4754 | /// Return true if the target may be able emit the call instruction as a tail |
4755 | /// call. This is used by optimization passes to determine if it's profitable |
4756 | /// to duplicate return instructions to enable tailcall optimization. |
4757 | virtual bool mayBeEmittedAsTailCall(const CallInst *) const { |
4758 | return false; |
4759 | } |
4760 | |
4761 | /// Return the builtin name for the __builtin___clear_cache intrinsic |
4762 | /// Default is to invoke the clear cache library call |
4763 | virtual const char * getClearCacheBuiltinName() const { |
4764 | return "__clear_cache" ; |
4765 | } |
4766 | |
4767 | /// Return the register ID of the name passed in. Used by named register |
4768 | /// global variables extension. There is no target-independent behaviour |
4769 | /// so the default action is to bail. |
4770 | virtual Register getRegisterByName(const char* RegName, LLT Ty, |
4771 | const MachineFunction &MF) const { |
4772 | report_fatal_error(reason: "Named registers not implemented for this target" ); |
4773 | } |
4774 | |
4775 | /// Return the type that should be used to zero or sign extend a |
4776 | /// zeroext/signext integer return value. FIXME: Some C calling conventions |
4777 | /// require the return type to be promoted, but this is not true all the time, |
4778 | /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling |
4779 | /// conventions. The frontend should handle this and include all of the |
4780 | /// necessary information. |
4781 | virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, |
4782 | ISD::NodeType /*ExtendKind*/) const { |
4783 | EVT MinVT = getRegisterType(MVT::i32); |
4784 | return VT.bitsLT(VT: MinVT) ? MinVT : VT; |
4785 | } |
4786 | |
4787 | /// For some targets, an LLVM struct type must be broken down into multiple |
4788 | /// simple types, but the calling convention specifies that the entire struct |
4789 | /// must be passed in a block of consecutive registers. |
4790 | virtual bool |
4791 | functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, |
4792 | bool isVarArg, |
4793 | const DataLayout &DL) const { |
4794 | return false; |
4795 | } |
4796 | |
4797 | /// For most targets, an LLVM type must be broken down into multiple |
4798 | /// smaller types. Usually the halves are ordered according to the endianness |
4799 | /// but for some platform that would break. So this method will default to |
4800 | /// matching the endianness but can be overridden. |
4801 | virtual bool |
4802 | shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const { |
4803 | return DL.isLittleEndian(); |
4804 | } |
4805 | |
4806 | /// Returns a 0 terminated array of registers that can be safely used as |
4807 | /// scratch registers. |
4808 | virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const { |
4809 | return nullptr; |
4810 | } |
4811 | |
4812 | /// Returns a 0 terminated array of rounding control registers that can be |
4813 | /// attached into strict FP call. |
4814 | virtual ArrayRef<MCPhysReg> getRoundingControlRegisters() const { |
4815 | return ArrayRef<MCPhysReg>(); |
4816 | } |
4817 | |
4818 | /// This callback is used to prepare for a volatile or atomic load. |
4819 | /// It takes a chain node as input and returns the chain for the load itself. |
4820 | /// |
4821 | /// Having a callback like this is necessary for targets like SystemZ, |
4822 | /// which allows a CPU to reuse the result of a previous load indefinitely, |
4823 | /// even if a cache-coherent store is performed by another CPU. The default |
4824 | /// implementation does nothing. |
4825 | virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, |
4826 | SelectionDAG &DAG) const { |
4827 | return Chain; |
4828 | } |
4829 | |
4830 | /// This callback is invoked by the type legalizer to legalize nodes with an |
4831 | /// illegal operand type but legal result types. It replaces the |
4832 | /// LowerOperation callback in the type Legalizer. The reason we can not do |
4833 | /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to |
4834 | /// use this callback. |
4835 | /// |
4836 | /// TODO: Consider merging with ReplaceNodeResults. |
4837 | /// |
4838 | /// The target places new result values for the node in Results (their number |
4839 | /// and types must exactly match those of the original return values of |
4840 | /// the node), or leaves Results empty, which indicates that the node is not |
4841 | /// to be custom lowered after all. |
4842 | /// The default implementation calls LowerOperation. |
4843 | virtual void LowerOperationWrapper(SDNode *N, |
4844 | SmallVectorImpl<SDValue> &Results, |
4845 | SelectionDAG &DAG) const; |
4846 | |
4847 | /// This callback is invoked for operations that are unsupported by the |
4848 | /// target, which are registered to use 'custom' lowering, and whose defined |
4849 | /// values are all legal. If the target has no operations that require custom |
4850 | /// lowering, it need not implement this. The default implementation of this |
4851 | /// aborts. |
4852 | virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; |
4853 | |
4854 | /// This callback is invoked when a node result type is illegal for the |
4855 | /// target, and the operation was registered to use 'custom' lowering for that |
4856 | /// result type. The target places new result values for the node in Results |
4857 | /// (their number and types must exactly match those of the original return |
4858 | /// values of the node), or leaves Results empty, which indicates that the |
4859 | /// node is not to be custom lowered after all. |
4860 | /// |
4861 | /// If the target has no operations that require custom lowering, it need not |
4862 | /// implement this. The default implementation aborts. |
4863 | virtual void ReplaceNodeResults(SDNode * /*N*/, |
4864 | SmallVectorImpl<SDValue> &/*Results*/, |
4865 | SelectionDAG &/*DAG*/) const { |
4866 | llvm_unreachable("ReplaceNodeResults not implemented for this target!" ); |
4867 | } |
4868 | |
4869 | /// This method returns the name of a target specific DAG node. |
4870 | virtual const char *getTargetNodeName(unsigned Opcode) const; |
4871 | |
4872 | /// This method returns a target specific FastISel object, or null if the |
4873 | /// target does not support "fast" ISel. |
4874 | virtual FastISel *createFastISel(FunctionLoweringInfo &, |
4875 | const TargetLibraryInfo *) const { |
4876 | return nullptr; |
4877 | } |
4878 | |
4879 | bool verifyReturnAddressArgumentIsConstant(SDValue Op, |
4880 | SelectionDAG &DAG) const; |
4881 | |
4882 | #ifndef NDEBUG |
4883 | /// Check the given SDNode. Aborts if it is invalid. |
4884 | virtual void verifyTargetSDNode(const SDNode *N) const {}; |
4885 | #endif |
4886 | |
4887 | //===--------------------------------------------------------------------===// |
4888 | // Inline Asm Support hooks |
4889 | // |
4890 | |
4891 | /// This hook allows the target to expand an inline asm call to be explicit |
4892 | /// llvm code if it wants to. This is useful for turning simple inline asms |
4893 | /// into LLVM intrinsics, which gives the compiler more information about the |
4894 | /// behavior of the code. |
4895 | virtual bool ExpandInlineAsm(CallInst *) const { |
4896 | return false; |
4897 | } |
4898 | |
4899 | enum ConstraintType { |
4900 | C_Register, // Constraint represents specific register(s). |
4901 | C_RegisterClass, // Constraint represents any of register(s) in class. |
4902 | C_Memory, // Memory constraint. |
4903 | C_Address, // Address constraint. |
4904 | C_Immediate, // Requires an immediate. |
4905 | C_Other, // Something else. |
4906 | C_Unknown // Unsupported constraint. |
4907 | }; |
4908 | |
4909 | enum ConstraintWeight { |
4910 | // Generic weights. |
4911 | CW_Invalid = -1, // No match. |
4912 | CW_Okay = 0, // Acceptable. |
4913 | CW_Good = 1, // Good weight. |
4914 | CW_Better = 2, // Better weight. |
4915 | CW_Best = 3, // Best weight. |
4916 | |
4917 | // Well-known weights. |
4918 | CW_SpecificReg = CW_Okay, // Specific register operands. |
4919 | CW_Register = CW_Good, // Register operands. |
4920 | CW_Memory = CW_Better, // Memory operands. |
4921 | CW_Constant = CW_Best, // Constant operand. |
4922 | CW_Default = CW_Okay // Default or don't know type. |
4923 | }; |
4924 | |
4925 | /// This contains information for each constraint that we are lowering. |
4926 | struct AsmOperandInfo : public InlineAsm::ConstraintInfo { |
4927 | /// This contains the actual string for the code, like "m". TargetLowering |
4928 | /// picks the 'best' code from ConstraintInfo::Codes that most closely |
4929 | /// matches the operand. |
4930 | std::string ConstraintCode; |
4931 | |
4932 | /// Information about the constraint code, e.g. Register, RegisterClass, |
4933 | /// Memory, Other, Unknown. |
4934 | TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown; |
4935 | |
4936 | /// If this is the result output operand or a clobber, this is null, |
4937 | /// otherwise it is the incoming operand to the CallInst. This gets |
4938 | /// modified as the asm is processed. |
4939 | Value *CallOperandVal = nullptr; |
4940 | |
4941 | /// The ValueType for the operand value. |
4942 | MVT ConstraintVT = MVT::Other; |
4943 | |
4944 | /// Copy constructor for copying from a ConstraintInfo. |
4945 | AsmOperandInfo(InlineAsm::ConstraintInfo Info) |
4946 | : InlineAsm::ConstraintInfo(std::move(Info)) {} |
4947 | |
4948 | /// Return true of this is an input operand that is a matching constraint |
4949 | /// like "4". |
4950 | bool isMatchingInputConstraint() const; |
4951 | |
4952 | /// If this is an input matching constraint, this method returns the output |
4953 | /// operand it matches. |
4954 | unsigned getMatchedOperand() const; |
4955 | }; |
4956 | |
4957 | using AsmOperandInfoVector = std::vector<AsmOperandInfo>; |
4958 | |
4959 | /// Split up the constraint string from the inline assembly value into the |
4960 | /// specific constraints and their prefixes, and also tie in the associated |
4961 | /// operand values. If this returns an empty vector, and if the constraint |
4962 | /// string itself isn't empty, there was an error parsing. |
4963 | virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, |
4964 | const TargetRegisterInfo *TRI, |
4965 | const CallBase &Call) const; |
4966 | |
4967 | /// Examine constraint type and operand type and determine a weight value. |
4968 | /// The operand object must already have been set up with the operand type. |
4969 | virtual ConstraintWeight getMultipleConstraintMatchWeight( |
4970 | AsmOperandInfo &info, int maIndex) const; |
4971 | |
4972 | /// Examine constraint string and operand type and determine a weight value. |
4973 | /// The operand object must already have been set up with the operand type. |
4974 | virtual ConstraintWeight getSingleConstraintMatchWeight( |
4975 | AsmOperandInfo &info, const char *constraint) const; |
4976 | |
4977 | /// Determines the constraint code and constraint type to use for the specific |
4978 | /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. |
4979 | /// If the actual operand being passed in is available, it can be passed in as |
4980 | /// Op, otherwise an empty SDValue can be passed. |
4981 | virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, |
4982 | SDValue Op, |
4983 | SelectionDAG *DAG = nullptr) const; |
4984 | |
4985 | /// Given a constraint, return the type of constraint it is for this target. |
4986 | virtual ConstraintType getConstraintType(StringRef Constraint) const; |
4987 | |
4988 | using ConstraintPair = std::pair<StringRef, TargetLowering::ConstraintType>; |
4989 | using ConstraintGroup = SmallVector<ConstraintPair>; |
4990 | /// Given an OpInfo with list of constraints codes as strings, return a |
4991 | /// sorted Vector of pairs of constraint codes and their types in priority of |
4992 | /// what we'd prefer to lower them as. This may contain immediates that |
4993 | /// cannot be lowered, but it is meant to be a machine agnostic order of |
4994 | /// preferences. |
4995 | ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const; |
4996 | |
4997 | /// Given a physical register constraint (e.g. {edx}), return the register |
4998 | /// number and the register class for the register. |
4999 | /// |
5000 | /// Given a register class constraint, like 'r', if this corresponds directly |
5001 | /// to an LLVM register class, return a register of 0 and the register class |
5002 | /// pointer. |
5003 | /// |
5004 | /// This should only be used for C_Register constraints. On error, this |
5005 | /// returns a register number of 0 and a null register class pointer. |
5006 | virtual std::pair<unsigned, const TargetRegisterClass *> |
5007 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
5008 | StringRef Constraint, MVT VT) const; |
5009 | |
5010 | virtual InlineAsm::ConstraintCode |
5011 | getInlineAsmMemConstraint(StringRef ConstraintCode) const { |
5012 | if (ConstraintCode == "m" ) |
5013 | return InlineAsm::ConstraintCode::m; |
5014 | if (ConstraintCode == "o" ) |
5015 | return InlineAsm::ConstraintCode::o; |
5016 | if (ConstraintCode == "X" ) |
5017 | return InlineAsm::ConstraintCode::X; |
5018 | if (ConstraintCode == "p" ) |
5019 | return InlineAsm::ConstraintCode::p; |
5020 | return InlineAsm::ConstraintCode::Unknown; |
5021 | } |
5022 | |
5023 | /// Try to replace an X constraint, which matches anything, with another that |
5024 | /// has more specific requirements based on the type of the corresponding |
5025 | /// operand. This returns null if there is no replacement to make. |
5026 | virtual const char *LowerXConstraint(EVT ConstraintVT) const; |
5027 | |
5028 | /// Lower the specified operand into the Ops vector. If it is invalid, don't |
5029 | /// add anything to Ops. |
5030 | virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
5031 | std::vector<SDValue> &Ops, |
5032 | SelectionDAG &DAG) const; |
5033 | |
5034 | // Lower custom output constraints. If invalid, return SDValue(). |
5035 | virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, |
5036 | const SDLoc &DL, |
5037 | const AsmOperandInfo &OpInfo, |
5038 | SelectionDAG &DAG) const; |
5039 | |
5040 | // Targets may override this function to collect operands from the CallInst |
5041 | // and for example, lower them into the SelectionDAG operands. |
5042 | virtual void CollectTargetIntrinsicOperands(const CallInst &I, |
5043 | SmallVectorImpl<SDValue> &Ops, |
5044 | SelectionDAG &DAG) const; |
5045 | |
5046 | //===--------------------------------------------------------------------===// |
5047 | // Div utility functions |
5048 | // |
5049 | |
5050 | SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, |
5051 | SmallVectorImpl<SDNode *> &Created) const; |
5052 | SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, |
5053 | SmallVectorImpl<SDNode *> &Created) const; |
5054 | // Build sdiv by power-of-2 with conditional move instructions |
5055 | SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, |
5056 | SelectionDAG &DAG, |
5057 | SmallVectorImpl<SDNode *> &Created) const; |
5058 | |
5059 | /// Targets may override this function to provide custom SDIV lowering for |
5060 | /// power-of-2 denominators. If the target returns an empty SDValue, LLVM |
5061 | /// assumes SDIV is expensive and replaces it with a series of other integer |
5062 | /// operations. |
5063 | virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
5064 | SelectionDAG &DAG, |
5065 | SmallVectorImpl<SDNode *> &Created) const; |
5066 | |
5067 | /// Targets may override this function to provide custom SREM lowering for |
5068 | /// power-of-2 denominators. If the target returns an empty SDValue, LLVM |
5069 | /// assumes SREM is expensive and replaces it with a series of other integer |
5070 | /// operations. |
5071 | virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, |
5072 | SelectionDAG &DAG, |
5073 | SmallVectorImpl<SDNode *> &Created) const; |
5074 | |
5075 | /// Indicate whether this target prefers to combine FDIVs with the same |
5076 | /// divisor. If the transform should never be done, return zero. If the |
5077 | /// transform should be done, return the minimum number of divisor uses |
5078 | /// that must exist. |
5079 | virtual unsigned combineRepeatedFPDivisors() const { |
5080 | return 0; |
5081 | } |
5082 | |
5083 | /// Hooks for building estimates in place of slower divisions and square |
5084 | /// roots. |
5085 | |
5086 | /// Return either a square root or its reciprocal estimate value for the input |
5087 | /// operand. |
5088 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or |
5089 | /// 'Enabled' as set by a potential default override attribute. |
5090 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson |
5091 | /// refinement iterations required to generate a sufficient (though not |
5092 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. |
5093 | /// The boolean UseOneConstNR output is used to select a Newton-Raphson |
5094 | /// algorithm implementation that uses either one or two constants. |
5095 | /// The boolean Reciprocal is used to select whether the estimate is for the |
5096 | /// square root of the input operand or the reciprocal of its square root. |
5097 | /// A target may choose to implement its own refinement within this function. |
5098 | /// If that's true, then return '0' as the number of RefinementSteps to avoid |
5099 | /// any further refinement of the estimate. |
5100 | /// An empty SDValue return means no estimate sequence can be created. |
5101 | virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, |
5102 | int Enabled, int &RefinementSteps, |
5103 | bool &UseOneConstNR, bool Reciprocal) const { |
5104 | return SDValue(); |
5105 | } |
5106 | |
5107 | /// Try to convert the fminnum/fmaxnum to a compare/select sequence. This is |
5108 | /// required for correctness since InstCombine might have canonicalized a |
5109 | /// fcmp+select sequence to a FMINNUM/FMAXNUM intrinsic. If we were to fall |
5110 | /// through to the default expansion/soften to libcall, we might introduce a |
5111 | /// link-time dependency on libm into a file that originally did not have one. |
5112 | SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const; |
5113 | |
5114 | /// Return a reciprocal estimate value for the input operand. |
5115 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or |
5116 | /// 'Enabled' as set by a potential default override attribute. |
5117 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson |
5118 | /// refinement iterations required to generate a sufficient (though not |
5119 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. |
5120 | /// A target may choose to implement its own refinement within this function. |
5121 | /// If that's true, then return '0' as the number of RefinementSteps to avoid |
5122 | /// any further refinement of the estimate. |
5123 | /// An empty SDValue return means no estimate sequence can be created. |
5124 | virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, |
5125 | int Enabled, int &RefinementSteps) const { |
5126 | return SDValue(); |
5127 | } |
5128 | |
5129 | /// Return a target-dependent comparison result if the input operand is |
5130 | /// suitable for use with a square root estimate calculation. For example, the |
5131 | /// comparison may check if the operand is NAN, INF, zero, normal, etc. The |
5132 | /// result should be used as the condition operand for a select or branch. |
5133 | virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, |
5134 | const DenormalMode &Mode) const; |
5135 | |
5136 | /// Return a target-dependent result if the input operand is not suitable for |
5137 | /// use with a square root estimate calculation. |
5138 | virtual SDValue getSqrtResultForDenormInput(SDValue Operand, |
5139 | SelectionDAG &DAG) const { |
5140 | return DAG.getConstantFP(Val: 0.0, DL: SDLoc(Operand), VT: Operand.getValueType()); |
5141 | } |
5142 | |
5143 | //===--------------------------------------------------------------------===// |
5144 | // Legalization utility functions |
5145 | // |
5146 | |
5147 | /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, |
5148 | /// respectively, each computing an n/2-bit part of the result. |
5149 | /// \param Result A vector that will be filled with the parts of the result |
5150 | /// in little-endian order. |
5151 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter |
5152 | /// if you want to control how low bits are extracted from the LHS. |
5153 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. |
5154 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning |
5155 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. |
5156 | /// \returns true if the node has been expanded, false if it has not |
5157 | bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, |
5158 | SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, |
5159 | SelectionDAG &DAG, MulExpansionKind Kind, |
5160 | SDValue LL = SDValue(), SDValue LH = SDValue(), |
5161 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; |
5162 | |
5163 | /// Expand a MUL into two nodes. One that computes the high bits of |
5164 | /// the result and one that computes the low bits. |
5165 | /// \param HiLoVT The value type to use for the Lo and Hi nodes. |
5166 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter |
5167 | /// if you want to control how low bits are extracted from the LHS. |
5168 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. |
5169 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning |
5170 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. |
5171 | /// \returns true if the node has been expanded. false if it has not |
5172 | bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, |
5173 | SelectionDAG &DAG, MulExpansionKind Kind, |
5174 | SDValue LL = SDValue(), SDValue LH = SDValue(), |
5175 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; |
5176 | |
5177 | /// Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit |
5178 | /// urem by constant and other arithmetic ops. The n/2-bit urem by constant |
5179 | /// will be expanded by DAGCombiner. This is not possible for all constant |
5180 | /// divisors. |
5181 | /// \param N Node to expand |
5182 | /// \param Result A vector that will be filled with the lo and high parts of |
5183 | /// the results. For *DIVREM, this will be the quotient parts followed |
5184 | /// by the remainder parts. |
5185 | /// \param HiLoVT The value type to use for the Lo and Hi parts. Should be |
5186 | /// half of VT. |
5187 | /// \param LL Low bits of the LHS of the operation. You can use this |
5188 | /// parameter if you want to control how low bits are extracted from |
5189 | /// the LHS. |
5190 | /// \param LH High bits of the LHS of the operation. See LL for meaning. |
5191 | /// \returns true if the node has been expanded, false if it has not. |
5192 | bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl<SDValue> &Result, |
5193 | EVT HiLoVT, SelectionDAG &DAG, |
5194 | SDValue LL = SDValue(), |
5195 | SDValue LH = SDValue()) const; |
5196 | |
5197 | /// Expand funnel shift. |
5198 | /// \param N Node to expand |
5199 | /// \returns The expansion if successful, SDValue() otherwise |
5200 | SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const; |
5201 | |
5202 | /// Expand rotations. |
5203 | /// \param N Node to expand |
5204 | /// \param AllowVectorOps expand vector rotate, this should only be performed |
5205 | /// if the legalization is happening outside of LegalizeVectorOps |
5206 | /// \returns The expansion if successful, SDValue() otherwise |
5207 | SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const; |
5208 | |
5209 | /// Expand shift-by-parts. |
5210 | /// \param N Node to expand |
5211 | /// \param Lo lower-output-part after conversion |
5212 | /// \param Hi upper-output-part after conversion |
5213 | void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, |
5214 | SelectionDAG &DAG) const; |
5215 | |
5216 | /// Expand float(f32) to SINT(i64) conversion |
5217 | /// \param N Node to expand |
5218 | /// \param Result output after conversion |
5219 | /// \returns True, if the expansion was successful, false otherwise |
5220 | bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
5221 | |
5222 | /// Expand float to UINT conversion |
5223 | /// \param N Node to expand |
5224 | /// \param Result output after conversion |
5225 | /// \param Chain output chain after conversion |
5226 | /// \returns True, if the expansion was successful, false otherwise |
5227 | bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, |
5228 | SelectionDAG &DAG) const; |
5229 | |
5230 | /// Expand UINT(i64) to double(f64) conversion |
5231 | /// \param N Node to expand |
5232 | /// \param Result output after conversion |
5233 | /// \param Chain output chain after conversion |
5234 | /// \returns True, if the expansion was successful, false otherwise |
5235 | bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, |
5236 | SelectionDAG &DAG) const; |
5237 | |
5238 | /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. |
5239 | SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; |
5240 | |
5241 | /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max. |
5242 | /// \param N Node to expand |
5243 | /// \returns The expansion result |
5244 | SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const; |
5245 | |
5246 | /// Truncate Op to ResultVT. If the result is exact, leave it alone. If it is |
5247 | /// not exact, force the result to be odd. |
5248 | /// \param ResultVT The type of result. |
5249 | /// \param Op The value to round. |
5250 | /// \returns The expansion result |
5251 | SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, |
5252 | SelectionDAG &DAG) const; |
5253 | |
5254 | /// Expand round(fp) to fp conversion |
5255 | /// \param N Node to expand |
5256 | /// \returns The expansion result |
5257 | SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const; |
5258 | |
5259 | /// Expand check for floating point class. |
5260 | /// \param ResultVT The type of intrinsic call result. |
5261 | /// \param Op The tested value. |
5262 | /// \param Test The test to perform. |
5263 | /// \param Flags The optimization flags. |
5264 | /// \returns The expansion result or SDValue() if it fails. |
5265 | SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, |
5266 | SDNodeFlags Flags, const SDLoc &DL, |
5267 | SelectionDAG &DAG) const; |
5268 | |
5269 | /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, |
5270 | /// vector nodes can only succeed if all operations are legal/custom. |
5271 | /// \param N Node to expand |
5272 | /// \returns The expansion result or SDValue() if it fails. |
5273 | SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const; |
5274 | |
5275 | /// Expand VP_CTPOP nodes. |
5276 | /// \returns The expansion result or SDValue() if it fails. |
5277 | SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const; |
5278 | |
5279 | /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes, |
5280 | /// vector nodes can only succeed if all operations are legal/custom. |
5281 | /// \param N Node to expand |
5282 | /// \returns The expansion result or SDValue() if it fails. |
5283 | SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const; |
5284 | |
5285 | /// Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes. |
5286 | /// \param N Node to expand |
5287 | /// \returns The expansion result or SDValue() if it fails. |
5288 | SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const; |
5289 | |
5290 | /// Expand CTTZ via Table Lookup. |
5291 | /// \param N Node to expand |
5292 | /// \returns The expansion result or SDValue() if it fails. |
5293 | SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, |
5294 | SDValue Op, unsigned NumBitsPerElt) const; |
5295 | |
5296 | /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, |
5297 | /// vector nodes can only succeed if all operations are legal/custom. |
5298 | /// \param N Node to expand |
5299 | /// \returns The expansion result or SDValue() if it fails. |
5300 | SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const; |
5301 | |
5302 | /// Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes. |
5303 | /// \param N Node to expand |
5304 | /// \returns The expansion result or SDValue() if it fails. |
5305 | SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const; |
5306 | |
5307 | /// Expand ABS nodes. Expands vector/scalar ABS nodes, |
5308 | /// vector nodes can only succeed if all operations are legal/custom. |
5309 | /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) |
5310 | /// \param N Node to expand |
5311 | /// \param IsNegative indicate negated abs |
5312 | /// \returns The expansion result or SDValue() if it fails. |
5313 | SDValue expandABS(SDNode *N, SelectionDAG &DAG, |
5314 | bool IsNegative = false) const; |
5315 | |
5316 | /// Expand ABDS/ABDU nodes. Expands vector/scalar ABDS/ABDU nodes. |
5317 | /// \param N Node to expand |
5318 | /// \returns The expansion result or SDValue() if it fails. |
5319 | SDValue expandABD(SDNode *N, SelectionDAG &DAG) const; |
5320 | |
5321 | /// Expand BSWAP nodes. Expands scalar/vector BSWAP nodes with i16/i32/i64 |
5322 | /// scalar types. Returns SDValue() if expand fails. |
5323 | /// \param N Node to expand |
5324 | /// \returns The expansion result or SDValue() if it fails. |
5325 | SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const; |
5326 | |
5327 | /// Expand VP_BSWAP nodes. Expands VP_BSWAP nodes with |
5328 | /// i16/i32/i64 scalar types. Returns SDValue() if expand fails. \param N Node |
5329 | /// to expand \returns The expansion result or SDValue() if it fails. |
5330 | SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const; |
5331 | |
5332 | /// Expand BITREVERSE nodes. Expands scalar/vector BITREVERSE nodes. |
5333 | /// Returns SDValue() if expand fails. |
5334 | /// \param N Node to expand |
5335 | /// \returns The expansion result or SDValue() if it fails. |
5336 | SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const; |
5337 | |
5338 | /// Expand VP_BITREVERSE nodes. Expands VP_BITREVERSE nodes with |
5339 | /// i8/i16/i32/i64 scalar types. \param N Node to expand \returns The |
5340 | /// expansion result or SDValue() if it fails. |
5341 | SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const; |
5342 | |
5343 | /// Turn load of vector type into a load of the individual elements. |
5344 | /// \param LD load to expand |
5345 | /// \returns BUILD_VECTOR and TokenFactor nodes. |
5346 | std::pair<SDValue, SDValue> scalarizeVectorLoad(LoadSDNode *LD, |
5347 | SelectionDAG &DAG) const; |
5348 | |
5349 | // Turn a store of a vector type into stores of the individual elements. |
5350 | /// \param ST Store with a vector value type |
5351 | /// \returns TokenFactor of the individual store chains. |
5352 | SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const; |
5353 | |
5354 | /// Expands an unaligned load to 2 half-size loads for an integer, and |
5355 | /// possibly more for vectors. |
5356 | std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD, |
5357 | SelectionDAG &DAG) const; |
5358 | |
5359 | /// Expands an unaligned store to 2 half-size stores for integer values, and |
5360 | /// possibly more for vectors. |
5361 | SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const; |
5362 | |
5363 | /// Increments memory address \p Addr according to the type of the value |
5364 | /// \p DataVT that should be stored. If the data is stored in compressed |
5365 | /// form, the memory address should be incremented according to the number of |
5366 | /// the stored elements. This number is equal to the number of '1's bits |
5367 | /// in the \p Mask. |
5368 | /// \p DataVT is a vector type. \p Mask is a vector value. |
5369 | /// \p DataVT and \p Mask have the same number of vector elements. |
5370 | SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, |
5371 | EVT DataVT, SelectionDAG &DAG, |
5372 | bool IsCompressedMemory) const; |
5373 | |
5374 | /// Get a pointer to vector element \p Idx located in memory for a vector of |
5375 | /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of |
5376 | /// bounds the returned pointer is unspecified, but will be within the vector |
5377 | /// bounds. |
5378 | SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, |
5379 | SDValue Index) const; |
5380 | |
5381 | /// Get a pointer to a sub-vector of type \p SubVecVT at index \p Idx located |
5382 | /// in memory for a vector of type \p VecVT starting at a base address of |
5383 | /// \p VecPtr. If \p Idx plus the size of \p SubVecVT is out of bounds the |
5384 | /// returned pointer is unspecified, but the value returned will be such that |
5385 | /// the entire subvector would be within the vector bounds. |
5386 | SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, |
5387 | EVT SubVecVT, SDValue Index) const; |
5388 | |
5389 | /// Method for building the DAG expansion of ISD::[US][MIN|MAX]. This |
5390 | /// method accepts integers as its arguments. |
5391 | SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const; |
5392 | |
5393 | /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This |
5394 | /// method accepts integers as its arguments. |
5395 | SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; |
5396 | |
5397 | /// Method for building the DAG expansion of ISD::[US]SHLSAT. This |
5398 | /// method accepts integers as its arguments. |
5399 | SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const; |
5400 | |
5401 | /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This |
5402 | /// method accepts integers as its arguments. |
5403 | SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; |
5404 | |
5405 | /// Method for building the DAG expansion of ISD::[US]DIVFIX[SAT]. This |
5406 | /// method accepts integers as its arguments. |
5407 | /// Note: This method may fail if the division could not be performed |
5408 | /// within the type. Clients must retry with a wider type if this happens. |
5409 | SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, |
5410 | SDValue LHS, SDValue RHS, |
5411 | unsigned Scale, SelectionDAG &DAG) const; |
5412 | |
5413 | /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion |
5414 | /// always suceeds and populates the Result and Overflow arguments. |
5415 | void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
5416 | SelectionDAG &DAG) const; |
5417 | |
5418 | /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion |
5419 | /// always suceeds and populates the Result and Overflow arguments. |
5420 | void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
5421 | SelectionDAG &DAG) const; |
5422 | |
5423 | /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether |
5424 | /// expansion was successful and populates the Result and Overflow arguments. |
5425 | bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
5426 | SelectionDAG &DAG) const; |
5427 | |
5428 | /// forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or |
5429 | /// brute force via a wide multiplication. The expansion works by |
5430 | /// attempting to do a multiplication on a wider type twice the size of the |
5431 | /// original operands. LL and LH represent the lower and upper halves of the |
5432 | /// first operand. RL and RH represent the lower and upper halves of the |
5433 | /// second operand. The upper and lower halves of the result are stored in Lo |
5434 | /// and Hi. |
5435 | void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, |
5436 | EVT WideVT, const SDValue LL, const SDValue LH, |
5437 | const SDValue RL, const SDValue RH, SDValue &Lo, |
5438 | SDValue &Hi) const; |
5439 | |
5440 | /// Same as above, but creates the upper halves of each operand by |
5441 | /// sign/zero-extending the operands. |
5442 | void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, |
5443 | const SDValue LHS, const SDValue RHS, SDValue &Lo, |
5444 | SDValue &Hi) const; |
5445 | |
5446 | /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified, |
5447 | /// only the first Count elements of the vector are used. |
5448 | SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const; |
5449 | |
5450 | /// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation. |
5451 | SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const; |
5452 | |
5453 | /// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal. |
5454 | /// Returns true if the expansion was successful. |
5455 | bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const; |
5456 | |
5457 | /// Method for building the DAG expansion of ISD::VECTOR_SPLICE. This |
5458 | /// method accepts vectors as its arguments. |
5459 | SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const; |
5460 | |
5461 | /// Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC |
5462 | /// on the current target. A VP_SETCC will additionally be given a Mask |
5463 | /// and/or EVL not equal to SDValue(). |
5464 | /// |
5465 | /// If the SETCC has been legalized using AND / OR, then the legalized node |
5466 | /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert |
5467 | /// will be set to false. This will also hold if the VP_SETCC has been |
5468 | /// legalized using VP_AND / VP_OR. |
5469 | /// |
5470 | /// If the SETCC / VP_SETCC has been legalized by using |
5471 | /// getSetCCSwappedOperands(), then the values of LHS and RHS will be |
5472 | /// swapped, CC will be set to the new condition, and NeedInvert will be set |
5473 | /// to false. |
5474 | /// |
5475 | /// If the SETCC / VP_SETCC has been legalized using the inverse condcode, |
5476 | /// then LHS and RHS will be unchanged, CC will set to the inverted condcode, |
5477 | /// and NeedInvert will be set to true. The caller must invert the result of |
5478 | /// the SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to |
5479 | /// swap the effect of a true/false result. |
5480 | /// |
5481 | /// \returns true if the SETCC / VP_SETCC has been legalized, false if it |
5482 | /// hasn't. |
5483 | bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, |
5484 | SDValue &RHS, SDValue &CC, SDValue Mask, |
5485 | SDValue EVL, bool &NeedInvert, const SDLoc &dl, |
5486 | SDValue &Chain, bool IsSignaling = false) const; |
5487 | |
5488 | //===--------------------------------------------------------------------===// |
5489 | // Instruction Emitting Hooks |
5490 | // |
5491 | |
5492 | /// This method should be implemented by targets that mark instructions with |
5493 | /// the 'usesCustomInserter' flag. These instructions are special in various |
5494 | /// ways, which require special support to insert. The specified MachineInstr |
5495 | /// is created but not inserted into any basic blocks, and this method is |
5496 | /// called to expand it into a sequence of instructions, potentially also |
5497 | /// creating new basic blocks and control flow. |
5498 | /// As long as the returned basic block is different (i.e., we created a new |
5499 | /// one), the custom inserter is free to modify the rest of \p MBB. |
5500 | virtual MachineBasicBlock * |
5501 | EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; |
5502 | |
5503 | /// This method should be implemented by targets that mark instructions with |
5504 | /// the 'hasPostISelHook' flag. These instructions must be adjusted after |
5505 | /// instruction selection by target hooks. e.g. To fill in optional defs for |
5506 | /// ARM 's' setting instructions. |
5507 | virtual void AdjustInstrPostInstrSelection(MachineInstr &MI, |
5508 | SDNode *Node) const; |
5509 | |
5510 | /// If this function returns true, SelectionDAGBuilder emits a |
5511 | /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector. |
5512 | virtual bool useLoadStackGuardNode() const { |
5513 | return false; |
5514 | } |
5515 | |
5516 | virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, |
5517 | const SDLoc &DL) const { |
5518 | llvm_unreachable("not implemented for this target" ); |
5519 | } |
5520 | |
5521 | /// Lower TLS global address SDNode for target independent emulated TLS model. |
5522 | virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, |
5523 | SelectionDAG &DAG) const; |
5524 | |
5525 | /// Expands target specific indirect branch for the case of JumpTable |
5526 | /// expansion. |
5527 | virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, |
5528 | SDValue Addr, int JTI, |
5529 | SelectionDAG &DAG) const; |
5530 | |
5531 | // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) |
5532 | // If we're comparing for equality to zero and isCtlzFast is true, expose the |
5533 | // fact that this can be implemented as a ctlz/srl pair, so that the dag |
5534 | // combiner can fold the new nodes. |
5535 | SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; |
5536 | |
5537 | // Return true if `X & Y eq/ne 0` is preferable to `X & Y ne/eq Y` |
5538 | virtual bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode, EVT) const { |
5539 | return true; |
5540 | } |
5541 | |
5542 | private: |
5543 | SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
5544 | const SDLoc &DL, DAGCombinerInfo &DCI) const; |
5545 | SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
5546 | const SDLoc &DL, DAGCombinerInfo &DCI) const; |
5547 | |
5548 | SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0, |
5549 | SDValue N1, ISD::CondCode Cond, |
5550 | DAGCombinerInfo &DCI, |
5551 | const SDLoc &DL) const; |
5552 | |
5553 | // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 |
5554 | SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift( |
5555 | EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, |
5556 | DAGCombinerInfo &DCI, const SDLoc &DL) const; |
5557 | |
5558 | SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, |
5559 | SDValue CompTargetNode, ISD::CondCode Cond, |
5560 | DAGCombinerInfo &DCI, const SDLoc &DL, |
5561 | SmallVectorImpl<SDNode *> &Created) const; |
5562 | SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, |
5563 | ISD::CondCode Cond, DAGCombinerInfo &DCI, |
5564 | const SDLoc &DL) const; |
5565 | |
5566 | SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, |
5567 | SDValue CompTargetNode, ISD::CondCode Cond, |
5568 | DAGCombinerInfo &DCI, const SDLoc &DL, |
5569 | SmallVectorImpl<SDNode *> &Created) const; |
5570 | SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, |
5571 | ISD::CondCode Cond, DAGCombinerInfo &DCI, |
5572 | const SDLoc &DL) const; |
5573 | }; |
5574 | |
5575 | /// Given an LLVM IR type and return type attributes, compute the return value |
5576 | /// EVTs and flags, and optionally also the offsets, if the return value is |
5577 | /// being lowered to memory. |
5578 | void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, |
5579 | SmallVectorImpl<ISD::OutputArg> &Outs, |
5580 | const TargetLowering &TLI, const DataLayout &DL); |
5581 | |
5582 | } // end namespace llvm |
5583 | |
5584 | #endif // LLVM_CODEGEN_TARGETLOWERING_H |
5585 | |