1 | //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file provides helpers for the implementation of |
10 | /// a TargetTransformInfo-conforming class. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H |
15 | #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H |
16 | |
17 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
18 | #include "llvm/Analysis/TargetTransformInfo.h" |
19 | #include "llvm/Analysis/VectorUtils.h" |
20 | #include "llvm/IR/DataLayout.h" |
21 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
22 | #include "llvm/IR/IntrinsicInst.h" |
23 | #include "llvm/IR/Operator.h" |
24 | #include "llvm/IR/PatternMatch.h" |
25 | #include <optional> |
26 | #include <utility> |
27 | |
28 | namespace llvm { |
29 | |
30 | class Function; |
31 | |
32 | /// Base class for use as a mix-in that aids implementing |
33 | /// a TargetTransformInfo-compatible class. |
34 | class TargetTransformInfoImplBase { |
35 | protected: |
36 | typedef TargetTransformInfo TTI; |
37 | |
38 | const DataLayout &DL; |
39 | |
40 | explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} |
41 | |
42 | public: |
43 | // Provide value semantics. MSVC requires that we spell all of these out. |
44 | TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default; |
45 | TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} |
46 | |
47 | const DataLayout &getDataLayout() const { return DL; } |
48 | |
49 | InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, |
50 | ArrayRef<const Value *> Operands, Type *AccessType, |
51 | TTI::TargetCostKind CostKind) const { |
52 | // In the basic model, we just assume that all-constant GEPs will be folded |
53 | // into their uses via addressing modes. |
54 | for (const Value *Operand : Operands) |
55 | if (!isa<Constant>(Val: Operand)) |
56 | return TTI::TCC_Basic; |
57 | |
58 | return TTI::TCC_Free; |
59 | } |
60 | |
61 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, |
62 | unsigned &JTSize, |
63 | ProfileSummaryInfo *PSI, |
64 | BlockFrequencyInfo *BFI) const { |
65 | (void)PSI; |
66 | (void)BFI; |
67 | JTSize = 0; |
68 | return SI.getNumCases(); |
69 | } |
70 | |
71 | unsigned getInliningThresholdMultiplier() const { return 1; } |
72 | unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const { return 8; } |
73 | unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const { |
74 | return 8; |
75 | } |
76 | unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; } |
77 | unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const { |
78 | return 0; |
79 | }; |
80 | |
81 | int getInlinerVectorBonusPercent() const { return 150; } |
82 | |
83 | InstructionCost getMemcpyCost(const Instruction *I) const { |
84 | return TTI::TCC_Expensive; |
85 | } |
86 | |
87 | uint64_t getMaxMemIntrinsicInlineSizeThreshold() const { |
88 | return 64; |
89 | } |
90 | |
91 | // Although this default value is arbitrary, it is not random. It is assumed |
92 | // that a condition that evaluates the same way by a higher percentage than |
93 | // this is best represented as control flow. Therefore, the default value N |
94 | // should be set such that the win from N% correct executions is greater than |
95 | // the loss from (100 - N)% mispredicted executions for the majority of |
96 | // intended targets. |
97 | BranchProbability getPredictableBranchThreshold() const { |
98 | return BranchProbability(99, 100); |
99 | } |
100 | |
101 | bool hasBranchDivergence(const Function *F = nullptr) const { return false; } |
102 | |
103 | bool isSourceOfDivergence(const Value *V) const { return false; } |
104 | |
105 | bool isAlwaysUniform(const Value *V) const { return false; } |
106 | |
107 | bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { |
108 | return false; |
109 | } |
110 | |
111 | bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const { |
112 | return true; |
113 | } |
114 | |
115 | unsigned getFlatAddressSpace() const { return -1; } |
116 | |
117 | bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, |
118 | Intrinsic::ID IID) const { |
119 | return false; |
120 | } |
121 | |
122 | bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; } |
123 | bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { |
124 | return AS == 0; |
125 | }; |
126 | |
127 | unsigned getAssumedAddrSpace(const Value *V) const { return -1; } |
128 | |
129 | bool isSingleThreaded() const { return false; } |
130 | |
131 | std::pair<const Value *, unsigned> |
132 | getPredicatedAddrSpace(const Value *V) const { |
133 | return std::make_pair(x: nullptr, y: -1); |
134 | } |
135 | |
136 | Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, |
137 | Value *NewV) const { |
138 | return nullptr; |
139 | } |
140 | |
141 | bool isLoweredToCall(const Function *F) const { |
142 | assert(F && "A concrete function must be provided to this routine." ); |
143 | |
144 | // FIXME: These should almost certainly not be handled here, and instead |
145 | // handled with the help of TLI or the target itself. This was largely |
146 | // ported from existing analysis heuristics here so that such refactorings |
147 | // can take place in the future. |
148 | |
149 | if (F->isIntrinsic()) |
150 | return false; |
151 | |
152 | if (F->hasLocalLinkage() || !F->hasName()) |
153 | return true; |
154 | |
155 | StringRef Name = F->getName(); |
156 | |
157 | // These will all likely lower to a single selection DAG node. |
158 | if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || |
159 | Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || |
160 | Name == "fmin" || Name == "fminf" || Name == "fminl" || |
161 | Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || |
162 | Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || |
163 | Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) |
164 | return false; |
165 | |
166 | // These are all likely to be optimized into something smaller. |
167 | if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || |
168 | Name == "exp2l" || Name == "exp2f" || Name == "floor" || |
169 | Name == "floorf" || Name == "ceil" || Name == "round" || |
170 | Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || |
171 | Name == "llabs" ) |
172 | return false; |
173 | |
174 | return true; |
175 | } |
176 | |
177 | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, |
178 | AssumptionCache &AC, TargetLibraryInfo *LibInfo, |
179 | HardwareLoopInfo &HWLoopInfo) const { |
180 | return false; |
181 | } |
182 | |
183 | bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const { return false; } |
184 | |
185 | TailFoldingStyle |
186 | getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const { |
187 | return TailFoldingStyle::DataWithoutLaneMask; |
188 | } |
189 | |
190 | std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, |
191 | IntrinsicInst &II) const { |
192 | return std::nullopt; |
193 | } |
194 | |
195 | std::optional<Value *> |
196 | simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, |
197 | APInt DemandedMask, KnownBits &Known, |
198 | bool &KnownBitsComputed) const { |
199 | return std::nullopt; |
200 | } |
201 | |
202 | std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( |
203 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, |
204 | APInt &UndefElts2, APInt &UndefElts3, |
205 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
206 | SimplifyAndSetOp) const { |
207 | return std::nullopt; |
208 | } |
209 | |
210 | void getUnrollingPreferences(Loop *, ScalarEvolution &, |
211 | TTI::UnrollingPreferences &, |
212 | OptimizationRemarkEmitter *) const {} |
213 | |
214 | void getPeelingPreferences(Loop *, ScalarEvolution &, |
215 | TTI::PeelingPreferences &) const {} |
216 | |
217 | bool isLegalAddImmediate(int64_t Imm) const { return false; } |
218 | |
219 | bool isLegalICmpImmediate(int64_t Imm) const { return false; } |
220 | |
221 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
222 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace, |
223 | Instruction *I = nullptr) const { |
224 | // Guess that only reg and reg+reg addressing is allowed. This heuristic is |
225 | // taken from the implementation of LSR. |
226 | return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); |
227 | } |
228 | |
229 | bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const { |
230 | return std::tie(args: C1.NumRegs, args: C1.AddRecCost, args: C1.NumIVMuls, args: C1.NumBaseAdds, |
231 | args: C1.ScaleCost, args: C1.ImmCost, args: C1.SetupCost) < |
232 | std::tie(args: C2.NumRegs, args: C2.AddRecCost, args: C2.NumIVMuls, args: C2.NumBaseAdds, |
233 | args: C2.ScaleCost, args: C2.ImmCost, args: C2.SetupCost); |
234 | } |
235 | |
236 | bool isNumRegsMajorCostOfLSR() const { return true; } |
237 | |
238 | bool shouldFoldTerminatingConditionAfterLSR() const { return false; } |
239 | |
240 | bool isProfitableLSRChainElement(Instruction *I) const { return false; } |
241 | |
242 | bool canMacroFuseCmp() const { return false; } |
243 | |
244 | bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, |
245 | DominatorTree *DT, AssumptionCache *AC, |
246 | TargetLibraryInfo *LibInfo) const { |
247 | return false; |
248 | } |
249 | |
250 | TTI::AddressingModeKind |
251 | getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { |
252 | return TTI::AMK_None; |
253 | } |
254 | |
255 | bool isLegalMaskedStore(Type *DataType, Align Alignment) const { |
256 | return false; |
257 | } |
258 | |
259 | bool isLegalMaskedLoad(Type *DataType, Align Alignment) const { |
260 | return false; |
261 | } |
262 | |
263 | bool isLegalNTStore(Type *DataType, Align Alignment) const { |
264 | // By default, assume nontemporal memory stores are available for stores |
265 | // that are aligned and have a size that is a power of 2. |
266 | unsigned DataSize = DL.getTypeStoreSize(Ty: DataType); |
267 | return Alignment >= DataSize && isPowerOf2_32(Value: DataSize); |
268 | } |
269 | |
270 | bool isLegalNTLoad(Type *DataType, Align Alignment) const { |
271 | // By default, assume nontemporal memory loads are available for loads that |
272 | // are aligned and have a size that is a power of 2. |
273 | unsigned DataSize = DL.getTypeStoreSize(Ty: DataType); |
274 | return Alignment >= DataSize && isPowerOf2_32(Value: DataSize); |
275 | } |
276 | |
277 | bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const { |
278 | return false; |
279 | } |
280 | |
281 | bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { |
282 | return false; |
283 | } |
284 | |
285 | bool isLegalMaskedGather(Type *DataType, Align Alignment) const { |
286 | return false; |
287 | } |
288 | |
289 | bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const { |
290 | return false; |
291 | } |
292 | |
293 | bool forceScalarizeMaskedScatter(VectorType *DataType, |
294 | Align Alignment) const { |
295 | return false; |
296 | } |
297 | |
298 | bool isLegalMaskedCompressStore(Type *DataType) const { return false; } |
299 | |
300 | bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, |
301 | const SmallBitVector &OpcodeMask) const { |
302 | return false; |
303 | } |
304 | |
305 | bool isLegalMaskedExpandLoad(Type *DataType) const { return false; } |
306 | |
307 | bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const { |
308 | return false; |
309 | } |
310 | |
311 | bool enableOrderedReductions() const { return false; } |
312 | |
313 | bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; } |
314 | |
315 | bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { |
316 | return false; |
317 | } |
318 | |
319 | bool prefersVectorizedAddressing() const { return true; } |
320 | |
321 | InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, |
322 | int64_t BaseOffset, bool HasBaseReg, |
323 | int64_t Scale, |
324 | unsigned AddrSpace) const { |
325 | // Guess that all legal addressing mode are free. |
326 | if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, |
327 | AddrSpace)) |
328 | return 0; |
329 | return -1; |
330 | } |
331 | |
332 | bool LSRWithInstrQueries() const { return false; } |
333 | |
334 | bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } |
335 | |
336 | bool isProfitableToHoist(Instruction *I) const { return true; } |
337 | |
338 | bool useAA() const { return false; } |
339 | |
340 | bool isTypeLegal(Type *Ty) const { return false; } |
341 | |
342 | unsigned getRegUsageForType(Type *Ty) const { return 1; } |
343 | |
344 | bool shouldBuildLookupTables() const { return true; } |
345 | |
346 | bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; } |
347 | |
348 | bool shouldBuildRelLookupTables() const { return false; } |
349 | |
350 | bool useColdCCForColdCall(Function &F) const { return false; } |
351 | |
352 | InstructionCost getScalarizationOverhead(VectorType *Ty, |
353 | const APInt &DemandedElts, |
354 | bool Insert, bool , |
355 | TTI::TargetCostKind CostKind) const { |
356 | return 0; |
357 | } |
358 | |
359 | InstructionCost |
360 | getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, |
361 | ArrayRef<Type *> Tys, |
362 | TTI::TargetCostKind CostKind) const { |
363 | return 0; |
364 | } |
365 | |
366 | bool supportsEfficientVectorElementLoadStore() const { return false; } |
367 | |
368 | bool supportsTailCalls() const { return true; } |
369 | |
370 | bool supportsTailCallFor(const CallBase *CB) const { |
371 | return supportsTailCalls(); |
372 | } |
373 | |
374 | bool enableAggressiveInterleaving(bool LoopHasReductions) const { |
375 | return false; |
376 | } |
377 | |
378 | TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, |
379 | bool IsZeroCmp) const { |
380 | return {}; |
381 | } |
382 | |
383 | bool enableSelectOptimize() const { return true; } |
384 | |
385 | bool shouldTreatInstructionLikeSelect(const Instruction *I) { |
386 | // If the select is a logical-and/logical-or then it is better treated as a |
387 | // and/or by the backend. |
388 | using namespace llvm::PatternMatch; |
389 | return isa<SelectInst>(Val: I) && |
390 | !match(V: I, P: m_CombineOr(L: m_LogicalAnd(L: m_Value(), R: m_Value()), |
391 | R: m_LogicalOr(L: m_Value(), R: m_Value()))); |
392 | } |
393 | |
394 | bool enableInterleavedAccessVectorization() const { return false; } |
395 | |
396 | bool enableMaskedInterleavedAccessVectorization() const { return false; } |
397 | |
398 | bool isFPVectorizationPotentiallyUnsafe() const { return false; } |
399 | |
400 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, |
401 | unsigned AddressSpace, Align Alignment, |
402 | unsigned *Fast) const { |
403 | return false; |
404 | } |
405 | |
406 | TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { |
407 | return TTI::PSK_Software; |
408 | } |
409 | |
410 | bool haveFastSqrt(Type *Ty) const { return false; } |
411 | |
412 | bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; } |
413 | |
414 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; } |
415 | |
416 | InstructionCost getFPOpCost(Type *Ty) const { |
417 | return TargetTransformInfo::TCC_Basic; |
418 | } |
419 | |
420 | InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, |
421 | const APInt &Imm, Type *Ty) const { |
422 | return 0; |
423 | } |
424 | |
425 | InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, |
426 | TTI::TargetCostKind CostKind) const { |
427 | return TTI::TCC_Basic; |
428 | } |
429 | |
430 | InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, |
431 | const APInt &Imm, Type *Ty, |
432 | TTI::TargetCostKind CostKind, |
433 | Instruction *Inst = nullptr) const { |
434 | return TTI::TCC_Free; |
435 | } |
436 | |
437 | InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, |
438 | const APInt &Imm, Type *Ty, |
439 | TTI::TargetCostKind CostKind) const { |
440 | return TTI::TCC_Free; |
441 | } |
442 | |
443 | bool preferToKeepConstantsAttached(const Instruction &Inst, |
444 | const Function &Fn) const { |
445 | return false; |
446 | } |
447 | |
448 | unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } |
449 | |
450 | unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { |
451 | return Vector ? 1 : 0; |
452 | }; |
453 | |
454 | const char *getRegisterClassName(unsigned ClassID) const { |
455 | switch (ClassID) { |
456 | default: |
457 | return "Generic::Unknown Register Class" ; |
458 | case 0: |
459 | return "Generic::ScalarRC" ; |
460 | case 1: |
461 | return "Generic::VectorRC" ; |
462 | } |
463 | } |
464 | |
465 | TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { |
466 | return TypeSize::getFixed(ExactSize: 32); |
467 | } |
468 | |
469 | unsigned getMinVectorRegisterBitWidth() const { return 128; } |
470 | |
471 | std::optional<unsigned> getMaxVScale() const { return std::nullopt; } |
472 | std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; } |
473 | bool isVScaleKnownToBeAPowerOfTwo() const { return false; } |
474 | |
475 | bool |
476 | shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const { |
477 | return false; |
478 | } |
479 | |
480 | ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const { |
481 | return ElementCount::get(MinVal: 0, Scalable: IsScalable); |
482 | } |
483 | |
484 | unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; } |
485 | unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; } |
486 | |
487 | bool shouldConsiderAddressTypePromotion( |
488 | const Instruction &I, bool &) const { |
489 | AllowPromotionWithoutCommonHeader = false; |
490 | return false; |
491 | } |
492 | |
493 | unsigned getCacheLineSize() const { return 0; } |
494 | std::optional<unsigned> |
495 | getCacheSize(TargetTransformInfo::CacheLevel Level) const { |
496 | switch (Level) { |
497 | case TargetTransformInfo::CacheLevel::L1D: |
498 | [[fallthrough]]; |
499 | case TargetTransformInfo::CacheLevel::L2D: |
500 | return std::nullopt; |
501 | } |
502 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel" ); |
503 | } |
504 | |
505 | std::optional<unsigned> |
506 | getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { |
507 | switch (Level) { |
508 | case TargetTransformInfo::CacheLevel::L1D: |
509 | [[fallthrough]]; |
510 | case TargetTransformInfo::CacheLevel::L2D: |
511 | return std::nullopt; |
512 | } |
513 | |
514 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel" ); |
515 | } |
516 | |
517 | std::optional<unsigned> getMinPageSize() const { return {}; } |
518 | |
519 | unsigned getPrefetchDistance() const { return 0; } |
520 | unsigned getMinPrefetchStride(unsigned NumMemAccesses, |
521 | unsigned NumStridedMemAccesses, |
522 | unsigned NumPrefetches, bool HasCall) const { |
523 | return 1; |
524 | } |
525 | unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } |
526 | bool enableWritePrefetching() const { return false; } |
527 | bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; } |
528 | |
529 | unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; } |
530 | |
531 | InstructionCost getArithmeticInstrCost( |
532 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
533 | TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, |
534 | ArrayRef<const Value *> Args, |
535 | const Instruction *CxtI = nullptr) const { |
536 | // Widenable conditions will eventually lower into constants, so some |
537 | // operations with them will be trivially optimized away. |
538 | auto IsWidenableCondition = [](const Value *V) { |
539 | if (auto *II = dyn_cast<IntrinsicInst>(Val: V)) |
540 | if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition) |
541 | return true; |
542 | return false; |
543 | }; |
544 | // FIXME: A number of transformation tests seem to require these values |
545 | // which seems a little odd for how arbitary there are. |
546 | switch (Opcode) { |
547 | default: |
548 | break; |
549 | case Instruction::FDiv: |
550 | case Instruction::FRem: |
551 | case Instruction::SDiv: |
552 | case Instruction::SRem: |
553 | case Instruction::UDiv: |
554 | case Instruction::URem: |
555 | // FIXME: Unlikely to be true for CodeSize. |
556 | return TTI::TCC_Expensive; |
557 | case Instruction::And: |
558 | case Instruction::Or: |
559 | if (any_of(Range&: Args, P: IsWidenableCondition)) |
560 | return TTI::TCC_Free; |
561 | break; |
562 | } |
563 | |
564 | // Assume a 3cy latency for fp arithmetic ops. |
565 | if (CostKind == TTI::TCK_Latency) |
566 | if (Ty->getScalarType()->isFloatingPointTy()) |
567 | return 3; |
568 | |
569 | return 1; |
570 | } |
571 | |
572 | InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, |
573 | unsigned Opcode1, |
574 | const SmallBitVector &OpcodeMask, |
575 | TTI::TargetCostKind CostKind) const { |
576 | return InstructionCost::getInvalid(); |
577 | } |
578 | |
579 | InstructionCost |
580 | getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask, |
581 | TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, |
582 | ArrayRef<const Value *> Args = std::nullopt) const { |
583 | return 1; |
584 | } |
585 | |
586 | InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
587 | TTI::CastContextHint CCH, |
588 | TTI::TargetCostKind CostKind, |
589 | const Instruction *I) const { |
590 | switch (Opcode) { |
591 | default: |
592 | break; |
593 | case Instruction::IntToPtr: { |
594 | unsigned SrcSize = Src->getScalarSizeInBits(); |
595 | if (DL.isLegalInteger(Width: SrcSize) && |
596 | SrcSize <= DL.getPointerTypeSizeInBits(Dst)) |
597 | return 0; |
598 | break; |
599 | } |
600 | case Instruction::PtrToInt: { |
601 | unsigned DstSize = Dst->getScalarSizeInBits(); |
602 | if (DL.isLegalInteger(Width: DstSize) && |
603 | DstSize >= DL.getPointerTypeSizeInBits(Src)) |
604 | return 0; |
605 | break; |
606 | } |
607 | case Instruction::BitCast: |
608 | if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy())) |
609 | // Identity and pointer-to-pointer casts are free. |
610 | return 0; |
611 | break; |
612 | case Instruction::Trunc: { |
613 | // trunc to a native type is free (assuming the target has compare and |
614 | // shift-right of the same width). |
615 | TypeSize DstSize = DL.getTypeSizeInBits(Ty: Dst); |
616 | if (!DstSize.isScalable() && DL.isLegalInteger(Width: DstSize.getFixedValue())) |
617 | return 0; |
618 | break; |
619 | } |
620 | } |
621 | return 1; |
622 | } |
623 | |
624 | InstructionCost (unsigned Opcode, Type *Dst, |
625 | VectorType *VecTy, |
626 | unsigned Index) const { |
627 | return 1; |
628 | } |
629 | |
630 | InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
631 | const Instruction *I = nullptr) const { |
632 | // A phi would be free, unless we're costing the throughput because it |
633 | // will require a register. |
634 | if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput) |
635 | return 0; |
636 | return 1; |
637 | } |
638 | |
639 | InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, |
640 | CmpInst::Predicate VecPred, |
641 | TTI::TargetCostKind CostKind, |
642 | const Instruction *I) const { |
643 | return 1; |
644 | } |
645 | |
646 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, |
647 | TTI::TargetCostKind CostKind, |
648 | unsigned Index, Value *Op0, |
649 | Value *Op1) const { |
650 | return 1; |
651 | } |
652 | |
653 | InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, |
654 | TTI::TargetCostKind CostKind, |
655 | unsigned Index) const { |
656 | return 1; |
657 | } |
658 | |
659 | unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, |
660 | const APInt &DemandedDstElts, |
661 | TTI::TargetCostKind CostKind) { |
662 | return 1; |
663 | } |
664 | |
665 | InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, |
666 | unsigned AddressSpace, |
667 | TTI::TargetCostKind CostKind, |
668 | TTI::OperandValueInfo OpInfo, |
669 | const Instruction *I) const { |
670 | return 1; |
671 | } |
672 | |
673 | InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, |
674 | unsigned AddressSpace, |
675 | TTI::TargetCostKind CostKind, |
676 | const Instruction *I) const { |
677 | return 1; |
678 | } |
679 | |
680 | InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, |
681 | Align Alignment, unsigned AddressSpace, |
682 | TTI::TargetCostKind CostKind) const { |
683 | return 1; |
684 | } |
685 | |
686 | InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, |
687 | const Value *Ptr, bool VariableMask, |
688 | Align Alignment, |
689 | TTI::TargetCostKind CostKind, |
690 | const Instruction *I = nullptr) const { |
691 | return 1; |
692 | } |
693 | |
694 | InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, |
695 | const Value *Ptr, bool VariableMask, |
696 | Align Alignment, |
697 | TTI::TargetCostKind CostKind, |
698 | const Instruction *I = nullptr) const { |
699 | return InstructionCost::getInvalid(); |
700 | } |
701 | |
702 | unsigned getInterleavedMemoryOpCost( |
703 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
704 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
705 | bool UseMaskForCond, bool UseMaskForGaps) const { |
706 | return 1; |
707 | } |
708 | |
709 | InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
710 | TTI::TargetCostKind CostKind) const { |
711 | switch (ICA.getID()) { |
712 | default: |
713 | break; |
714 | case Intrinsic::annotation: |
715 | case Intrinsic::assume: |
716 | case Intrinsic::sideeffect: |
717 | case Intrinsic::pseudoprobe: |
718 | case Intrinsic::arithmetic_fence: |
719 | case Intrinsic::dbg_assign: |
720 | case Intrinsic::dbg_declare: |
721 | case Intrinsic::dbg_value: |
722 | case Intrinsic::dbg_label: |
723 | case Intrinsic::invariant_start: |
724 | case Intrinsic::invariant_end: |
725 | case Intrinsic::launder_invariant_group: |
726 | case Intrinsic::strip_invariant_group: |
727 | case Intrinsic::is_constant: |
728 | case Intrinsic::lifetime_start: |
729 | case Intrinsic::lifetime_end: |
730 | case Intrinsic::experimental_noalias_scope_decl: |
731 | case Intrinsic::objectsize: |
732 | case Intrinsic::ptr_annotation: |
733 | case Intrinsic::var_annotation: |
734 | case Intrinsic::experimental_gc_result: |
735 | case Intrinsic::experimental_gc_relocate: |
736 | case Intrinsic::coro_alloc: |
737 | case Intrinsic::coro_begin: |
738 | case Intrinsic::coro_free: |
739 | case Intrinsic::coro_end: |
740 | case Intrinsic::coro_frame: |
741 | case Intrinsic::coro_size: |
742 | case Intrinsic::coro_align: |
743 | case Intrinsic::coro_suspend: |
744 | case Intrinsic::coro_subfn_addr: |
745 | case Intrinsic::threadlocal_address: |
746 | case Intrinsic::experimental_widenable_condition: |
747 | case Intrinsic::ssa_copy: |
748 | // These intrinsics don't actually represent code after lowering. |
749 | return 0; |
750 | } |
751 | return 1; |
752 | } |
753 | |
754 | InstructionCost getCallInstrCost(Function *F, Type *RetTy, |
755 | ArrayRef<Type *> Tys, |
756 | TTI::TargetCostKind CostKind) const { |
757 | return 1; |
758 | } |
759 | |
760 | // Assume that we have a register of the right size for the type. |
761 | unsigned getNumberOfParts(Type *Tp) const { return 1; } |
762 | |
763 | InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, |
764 | const SCEV *) const { |
765 | return 0; |
766 | } |
767 | |
768 | InstructionCost getArithmeticReductionCost(unsigned, VectorType *, |
769 | std::optional<FastMathFlags> FMF, |
770 | TTI::TargetCostKind) const { |
771 | return 1; |
772 | } |
773 | |
774 | InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *, |
775 | FastMathFlags, |
776 | TTI::TargetCostKind) const { |
777 | return 1; |
778 | } |
779 | |
780 | InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, |
781 | Type *ResTy, VectorType *Ty, |
782 | FastMathFlags FMF, |
783 | TTI::TargetCostKind CostKind) const { |
784 | return 1; |
785 | } |
786 | |
787 | InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, |
788 | VectorType *Ty, |
789 | TTI::TargetCostKind CostKind) const { |
790 | return 1; |
791 | } |
792 | |
793 | InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { |
794 | return 0; |
795 | } |
796 | |
797 | bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { |
798 | return false; |
799 | } |
800 | |
801 | unsigned getAtomicMemIntrinsicMaxElementSize() const { |
802 | // Note for overrides: You must ensure for all element unordered-atomic |
803 | // memory intrinsics that all power-of-2 element sizes up to, and |
804 | // including, the return value of this method have a corresponding |
805 | // runtime lib call. These runtime lib call definitions can be found |
806 | // in RuntimeLibcalls.h |
807 | return 0; |
808 | } |
809 | |
810 | Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, |
811 | Type *ExpectedType) const { |
812 | return nullptr; |
813 | } |
814 | |
815 | Type * |
816 | getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, |
817 | unsigned SrcAddrSpace, unsigned DestAddrSpace, |
818 | unsigned SrcAlign, unsigned DestAlign, |
819 | std::optional<uint32_t> AtomicElementSize) const { |
820 | return AtomicElementSize ? Type::getIntNTy(C&: Context, N: *AtomicElementSize * 8) |
821 | : Type::getInt8Ty(C&: Context); |
822 | } |
823 | |
824 | void getMemcpyLoopResidualLoweringType( |
825 | SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, |
826 | unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, |
827 | unsigned SrcAlign, unsigned DestAlign, |
828 | std::optional<uint32_t> AtomicCpySize) const { |
829 | unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1; |
830 | Type *OpType = Type::getIntNTy(C&: Context, N: OpSizeInBytes * 8); |
831 | for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes) |
832 | OpsOut.push_back(Elt: OpType); |
833 | } |
834 | |
835 | bool areInlineCompatible(const Function *Caller, |
836 | const Function *Callee) const { |
837 | return (Caller->getFnAttribute(Kind: "target-cpu" ) == |
838 | Callee->getFnAttribute(Kind: "target-cpu" )) && |
839 | (Caller->getFnAttribute(Kind: "target-features" ) == |
840 | Callee->getFnAttribute(Kind: "target-features" )); |
841 | } |
842 | |
843 | unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, |
844 | unsigned DefaultCallPenalty) const { |
845 | return DefaultCallPenalty; |
846 | } |
847 | |
848 | bool areTypesABICompatible(const Function *Caller, const Function *Callee, |
849 | const ArrayRef<Type *> &Types) const { |
850 | return (Caller->getFnAttribute(Kind: "target-cpu" ) == |
851 | Callee->getFnAttribute(Kind: "target-cpu" )) && |
852 | (Caller->getFnAttribute(Kind: "target-features" ) == |
853 | Callee->getFnAttribute(Kind: "target-features" )); |
854 | } |
855 | |
856 | bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, |
857 | const DataLayout &DL) const { |
858 | return false; |
859 | } |
860 | |
861 | bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, |
862 | const DataLayout &DL) const { |
863 | return false; |
864 | } |
865 | |
866 | unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } |
867 | |
868 | bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } |
869 | |
870 | bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } |
871 | |
872 | bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, |
873 | unsigned AddrSpace) const { |
874 | return true; |
875 | } |
876 | |
877 | bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, |
878 | unsigned AddrSpace) const { |
879 | return true; |
880 | } |
881 | |
882 | bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, |
883 | ElementCount VF) const { |
884 | return true; |
885 | } |
886 | |
887 | bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; } |
888 | |
889 | unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, |
890 | unsigned ChainSizeInBytes, |
891 | VectorType *VecTy) const { |
892 | return VF; |
893 | } |
894 | |
895 | unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, |
896 | unsigned ChainSizeInBytes, |
897 | VectorType *VecTy) const { |
898 | return VF; |
899 | } |
900 | |
901 | bool preferInLoopReduction(unsigned Opcode, Type *Ty, |
902 | TTI::ReductionFlags Flags) const { |
903 | return false; |
904 | } |
905 | |
906 | bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, |
907 | TTI::ReductionFlags Flags) const { |
908 | return false; |
909 | } |
910 | |
911 | bool preferEpilogueVectorization() const { |
912 | return true; |
913 | } |
914 | |
915 | bool shouldExpandReduction(const IntrinsicInst *II) const { return true; } |
916 | |
917 | unsigned getGISelRematGlobalCost() const { return 1; } |
918 | |
919 | unsigned getMinTripCountTailFoldingThreshold() const { return 0; } |
920 | |
921 | bool supportsScalableVectors() const { return false; } |
922 | |
923 | bool enableScalableVectorization() const { return false; } |
924 | |
925 | bool hasActiveVectorLength(unsigned Opcode, Type *DataType, |
926 | Align Alignment) const { |
927 | return false; |
928 | } |
929 | |
930 | TargetTransformInfo::VPLegalization |
931 | getVPLegalizationStrategy(const VPIntrinsic &PI) const { |
932 | return TargetTransformInfo::VPLegalization( |
933 | /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard, |
934 | /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert); |
935 | } |
936 | |
937 | bool hasArmWideBranch(bool) const { return false; } |
938 | |
939 | unsigned getMaxNumArgs() const { return UINT_MAX; } |
940 | |
941 | protected: |
942 | // Obtain the minimum required size to hold the value (without the sign) |
943 | // In case of a vector it returns the min required size for one element. |
944 | unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const { |
945 | if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { |
946 | const auto *VectorValue = cast<Constant>(Val); |
947 | |
948 | // In case of a vector need to pick the max between the min |
949 | // required size for each element |
950 | auto *VT = cast<FixedVectorType>(Val: Val->getType()); |
951 | |
952 | // Assume unsigned elements |
953 | isSigned = false; |
954 | |
955 | // The max required size is the size of the vector element type |
956 | unsigned MaxRequiredSize = |
957 | VT->getElementType()->getPrimitiveSizeInBits().getFixedValue(); |
958 | |
959 | unsigned MinRequiredSize = 0; |
960 | for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { |
961 | if (auto *IntElement = |
962 | dyn_cast<ConstantInt>(Val: VectorValue->getAggregateElement(Elt: i))) { |
963 | bool signedElement = IntElement->getValue().isNegative(); |
964 | // Get the element min required size. |
965 | unsigned ElementMinRequiredSize = |
966 | IntElement->getValue().getSignificantBits() - 1; |
967 | // In case one element is signed then all the vector is signed. |
968 | isSigned |= signedElement; |
969 | // Save the max required bit size between all the elements. |
970 | MinRequiredSize = std::max(a: MinRequiredSize, b: ElementMinRequiredSize); |
971 | } else { |
972 | // not an int constant element |
973 | return MaxRequiredSize; |
974 | } |
975 | } |
976 | return MinRequiredSize; |
977 | } |
978 | |
979 | if (const auto *CI = dyn_cast<ConstantInt>(Val)) { |
980 | isSigned = CI->getValue().isNegative(); |
981 | return CI->getValue().getSignificantBits() - 1; |
982 | } |
983 | |
984 | if (const auto *Cast = dyn_cast<SExtInst>(Val)) { |
985 | isSigned = true; |
986 | return Cast->getSrcTy()->getScalarSizeInBits() - 1; |
987 | } |
988 | |
989 | if (const auto *Cast = dyn_cast<ZExtInst>(Val)) { |
990 | isSigned = false; |
991 | return Cast->getSrcTy()->getScalarSizeInBits(); |
992 | } |
993 | |
994 | isSigned = false; |
995 | return Val->getType()->getScalarSizeInBits(); |
996 | } |
997 | |
998 | bool isStridedAccess(const SCEV *Ptr) const { |
999 | return Ptr && isa<SCEVAddRecExpr>(Val: Ptr); |
1000 | } |
1001 | |
1002 | const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, |
1003 | const SCEV *Ptr) const { |
1004 | if (!isStridedAccess(Ptr)) |
1005 | return nullptr; |
1006 | const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Val: Ptr); |
1007 | return dyn_cast<SCEVConstant>(Val: AddRec->getStepRecurrence(SE&: *SE)); |
1008 | } |
1009 | |
1010 | bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, |
1011 | int64_t MergeDistance) const { |
1012 | const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); |
1013 | if (!Step) |
1014 | return false; |
1015 | APInt StrideVal = Step->getAPInt(); |
1016 | if (StrideVal.getBitWidth() > 64) |
1017 | return false; |
1018 | // FIXME: Need to take absolute value for negative stride case. |
1019 | return StrideVal.getSExtValue() < MergeDistance; |
1020 | } |
1021 | }; |
1022 | |
1023 | /// CRTP base class for use as a mix-in that aids implementing |
1024 | /// a TargetTransformInfo-compatible class. |
1025 | template <typename T> |
1026 | class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { |
1027 | private: |
1028 | typedef TargetTransformInfoImplBase BaseT; |
1029 | |
1030 | protected: |
1031 | explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} |
1032 | |
1033 | public: |
1034 | using BaseT::getGEPCost; |
1035 | |
1036 | InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, |
1037 | ArrayRef<const Value *> Operands, Type *AccessType, |
1038 | TTI::TargetCostKind CostKind) { |
1039 | assert(PointeeType && Ptr && "can't get GEPCost of nullptr" ); |
1040 | auto *BaseGV = dyn_cast<GlobalValue>(Val: Ptr->stripPointerCasts()); |
1041 | bool HasBaseReg = (BaseGV == nullptr); |
1042 | |
1043 | auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); |
1044 | APInt BaseOffset(PtrSizeBits, 0); |
1045 | int64_t Scale = 0; |
1046 | |
1047 | auto GTI = gep_type_begin(Op0: PointeeType, A: Operands); |
1048 | Type *TargetType = nullptr; |
1049 | |
1050 | // Handle the case where the GEP instruction has a single operand, |
1051 | // the basis, therefore TargetType is a nullptr. |
1052 | if (Operands.empty()) |
1053 | return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; |
1054 | |
1055 | for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { |
1056 | TargetType = GTI.getIndexedType(); |
1057 | // We assume that the cost of Scalar GEP with constant index and the |
1058 | // cost of Vector GEP with splat constant index are the same. |
1059 | const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(Val: *I); |
1060 | if (!ConstIdx) |
1061 | if (auto Splat = getSplatValue(V: *I)) |
1062 | ConstIdx = dyn_cast<ConstantInt>(Val: Splat); |
1063 | if (StructType *STy = GTI.getStructTypeOrNull()) { |
1064 | // For structures the index is always splat or scalar constant |
1065 | assert(ConstIdx && "Unexpected GEP index" ); |
1066 | uint64_t Field = ConstIdx->getZExtValue(); |
1067 | BaseOffset += DL.getStructLayout(Ty: STy)->getElementOffset(Field); |
1068 | } else { |
1069 | // If this operand is a scalable type, bail out early. |
1070 | // TODO: Make isLegalAddressingMode TypeSize aware. |
1071 | if (TargetType->isScalableTy()) |
1072 | return TTI::TCC_Basic; |
1073 | int64_t ElementSize = |
1074 | GTI.getSequentialElementStride(DL).getFixedValue(); |
1075 | if (ConstIdx) { |
1076 | BaseOffset += |
1077 | ConstIdx->getValue().sextOrTrunc(width: PtrSizeBits) * ElementSize; |
1078 | } else { |
1079 | // Needs scale register. |
1080 | if (Scale != 0) |
1081 | // No addressing mode takes two scale registers. |
1082 | return TTI::TCC_Basic; |
1083 | Scale = ElementSize; |
1084 | } |
1085 | } |
1086 | } |
1087 | |
1088 | // If we haven't been provided a hint, use the target type for now. |
1089 | // |
1090 | // TODO: Take a look at potentially removing this: This is *slightly* wrong |
1091 | // as it's possible to have a GEP with a foldable target type but a memory |
1092 | // access that isn't foldable. For example, this load isn't foldable on |
1093 | // RISC-V: |
1094 | // |
1095 | // %p = getelementptr i32, ptr %base, i32 42 |
1096 | // %x = load <2 x i32>, ptr %p |
1097 | if (!AccessType) |
1098 | AccessType = TargetType; |
1099 | |
1100 | // If the final address of the GEP is a legal addressing mode for the given |
1101 | // access type, then we can fold it into its users. |
1102 | if (static_cast<T *>(this)->isLegalAddressingMode( |
1103 | AccessType, const_cast<GlobalValue *>(BaseGV), |
1104 | BaseOffset.sextOrTrunc(width: 64).getSExtValue(), HasBaseReg, Scale, |
1105 | Ptr->getType()->getPointerAddressSpace())) |
1106 | return TTI::TCC_Free; |
1107 | |
1108 | // TODO: Instead of returning TCC_Basic here, we should use |
1109 | // getArithmeticInstrCost. Or better yet, provide a hook to let the target |
1110 | // model it. |
1111 | return TTI::TCC_Basic; |
1112 | } |
1113 | |
1114 | InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs, |
1115 | const Value *Base, |
1116 | const TTI::PointersChainInfo &Info, |
1117 | Type *AccessTy, |
1118 | TTI::TargetCostKind CostKind) { |
1119 | InstructionCost Cost = TTI::TCC_Free; |
1120 | // In the basic model we take into account GEP instructions only |
1121 | // (although here can come alloca instruction, a value, constants and/or |
1122 | // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a |
1123 | // pointer). Typically, if Base is a not a GEP-instruction and all the |
1124 | // pointers are relative to the same base address, all the rest are |
1125 | // either GEP instructions, PHIs, bitcasts or constants. When we have same |
1126 | // base, we just calculate cost of each non-Base GEP as an ADD operation if |
1127 | // any their index is a non-const. |
1128 | // If no known dependecies between the pointers cost is calculated as a sum |
1129 | // of costs of GEP instructions. |
1130 | for (const Value *V : Ptrs) { |
1131 | const auto *GEP = dyn_cast<GetElementPtrInst>(Val: V); |
1132 | if (!GEP) |
1133 | continue; |
1134 | if (Info.isSameBase() && V != Base) { |
1135 | if (GEP->hasAllConstantIndices()) |
1136 | continue; |
1137 | Cost += static_cast<T *>(this)->getArithmeticInstrCost( |
1138 | Instruction::Add, GEP->getType(), CostKind, |
1139 | {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, |
1140 | std::nullopt); |
1141 | } else { |
1142 | SmallVector<const Value *> Indices(GEP->indices()); |
1143 | Cost += static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(), |
1144 | GEP->getPointerOperand(), |
1145 | Indices, AccessTy, CostKind); |
1146 | } |
1147 | } |
1148 | return Cost; |
1149 | } |
1150 | |
1151 | InstructionCost getInstructionCost(const User *U, |
1152 | ArrayRef<const Value *> Operands, |
1153 | TTI::TargetCostKind CostKind) { |
1154 | using namespace llvm::PatternMatch; |
1155 | |
1156 | auto *TargetTTI = static_cast<T *>(this); |
1157 | // Handle non-intrinsic calls, invokes, and callbr. |
1158 | // FIXME: Unlikely to be true for anything but CodeSize. |
1159 | auto *CB = dyn_cast<CallBase>(Val: U); |
1160 | if (CB && !isa<IntrinsicInst>(Val: U)) { |
1161 | if (const Function *F = CB->getCalledFunction()) { |
1162 | if (!TargetTTI->isLoweredToCall(F)) |
1163 | return TTI::TCC_Basic; // Give a basic cost if it will be lowered |
1164 | |
1165 | return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1); |
1166 | } |
1167 | // For indirect or other calls, scale cost by number of arguments. |
1168 | return TTI::TCC_Basic * (CB->arg_size() + 1); |
1169 | } |
1170 | |
1171 | Type *Ty = U->getType(); |
1172 | unsigned Opcode = Operator::getOpcode(U); |
1173 | auto *I = dyn_cast<Instruction>(Val: U); |
1174 | switch (Opcode) { |
1175 | default: |
1176 | break; |
1177 | case Instruction::Call: { |
1178 | assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call" ); |
1179 | auto *Intrinsic = cast<IntrinsicInst>(Val: U); |
1180 | IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB); |
1181 | return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind); |
1182 | } |
1183 | case Instruction::Br: |
1184 | case Instruction::Ret: |
1185 | case Instruction::PHI: |
1186 | case Instruction::Switch: |
1187 | return TargetTTI->getCFInstrCost(Opcode, CostKind, I); |
1188 | case Instruction::ExtractValue: |
1189 | case Instruction::Freeze: |
1190 | return TTI::TCC_Free; |
1191 | case Instruction::Alloca: |
1192 | if (cast<AllocaInst>(Val: U)->isStaticAlloca()) |
1193 | return TTI::TCC_Free; |
1194 | break; |
1195 | case Instruction::GetElementPtr: { |
1196 | const auto *GEP = cast<GEPOperator>(Val: U); |
1197 | Type *AccessType = nullptr; |
1198 | // For now, only provide the AccessType in the simple case where the GEP |
1199 | // only has one user. |
1200 | if (GEP->hasOneUser() && I) |
1201 | AccessType = I->user_back()->getAccessType(); |
1202 | |
1203 | return TargetTTI->getGEPCost(GEP->getSourceElementType(), |
1204 | Operands.front(), Operands.drop_front(), |
1205 | AccessType, CostKind); |
1206 | } |
1207 | case Instruction::Add: |
1208 | case Instruction::FAdd: |
1209 | case Instruction::Sub: |
1210 | case Instruction::FSub: |
1211 | case Instruction::Mul: |
1212 | case Instruction::FMul: |
1213 | case Instruction::UDiv: |
1214 | case Instruction::SDiv: |
1215 | case Instruction::FDiv: |
1216 | case Instruction::URem: |
1217 | case Instruction::SRem: |
1218 | case Instruction::FRem: |
1219 | case Instruction::Shl: |
1220 | case Instruction::LShr: |
1221 | case Instruction::AShr: |
1222 | case Instruction::And: |
1223 | case Instruction::Or: |
1224 | case Instruction::Xor: |
1225 | case Instruction::FNeg: { |
1226 | const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(V: Operands[0]); |
1227 | TTI::OperandValueInfo Op2Info; |
1228 | if (Opcode != Instruction::FNeg) |
1229 | Op2Info = TTI::getOperandInfo(V: Operands[1]); |
1230 | return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, |
1231 | Op2Info, Operands, I); |
1232 | } |
1233 | case Instruction::IntToPtr: |
1234 | case Instruction::PtrToInt: |
1235 | case Instruction::SIToFP: |
1236 | case Instruction::UIToFP: |
1237 | case Instruction::FPToUI: |
1238 | case Instruction::FPToSI: |
1239 | case Instruction::Trunc: |
1240 | case Instruction::FPTrunc: |
1241 | case Instruction::BitCast: |
1242 | case Instruction::FPExt: |
1243 | case Instruction::SExt: |
1244 | case Instruction::ZExt: |
1245 | case Instruction::AddrSpaceCast: { |
1246 | Type *OpTy = Operands[0]->getType(); |
1247 | return TargetTTI->getCastInstrCost( |
1248 | Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I); |
1249 | } |
1250 | case Instruction::Store: { |
1251 | auto *SI = cast<StoreInst>(Val: U); |
1252 | Type *ValTy = Operands[0]->getType(); |
1253 | TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(V: Operands[0]); |
1254 | return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(), |
1255 | SI->getPointerAddressSpace(), CostKind, |
1256 | OpInfo, I); |
1257 | } |
1258 | case Instruction::Load: { |
1259 | // FIXME: Arbitary cost which could come from the backend. |
1260 | if (CostKind == TTI::TCK_Latency) |
1261 | return 4; |
1262 | auto *LI = cast<LoadInst>(Val: U); |
1263 | Type *LoadType = U->getType(); |
1264 | // If there is a non-register sized type, the cost estimation may expand |
1265 | // it to be several instructions to load into multiple registers on the |
1266 | // target. But, if the only use of the load is a trunc instruction to a |
1267 | // register sized type, the instruction selector can combine these |
1268 | // instructions to be a single load. So, in this case, we use the |
1269 | // destination type of the trunc instruction rather than the load to |
1270 | // accurately estimate the cost of this load instruction. |
1271 | if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() && |
1272 | !LoadType->isVectorTy()) { |
1273 | if (const TruncInst *TI = dyn_cast<TruncInst>(Val: *LI->user_begin())) |
1274 | LoadType = TI->getDestTy(); |
1275 | } |
1276 | return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(), |
1277 | LI->getPointerAddressSpace(), CostKind, |
1278 | {TTI::OK_AnyValue, TTI::OP_None}, I); |
1279 | } |
1280 | case Instruction::Select: { |
1281 | const Value *Op0, *Op1; |
1282 | if (match(V: U, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1))) || |
1283 | match(V: U, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) { |
1284 | // select x, y, false --> x & y |
1285 | // select x, true, y --> x | y |
1286 | const auto Op1Info = TTI::getOperandInfo(V: Op0); |
1287 | const auto Op2Info = TTI::getOperandInfo(V: Op1); |
1288 | assert(Op0->getType()->getScalarSizeInBits() == 1 && |
1289 | Op1->getType()->getScalarSizeInBits() == 1); |
1290 | |
1291 | SmallVector<const Value *, 2> Operands{Op0, Op1}; |
1292 | return TargetTTI->getArithmeticInstrCost( |
1293 | match(V: U, P: m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty, |
1294 | CostKind, Op1Info, Op2Info, Operands, I); |
1295 | } |
1296 | Type *CondTy = Operands[0]->getType(); |
1297 | return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, |
1298 | CmpInst::BAD_ICMP_PREDICATE, |
1299 | CostKind, I); |
1300 | } |
1301 | case Instruction::ICmp: |
1302 | case Instruction::FCmp: { |
1303 | Type *ValTy = Operands[0]->getType(); |
1304 | // TODO: Also handle ICmp/FCmp constant expressions. |
1305 | return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), |
1306 | I ? cast<CmpInst>(Val: I)->getPredicate() |
1307 | : CmpInst::BAD_ICMP_PREDICATE, |
1308 | CostKind, I); |
1309 | } |
1310 | case Instruction::InsertElement: { |
1311 | auto *IE = dyn_cast<InsertElementInst>(Val: U); |
1312 | if (!IE) |
1313 | return TTI::TCC_Basic; // FIXME |
1314 | unsigned Idx = -1; |
1315 | if (auto *CI = dyn_cast<ConstantInt>(Val: Operands[2])) |
1316 | if (CI->getValue().getActiveBits() <= 32) |
1317 | Idx = CI->getZExtValue(); |
1318 | return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx); |
1319 | } |
1320 | case Instruction::ShuffleVector: { |
1321 | auto *Shuffle = dyn_cast<ShuffleVectorInst>(Val: U); |
1322 | if (!Shuffle) |
1323 | return TTI::TCC_Basic; // FIXME |
1324 | |
1325 | auto *VecTy = cast<VectorType>(Val: U->getType()); |
1326 | auto *VecSrcTy = cast<VectorType>(Val: Operands[0]->getType()); |
1327 | int NumSubElts, SubIndex; |
1328 | |
1329 | if (Shuffle->changesLength()) { |
1330 | // Treat a 'subvector widening' as a free shuffle. |
1331 | if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) |
1332 | return 0; |
1333 | |
1334 | if (Shuffle->isExtractSubvectorMask(Index&: SubIndex)) |
1335 | return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, |
1336 | Shuffle->getShuffleMask(), CostKind, |
1337 | SubIndex, VecTy, Operands); |
1338 | |
1339 | if (Shuffle->isInsertSubvectorMask(NumSubElts, Index&: SubIndex)) |
1340 | return TargetTTI->getShuffleCost( |
1341 | TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), |
1342 | CostKind, SubIndex, |
1343 | FixedVectorType::get(ElementType: VecTy->getScalarType(), NumElts: NumSubElts), |
1344 | Operands); |
1345 | |
1346 | int ReplicationFactor, VF; |
1347 | if (Shuffle->isReplicationMask(ReplicationFactor, VF)) { |
1348 | APInt DemandedDstElts = |
1349 | APInt::getZero(numBits: Shuffle->getShuffleMask().size()); |
1350 | for (auto I : enumerate(First: Shuffle->getShuffleMask())) { |
1351 | if (I.value() != PoisonMaskElem) |
1352 | DemandedDstElts.setBit(I.index()); |
1353 | } |
1354 | return TargetTTI->getReplicationShuffleCost( |
1355 | VecSrcTy->getElementType(), ReplicationFactor, VF, |
1356 | DemandedDstElts, CostKind); |
1357 | } |
1358 | |
1359 | return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; |
1360 | } |
1361 | |
1362 | if (Shuffle->isIdentity()) |
1363 | return 0; |
1364 | |
1365 | if (Shuffle->isReverse()) |
1366 | return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, |
1367 | Shuffle->getShuffleMask(), CostKind, 0, |
1368 | nullptr, Operands); |
1369 | |
1370 | if (Shuffle->isSelect()) |
1371 | return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, |
1372 | Shuffle->getShuffleMask(), CostKind, 0, |
1373 | nullptr, Operands); |
1374 | |
1375 | if (Shuffle->isTranspose()) |
1376 | return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, |
1377 | Shuffle->getShuffleMask(), CostKind, 0, |
1378 | nullptr, Operands); |
1379 | |
1380 | if (Shuffle->isZeroEltSplat()) |
1381 | return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, |
1382 | Shuffle->getShuffleMask(), CostKind, 0, |
1383 | nullptr, Operands); |
1384 | |
1385 | if (Shuffle->isSingleSource()) |
1386 | return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, |
1387 | Shuffle->getShuffleMask(), CostKind, 0, |
1388 | nullptr, Operands); |
1389 | |
1390 | if (Shuffle->isInsertSubvectorMask(NumSubElts, Index&: SubIndex)) |
1391 | return TargetTTI->getShuffleCost( |
1392 | TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), CostKind, |
1393 | SubIndex, FixedVectorType::get(ElementType: VecTy->getScalarType(), NumElts: NumSubElts), |
1394 | Operands); |
1395 | |
1396 | if (Shuffle->isSplice(Index&: SubIndex)) |
1397 | return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, |
1398 | Shuffle->getShuffleMask(), CostKind, |
1399 | SubIndex, nullptr, Operands); |
1400 | |
1401 | return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, |
1402 | Shuffle->getShuffleMask(), CostKind, 0, |
1403 | nullptr, Operands); |
1404 | } |
1405 | case Instruction::ExtractElement: { |
1406 | auto *EEI = dyn_cast<ExtractElementInst>(Val: U); |
1407 | if (!EEI) |
1408 | return TTI::TCC_Basic; // FIXME |
1409 | unsigned Idx = -1; |
1410 | if (auto *CI = dyn_cast<ConstantInt>(Val: Operands[1])) |
1411 | if (CI->getValue().getActiveBits() <= 32) |
1412 | Idx = CI->getZExtValue(); |
1413 | Type *DstTy = Operands[0]->getType(); |
1414 | return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx); |
1415 | } |
1416 | } |
1417 | |
1418 | // By default, just classify everything as 'basic' or -1 to represent that |
1419 | // don't know the throughput cost. |
1420 | return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic; |
1421 | } |
1422 | |
1423 | bool isExpensiveToSpeculativelyExecute(const Instruction *I) { |
1424 | auto *TargetTTI = static_cast<T *>(this); |
1425 | SmallVector<const Value *, 4> Ops(I->operand_values()); |
1426 | InstructionCost Cost = TargetTTI->getInstructionCost( |
1427 | I, Ops, TargetTransformInfo::TCK_SizeAndLatency); |
1428 | return Cost >= TargetTransformInfo::TCC_Expensive; |
1429 | } |
1430 | }; |
1431 | } // namespace llvm |
1432 | |
1433 | #endif |
1434 | |