1//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file provides helpers for the implementation of
10/// a TargetTransformInfo-conforming class.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
17#include "llvm/Analysis/ScalarEvolutionExpressions.h"
18#include "llvm/Analysis/TargetTransformInfo.h"
19#include "llvm/Analysis/VectorUtils.h"
20#include "llvm/IR/DataLayout.h"
21#include "llvm/IR/GetElementPtrTypeIterator.h"
22#include "llvm/IR/IntrinsicInst.h"
23#include "llvm/IR/Operator.h"
24#include "llvm/IR/PatternMatch.h"
25#include <optional>
26#include <utility>
27
28namespace llvm {
29
30class Function;
31
32/// Base class for use as a mix-in that aids implementing
33/// a TargetTransformInfo-compatible class.
34class TargetTransformInfoImplBase {
35protected:
36 typedef TargetTransformInfo TTI;
37
38 const DataLayout &DL;
39
40 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
41
42public:
43 // Provide value semantics. MSVC requires that we spell all of these out.
44 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default;
45 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
46
47 const DataLayout &getDataLayout() const { return DL; }
48
49 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
50 ArrayRef<const Value *> Operands, Type *AccessType,
51 TTI::TargetCostKind CostKind) const {
52 // In the basic model, we just assume that all-constant GEPs will be folded
53 // into their uses via addressing modes.
54 for (const Value *Operand : Operands)
55 if (!isa<Constant>(Val: Operand))
56 return TTI::TCC_Basic;
57
58 return TTI::TCC_Free;
59 }
60
61 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
62 unsigned &JTSize,
63 ProfileSummaryInfo *PSI,
64 BlockFrequencyInfo *BFI) const {
65 (void)PSI;
66 (void)BFI;
67 JTSize = 0;
68 return SI.getNumCases();
69 }
70
71 unsigned getInliningThresholdMultiplier() const { return 1; }
72 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const { return 8; }
73 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
74 return 8;
75 }
76 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
77 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const {
78 return 0;
79 };
80
81 int getInlinerVectorBonusPercent() const { return 150; }
82
83 InstructionCost getMemcpyCost(const Instruction *I) const {
84 return TTI::TCC_Expensive;
85 }
86
87 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const {
88 return 64;
89 }
90
91 // Although this default value is arbitrary, it is not random. It is assumed
92 // that a condition that evaluates the same way by a higher percentage than
93 // this is best represented as control flow. Therefore, the default value N
94 // should be set such that the win from N% correct executions is greater than
95 // the loss from (100 - N)% mispredicted executions for the majority of
96 // intended targets.
97 BranchProbability getPredictableBranchThreshold() const {
98 return BranchProbability(99, 100);
99 }
100
101 bool hasBranchDivergence(const Function *F = nullptr) const { return false; }
102
103 bool isSourceOfDivergence(const Value *V) const { return false; }
104
105 bool isAlwaysUniform(const Value *V) const { return false; }
106
107 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
108 return false;
109 }
110
111 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
112 return true;
113 }
114
115 unsigned getFlatAddressSpace() const { return -1; }
116
117 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
118 Intrinsic::ID IID) const {
119 return false;
120 }
121
122 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
123 bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
124 return AS == 0;
125 };
126
127 unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
128
129 bool isSingleThreaded() const { return false; }
130
131 std::pair<const Value *, unsigned>
132 getPredicatedAddrSpace(const Value *V) const {
133 return std::make_pair(x: nullptr, y: -1);
134 }
135
136 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
137 Value *NewV) const {
138 return nullptr;
139 }
140
141 bool isLoweredToCall(const Function *F) const {
142 assert(F && "A concrete function must be provided to this routine.");
143
144 // FIXME: These should almost certainly not be handled here, and instead
145 // handled with the help of TLI or the target itself. This was largely
146 // ported from existing analysis heuristics here so that such refactorings
147 // can take place in the future.
148
149 if (F->isIntrinsic())
150 return false;
151
152 if (F->hasLocalLinkage() || !F->hasName())
153 return true;
154
155 StringRef Name = F->getName();
156
157 // These will all likely lower to a single selection DAG node.
158 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
159 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
160 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
161 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
162 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
163 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
164 return false;
165
166 // These are all likely to be optimized into something smaller.
167 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
168 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
169 Name == "floorf" || Name == "ceil" || Name == "round" ||
170 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
171 Name == "llabs")
172 return false;
173
174 return true;
175 }
176
177 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
178 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
179 HardwareLoopInfo &HWLoopInfo) const {
180 return false;
181 }
182
183 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const { return false; }
184
185 TailFoldingStyle
186 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
187 return TailFoldingStyle::DataWithoutLaneMask;
188 }
189
190 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
191 IntrinsicInst &II) const {
192 return std::nullopt;
193 }
194
195 std::optional<Value *>
196 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
197 APInt DemandedMask, KnownBits &Known,
198 bool &KnownBitsComputed) const {
199 return std::nullopt;
200 }
201
202 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
203 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
204 APInt &UndefElts2, APInt &UndefElts3,
205 std::function<void(Instruction *, unsigned, APInt, APInt &)>
206 SimplifyAndSetOp) const {
207 return std::nullopt;
208 }
209
210 void getUnrollingPreferences(Loop *, ScalarEvolution &,
211 TTI::UnrollingPreferences &,
212 OptimizationRemarkEmitter *) const {}
213
214 void getPeelingPreferences(Loop *, ScalarEvolution &,
215 TTI::PeelingPreferences &) const {}
216
217 bool isLegalAddImmediate(int64_t Imm) const { return false; }
218
219 bool isLegalICmpImmediate(int64_t Imm) const { return false; }
220
221 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
222 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
223 Instruction *I = nullptr) const {
224 // Guess that only reg and reg+reg addressing is allowed. This heuristic is
225 // taken from the implementation of LSR.
226 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
227 }
228
229 bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const {
230 return std::tie(args: C1.NumRegs, args: C1.AddRecCost, args: C1.NumIVMuls, args: C1.NumBaseAdds,
231 args: C1.ScaleCost, args: C1.ImmCost, args: C1.SetupCost) <
232 std::tie(args: C2.NumRegs, args: C2.AddRecCost, args: C2.NumIVMuls, args: C2.NumBaseAdds,
233 args: C2.ScaleCost, args: C2.ImmCost, args: C2.SetupCost);
234 }
235
236 bool isNumRegsMajorCostOfLSR() const { return true; }
237
238 bool shouldFoldTerminatingConditionAfterLSR() const { return false; }
239
240 bool isProfitableLSRChainElement(Instruction *I) const { return false; }
241
242 bool canMacroFuseCmp() const { return false; }
243
244 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
245 DominatorTree *DT, AssumptionCache *AC,
246 TargetLibraryInfo *LibInfo) const {
247 return false;
248 }
249
250 TTI::AddressingModeKind
251 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const {
252 return TTI::AMK_None;
253 }
254
255 bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
256 return false;
257 }
258
259 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
260 return false;
261 }
262
263 bool isLegalNTStore(Type *DataType, Align Alignment) const {
264 // By default, assume nontemporal memory stores are available for stores
265 // that are aligned and have a size that is a power of 2.
266 unsigned DataSize = DL.getTypeStoreSize(Ty: DataType);
267 return Alignment >= DataSize && isPowerOf2_32(Value: DataSize);
268 }
269
270 bool isLegalNTLoad(Type *DataType, Align Alignment) const {
271 // By default, assume nontemporal memory loads are available for loads that
272 // are aligned and have a size that is a power of 2.
273 unsigned DataSize = DL.getTypeStoreSize(Ty: DataType);
274 return Alignment >= DataSize && isPowerOf2_32(Value: DataSize);
275 }
276
277 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
278 return false;
279 }
280
281 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
282 return false;
283 }
284
285 bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
286 return false;
287 }
288
289 bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const {
290 return false;
291 }
292
293 bool forceScalarizeMaskedScatter(VectorType *DataType,
294 Align Alignment) const {
295 return false;
296 }
297
298 bool isLegalMaskedCompressStore(Type *DataType) const { return false; }
299
300 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
301 const SmallBitVector &OpcodeMask) const {
302 return false;
303 }
304
305 bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
306
307 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const {
308 return false;
309 }
310
311 bool enableOrderedReductions() const { return false; }
312
313 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
314
315 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
316 return false;
317 }
318
319 bool prefersVectorizedAddressing() const { return true; }
320
321 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
322 int64_t BaseOffset, bool HasBaseReg,
323 int64_t Scale,
324 unsigned AddrSpace) const {
325 // Guess that all legal addressing mode are free.
326 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
327 AddrSpace))
328 return 0;
329 return -1;
330 }
331
332 bool LSRWithInstrQueries() const { return false; }
333
334 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
335
336 bool isProfitableToHoist(Instruction *I) const { return true; }
337
338 bool useAA() const { return false; }
339
340 bool isTypeLegal(Type *Ty) const { return false; }
341
342 unsigned getRegUsageForType(Type *Ty) const { return 1; }
343
344 bool shouldBuildLookupTables() const { return true; }
345
346 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
347
348 bool shouldBuildRelLookupTables() const { return false; }
349
350 bool useColdCCForColdCall(Function &F) const { return false; }
351
352 InstructionCost getScalarizationOverhead(VectorType *Ty,
353 const APInt &DemandedElts,
354 bool Insert, bool Extract,
355 TTI::TargetCostKind CostKind) const {
356 return 0;
357 }
358
359 InstructionCost
360 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
361 ArrayRef<Type *> Tys,
362 TTI::TargetCostKind CostKind) const {
363 return 0;
364 }
365
366 bool supportsEfficientVectorElementLoadStore() const { return false; }
367
368 bool supportsTailCalls() const { return true; }
369
370 bool supportsTailCallFor(const CallBase *CB) const {
371 return supportsTailCalls();
372 }
373
374 bool enableAggressiveInterleaving(bool LoopHasReductions) const {
375 return false;
376 }
377
378 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
379 bool IsZeroCmp) const {
380 return {};
381 }
382
383 bool enableSelectOptimize() const { return true; }
384
385 bool shouldTreatInstructionLikeSelect(const Instruction *I) {
386 // If the select is a logical-and/logical-or then it is better treated as a
387 // and/or by the backend.
388 using namespace llvm::PatternMatch;
389 return isa<SelectInst>(Val: I) &&
390 !match(V: I, P: m_CombineOr(L: m_LogicalAnd(L: m_Value(), R: m_Value()),
391 R: m_LogicalOr(L: m_Value(), R: m_Value())));
392 }
393
394 bool enableInterleavedAccessVectorization() const { return false; }
395
396 bool enableMaskedInterleavedAccessVectorization() const { return false; }
397
398 bool isFPVectorizationPotentiallyUnsafe() const { return false; }
399
400 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
401 unsigned AddressSpace, Align Alignment,
402 unsigned *Fast) const {
403 return false;
404 }
405
406 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
407 return TTI::PSK_Software;
408 }
409
410 bool haveFastSqrt(Type *Ty) const { return false; }
411
412 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; }
413
414 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
415
416 InstructionCost getFPOpCost(Type *Ty) const {
417 return TargetTransformInfo::TCC_Basic;
418 }
419
420 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
421 const APInt &Imm, Type *Ty) const {
422 return 0;
423 }
424
425 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
426 TTI::TargetCostKind CostKind) const {
427 return TTI::TCC_Basic;
428 }
429
430 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
431 const APInt &Imm, Type *Ty,
432 TTI::TargetCostKind CostKind,
433 Instruction *Inst = nullptr) const {
434 return TTI::TCC_Free;
435 }
436
437 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
438 const APInt &Imm, Type *Ty,
439 TTI::TargetCostKind CostKind) const {
440 return TTI::TCC_Free;
441 }
442
443 bool preferToKeepConstantsAttached(const Instruction &Inst,
444 const Function &Fn) const {
445 return false;
446 }
447
448 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
449
450 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
451 return Vector ? 1 : 0;
452 };
453
454 const char *getRegisterClassName(unsigned ClassID) const {
455 switch (ClassID) {
456 default:
457 return "Generic::Unknown Register Class";
458 case 0:
459 return "Generic::ScalarRC";
460 case 1:
461 return "Generic::VectorRC";
462 }
463 }
464
465 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
466 return TypeSize::getFixed(ExactSize: 32);
467 }
468
469 unsigned getMinVectorRegisterBitWidth() const { return 128; }
470
471 std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
472 std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
473 bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
474
475 bool
476 shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
477 return false;
478 }
479
480 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
481 return ElementCount::get(MinVal: 0, Scalable: IsScalable);
482 }
483
484 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
485 unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; }
486
487 bool shouldConsiderAddressTypePromotion(
488 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
489 AllowPromotionWithoutCommonHeader = false;
490 return false;
491 }
492
493 unsigned getCacheLineSize() const { return 0; }
494 std::optional<unsigned>
495 getCacheSize(TargetTransformInfo::CacheLevel Level) const {
496 switch (Level) {
497 case TargetTransformInfo::CacheLevel::L1D:
498 [[fallthrough]];
499 case TargetTransformInfo::CacheLevel::L2D:
500 return std::nullopt;
501 }
502 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
503 }
504
505 std::optional<unsigned>
506 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
507 switch (Level) {
508 case TargetTransformInfo::CacheLevel::L1D:
509 [[fallthrough]];
510 case TargetTransformInfo::CacheLevel::L2D:
511 return std::nullopt;
512 }
513
514 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
515 }
516
517 std::optional<unsigned> getMinPageSize() const { return {}; }
518
519 unsigned getPrefetchDistance() const { return 0; }
520 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
521 unsigned NumStridedMemAccesses,
522 unsigned NumPrefetches, bool HasCall) const {
523 return 1;
524 }
525 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
526 bool enableWritePrefetching() const { return false; }
527 bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
528
529 unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; }
530
531 InstructionCost getArithmeticInstrCost(
532 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
533 TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info,
534 ArrayRef<const Value *> Args,
535 const Instruction *CxtI = nullptr) const {
536 // Widenable conditions will eventually lower into constants, so some
537 // operations with them will be trivially optimized away.
538 auto IsWidenableCondition = [](const Value *V) {
539 if (auto *II = dyn_cast<IntrinsicInst>(Val: V))
540 if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
541 return true;
542 return false;
543 };
544 // FIXME: A number of transformation tests seem to require these values
545 // which seems a little odd for how arbitary there are.
546 switch (Opcode) {
547 default:
548 break;
549 case Instruction::FDiv:
550 case Instruction::FRem:
551 case Instruction::SDiv:
552 case Instruction::SRem:
553 case Instruction::UDiv:
554 case Instruction::URem:
555 // FIXME: Unlikely to be true for CodeSize.
556 return TTI::TCC_Expensive;
557 case Instruction::And:
558 case Instruction::Or:
559 if (any_of(Range&: Args, P: IsWidenableCondition))
560 return TTI::TCC_Free;
561 break;
562 }
563
564 // Assume a 3cy latency for fp arithmetic ops.
565 if (CostKind == TTI::TCK_Latency)
566 if (Ty->getScalarType()->isFloatingPointTy())
567 return 3;
568
569 return 1;
570 }
571
572 InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
573 unsigned Opcode1,
574 const SmallBitVector &OpcodeMask,
575 TTI::TargetCostKind CostKind) const {
576 return InstructionCost::getInvalid();
577 }
578
579 InstructionCost
580 getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
581 TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
582 ArrayRef<const Value *> Args = std::nullopt) const {
583 return 1;
584 }
585
586 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
587 TTI::CastContextHint CCH,
588 TTI::TargetCostKind CostKind,
589 const Instruction *I) const {
590 switch (Opcode) {
591 default:
592 break;
593 case Instruction::IntToPtr: {
594 unsigned SrcSize = Src->getScalarSizeInBits();
595 if (DL.isLegalInteger(Width: SrcSize) &&
596 SrcSize <= DL.getPointerTypeSizeInBits(Dst))
597 return 0;
598 break;
599 }
600 case Instruction::PtrToInt: {
601 unsigned DstSize = Dst->getScalarSizeInBits();
602 if (DL.isLegalInteger(Width: DstSize) &&
603 DstSize >= DL.getPointerTypeSizeInBits(Src))
604 return 0;
605 break;
606 }
607 case Instruction::BitCast:
608 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
609 // Identity and pointer-to-pointer casts are free.
610 return 0;
611 break;
612 case Instruction::Trunc: {
613 // trunc to a native type is free (assuming the target has compare and
614 // shift-right of the same width).
615 TypeSize DstSize = DL.getTypeSizeInBits(Ty: Dst);
616 if (!DstSize.isScalable() && DL.isLegalInteger(Width: DstSize.getFixedValue()))
617 return 0;
618 break;
619 }
620 }
621 return 1;
622 }
623
624 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
625 VectorType *VecTy,
626 unsigned Index) const {
627 return 1;
628 }
629
630 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
631 const Instruction *I = nullptr) const {
632 // A phi would be free, unless we're costing the throughput because it
633 // will require a register.
634 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
635 return 0;
636 return 1;
637 }
638
639 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
640 CmpInst::Predicate VecPred,
641 TTI::TargetCostKind CostKind,
642 const Instruction *I) const {
643 return 1;
644 }
645
646 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
647 TTI::TargetCostKind CostKind,
648 unsigned Index, Value *Op0,
649 Value *Op1) const {
650 return 1;
651 }
652
653 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
654 TTI::TargetCostKind CostKind,
655 unsigned Index) const {
656 return 1;
657 }
658
659 unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
660 const APInt &DemandedDstElts,
661 TTI::TargetCostKind CostKind) {
662 return 1;
663 }
664
665 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
666 unsigned AddressSpace,
667 TTI::TargetCostKind CostKind,
668 TTI::OperandValueInfo OpInfo,
669 const Instruction *I) const {
670 return 1;
671 }
672
673 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
674 unsigned AddressSpace,
675 TTI::TargetCostKind CostKind,
676 const Instruction *I) const {
677 return 1;
678 }
679
680 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
681 Align Alignment, unsigned AddressSpace,
682 TTI::TargetCostKind CostKind) const {
683 return 1;
684 }
685
686 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
687 const Value *Ptr, bool VariableMask,
688 Align Alignment,
689 TTI::TargetCostKind CostKind,
690 const Instruction *I = nullptr) const {
691 return 1;
692 }
693
694 InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
695 const Value *Ptr, bool VariableMask,
696 Align Alignment,
697 TTI::TargetCostKind CostKind,
698 const Instruction *I = nullptr) const {
699 return InstructionCost::getInvalid();
700 }
701
702 unsigned getInterleavedMemoryOpCost(
703 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
704 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
705 bool UseMaskForCond, bool UseMaskForGaps) const {
706 return 1;
707 }
708
709 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
710 TTI::TargetCostKind CostKind) const {
711 switch (ICA.getID()) {
712 default:
713 break;
714 case Intrinsic::annotation:
715 case Intrinsic::assume:
716 case Intrinsic::sideeffect:
717 case Intrinsic::pseudoprobe:
718 case Intrinsic::arithmetic_fence:
719 case Intrinsic::dbg_assign:
720 case Intrinsic::dbg_declare:
721 case Intrinsic::dbg_value:
722 case Intrinsic::dbg_label:
723 case Intrinsic::invariant_start:
724 case Intrinsic::invariant_end:
725 case Intrinsic::launder_invariant_group:
726 case Intrinsic::strip_invariant_group:
727 case Intrinsic::is_constant:
728 case Intrinsic::lifetime_start:
729 case Intrinsic::lifetime_end:
730 case Intrinsic::experimental_noalias_scope_decl:
731 case Intrinsic::objectsize:
732 case Intrinsic::ptr_annotation:
733 case Intrinsic::var_annotation:
734 case Intrinsic::experimental_gc_result:
735 case Intrinsic::experimental_gc_relocate:
736 case Intrinsic::coro_alloc:
737 case Intrinsic::coro_begin:
738 case Intrinsic::coro_free:
739 case Intrinsic::coro_end:
740 case Intrinsic::coro_frame:
741 case Intrinsic::coro_size:
742 case Intrinsic::coro_align:
743 case Intrinsic::coro_suspend:
744 case Intrinsic::coro_subfn_addr:
745 case Intrinsic::threadlocal_address:
746 case Intrinsic::experimental_widenable_condition:
747 case Intrinsic::ssa_copy:
748 // These intrinsics don't actually represent code after lowering.
749 return 0;
750 }
751 return 1;
752 }
753
754 InstructionCost getCallInstrCost(Function *F, Type *RetTy,
755 ArrayRef<Type *> Tys,
756 TTI::TargetCostKind CostKind) const {
757 return 1;
758 }
759
760 // Assume that we have a register of the right size for the type.
761 unsigned getNumberOfParts(Type *Tp) const { return 1; }
762
763 InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *,
764 const SCEV *) const {
765 return 0;
766 }
767
768 InstructionCost getArithmeticReductionCost(unsigned, VectorType *,
769 std::optional<FastMathFlags> FMF,
770 TTI::TargetCostKind) const {
771 return 1;
772 }
773
774 InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *,
775 FastMathFlags,
776 TTI::TargetCostKind) const {
777 return 1;
778 }
779
780 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
781 Type *ResTy, VectorType *Ty,
782 FastMathFlags FMF,
783 TTI::TargetCostKind CostKind) const {
784 return 1;
785 }
786
787 InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
788 VectorType *Ty,
789 TTI::TargetCostKind CostKind) const {
790 return 1;
791 }
792
793 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
794 return 0;
795 }
796
797 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const {
798 return false;
799 }
800
801 unsigned getAtomicMemIntrinsicMaxElementSize() const {
802 // Note for overrides: You must ensure for all element unordered-atomic
803 // memory intrinsics that all power-of-2 element sizes up to, and
804 // including, the return value of this method have a corresponding
805 // runtime lib call. These runtime lib call definitions can be found
806 // in RuntimeLibcalls.h
807 return 0;
808 }
809
810 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
811 Type *ExpectedType) const {
812 return nullptr;
813 }
814
815 Type *
816 getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
817 unsigned SrcAddrSpace, unsigned DestAddrSpace,
818 unsigned SrcAlign, unsigned DestAlign,
819 std::optional<uint32_t> AtomicElementSize) const {
820 return AtomicElementSize ? Type::getIntNTy(C&: Context, N: *AtomicElementSize * 8)
821 : Type::getInt8Ty(C&: Context);
822 }
823
824 void getMemcpyLoopResidualLoweringType(
825 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
826 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
827 unsigned SrcAlign, unsigned DestAlign,
828 std::optional<uint32_t> AtomicCpySize) const {
829 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
830 Type *OpType = Type::getIntNTy(C&: Context, N: OpSizeInBytes * 8);
831 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
832 OpsOut.push_back(Elt: OpType);
833 }
834
835 bool areInlineCompatible(const Function *Caller,
836 const Function *Callee) const {
837 return (Caller->getFnAttribute(Kind: "target-cpu") ==
838 Callee->getFnAttribute(Kind: "target-cpu")) &&
839 (Caller->getFnAttribute(Kind: "target-features") ==
840 Callee->getFnAttribute(Kind: "target-features"));
841 }
842
843 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
844 unsigned DefaultCallPenalty) const {
845 return DefaultCallPenalty;
846 }
847
848 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
849 const ArrayRef<Type *> &Types) const {
850 return (Caller->getFnAttribute(Kind: "target-cpu") ==
851 Callee->getFnAttribute(Kind: "target-cpu")) &&
852 (Caller->getFnAttribute(Kind: "target-features") ==
853 Callee->getFnAttribute(Kind: "target-features"));
854 }
855
856 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
857 const DataLayout &DL) const {
858 return false;
859 }
860
861 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
862 const DataLayout &DL) const {
863 return false;
864 }
865
866 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
867
868 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
869
870 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
871
872 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
873 unsigned AddrSpace) const {
874 return true;
875 }
876
877 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
878 unsigned AddrSpace) const {
879 return true;
880 }
881
882 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
883 ElementCount VF) const {
884 return true;
885 }
886
887 bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
888
889 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
890 unsigned ChainSizeInBytes,
891 VectorType *VecTy) const {
892 return VF;
893 }
894
895 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
896 unsigned ChainSizeInBytes,
897 VectorType *VecTy) const {
898 return VF;
899 }
900
901 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
902 TTI::ReductionFlags Flags) const {
903 return false;
904 }
905
906 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
907 TTI::ReductionFlags Flags) const {
908 return false;
909 }
910
911 bool preferEpilogueVectorization() const {
912 return true;
913 }
914
915 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
916
917 unsigned getGISelRematGlobalCost() const { return 1; }
918
919 unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
920
921 bool supportsScalableVectors() const { return false; }
922
923 bool enableScalableVectorization() const { return false; }
924
925 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
926 Align Alignment) const {
927 return false;
928 }
929
930 TargetTransformInfo::VPLegalization
931 getVPLegalizationStrategy(const VPIntrinsic &PI) const {
932 return TargetTransformInfo::VPLegalization(
933 /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard,
934 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
935 }
936
937 bool hasArmWideBranch(bool) const { return false; }
938
939 unsigned getMaxNumArgs() const { return UINT_MAX; }
940
941protected:
942 // Obtain the minimum required size to hold the value (without the sign)
943 // In case of a vector it returns the min required size for one element.
944 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
945 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
946 const auto *VectorValue = cast<Constant>(Val);
947
948 // In case of a vector need to pick the max between the min
949 // required size for each element
950 auto *VT = cast<FixedVectorType>(Val: Val->getType());
951
952 // Assume unsigned elements
953 isSigned = false;
954
955 // The max required size is the size of the vector element type
956 unsigned MaxRequiredSize =
957 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
958
959 unsigned MinRequiredSize = 0;
960 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
961 if (auto *IntElement =
962 dyn_cast<ConstantInt>(Val: VectorValue->getAggregateElement(Elt: i))) {
963 bool signedElement = IntElement->getValue().isNegative();
964 // Get the element min required size.
965 unsigned ElementMinRequiredSize =
966 IntElement->getValue().getSignificantBits() - 1;
967 // In case one element is signed then all the vector is signed.
968 isSigned |= signedElement;
969 // Save the max required bit size between all the elements.
970 MinRequiredSize = std::max(a: MinRequiredSize, b: ElementMinRequiredSize);
971 } else {
972 // not an int constant element
973 return MaxRequiredSize;
974 }
975 }
976 return MinRequiredSize;
977 }
978
979 if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
980 isSigned = CI->getValue().isNegative();
981 return CI->getValue().getSignificantBits() - 1;
982 }
983
984 if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
985 isSigned = true;
986 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
987 }
988
989 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
990 isSigned = false;
991 return Cast->getSrcTy()->getScalarSizeInBits();
992 }
993
994 isSigned = false;
995 return Val->getType()->getScalarSizeInBits();
996 }
997
998 bool isStridedAccess(const SCEV *Ptr) const {
999 return Ptr && isa<SCEVAddRecExpr>(Val: Ptr);
1000 }
1001
1002 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
1003 const SCEV *Ptr) const {
1004 if (!isStridedAccess(Ptr))
1005 return nullptr;
1006 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Val: Ptr);
1007 return dyn_cast<SCEVConstant>(Val: AddRec->getStepRecurrence(SE&: *SE));
1008 }
1009
1010 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
1011 int64_t MergeDistance) const {
1012 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
1013 if (!Step)
1014 return false;
1015 APInt StrideVal = Step->getAPInt();
1016 if (StrideVal.getBitWidth() > 64)
1017 return false;
1018 // FIXME: Need to take absolute value for negative stride case.
1019 return StrideVal.getSExtValue() < MergeDistance;
1020 }
1021};
1022
1023/// CRTP base class for use as a mix-in that aids implementing
1024/// a TargetTransformInfo-compatible class.
1025template <typename T>
1026class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
1027private:
1028 typedef TargetTransformInfoImplBase BaseT;
1029
1030protected:
1031 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
1032
1033public:
1034 using BaseT::getGEPCost;
1035
1036 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1037 ArrayRef<const Value *> Operands, Type *AccessType,
1038 TTI::TargetCostKind CostKind) {
1039 assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
1040 auto *BaseGV = dyn_cast<GlobalValue>(Val: Ptr->stripPointerCasts());
1041 bool HasBaseReg = (BaseGV == nullptr);
1042
1043 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
1044 APInt BaseOffset(PtrSizeBits, 0);
1045 int64_t Scale = 0;
1046
1047 auto GTI = gep_type_begin(Op0: PointeeType, A: Operands);
1048 Type *TargetType = nullptr;
1049
1050 // Handle the case where the GEP instruction has a single operand,
1051 // the basis, therefore TargetType is a nullptr.
1052 if (Operands.empty())
1053 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
1054
1055 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
1056 TargetType = GTI.getIndexedType();
1057 // We assume that the cost of Scalar GEP with constant index and the
1058 // cost of Vector GEP with splat constant index are the same.
1059 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(Val: *I);
1060 if (!ConstIdx)
1061 if (auto Splat = getSplatValue(V: *I))
1062 ConstIdx = dyn_cast<ConstantInt>(Val: Splat);
1063 if (StructType *STy = GTI.getStructTypeOrNull()) {
1064 // For structures the index is always splat or scalar constant
1065 assert(ConstIdx && "Unexpected GEP index");
1066 uint64_t Field = ConstIdx->getZExtValue();
1067 BaseOffset += DL.getStructLayout(Ty: STy)->getElementOffset(Field);
1068 } else {
1069 // If this operand is a scalable type, bail out early.
1070 // TODO: Make isLegalAddressingMode TypeSize aware.
1071 if (TargetType->isScalableTy())
1072 return TTI::TCC_Basic;
1073 int64_t ElementSize =
1074 GTI.getSequentialElementStride(DL).getFixedValue();
1075 if (ConstIdx) {
1076 BaseOffset +=
1077 ConstIdx->getValue().sextOrTrunc(width: PtrSizeBits) * ElementSize;
1078 } else {
1079 // Needs scale register.
1080 if (Scale != 0)
1081 // No addressing mode takes two scale registers.
1082 return TTI::TCC_Basic;
1083 Scale = ElementSize;
1084 }
1085 }
1086 }
1087
1088 // If we haven't been provided a hint, use the target type for now.
1089 //
1090 // TODO: Take a look at potentially removing this: This is *slightly* wrong
1091 // as it's possible to have a GEP with a foldable target type but a memory
1092 // access that isn't foldable. For example, this load isn't foldable on
1093 // RISC-V:
1094 //
1095 // %p = getelementptr i32, ptr %base, i32 42
1096 // %x = load <2 x i32>, ptr %p
1097 if (!AccessType)
1098 AccessType = TargetType;
1099
1100 // If the final address of the GEP is a legal addressing mode for the given
1101 // access type, then we can fold it into its users.
1102 if (static_cast<T *>(this)->isLegalAddressingMode(
1103 AccessType, const_cast<GlobalValue *>(BaseGV),
1104 BaseOffset.sextOrTrunc(width: 64).getSExtValue(), HasBaseReg, Scale,
1105 Ptr->getType()->getPointerAddressSpace()))
1106 return TTI::TCC_Free;
1107
1108 // TODO: Instead of returning TCC_Basic here, we should use
1109 // getArithmeticInstrCost. Or better yet, provide a hook to let the target
1110 // model it.
1111 return TTI::TCC_Basic;
1112 }
1113
1114 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
1115 const Value *Base,
1116 const TTI::PointersChainInfo &Info,
1117 Type *AccessTy,
1118 TTI::TargetCostKind CostKind) {
1119 InstructionCost Cost = TTI::TCC_Free;
1120 // In the basic model we take into account GEP instructions only
1121 // (although here can come alloca instruction, a value, constants and/or
1122 // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
1123 // pointer). Typically, if Base is a not a GEP-instruction and all the
1124 // pointers are relative to the same base address, all the rest are
1125 // either GEP instructions, PHIs, bitcasts or constants. When we have same
1126 // base, we just calculate cost of each non-Base GEP as an ADD operation if
1127 // any their index is a non-const.
1128 // If no known dependecies between the pointers cost is calculated as a sum
1129 // of costs of GEP instructions.
1130 for (const Value *V : Ptrs) {
1131 const auto *GEP = dyn_cast<GetElementPtrInst>(Val: V);
1132 if (!GEP)
1133 continue;
1134 if (Info.isSameBase() && V != Base) {
1135 if (GEP->hasAllConstantIndices())
1136 continue;
1137 Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1138 Instruction::Add, GEP->getType(), CostKind,
1139 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None},
1140 std::nullopt);
1141 } else {
1142 SmallVector<const Value *> Indices(GEP->indices());
1143 Cost += static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
1144 GEP->getPointerOperand(),
1145 Indices, AccessTy, CostKind);
1146 }
1147 }
1148 return Cost;
1149 }
1150
1151 InstructionCost getInstructionCost(const User *U,
1152 ArrayRef<const Value *> Operands,
1153 TTI::TargetCostKind CostKind) {
1154 using namespace llvm::PatternMatch;
1155
1156 auto *TargetTTI = static_cast<T *>(this);
1157 // Handle non-intrinsic calls, invokes, and callbr.
1158 // FIXME: Unlikely to be true for anything but CodeSize.
1159 auto *CB = dyn_cast<CallBase>(Val: U);
1160 if (CB && !isa<IntrinsicInst>(Val: U)) {
1161 if (const Function *F = CB->getCalledFunction()) {
1162 if (!TargetTTI->isLoweredToCall(F))
1163 return TTI::TCC_Basic; // Give a basic cost if it will be lowered
1164
1165 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
1166 }
1167 // For indirect or other calls, scale cost by number of arguments.
1168 return TTI::TCC_Basic * (CB->arg_size() + 1);
1169 }
1170
1171 Type *Ty = U->getType();
1172 unsigned Opcode = Operator::getOpcode(U);
1173 auto *I = dyn_cast<Instruction>(Val: U);
1174 switch (Opcode) {
1175 default:
1176 break;
1177 case Instruction::Call: {
1178 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
1179 auto *Intrinsic = cast<IntrinsicInst>(Val: U);
1180 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
1181 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
1182 }
1183 case Instruction::Br:
1184 case Instruction::Ret:
1185 case Instruction::PHI:
1186 case Instruction::Switch:
1187 return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
1188 case Instruction::ExtractValue:
1189 case Instruction::Freeze:
1190 return TTI::TCC_Free;
1191 case Instruction::Alloca:
1192 if (cast<AllocaInst>(Val: U)->isStaticAlloca())
1193 return TTI::TCC_Free;
1194 break;
1195 case Instruction::GetElementPtr: {
1196 const auto *GEP = cast<GEPOperator>(Val: U);
1197 Type *AccessType = nullptr;
1198 // For now, only provide the AccessType in the simple case where the GEP
1199 // only has one user.
1200 if (GEP->hasOneUser() && I)
1201 AccessType = I->user_back()->getAccessType();
1202
1203 return TargetTTI->getGEPCost(GEP->getSourceElementType(),
1204 Operands.front(), Operands.drop_front(),
1205 AccessType, CostKind);
1206 }
1207 case Instruction::Add:
1208 case Instruction::FAdd:
1209 case Instruction::Sub:
1210 case Instruction::FSub:
1211 case Instruction::Mul:
1212 case Instruction::FMul:
1213 case Instruction::UDiv:
1214 case Instruction::SDiv:
1215 case Instruction::FDiv:
1216 case Instruction::URem:
1217 case Instruction::SRem:
1218 case Instruction::FRem:
1219 case Instruction::Shl:
1220 case Instruction::LShr:
1221 case Instruction::AShr:
1222 case Instruction::And:
1223 case Instruction::Or:
1224 case Instruction::Xor:
1225 case Instruction::FNeg: {
1226 const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(V: Operands[0]);
1227 TTI::OperandValueInfo Op2Info;
1228 if (Opcode != Instruction::FNeg)
1229 Op2Info = TTI::getOperandInfo(V: Operands[1]);
1230 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
1231 Op2Info, Operands, I);
1232 }
1233 case Instruction::IntToPtr:
1234 case Instruction::PtrToInt:
1235 case Instruction::SIToFP:
1236 case Instruction::UIToFP:
1237 case Instruction::FPToUI:
1238 case Instruction::FPToSI:
1239 case Instruction::Trunc:
1240 case Instruction::FPTrunc:
1241 case Instruction::BitCast:
1242 case Instruction::FPExt:
1243 case Instruction::SExt:
1244 case Instruction::ZExt:
1245 case Instruction::AddrSpaceCast: {
1246 Type *OpTy = Operands[0]->getType();
1247 return TargetTTI->getCastInstrCost(
1248 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
1249 }
1250 case Instruction::Store: {
1251 auto *SI = cast<StoreInst>(Val: U);
1252 Type *ValTy = Operands[0]->getType();
1253 TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(V: Operands[0]);
1254 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1255 SI->getPointerAddressSpace(), CostKind,
1256 OpInfo, I);
1257 }
1258 case Instruction::Load: {
1259 // FIXME: Arbitary cost which could come from the backend.
1260 if (CostKind == TTI::TCK_Latency)
1261 return 4;
1262 auto *LI = cast<LoadInst>(Val: U);
1263 Type *LoadType = U->getType();
1264 // If there is a non-register sized type, the cost estimation may expand
1265 // it to be several instructions to load into multiple registers on the
1266 // target. But, if the only use of the load is a trunc instruction to a
1267 // register sized type, the instruction selector can combine these
1268 // instructions to be a single load. So, in this case, we use the
1269 // destination type of the trunc instruction rather than the load to
1270 // accurately estimate the cost of this load instruction.
1271 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
1272 !LoadType->isVectorTy()) {
1273 if (const TruncInst *TI = dyn_cast<TruncInst>(Val: *LI->user_begin()))
1274 LoadType = TI->getDestTy();
1275 }
1276 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1277 LI->getPointerAddressSpace(), CostKind,
1278 {TTI::OK_AnyValue, TTI::OP_None}, I);
1279 }
1280 case Instruction::Select: {
1281 const Value *Op0, *Op1;
1282 if (match(V: U, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1))) ||
1283 match(V: U, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) {
1284 // select x, y, false --> x & y
1285 // select x, true, y --> x | y
1286 const auto Op1Info = TTI::getOperandInfo(V: Op0);
1287 const auto Op2Info = TTI::getOperandInfo(V: Op1);
1288 assert(Op0->getType()->getScalarSizeInBits() == 1 &&
1289 Op1->getType()->getScalarSizeInBits() == 1);
1290
1291 SmallVector<const Value *, 2> Operands{Op0, Op1};
1292 return TargetTTI->getArithmeticInstrCost(
1293 match(V: U, P: m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
1294 CostKind, Op1Info, Op2Info, Operands, I);
1295 }
1296 Type *CondTy = Operands[0]->getType();
1297 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1298 CmpInst::BAD_ICMP_PREDICATE,
1299 CostKind, I);
1300 }
1301 case Instruction::ICmp:
1302 case Instruction::FCmp: {
1303 Type *ValTy = Operands[0]->getType();
1304 // TODO: Also handle ICmp/FCmp constant expressions.
1305 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1306 I ? cast<CmpInst>(Val: I)->getPredicate()
1307 : CmpInst::BAD_ICMP_PREDICATE,
1308 CostKind, I);
1309 }
1310 case Instruction::InsertElement: {
1311 auto *IE = dyn_cast<InsertElementInst>(Val: U);
1312 if (!IE)
1313 return TTI::TCC_Basic; // FIXME
1314 unsigned Idx = -1;
1315 if (auto *CI = dyn_cast<ConstantInt>(Val: Operands[2]))
1316 if (CI->getValue().getActiveBits() <= 32)
1317 Idx = CI->getZExtValue();
1318 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
1319 }
1320 case Instruction::ShuffleVector: {
1321 auto *Shuffle = dyn_cast<ShuffleVectorInst>(Val: U);
1322 if (!Shuffle)
1323 return TTI::TCC_Basic; // FIXME
1324
1325 auto *VecTy = cast<VectorType>(Val: U->getType());
1326 auto *VecSrcTy = cast<VectorType>(Val: Operands[0]->getType());
1327 int NumSubElts, SubIndex;
1328
1329 if (Shuffle->changesLength()) {
1330 // Treat a 'subvector widening' as a free shuffle.
1331 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1332 return 0;
1333
1334 if (Shuffle->isExtractSubvectorMask(Index&: SubIndex))
1335 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
1336 Shuffle->getShuffleMask(), CostKind,
1337 SubIndex, VecTy, Operands);
1338
1339 if (Shuffle->isInsertSubvectorMask(NumSubElts, Index&: SubIndex))
1340 return TargetTTI->getShuffleCost(
1341 TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
1342 CostKind, SubIndex,
1343 FixedVectorType::get(ElementType: VecTy->getScalarType(), NumElts: NumSubElts),
1344 Operands);
1345
1346 int ReplicationFactor, VF;
1347 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1348 APInt DemandedDstElts =
1349 APInt::getZero(numBits: Shuffle->getShuffleMask().size());
1350 for (auto I : enumerate(First: Shuffle->getShuffleMask())) {
1351 if (I.value() != PoisonMaskElem)
1352 DemandedDstElts.setBit(I.index());
1353 }
1354 return TargetTTI->getReplicationShuffleCost(
1355 VecSrcTy->getElementType(), ReplicationFactor, VF,
1356 DemandedDstElts, CostKind);
1357 }
1358
1359 return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
1360 }
1361
1362 if (Shuffle->isIdentity())
1363 return 0;
1364
1365 if (Shuffle->isReverse())
1366 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
1367 Shuffle->getShuffleMask(), CostKind, 0,
1368 nullptr, Operands);
1369
1370 if (Shuffle->isSelect())
1371 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
1372 Shuffle->getShuffleMask(), CostKind, 0,
1373 nullptr, Operands);
1374
1375 if (Shuffle->isTranspose())
1376 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
1377 Shuffle->getShuffleMask(), CostKind, 0,
1378 nullptr, Operands);
1379
1380 if (Shuffle->isZeroEltSplat())
1381 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
1382 Shuffle->getShuffleMask(), CostKind, 0,
1383 nullptr, Operands);
1384
1385 if (Shuffle->isSingleSource())
1386 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
1387 Shuffle->getShuffleMask(), CostKind, 0,
1388 nullptr, Operands);
1389
1390 if (Shuffle->isInsertSubvectorMask(NumSubElts, Index&: SubIndex))
1391 return TargetTTI->getShuffleCost(
1392 TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), CostKind,
1393 SubIndex, FixedVectorType::get(ElementType: VecTy->getScalarType(), NumElts: NumSubElts),
1394 Operands);
1395
1396 if (Shuffle->isSplice(Index&: SubIndex))
1397 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy,
1398 Shuffle->getShuffleMask(), CostKind,
1399 SubIndex, nullptr, Operands);
1400
1401 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
1402 Shuffle->getShuffleMask(), CostKind, 0,
1403 nullptr, Operands);
1404 }
1405 case Instruction::ExtractElement: {
1406 auto *EEI = dyn_cast<ExtractElementInst>(Val: U);
1407 if (!EEI)
1408 return TTI::TCC_Basic; // FIXME
1409 unsigned Idx = -1;
1410 if (auto *CI = dyn_cast<ConstantInt>(Val: Operands[1]))
1411 if (CI->getValue().getActiveBits() <= 32)
1412 Idx = CI->getZExtValue();
1413 Type *DstTy = Operands[0]->getType();
1414 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
1415 }
1416 }
1417
1418 // By default, just classify everything as 'basic' or -1 to represent that
1419 // don't know the throughput cost.
1420 return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic;
1421 }
1422
1423 bool isExpensiveToSpeculativelyExecute(const Instruction *I) {
1424 auto *TargetTTI = static_cast<T *>(this);
1425 SmallVector<const Value *, 4> Ops(I->operand_values());
1426 InstructionCost Cost = TargetTTI->getInstructionCost(
1427 I, Ops, TargetTransformInfo::TCK_SizeAndLatency);
1428 return Cost >= TargetTransformInfo::TCC_Expensive;
1429 }
1430};
1431} // namespace llvm
1432
1433#endif
1434

source code of llvm/include/llvm/Analysis/TargetTransformInfoImpl.h