TargetTransformInfoImpl.h source code [llvm/include/llvm/Analysis/TargetTransformInfoImpl.h]

1	//===- TargetTransformInfoImpl.h --------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file provides helpers for the implementation of
10	/// a TargetTransformInfo-conforming class.
11	///
12	//===----------------------------------------------------------------------===//
13
14	#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15	#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
17	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
18	#include "llvm/Analysis/TargetTransformInfo.h"
19	#include "llvm/Analysis/VectorUtils.h"
20	#include "llvm/IR/DataLayout.h"
21	#include "llvm/IR/GetElementPtrTypeIterator.h"
22	#include "llvm/IR/IntrinsicInst.h"
23	#include "llvm/IR/Operator.h"
24	#include "llvm/IR/PatternMatch.h"
25	#include <optional>
26	#include <utility>
27
28	namespace llvm {
29
30	class Function;
31
32	/// Base class for use as a mix-in that aids implementing
33	/// a TargetTransformInfo-compatible class.
34	class TargetTransformInfoImplBase {
35	protected:
36	typedef TargetTransformInfo TTI;
37
38	const DataLayout &DL;
39
40	explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
41
42	public:
43	// Provide value semantics. MSVC requires that we spell all of these out.
44	TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default;
45	TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
46
47	const DataLayout &getDataLayout() const { return DL; }
48
49	InstructionCost getGEPCost(Type PointeeType, const* Value *Ptr,
50	ArrayRef<const Value > Operands, Type AccessType,
51	TTI::TargetCostKind CostKind) const {
52	// In the basic model, we just assume that all-constant GEPs will be folded
53	// into their uses via addressing modes.
54	for (const Value *Operand : Operands)
55	if (!isa<Constant>(Val: Operand))
56	return TTI::TCC_Basic;
57
58	return TTI::TCC_Free;
59	}
60
61	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
62	unsigned &JTSize,
63	ProfileSummaryInfo *PSI,
64	BlockFrequencyInfo BFI) const* {
65	(void)PSI;
66	(void)BFI;
67	JTSize = `0`;
68	return SI.getNumCases();
69	}
70
71	unsigned getInliningThresholdMultiplier() const { return `1`; }
72	unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const { return `8`; }
73	unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
74	return `8`;
75	}
76	unsigned adjustInliningThreshold(const CallBase CB) const* { return `0`; }
77	unsigned getCallerAllocaCost(const CallBase CB, const* AllocaInst AI) const* {
78	return `0`;
79	};
80
81	int getInlinerVectorBonusPercent() const { return `150`; }
82
83	InstructionCost getMemcpyCost(const Instruction I) const* {
84	return TTI::TCC_Expensive;
85	}
86
87	uint64_t getMaxMemIntrinsicInlineSizeThreshold() const {
88	return `64`;
89	}
90
91	// Although this default value is arbitrary, it is not random. It is assumed
92	// that a condition that evaluates the same way by a higher percentage than
93	// this is best represented as control flow. Therefore, the default value N
94	// should be set such that the win from N% correct executions is greater than
95	// the loss from (100 - N)% mispredicted executions for the majority of
96	// intended targets.
97	BranchProbability getPredictableBranchThreshold() const {
98	return BranchProbability (`99`, `100`);
99	}
100
101	bool hasBranchDivergence(const Function F = nullptr) const* { return false; }
102
103	bool isSourceOfDivergence(const Value V) const* { return false; }
104
105	bool isAlwaysUniform(const Value V) const* { return false; }
106
107	bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
108	return false;
109	}
110
111	bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
112	return true;
113	}
114
115	unsigned getFlatAddressSpace() const { return -`1`; }
116
117	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
118	Intrinsic::ID IID) const {
119	return false;
120	}
121
122	bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
123	bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
124	return AS == `0`;
125	};
126
127	unsigned getAssumedAddrSpace(const Value V) const* { return -`1`; }
128
129	bool isSingleThreaded() const { return false; }
130
131	std::pair<const Value , unsigned*>
132	getPredicatedAddrSpace(const Value V) const* {
133	return std::make_pair(x: nullptr, y: -`1`);
134	}
135
136	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
137	Value NewV) const* {
138	return nullptr;
139	}
140
141	bool isLoweredToCall(const Function F) const* {
142	assert(F && "A concrete function must be provided to this routine.");
143
144	// FIXME: These should almost certainly not be handled here, and instead
145	// handled with the help of TLI or the target itself. This was largely
146	// ported from existing analysis heuristics here so that such refactorings
147	// can take place in the future.
148
149	if (F->isIntrinsic())
150	return false;
151
152	if (F->hasLocalLinkage() \|\| !F->hasName())
153	return true;
154
155	StringRef Name = F->getName();
156
157	// These will all likely lower to a single selection DAG node.
158	if (Name == "copysign" \|\| Name == "copysignf" \|\| Name == "copysignl" \|\|
159	Name == "fabs" \|\| Name == "fabsf" \|\| Name == "fabsl" \|\| Name == "sin" \|\|
160	Name == "fmin" \|\| Name == "fminf" \|\| Name == "fminl" \|\|
161	Name == "fmax" \|\| Name == "fmaxf" \|\| Name == "fmaxl" \|\|
162	Name == "sinf" \|\| Name == "sinl" \|\| Name == "cos" \|\| Name == "cosf" \|\|
163	Name == "cosl" \|\| Name == "sqrt" \|\| Name == "sqrtf" \|\| Name == "sqrtl")
164	return false;
165
166	// These are all likely to be optimized into something smaller.
167	if (Name == "pow" \|\| Name == "powf" \|\| Name == "powl" \|\| Name == "exp2" \|\|
168	Name == "exp2l" \|\| Name == "exp2f" \|\| Name == "floor" \|\|
169	Name == "floorf" \|\| Name == "ceil" \|\| Name == "round" \|\|
170	Name == "ffs" \|\| Name == "ffsl" \|\| Name == "abs" \|\| Name == "labs" \|\|
171	Name == "llabs")
172	return false;
173
174	return true;
175	}
176
177	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
178	AssumptionCache &AC, TargetLibraryInfo *LibInfo,
179	HardwareLoopInfo &HWLoopInfo) const {
180	return false;
181	}
182
183	bool preferPredicateOverEpilogue(TailFoldingInfo TFI) const* { return false; }
184
185	TailFoldingStyle
186	getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
187	return TailFoldingStyle::DataWithoutLaneMask;
188	}
189
190	std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
191	IntrinsicInst &II) const {
192	return std::nullopt;
193	}
194
195	std::optional<Value *>
196	simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
197	APInt DemandedMask, KnownBits &Known,
198	bool &KnownBitsComputed) const {
199	return std::nullopt;
200	}
201
202	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
203	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
204	APInt &UndefElts2, APInt &UndefElts3,
205	std::function<void(Instruction , unsigned*, APInt, APInt &)>
206	SimplifyAndSetOp) const {
207	return std::nullopt;
208	}
209
210	void getUnrollingPreferences(Loop *, ScalarEvolution &,
211	TTI::UnrollingPreferences &,
212	OptimizationRemarkEmitter ) const* {}
213
214	void getPeelingPreferences(Loop *, ScalarEvolution &,
215	TTI::PeelingPreferences &) const {}
216
217	bool isLegalAddImmediate(int64_t Imm) const { return false; }
218
219	bool isLegalICmpImmediate(int64_t Imm) const { return false; }
220
221	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
222	bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
223	Instruction I = nullptr) const* {
224	// Guess that only reg and reg+reg addressing is allowed. This heuristic is
225	// taken from the implementation of LSR.
226	return !BaseGV && BaseOffset == `0` && (Scale == `0` \|\| Scale == `1`);
227	}
228
229	bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const {
230	return std::tie(args: C1.NumRegs, args: C1.AddRecCost, args: C1.NumIVMuls, args: C1.NumBaseAdds,
231	args: C1.ScaleCost, args: C1.ImmCost, args: C1.SetupCost) <
232	std::tie(args: C2.NumRegs, args: C2.AddRecCost, args: C2.NumIVMuls, args: C2.NumBaseAdds,
233	args: C2.ScaleCost, args: C2.ImmCost, args: C2.SetupCost);
234	}
235
236	bool isNumRegsMajorCostOfLSR() const { return true; }
237
238	bool shouldFoldTerminatingConditionAfterLSR() const { return false; }
239
240	bool isProfitableLSRChainElement(Instruction I) const* { return false; }
241
242	bool canMacroFuseCmp() const { return false; }
243
244	bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,
245	DominatorTree DT, AssumptionCache AC,
246	TargetLibraryInfo LibInfo) const* {
247	return false;
248	}
249
250	TTI::AddressingModeKind
251	getPreferredAddressingMode(const Loop L, ScalarEvolution SE) const {
252	return TTI::AMK_None;
253	}
254
255	bool isLegalMaskedStore(Type DataType, Align Alignment) const* {
256	return false;
257	}
258
259	bool isLegalMaskedLoad(Type DataType, Align Alignment) const* {
260	return false;
261	}
262
263	bool isLegalNTStore(Type DataType, Align Alignment) const* {
264	// By default, assume nontemporal memory stores are available for stores
265	// that are aligned and have a size that is a power of 2.
266	unsigned DataSize = DL.getTypeStoreSize(Ty: DataType);
267	return Alignment >= DataSize && isPowerOf2_32(Value: DataSize);
268	}
269
270	bool isLegalNTLoad(Type DataType, Align Alignment) const* {
271	// By default, assume nontemporal memory loads are available for loads that
272	// are aligned and have a size that is a power of 2.
273	unsigned DataSize = DL.getTypeStoreSize(Ty: DataType);
274	return Alignment >= DataSize && isPowerOf2_32(Value: DataSize);
275	}
276
277	bool isLegalBroadcastLoad(Type ElementTy, ElementCount NumElements) const* {
278	return false;
279	}
280
281	bool isLegalMaskedScatter(Type DataType, Align Alignment) const* {
282	return false;
283	}
284
285	bool isLegalMaskedGather(Type DataType, Align Alignment) const* {
286	return false;
287	}
288
289	bool forceScalarizeMaskedGather(VectorType DataType, Align Alignment) const* {
290	return false;
291	}
292
293	bool forceScalarizeMaskedScatter(VectorType *DataType,
294	Align Alignment) const {
295	return false;
296	}
297
298	bool isLegalMaskedCompressStore(Type DataType) const* { return false; }
299
300	bool isLegalAltInstr(VectorType VecTy, unsigned* Opcode0, unsigned Opcode1,
301	const SmallBitVector &OpcodeMask) const {
302	return false;
303	}
304
305	bool isLegalMaskedExpandLoad(Type DataType) const* { return false; }
306
307	bool isLegalStridedLoadStore(Type DataType, Align Alignment) const* {
308	return false;
309	}
310
311	bool enableOrderedReductions() const { return false; }
312
313	bool hasDivRemOp(Type DataType, bool* IsSigned) const { return false; }
314
315	bool hasVolatileVariant(Instruction I, unsigned* AddrSpace) const {
316	return false;
317	}
318
319	bool prefersVectorizedAddressing() const { return true; }
320
321	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
322	int64_t BaseOffset, bool HasBaseReg,
323	int64_t Scale,
324	unsigned AddrSpace) const {
325	// Guess that all legal addressing mode are free.
326	if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
327	AddrSpace))
328	return `0`;
329	return -`1`;
330	}
331
332	bool LSRWithInstrQueries() const { return false; }
333
334	bool isTruncateFree(Type Ty1, Type Ty2) const { return false; }
335
336	bool isProfitableToHoist(Instruction I) const* { return true; }
337
338	bool useAA() const { return false; }
339
340	bool isTypeLegal(Type Ty) const* { return false; }
341
342	unsigned getRegUsageForType(Type Ty) const* { return `1`; }
343
344	bool shouldBuildLookupTables() const { return true; }
345
346	bool shouldBuildLookupTablesForConstant(Constant C) const* { return true; }
347
348	bool shouldBuildRelLookupTables() const { return false; }
349
350	bool useColdCCForColdCall(Function &F) const { return false; }
351
352	InstructionCost getScalarizationOverhead(VectorType *Ty,
353	const APInt &DemandedElts,
354	bool Insert, bool Extract,
355	TTI::TargetCostKind CostKind) const {
356	return `0`;
357	}
358
359	InstructionCost
360	getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
361	ArrayRef<Type *> Tys,
362	TTI::TargetCostKind CostKind) const {
363	return `0`;
364	}
365
366	bool supportsEfficientVectorElementLoadStore() const { return false; }
367
368	bool supportsTailCalls() const { return true; }
369
370	bool supportsTailCallFor(const CallBase CB) const* {
371	return supportsTailCalls();
372	}
373
374	bool enableAggressiveInterleaving(bool LoopHasReductions) const {
375	return false;
376	}
377
378	TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
379	bool IsZeroCmp) const {
380	return {};
381	}
382
383	bool enableSelectOptimize() const { return true; }
384
385	bool shouldTreatInstructionLikeSelect(const Instruction *I) {
386	// If the select is a logical-and/logical-or then it is better treated as a
387	// and/or by the backend.
388	using namespace llvm::PatternMatch;
389	return isa<SelectInst>(Val: I) &&
390	!match(V: I, P: m_CombineOr(L: m_LogicalAnd(L: m_Value(), R: m_Value()),
391	R: m_LogicalOr(L: m_Value(), R: m_Value())));
392	}
393
394	bool enableInterleavedAccessVectorization() const { return false; }
395
396	bool enableMaskedInterleavedAccessVectorization() const { return false; }
397
398	bool isFPVectorizationPotentiallyUnsafe() const { return false; }
399
400	bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
401	unsigned AddressSpace, Align Alignment,
402	unsigned Fast) const* {
403	return false;
404	}
405
406	TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
407	return TTI::PSK_Software;
408	}
409
410	bool haveFastSqrt(Type Ty) const* { return false; }
411
412	bool isExpensiveToSpeculativelyExecute(const Instruction I) { return* true; }
413
414	bool isFCmpOrdCheaperThanFCmpZero(Type Ty) const* { return true; }
415
416	InstructionCost getFPOpCost(Type Ty) const* {
417	return TargetTransformInfo::TCC_Basic;
418	}
419
420	InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
421	const APInt &Imm, Type Ty) const* {
422	return `0`;
423	}
424
425	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
426	TTI::TargetCostKind CostKind) const {
427	return TTI::TCC_Basic;
428	}
429
430	InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
431	const APInt &Imm, Type *Ty,
432	TTI::TargetCostKind CostKind,
433	Instruction Inst = nullptr) const* {
434	return TTI::TCC_Free;
435	}
436
437	InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
438	const APInt &Imm, Type *Ty,
439	TTI::TargetCostKind CostKind) const {
440	return TTI::TCC_Free;
441	}
442
443	bool preferToKeepConstantsAttached(const Instruction &Inst,
444	const Function &Fn) const {
445	return false;
446	}
447
448	unsigned getNumberOfRegisters(unsigned ClassID) const { return `8`; }
449
450	unsigned getRegisterClassForType(bool Vector, Type Ty = nullptr) const* {
451	return Vector ? `1` : `0`;
452	};
453
454	const char getRegisterClassName(unsigned* ClassID) const {
455	switch (ClassID) {
456	default:
457	return "Generic::Unknown Register Class";
458	case `0`:
459	return "Generic::ScalarRC";
460	case `1`:
461	return "Generic::VectorRC";
462	}
463	}
464
465	TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
466	return TypeSize::getFixed(ExactSize: `32`);
467	}
468
469	unsigned getMinVectorRegisterBitWidth() const { return `128`; }
470
471	std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
472	std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
473	bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
474
475	bool
476	shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
477	return false;
478	}
479
480	ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
481	return ElementCount::get(MinVal: `0`, Scalable: IsScalable);
482	}
483
484	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return `0`; }
485	unsigned getStoreMinimumVF(unsigned VF, Type , Type ) const { return VF; }
486
487	bool shouldConsiderAddressTypePromotion(
488	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
489	AllowPromotionWithoutCommonHeader = false;
490	return false;
491	}
492
493	unsigned getCacheLineSize() const { return `0`; }
494	std::optional<unsigned>
495	getCacheSize(TargetTransformInfo::CacheLevel Level) const {
496	switch (Level) {
497	case TargetTransformInfo::CacheLevel::L1D:
498	[[fallthrough]];
499	case TargetTransformInfo::CacheLevel::L2D:
500	return std::nullopt;
501	}
502	llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
503	}
504
505	std::optional<unsigned>
506	getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
507	switch (Level) {
508	case TargetTransformInfo::CacheLevel::L1D:
509	[[fallthrough]];
510	case TargetTransformInfo::CacheLevel::L2D:
511	return std::nullopt;
512	}
513
514	llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
515	}
516
517	std::optional<unsigned> getMinPageSize() const { return {}; }
518
519	unsigned getPrefetchDistance() const { return `0`; }
520	unsigned getMinPrefetchStride(unsigned NumMemAccesses,
521	unsigned NumStridedMemAccesses,
522	unsigned NumPrefetches, bool HasCall) const {
523	return `1`;
524	}
525	unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
526	bool enableWritePrefetching() const { return false; }
527	bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
528
529	unsigned getMaxInterleaveFactor(ElementCount VF) const { return `1`; }
530
531	InstructionCost getArithmeticInstrCost(
532	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
533	TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info,
534	ArrayRef<const Value *> Args,
535	const Instruction CxtI = nullptr) const* {
536	// Widenable conditions will eventually lower into constants, so some
537	// operations with them will be trivially optimized away.
538	auto IsWidenableCondition = [](const Value *V) {
539	if (auto *II = dyn_cast<IntrinsicInst>(Val: V))
540	if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
541	return true;
542	return false;
543	};
544	// FIXME: A number of transformation tests seem to require these values
545	// which seems a little odd for how arbitary there are.
546	switch (Opcode) {
547	default:
548	break;
549	case Instruction::FDiv:
550	case Instruction::FRem:
551	case Instruction::SDiv:
552	case Instruction::SRem:
553	case Instruction::UDiv:
554	case Instruction::URem:
555	// FIXME: Unlikely to be true for CodeSize.
556	return TTI::TCC_Expensive;
557	case Instruction::And:
558	case Instruction::Or:
559	if (any_of(Range&: Args, P: IsWidenableCondition))
560	return TTI::TCC_Free;
561	break;
562	}
563
564	// Assume a 3cy latency for fp arithmetic ops.
565	if (CostKind == TTI::TCK_Latency)
566	if (Ty->getScalarType()->isFloatingPointTy())
567	return `3`;
568
569	return `1`;
570	}
571
572	InstructionCost getAltInstrCost(VectorType VecTy, unsigned* Opcode0,
573	unsigned Opcode1,
574	const SmallBitVector &OpcodeMask,
575	TTI::TargetCostKind CostKind) const {
576	return InstructionCost::getInvalid();
577	}
578
579	InstructionCost
580	getShuffleCost(TTI::ShuffleKind Kind, VectorType Ty, ArrayRef<int*> Mask,
581	TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
582	ArrayRef<const Value > Args = std::nullopt) const* {
583	return `1`;
584	}
585
586	InstructionCost getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
587	TTI::CastContextHint CCH,
588	TTI::TargetCostKind CostKind,
589	const Instruction I) const* {
590	switch (Opcode) {
591	default:
592	break;
593	case Instruction::IntToPtr: {
594	unsigned SrcSize = Src->getScalarSizeInBits();
595	if (DL.isLegalInteger(Width: SrcSize) &&
596	SrcSize <= DL.getPointerTypeSizeInBits(Dst))
597	return `0`;
598	break;
599	}
600	case Instruction::PtrToInt: {
601	unsigned DstSize = Dst->getScalarSizeInBits();
602	if (DL.isLegalInteger(Width: DstSize) &&
603	DstSize >= DL.getPointerTypeSizeInBits(Src))
604	return `0`;
605	break;
606	}
607	case Instruction::BitCast:
608	if (Dst == Src \|\| (Dst->isPointerTy() && Src->isPointerTy()))
609	// Identity and pointer-to-pointer casts are free.
610	return `0`;
611	break;
612	case Instruction::Trunc: {
613	// trunc to a native type is free (assuming the target has compare and
614	// shift-right of the same width).
615	TypeSize DstSize = DL.getTypeSizeInBits(Ty: Dst);
616	if (!DstSize.isScalable() && DL.isLegalInteger(Width: DstSize.getFixedValue()))
617	return `0`;
618	break;
619	}
620	}
621	return `1`;
622	}
623
624	InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
625	VectorType *VecTy,
626	unsigned Index) const {
627	return `1`;
628	}
629
630	InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
631	const Instruction I = nullptr) const* {
632	// A phi would be free, unless we're costing the throughput because it
633	// will require a register.
634	if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
635	return `0`;
636	return `1`;
637	}
638
639	InstructionCost getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
640	CmpInst::Predicate VecPred,
641	TTI::TargetCostKind CostKind,
642	const Instruction I) const* {
643	return `1`;
644	}
645
646	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
647	TTI::TargetCostKind CostKind,
648	unsigned Index, Value *Op0,
649	Value Op1) const* {
650	return `1`;
651	}
652
653	InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
654	TTI::TargetCostKind CostKind,
655	unsigned Index) const {
656	return `1`;
657	}
658
659	unsigned getReplicationShuffleCost(Type EltTy, int* ReplicationFactor, int VF,
660	const APInt &DemandedDstElts,
661	TTI::TargetCostKind CostKind) {
662	return `1`;
663	}
664
665	InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
666	unsigned AddressSpace,
667	TTI::TargetCostKind CostKind,
668	TTI::OperandValueInfo OpInfo,
669	const Instruction I) const* {
670	return `1`;
671	}
672
673	InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
674	unsigned AddressSpace,
675	TTI::TargetCostKind CostKind,
676	const Instruction I) const* {
677	return `1`;
678	}
679
680	InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
681	Align Alignment, unsigned AddressSpace,
682	TTI::TargetCostKind CostKind) const {
683	return `1`;
684	}
685
686	InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
687	const Value Ptr, bool* VariableMask,
688	Align Alignment,
689	TTI::TargetCostKind CostKind,
690	const Instruction I = nullptr) const* {
691	return `1`;
692	}
693
694	InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
695	const Value Ptr, bool* VariableMask,
696	Align Alignment,
697	TTI::TargetCostKind CostKind,
698	const Instruction I = nullptr) const* {
699	return InstructionCost::getInvalid();
700	}
701
702	unsigned getInterleavedMemoryOpCost(
703	unsigned Opcode, Type VecTy, unsigned* Factor, ArrayRef<unsigned> Indices,
704	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
705	bool UseMaskForCond, bool UseMaskForGaps) const {
706	return `1`;
707	}
708
709	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
710	TTI::TargetCostKind CostKind) const {
711	switch (ICA.getID()) {
712	default:
713	break;
714	case Intrinsic::annotation:
715	case Intrinsic::assume:
716	case Intrinsic::sideeffect:
717	case Intrinsic::pseudoprobe:
718	case Intrinsic::arithmetic_fence:
719	case Intrinsic::dbg_assign:
720	case Intrinsic::dbg_declare:
721	case Intrinsic::dbg_value:
722	case Intrinsic::dbg_label:
723	case Intrinsic::invariant_start:
724	case Intrinsic::invariant_end:
725	case Intrinsic::launder_invariant_group:
726	case Intrinsic::strip_invariant_group:
727	case Intrinsic::is_constant:
728	case Intrinsic::lifetime_start:
729	case Intrinsic::lifetime_end:
730	case Intrinsic::experimental_noalias_scope_decl:
731	case Intrinsic::objectsize:
732	case Intrinsic::ptr_annotation:
733	case Intrinsic::var_annotation:
734	case Intrinsic::experimental_gc_result:
735	case Intrinsic::experimental_gc_relocate:
736	case Intrinsic::coro_alloc:
737	case Intrinsic::coro_begin:
738	case Intrinsic::coro_free:
739	case Intrinsic::coro_end:
740	case Intrinsic::coro_frame:
741	case Intrinsic::coro_size:
742	case Intrinsic::coro_align:
743	case Intrinsic::coro_suspend:
744	case Intrinsic::coro_subfn_addr:
745	case Intrinsic::threadlocal_address:
746	case Intrinsic::experimental_widenable_condition:
747	case Intrinsic::ssa_copy:
748	// These intrinsics don't actually represent code after lowering.
749	return `0`;
750	}
751	return `1`;
752	}
753
754	InstructionCost getCallInstrCost(Function F, Type RetTy,
755	ArrayRef<Type *> Tys,
756	TTI::TargetCostKind CostKind) const {
757	return `1`;
758	}
759
760	// Assume that we have a register of the right size for the type.
761	unsigned getNumberOfParts(Type Tp) const* { return `1`; }
762
763	InstructionCost getAddressComputationCost(Type Tp, ScalarEvolution ,
764	const SCEV ) const* {
765	return `0`;
766	}
767
768	InstructionCost getArithmeticReductionCost(unsigned, VectorType *,
769	std::optional<FastMathFlags> FMF,
770	TTI::TargetCostKind) const {
771	return `1`;
772	}
773
774	InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *,
775	FastMathFlags,
776	TTI::TargetCostKind) const {
777	return `1`;
778	}
779
780	InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
781	Type ResTy, VectorType Ty,
782	FastMathFlags FMF,
783	TTI::TargetCostKind CostKind) const {
784	return `1`;
785	}
786
787	InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
788	VectorType *Ty,
789	TTI::TargetCostKind CostKind) const {
790	return `1`;
791	}
792
793	InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type > Tys) const* {
794	return `0`;
795	}
796
797	bool getTgtMemIntrinsic(IntrinsicInst Inst, MemIntrinsicInfo &Info) const* {
798	return false;
799	}
800
801	unsigned getAtomicMemIntrinsicMaxElementSize() const {
802	// Note for overrides: You must ensure for all element unordered-atomic
803	// memory intrinsics that all power-of-2 element sizes up to, and
804	// including, the return value of this method have a corresponding
805	// runtime lib call. These runtime lib call definitions can be found
806	// in RuntimeLibcalls.h
807	return `0`;
808	}
809
810	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
811	Type ExpectedType) const* {
812	return nullptr;
813	}
814
815	Type *
816	getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
817	unsigned SrcAddrSpace, unsigned DestAddrSpace,
818	unsigned SrcAlign, unsigned DestAlign,
819	std::optional<uint32_t> AtomicElementSize) const {
820	return AtomicElementSize ? Type::getIntNTy(C&: Context, N: AtomicElementSize `8`)
821	: Type::getInt8Ty(C&: Context);
822	}
823
824	void getMemcpyLoopResidualLoweringType(
825	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
826	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
827	unsigned SrcAlign, unsigned DestAlign,
828	std::optional<uint32_t> AtomicCpySize) const {
829	unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : `1`;
830	Type OpType = Type::getIntNTy(C&: Context, N: OpSizeInBytes `8`);
831	for (unsigned i = `0`; i != RemainingBytes; i += OpSizeInBytes)
832	OpsOut.push_back(Elt: OpType);
833	}
834
835	bool areInlineCompatible(const Function *Caller,
836	const Function Callee) const* {
837	return (Caller->getFnAttribute(Kind: "target-cpu") ==
838	Callee->getFnAttribute(Kind: "target-cpu")) &&
839	(Caller->getFnAttribute(Kind: "target-features") ==
840	Callee->getFnAttribute(Kind: "target-features"));
841	}
842
843	unsigned getInlineCallPenalty(const Function F, const* CallBase &Call,
844	unsigned DefaultCallPenalty) const {
845	return DefaultCallPenalty;
846	}
847
848	bool areTypesABICompatible(const Function Caller, const* Function *Callee,
849	const ArrayRef<Type > &Types) const* {
850	return (Caller->getFnAttribute(Kind: "target-cpu") ==
851	Callee->getFnAttribute(Kind: "target-cpu")) &&
852	(Caller->getFnAttribute(Kind: "target-features") ==
853	Callee->getFnAttribute(Kind: "target-features"));
854	}
855
856	bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
857	const DataLayout &DL) const {
858	return false;
859	}
860
861	bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
862	const DataLayout &DL) const {
863	return false;
864	}
865
866	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return `128`; }
867
868	bool isLegalToVectorizeLoad(LoadInst LI) const* { return true; }
869
870	bool isLegalToVectorizeStore(StoreInst SI) const* { return true; }
871
872	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
873	unsigned AddrSpace) const {
874	return true;
875	}
876
877	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
878	unsigned AddrSpace) const {
879	return true;
880	}
881
882	bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
883	ElementCount VF) const {
884	return true;
885	}
886
887	bool isElementTypeLegalForScalableVector(Type Ty) const* { return true; }
888
889	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
890	unsigned ChainSizeInBytes,
891	VectorType VecTy) const* {
892	return VF;
893	}
894
895	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
896	unsigned ChainSizeInBytes,
897	VectorType VecTy) const* {
898	return VF;
899	}
900
901	bool preferInLoopReduction(unsigned Opcode, Type *Ty,
902	TTI::ReductionFlags Flags) const {
903	return false;
904	}
905
906	bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
907	TTI::ReductionFlags Flags) const {
908	return false;
909	}
910
911	bool preferEpilogueVectorization() const {
912	return true;
913	}
914
915	bool shouldExpandReduction(const IntrinsicInst II) const* { return true; }
916
917	unsigned getGISelRematGlobalCost() const { return `1`; }
918
919	unsigned getMinTripCountTailFoldingThreshold() const { return `0`; }
920
921	bool supportsScalableVectors() const { return false; }
922
923	bool enableScalableVectorization() const { return false; }
924
925	bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
926	Align Alignment) const {
927	return false;
928	}
929
930	TargetTransformInfo::VPLegalization
931	getVPLegalizationStrategy(const VPIntrinsic &PI) const {
932	return TargetTransformInfo::VPLegalization (
933	/ EVLParamStrategy / TargetTransformInfo::VPLegalization::Discard,
934	/ OperatorStrategy / TargetTransformInfo::VPLegalization::Convert);
935	}
936
937	bool hasArmWideBranch(bool) const { return false; }
938
939	unsigned getMaxNumArgs() const { return UINT_MAX; }
940
941	protected:
942	// Obtain the minimum required size to hold the value (without the sign)
943	// In case of a vector it returns the min required size for one element.
944	unsigned minRequiredElementSize(const Value Val, bool* &isSigned) const {
945	if (isa<ConstantDataVector>(Val) \|\| isa<ConstantVector>(Val)) {
946	const auto *VectorValue = cast<Constant>(Val);
947
948	// In case of a vector need to pick the max between the min
949	// required size for each element
950	auto *VT = cast<FixedVectorType>(Val: Val->getType());
951
952	// Assume unsigned elements
953	isSigned = false;
954
955	// The max required size is the size of the vector element type
956	unsigned MaxRequiredSize =
957	VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
958
959	unsigned MinRequiredSize = `0`;
960	for (unsigned i = `0`, e = VT->getNumElements(); i < e; ++i) {
961	if (auto *IntElement =
962	dyn_cast<ConstantInt>(Val: VectorValue->getAggregateElement(Elt: i))) {
963	bool signedElement = IntElement->getValue().isNegative();
964	// Get the element min required size.
965	unsigned ElementMinRequiredSize =
966	IntElement->getValue().getSignificantBits() - `1`;
967	// In case one element is signed then all the vector is signed.
968	isSigned \|= signedElement;
969	// Save the max required bit size between all the elements.
970	MinRequiredSize = std::max(a: MinRequiredSize, b: ElementMinRequiredSize);
971	} else {
972	// not an int constant element
973	return MaxRequiredSize;
974	}
975	}
976	return MinRequiredSize;
977	}
978
979	if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
980	isSigned = CI->getValue().isNegative();
981	return CI->getValue().getSignificantBits() - `1`;
982	}
983
984	if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
985	isSigned = true;
986	return Cast->getSrcTy()->getScalarSizeInBits() - `1`;
987	}
988
989	if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
990	isSigned = false;
991	return Cast->getSrcTy()->getScalarSizeInBits();
992	}
993
994	isSigned = false;
995	return Val->getType()->getScalarSizeInBits();
996	}
997
998	bool isStridedAccess(const SCEV Ptr) const* {
999	return Ptr && isa<SCEVAddRecExpr>(Val: Ptr);
1000	}
1001
1002	const SCEVConstant getConstantStrideStep(ScalarEvolution SE,
1003	const SCEV Ptr) const* {
1004	if (!isStridedAccess(Ptr))
1005	return nullptr;
1006	const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Val: Ptr);
1007	return dyn_cast<SCEVConstant>(Val: AddRec->getStepRecurrence(SE&: *SE));
1008	}
1009
1010	bool isConstantStridedAccessLessThan(ScalarEvolution SE, const* SCEV *Ptr,
1011	int64_t MergeDistance) const {
1012	const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
1013	if (!Step)
1014	return false;
1015	APInt StrideVal = Step->getAPInt();
1016	if (StrideVal.getBitWidth() > `64`)
1017	return false;
1018	// FIXME: Need to take absolute value for negative stride case.
1019	return StrideVal.getSExtValue() < MergeDistance;
1020	}
1021	};
1022
1023	/// CRTP base class for use as a mix-in that aids implementing
1024	/// a TargetTransformInfo-compatible class.
1025	template <typename T>
1026	class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
1027	private:
1028	typedef TargetTransformInfoImplBase BaseT;
1029
1030	protected:
1031	explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
1032
1033	public:
1034	using BaseT::getGEPCost;
1035
1036	InstructionCost getGEPCost(Type PointeeType, const* Value *Ptr,
1037	ArrayRef<const Value > Operands, Type AccessType,
1038	TTI::TargetCostKind CostKind) {
1039	assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
1040	auto *BaseGV = dyn_cast<GlobalValue>(Val: Ptr->stripPointerCasts());
1041	bool HasBaseReg = (BaseGV == nullptr);
1042
1043	auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
1044	APInt BaseOffset(PtrSizeBits, `0`);
1045	int64_t Scale = `0`;
1046
1047	auto GTI = gep_type_begin(Op0: PointeeType, A: Operands);
1048	Type TargetType = nullptr*;
1049
1050	// Handle the case where the GEP instruction has a single operand,
1051	// the basis, therefore TargetType is a nullptr.
1052	if (Operands.empty())
1053	return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
1054
1055	for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
1056	TargetType = GTI.getIndexedType();
1057	// We assume that the cost of Scalar GEP with constant index and the
1058	// cost of Vector GEP with splat constant index are the same.
1059	const ConstantInt ConstIdx = dyn_cast<ConstantInt>(Val: I);
1060	if (!ConstIdx)
1061	if (auto Splat = getSplatValue(V: *I))
1062	ConstIdx = dyn_cast<ConstantInt>(Val: Splat);
1063	if (StructType *STy = GTI.getStructTypeOrNull()) {
1064	// For structures the index is always splat or scalar constant
1065	assert(ConstIdx && "Unexpected GEP index");
1066	uint64_t Field = ConstIdx->getZExtValue();
1067	BaseOffset += DL.getStructLayout(Ty: STy)->getElementOffset(Field);
1068	} else {
1069	// If this operand is a scalable type, bail out early.
1070	// TODO: Make isLegalAddressingMode TypeSize aware.
1071	if (TargetType->isScalableTy())
1072	return TTI::TCC_Basic;
1073	int64_t ElementSize =
1074	GTI.getSequentialElementStride(DL).getFixedValue();
1075	if (ConstIdx) {
1076	BaseOffset +=
1077	ConstIdx->getValue().sextOrTrunc(width: PtrSizeBits) * ElementSize;
1078	} else {
1079	// Needs scale register.
1080	if (Scale != `0`)
1081	// No addressing mode takes two scale registers.
1082	return TTI::TCC_Basic;
1083	Scale = ElementSize;
1084	}
1085	}
1086	}
1087
1088	// If we haven't been provided a hint, use the target type for now.
1089	//
1090	// TODO: Take a look at potentially removing this: This is slightly* wrong*
1091	// as it's possible to have a GEP with a foldable target type but a memory
1092	// access that isn't foldable. For example, this load isn't foldable on
1093	// RISC-V:
1094	//
1095	// %p = getelementptr i32, ptr %base, i32 42
1096	// %x = load <2 x i32>, ptr %p
1097	if (!AccessType)
1098	AccessType = TargetType;
1099
1100	// If the final address of the GEP is a legal addressing mode for the given
1101	// access type, then we can fold it into its users.
1102	if (static_cast<T >(this*)->isLegalAddressingMode(
1103	AccessType, const_cast<GlobalValue *>(BaseGV),
1104	BaseOffset.sextOrTrunc(width: `64`).getSExtValue(), HasBaseReg, Scale,
1105	Ptr->getType()->getPointerAddressSpace()))
1106	return TTI::TCC_Free;
1107
1108	// TODO: Instead of returning TCC_Basic here, we should use
1109	// getArithmeticInstrCost. Or better yet, provide a hook to let the target
1110	// model it.
1111	return TTI::TCC_Basic;
1112	}
1113
1114	InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
1115	const Value *Base,
1116	const TTI::PointersChainInfo &Info,
1117	Type *AccessTy,
1118	TTI::TargetCostKind CostKind) {
1119	InstructionCost Cost = TTI::TCC_Free;
1120	// In the basic model we take into account GEP instructions only
1121	// (although here can come alloca instruction, a value, constants and/or
1122	// constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
1123	// pointer). Typically, if Base is a not a GEP-instruction and all the
1124	// pointers are relative to the same base address, all the rest are
1125	// either GEP instructions, PHIs, bitcasts or constants. When we have same
1126	// base, we just calculate cost of each non-Base GEP as an ADD operation if
1127	// any their index is a non-const.
1128	// If no known dependecies between the pointers cost is calculated as a sum
1129	// of costs of GEP instructions.
1130	for (const Value *V : Ptrs) {
1131	const auto *GEP = dyn_cast<GetElementPtrInst>(Val: V);
1132	if (!GEP)
1133	continue;
1134	if (Info.isSameBase() && V != Base) {
1135	if (GEP->hasAllConstantIndices())
1136	continue;
1137	Cost += static_cast<T >(this*)->getArithmeticInstrCost(
1138	Instruction::Add, GEP->getType(), CostKind,
1139	{TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None},
1140	std::nullopt);
1141	} else {
1142	SmallVector<const Value *> Indices(GEP->indices());
1143	Cost += static_cast<T >(this*)->getGEPCost(GEP->getSourceElementType(),
1144	GEP->getPointerOperand(),
1145	Indices, AccessTy, CostKind);
1146	}
1147	}
1148	return Cost;
1149	}
1150
1151	InstructionCost getInstructionCost(const User *U,
1152	ArrayRef<const Value *> Operands,
1153	TTI::TargetCostKind CostKind) {
1154	using namespace llvm::PatternMatch;
1155
1156	auto TargetTTI = static_cast<T >(this);
1157	// Handle non-intrinsic calls, invokes, and callbr.
1158	// FIXME: Unlikely to be true for anything but CodeSize.
1159	auto *CB = dyn_cast<CallBase>(Val: U);
1160	if (CB && !isa<IntrinsicInst>(Val: U)) {
1161	if (const Function *F = CB->getCalledFunction()) {
1162	if (!TargetTTI->isLoweredToCall(F))
1163	return TTI::TCC_Basic; // Give a basic cost if it will be lowered
1164
1165	return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + `1`);
1166	}
1167	// For indirect or other calls, scale cost by number of arguments.
1168	return TTI::TCC_Basic * (CB->arg_size() + `1`);
1169	}
1170
1171	Type *Ty = U->getType();
1172	unsigned Opcode = Operator::getOpcode(U);
1173	auto *I = dyn_cast<Instruction>(Val: U);
1174	switch (Opcode) {
1175	default:
1176	break;
1177	case Instruction::Call: {
1178	assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
1179	auto *Intrinsic = cast<IntrinsicInst>(Val: U);
1180	IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
1181	return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
1182	}
1183	case Instruction::Br:
1184	case Instruction::Ret:
1185	case Instruction::PHI:
1186	case Instruction::Switch:
1187	return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
1188	case Instruction::ExtractValue:
1189	case Instruction::Freeze:
1190	return TTI::TCC_Free;
1191	case Instruction::Alloca:
1192	if (cast<AllocaInst>(Val: U)->isStaticAlloca())
1193	return TTI::TCC_Free;
1194	break;
1195	case Instruction::GetElementPtr: {
1196	const auto *GEP = cast<GEPOperator>(Val: U);
1197	Type AccessType = nullptr*;
1198	// For now, only provide the AccessType in the simple case where the GEP
1199	// only has one user.
1200	if (GEP->hasOneUser() && I)
1201	AccessType = I->user_back()->getAccessType();
1202
1203	return TargetTTI->getGEPCost(GEP->getSourceElementType(),
1204	Operands.front(), Operands.drop_front(),
1205	AccessType, CostKind);
1206	}
1207	case Instruction::Add:
1208	case Instruction::FAdd:
1209	case Instruction::Sub:
1210	case Instruction::FSub:
1211	case Instruction::Mul:
1212	case Instruction::FMul:
1213	case Instruction::UDiv:
1214	case Instruction::SDiv:
1215	case Instruction::FDiv:
1216	case Instruction::URem:
1217	case Instruction::SRem:
1218	case Instruction::FRem:
1219	case Instruction::Shl:
1220	case Instruction::LShr:
1221	case Instruction::AShr:
1222	case Instruction::And:
1223	case Instruction::Or:
1224	case Instruction::Xor:
1225	case Instruction::FNeg: {
1226	const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(V: Operands [`0`]);
1227	TTI::OperandValueInfo Op2Info;
1228	if (Opcode != Instruction::FNeg)
1229	Op2Info = TTI::getOperandInfo(V: Operands [`1`]);
1230	return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
1231	Op2Info, Operands, I);
1232	}
1233	case Instruction::IntToPtr:
1234	case Instruction::PtrToInt:
1235	case Instruction::SIToFP:
1236	case Instruction::UIToFP:
1237	case Instruction::FPToUI:
1238	case Instruction::FPToSI:
1239	case Instruction::Trunc:
1240	case Instruction::FPTrunc:
1241	case Instruction::BitCast:
1242	case Instruction::FPExt:
1243	case Instruction::SExt:
1244	case Instruction::ZExt:
1245	case Instruction::AddrSpaceCast: {
1246	Type *OpTy = Operands [`0`]->getType();
1247	return TargetTTI->getCastInstrCost(
1248	Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
1249	}
1250	case Instruction::Store: {
1251	auto *SI = cast<StoreInst>(Val: U);
1252	Type *ValTy = Operands [`0`]->getType();
1253	TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(V: Operands [`0`]);
1254	return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1255	SI->getPointerAddressSpace(), CostKind,
1256	OpInfo, I);
1257	}
1258	case Instruction::Load: {
1259	// FIXME: Arbitary cost which could come from the backend.
1260	if (CostKind == TTI::TCK_Latency)
1261	return `4`;
1262	auto *LI = cast<LoadInst>(Val: U);
1263	Type *LoadType = U->getType();
1264	// If there is a non-register sized type, the cost estimation may expand
1265	// it to be several instructions to load into multiple registers on the
1266	// target. But, if the only use of the load is a trunc instruction to a
1267	// register sized type, the instruction selector can combine these
1268	// instructions to be a single load. So, in this case, we use the
1269	// destination type of the trunc instruction rather than the load to
1270	// accurately estimate the cost of this load instruction.
1271	if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
1272	!LoadType->isVectorTy()) {
1273	if (const TruncInst TI = dyn_cast<TruncInst>(Val: LI->user_begin()))
1274	LoadType = TI->getDestTy();
1275	}
1276	return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1277	LI->getPointerAddressSpace(), CostKind,
1278	{TTI::OK_AnyValue, TTI::OP_None}, I);
1279	}
1280	case Instruction::Select: {
1281	const Value Op0, Op1;
1282	if (match(V: U, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1))) \|\|
1283	match(V: U, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) {
1284	// select x, y, false --> x & y
1285	// select x, true, y --> x \| y
1286	const auto Op1Info = TTI::getOperandInfo(V: Op0);
1287	const auto Op2Info = TTI::getOperandInfo(V: Op1);
1288	assert(Op0->getType()->getScalarSizeInBits() == `1` &&
1289	Op1->getType()->getScalarSizeInBits() == `1`);
1290
1291	SmallVector<const Value *, `2`> Operands{Op0, Op1};
1292	return TargetTTI->getArithmeticInstrCost(
1293	match(V: U, P: m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
1294	CostKind, Op1Info, Op2Info, Operands, I);
1295	}
1296	Type *CondTy = Operands [`0`]->getType();
1297	return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1298	CmpInst::BAD_ICMP_PREDICATE,
1299	CostKind, I);
1300	}
1301	case Instruction::ICmp:
1302	case Instruction::FCmp: {
1303	Type *ValTy = Operands [`0`]->getType();
1304	// TODO: Also handle ICmp/FCmp constant expressions.
1305	return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1306	I ? cast<CmpInst>(Val: I)->getPredicate()
1307	: CmpInst::BAD_ICMP_PREDICATE,
1308	CostKind, I);
1309	}
1310	case Instruction::InsertElement: {
1311	auto *IE = dyn_cast<InsertElementInst>(Val: U);
1312	if (!IE)
1313	return TTI::TCC_Basic; // FIXME
1314	unsigned Idx = -`1`;
1315	if (auto *CI = dyn_cast<ConstantInt>(Val: Operands [`2`]))
1316	if (CI->getValue().getActiveBits() <= `32`)
1317	Idx = CI->getZExtValue();
1318	return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
1319	}
1320	case Instruction::ShuffleVector: {
1321	auto *Shuffle = dyn_cast<ShuffleVectorInst>(Val: U);
1322	if (!Shuffle)
1323	return TTI::TCC_Basic; // FIXME
1324
1325	auto *VecTy = cast<VectorType>(Val: U->getType());
1326	auto *VecSrcTy = cast<VectorType>(Val: Operands [`0`]->getType());
1327	int NumSubElts, SubIndex;
1328
1329	if (Shuffle->changesLength()) {
1330	// Treat a 'subvector widening' as a free shuffle.
1331	if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1332	return `0`;
1333
1334	if (Shuffle->isExtractSubvectorMask(Index&: SubIndex))
1335	return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
1336	Shuffle->getShuffleMask(), CostKind,
1337	SubIndex, VecTy, Operands);
1338
1339	if (Shuffle->isInsertSubvectorMask(NumSubElts, Index&: SubIndex))
1340	return TargetTTI->getShuffleCost(
1341	TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
1342	CostKind, SubIndex,
1343	FixedVectorType::get(ElementType: VecTy->getScalarType(), NumElts: NumSubElts),
1344	Operands);
1345
1346	int ReplicationFactor, VF;
1347	if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1348	APInt DemandedDstElts =
1349	APInt::getZero(numBits: Shuffle->getShuffleMask().size());
1350	for (auto I : enumerate(First: Shuffle->getShuffleMask())) {
1351	if (I.value() != PoisonMaskElem)
1352	DemandedDstElts.setBit(I.index());
1353	}
1354	return TargetTTI->getReplicationShuffleCost(
1355	VecSrcTy->getElementType(), ReplicationFactor, VF,
1356	DemandedDstElts, CostKind);
1357	}
1358
1359	return CostKind == TTI::TCK_RecipThroughput ? -`1` : `1`;
1360	}
1361
1362	if (Shuffle->isIdentity())
1363	return `0`;
1364
1365	if (Shuffle->isReverse())
1366	return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
1367	Shuffle->getShuffleMask(), CostKind, `0`,
1368	nullptr, Operands);
1369
1370	if (Shuffle->isSelect())
1371	return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
1372	Shuffle->getShuffleMask(), CostKind, `0`,
1373	nullptr, Operands);
1374
1375	if (Shuffle->isTranspose())
1376	return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
1377	Shuffle->getShuffleMask(), CostKind, `0`,
1378	nullptr, Operands);
1379
1380	if (Shuffle->isZeroEltSplat())
1381	return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
1382	Shuffle->getShuffleMask(), CostKind, `0`,
1383	nullptr, Operands);
1384
1385	if (Shuffle->isSingleSource())
1386	return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
1387	Shuffle->getShuffleMask(), CostKind, `0`,
1388	nullptr, Operands);
1389
1390	if (Shuffle->isInsertSubvectorMask(NumSubElts, Index&: SubIndex))
1391	return TargetTTI->getShuffleCost(
1392	TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), CostKind,
1393	SubIndex, FixedVectorType::get(ElementType: VecTy->getScalarType(), NumElts: NumSubElts),
1394	Operands);
1395
1396	if (Shuffle->isSplice(Index&: SubIndex))
1397	return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy,
1398	Shuffle->getShuffleMask(), CostKind,
1399	SubIndex, nullptr, Operands);
1400
1401	return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
1402	Shuffle->getShuffleMask(), CostKind, `0`,
1403	nullptr, Operands);
1404	}
1405	case Instruction::ExtractElement: {
1406	auto *EEI = dyn_cast<ExtractElementInst>(Val: U);
1407	if (!EEI)
1408	return TTI::TCC_Basic; // FIXME
1409	unsigned Idx = -`1`;
1410	if (auto *CI = dyn_cast<ConstantInt>(Val: Operands [`1`]))
1411	if (CI->getValue().getActiveBits() <= `32`)
1412	Idx = CI->getZExtValue();
1413	Type *DstTy = Operands [`0`]->getType();
1414	return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
1415	}
1416	}
1417
1418	// By default, just classify everything as 'basic' or -1 to represent that
1419	// don't know the throughput cost.
1420	return CostKind == TTI::TCK_RecipThroughput ? -`1` : TTI::TCC_Basic;
1421	}
1422
1423	bool isExpensiveToSpeculativelyExecute(const Instruction *I) {
1424	auto TargetTTI = static_cast<T >(this);
1425	SmallVector<const Value *, `4`> Ops(I->operand_values());
1426	InstructionCost Cost = TargetTTI->getInstructionCost(
1427	I, Ops, TargetTransformInfo::TCK_SizeAndLatency);
1428	return Cost >= TargetTransformInfo::TCC_Expensive;
1429	}
1430	};
1431	} // namespace llvm
1432
1433	#endif
1434

source code of llvm/include/llvm/Analysis/TargetTransformInfoImpl.h