1 | //===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file defines the WebAssembly-specific TargetTransformInfo |
11 | /// implementation. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "WebAssemblyTargetTransformInfo.h" |
16 | #include "llvm/CodeGen/CostTable.h" |
17 | #include "llvm/Support/Debug.h" |
18 | using namespace llvm; |
19 | |
20 | #define DEBUG_TYPE "wasmtti" |
21 | |
22 | TargetTransformInfo::PopcntSupportKind |
23 | WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const { |
24 | assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2" ); |
25 | return TargetTransformInfo::PSK_FastHardware; |
26 | } |
27 | |
28 | unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const { |
29 | unsigned Result = BaseT::getNumberOfRegisters(ClassID); |
30 | |
31 | // For SIMD, use at least 16 registers, as a rough guess. |
32 | bool Vector = (ClassID == 1); |
33 | if (Vector) |
34 | Result = std::max(a: Result, b: 16u); |
35 | |
36 | return Result; |
37 | } |
38 | |
39 | TypeSize WebAssemblyTTIImpl::getRegisterBitWidth( |
40 | TargetTransformInfo::RegisterKind K) const { |
41 | switch (K) { |
42 | case TargetTransformInfo::RGK_Scalar: |
43 | return TypeSize::getFixed(ExactSize: 64); |
44 | case TargetTransformInfo::RGK_FixedWidthVector: |
45 | return TypeSize::getFixed(ExactSize: getST()->hasSIMD128() ? 128 : 64); |
46 | case TargetTransformInfo::RGK_ScalableVector: |
47 | return TypeSize::getScalable(MinimumSize: 0); |
48 | } |
49 | |
50 | llvm_unreachable("Unsupported register kind" ); |
51 | } |
52 | |
53 | InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost( |
54 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
55 | TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, |
56 | ArrayRef<const Value *> Args, |
57 | const Instruction *CxtI) { |
58 | |
59 | InstructionCost Cost = |
60 | BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost( |
61 | Opcode, Ty, CostKind, Opd1Info: Op1Info, Opd2Info: Op2Info); |
62 | |
63 | if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) { |
64 | switch (Opcode) { |
65 | case Instruction::LShr: |
66 | case Instruction::AShr: |
67 | case Instruction::Shl: |
68 | // SIMD128's shifts currently only accept a scalar shift count. For each |
69 | // element, we'll need to extract, op, insert. The following is a rough |
70 | // approximation. |
71 | if (!Op2Info.isUniform()) |
72 | Cost = |
73 | cast<FixedVectorType>(Val: VTy)->getNumElements() * |
74 | (TargetTransformInfo::TCC_Basic + |
75 | getArithmeticInstrCost(Opcode, Ty: VTy->getElementType(), CostKind) + |
76 | TargetTransformInfo::TCC_Basic); |
77 | break; |
78 | } |
79 | } |
80 | return Cost; |
81 | } |
82 | |
83 | InstructionCost |
84 | WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, |
85 | TTI::TargetCostKind CostKind, |
86 | unsigned Index, Value *Op0, Value *Op1) { |
87 | InstructionCost Cost = BasicTTIImplBase::getVectorInstrCost( |
88 | Opcode, Val, CostKind, Index, Op0, Op1); |
89 | |
90 | // SIMD128's insert/extract currently only take constant indices. |
91 | if (Index == -1u) |
92 | return Cost + 25 * TargetTransformInfo::TCC_Expensive; |
93 | |
94 | return Cost; |
95 | } |
96 | |
97 | bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller, |
98 | const Function *Callee) const { |
99 | // Allow inlining only when the Callee has a subset of the Caller's |
100 | // features. In principle, we should be able to inline regardless of any |
101 | // features because WebAssembly supports features at module granularity, not |
102 | // function granularity, but without this restriction it would be possible for |
103 | // a module to "forget" about features if all the functions that used them |
104 | // were inlined. |
105 | const TargetMachine &TM = getTLI()->getTargetMachine(); |
106 | |
107 | const FeatureBitset &CallerBits = |
108 | TM.getSubtargetImpl(*Caller)->getFeatureBits(); |
109 | const FeatureBitset &CalleeBits = |
110 | TM.getSubtargetImpl(*Callee)->getFeatureBits(); |
111 | |
112 | return (CallerBits & CalleeBits) == CalleeBits; |
113 | } |
114 | |
115 | void WebAssemblyTTIImpl::( |
116 | Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, |
117 | OptimizationRemarkEmitter *ORE) const { |
118 | // Scan the loop: don't unroll loops with calls. This is a standard approach |
119 | // for most (all?) targets. |
120 | for (BasicBlock *BB : L->blocks()) |
121 | for (Instruction &I : *BB) |
122 | if (isa<CallInst>(Val: I) || isa<InvokeInst>(Val: I)) |
123 | if (const Function *F = cast<CallBase>(Val&: I).getCalledFunction()) |
124 | if (isLoweredToCall(F)) |
125 | return; |
126 | |
127 | // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of |
128 | // the various microarchitectures that use the BasicTTI implementation and |
129 | // has been selected through heuristics across multiple cores and runtimes. |
130 | UP.Partial = UP.Runtime = UP.UpperBound = true; |
131 | UP.PartialThreshold = 30; |
132 | |
133 | // Avoid unrolling when optimizing for size. |
134 | UP.OptSizeThreshold = 0; |
135 | UP.PartialOptSizeThreshold = 0; |
136 | |
137 | // Set number of instructions optimized when "back edge" |
138 | // becomes "fall through" to default value of 2. |
139 | UP.BEInsns = 2; |
140 | } |
141 | |
142 | bool WebAssemblyTTIImpl::supportsTailCalls() const { |
143 | return getST()->hasTailCall(); |
144 | } |
145 | |