1 | //===- FunctionSpecialization.h - Function Specialization -----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Overview: |
10 | // --------- |
11 | // Function Specialization is a transformation which propagates the constant |
12 | // parameters of a function call from the caller to the callee. It is part of |
13 | // the Inter-Procedural Sparse Conditional Constant Propagation (IPSCCP) pass. |
14 | // The transformation runs iteratively a number of times which is controlled |
15 | // by the option `funcspec-max-iters`. Running it multiple times is needed |
16 | // for specializing recursive functions, but also exposes new opportunities |
17 | // arising from specializations which return constant values or contain calls |
18 | // which can be specialized. |
19 | // |
20 | // Function Specialization supports propagating constant parameters like |
21 | // function pointers, literal constants and addresses of global variables. |
22 | // By propagating function pointers, indirect calls become direct calls. This |
23 | // exposes inlining opportunities which we would have otherwise missed. That's |
24 | // why function specialization is run before the inliner in the optimization |
25 | // pipeline; that is by design. |
26 | // |
27 | // Cost Model: |
28 | // ----------- |
29 | // The cost model facilitates a utility for estimating the specialization bonus |
30 | // from propagating a constant argument. This is the InstCostVisitor, a class |
31 | // that inherits from the InstVisitor. The bonus itself is expressed as codesize |
32 | // and latency savings. Codesize savings means the amount of code that becomes |
33 | // dead in the specialization from propagating the constant, whereas latency |
34 | // savings represents the cycles we are saving from replacing instructions with |
35 | // constant values. The InstCostVisitor overrides a set of `visit*` methods to |
36 | // be able to handle different types of instructions. These attempt to constant- |
37 | // fold the instruction in which case a constant is returned and propagated |
38 | // further. |
39 | // |
40 | // Function pointers are not handled by the InstCostVisitor. They are treated |
41 | // separately as they could expose inlining opportunities via indirect call |
42 | // promotion. The inlining bonus contributes to the total specialization score. |
43 | // |
44 | // For a specialization to be profitable its bonus needs to exceed a minimum |
45 | // threshold. There are three options for controlling the threshold which are |
46 | // expressed as percentages of the original function size: |
47 | // * funcspec-min-codesize-savings |
48 | // * funcspec-min-latency-savings |
49 | // * funcspec-min-inlining-bonus |
50 | // There's also an option for controlling the codesize growth from recursive |
51 | // specializations. That is `funcspec-max-codesize-growth`. |
52 | // |
53 | // Once we have all the potential specializations with their score we need to |
54 | // choose the best ones, which fit in the module specialization budget. That |
55 | // is controlled by the option `funcspec-max-clones`. To find the best `NSpec` |
56 | // specializations we use a max-heap. For more details refer to D139346. |
57 | // |
58 | // Ideas: |
59 | // ------ |
60 | // - With a function specialization attribute for arguments, we could have |
61 | // a direct way to steer function specialization, avoiding the cost-model, |
62 | // and thus control compile-times / code-size. |
63 | // |
64 | // - Perhaps a post-inlining function specialization pass could be more |
65 | // aggressive on literal constants. |
66 | // |
67 | // References: |
68 | // ----------- |
69 | // 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable |
70 | // it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q |
71 | // |
72 | //===----------------------------------------------------------------------===// |
73 | |
74 | #ifndef LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H |
75 | #define LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H |
76 | |
77 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
78 | #include "llvm/Analysis/CodeMetrics.h" |
79 | #include "llvm/Analysis/InlineCost.h" |
80 | #include "llvm/Analysis/TargetTransformInfo.h" |
81 | #include "llvm/IR/InstVisitor.h" |
82 | #include "llvm/Transforms/Scalar/SCCP.h" |
83 | #include "llvm/Transforms/Utils/Cloning.h" |
84 | #include "llvm/Transforms/Utils/SCCPSolver.h" |
85 | #include "llvm/Transforms/Utils/SizeOpts.h" |
86 | |
87 | namespace llvm { |
88 | // Map of potential specializations for each function. The FunctionSpecializer |
89 | // keeps the discovered specialisation opportunities for the module in a single |
90 | // vector, where the specialisations of each function form a contiguous range. |
91 | // This map's value is the beginning and the end of that range. |
92 | using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>; |
93 | |
94 | // Just a shorter abbreviation to improve indentation. |
95 | using Cost = InstructionCost; |
96 | |
97 | // Map of known constants found during the specialization bonus estimation. |
98 | using ConstMap = DenseMap<Value *, Constant *>; |
99 | |
100 | // Specialization signature, used to uniquely designate a specialization within |
101 | // a function. |
102 | struct SpecSig { |
103 | // Hashing support, used to distinguish between ordinary, empty, or tombstone |
104 | // keys. |
105 | unsigned Key = 0; |
106 | SmallVector<ArgInfo, 4> Args; |
107 | |
108 | bool operator==(const SpecSig &Other) const { |
109 | if (Key != Other.Key) |
110 | return false; |
111 | return Args == Other.Args; |
112 | } |
113 | |
114 | friend hash_code hash_value(const SpecSig &S) { |
115 | return hash_combine(args: hash_value(value: S.Key), |
116 | args: hash_combine_range(first: S.Args.begin(), last: S.Args.end())); |
117 | } |
118 | }; |
119 | |
120 | // Specialization instance. |
121 | struct Spec { |
122 | // Original function. |
123 | Function *F; |
124 | |
125 | // Cloned function, a specialized version of the original one. |
126 | Function *Clone = nullptr; |
127 | |
128 | // Specialization signature. |
129 | SpecSig Sig; |
130 | |
131 | // Profitability of the specialization. |
132 | unsigned Score; |
133 | |
134 | // List of call sites, matching this specialization. |
135 | SmallVector<CallBase *> CallSites; |
136 | |
137 | Spec(Function *F, const SpecSig &S, unsigned Score) |
138 | : F(F), Sig(S), Score(Score) {} |
139 | Spec(Function *F, const SpecSig &&S, unsigned Score) |
140 | : F(F), Sig(S), Score(Score) {} |
141 | }; |
142 | |
143 | struct Bonus { |
144 | unsigned CodeSize = 0; |
145 | unsigned Latency = 0; |
146 | |
147 | Bonus() = default; |
148 | |
149 | Bonus(Cost CodeSize, Cost Latency) { |
150 | int64_t Sz = *CodeSize.getValue(); |
151 | int64_t Ltc = *Latency.getValue(); |
152 | |
153 | assert(Sz >= 0 && Ltc >= 0 && "CodeSize and Latency cannot be negative" ); |
154 | // It is safe to down cast since we know the arguments |
155 | // cannot be negative and Cost is of type int64_t. |
156 | this->CodeSize = static_cast<unsigned>(Sz); |
157 | this->Latency = static_cast<unsigned>(Ltc); |
158 | } |
159 | |
160 | Bonus &operator+=(const Bonus RHS) { |
161 | CodeSize += RHS.CodeSize; |
162 | Latency += RHS.Latency; |
163 | return *this; |
164 | } |
165 | |
166 | Bonus operator+(const Bonus RHS) const { |
167 | return Bonus(CodeSize + RHS.CodeSize, Latency + RHS.Latency); |
168 | } |
169 | |
170 | bool operator==(const Bonus RHS) const { |
171 | return CodeSize == RHS.CodeSize && Latency == RHS.Latency; |
172 | } |
173 | }; |
174 | |
175 | class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> { |
176 | const DataLayout &DL; |
177 | BlockFrequencyInfo &BFI; |
178 | TargetTransformInfo &TTI; |
179 | SCCPSolver &Solver; |
180 | |
181 | ConstMap KnownConstants; |
182 | // Basic blocks known to be unreachable after constant propagation. |
183 | DenseSet<BasicBlock *> DeadBlocks; |
184 | // PHI nodes we have visited before. |
185 | DenseSet<Instruction *> VisitedPHIs; |
186 | // PHI nodes we have visited once without successfully constant folding them. |
187 | // Once the InstCostVisitor has processed all the specialization arguments, |
188 | // it should be possible to determine whether those PHIs can be folded |
189 | // (some of their incoming values may have become constant or dead). |
190 | SmallVector<Instruction *> PendingPHIs; |
191 | |
192 | ConstMap::iterator LastVisited; |
193 | |
194 | public: |
195 | InstCostVisitor(const DataLayout &DL, BlockFrequencyInfo &BFI, |
196 | TargetTransformInfo &TTI, SCCPSolver &Solver) |
197 | : DL(DL), BFI(BFI), TTI(TTI), Solver(Solver) {} |
198 | |
199 | bool isBlockExecutable(BasicBlock *BB) { |
200 | return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(V: BB); |
201 | } |
202 | |
203 | Bonus getSpecializationBonus(Argument *A, Constant *C); |
204 | |
205 | Bonus getBonusFromPendingPHIs(); |
206 | |
207 | private: |
208 | friend class InstVisitor<InstCostVisitor, Constant *>; |
209 | |
210 | static bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ, |
211 | DenseSet<BasicBlock *> &DeadBlocks); |
212 | |
213 | Bonus getUserBonus(Instruction *User, Value *Use = nullptr, |
214 | Constant *C = nullptr); |
215 | |
216 | Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList); |
217 | Cost estimateSwitchInst(SwitchInst &I); |
218 | Cost estimateBranchInst(BranchInst &I); |
219 | |
220 | // Transitively Incoming Values (TIV) is a set of Values that can "feed" a |
221 | // value to the initial PHI-node. It is defined like this: |
222 | // |
223 | // * the initial PHI-node belongs to TIV. |
224 | // |
225 | // * for every PHI-node in TIV, its operands belong to TIV |
226 | // |
227 | // If TIV for the initial PHI-node (P) contains more than one constant or a |
228 | // value that is not a PHI-node, then P cannot be folded to a constant. |
229 | // |
230 | // As soon as we detect these cases, we bail, without constructing the |
231 | // full TIV. |
232 | // Otherwise P can be folded to the one constant in TIV. |
233 | bool discoverTransitivelyIncomingValues(Constant *Const, PHINode *Root, |
234 | DenseSet<PHINode *> &TransitivePHIs); |
235 | |
236 | Constant *visitInstruction(Instruction &I) { return nullptr; } |
237 | Constant *visitPHINode(PHINode &I); |
238 | Constant *visitFreezeInst(FreezeInst &I); |
239 | Constant *visitCallBase(CallBase &I); |
240 | Constant *visitLoadInst(LoadInst &I); |
241 | Constant *visitGetElementPtrInst(GetElementPtrInst &I); |
242 | Constant *visitSelectInst(SelectInst &I); |
243 | Constant *visitCastInst(CastInst &I); |
244 | Constant *visitCmpInst(CmpInst &I); |
245 | Constant *visitUnaryOperator(UnaryOperator &I); |
246 | Constant *visitBinaryOperator(BinaryOperator &I); |
247 | }; |
248 | |
249 | class FunctionSpecializer { |
250 | |
251 | /// The IPSCCP Solver. |
252 | SCCPSolver &Solver; |
253 | |
254 | Module &M; |
255 | |
256 | /// Analysis manager, needed to invalidate analyses. |
257 | FunctionAnalysisManager *FAM; |
258 | |
259 | /// Analyses used to help determine if a function should be specialized. |
260 | std::function<BlockFrequencyInfo &(Function &)> GetBFI; |
261 | std::function<const TargetLibraryInfo &(Function &)> GetTLI; |
262 | std::function<TargetTransformInfo &(Function &)> GetTTI; |
263 | std::function<AssumptionCache &(Function &)> GetAC; |
264 | |
265 | SmallPtrSet<Function *, 32> Specializations; |
266 | SmallPtrSet<Function *, 32> FullySpecialized; |
267 | DenseMap<Function *, CodeMetrics> FunctionMetrics; |
268 | DenseMap<Function *, unsigned> FunctionGrowth; |
269 | unsigned NGlobals = 0; |
270 | |
271 | public: |
272 | FunctionSpecializer( |
273 | SCCPSolver &Solver, Module &M, FunctionAnalysisManager *FAM, |
274 | std::function<BlockFrequencyInfo &(Function &)> GetBFI, |
275 | std::function<const TargetLibraryInfo &(Function &)> GetTLI, |
276 | std::function<TargetTransformInfo &(Function &)> GetTTI, |
277 | std::function<AssumptionCache &(Function &)> GetAC) |
278 | : Solver(Solver), M(M), FAM(FAM), GetBFI(GetBFI), GetTLI(GetTLI), |
279 | GetTTI(GetTTI), GetAC(GetAC) {} |
280 | |
281 | ~FunctionSpecializer(); |
282 | |
283 | bool run(); |
284 | |
285 | InstCostVisitor getInstCostVisitorFor(Function *F) { |
286 | auto &BFI = GetBFI(*F); |
287 | auto &TTI = GetTTI(*F); |
288 | return InstCostVisitor(M.getDataLayout(), BFI, TTI, Solver); |
289 | } |
290 | |
291 | private: |
292 | Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call); |
293 | |
294 | /// A constant stack value is an AllocaInst that has a single constant |
295 | /// value stored to it. Return this constant if such an alloca stack value |
296 | /// is a function argument. |
297 | Constant *getConstantStackValue(CallInst *Call, Value *Val); |
298 | |
299 | /// See if there are any new constant values for the callers of \p F via |
300 | /// stack variables and promote them to global variables. |
301 | void promoteConstantStackValues(Function *F); |
302 | |
303 | /// Clean up fully specialized functions. |
304 | void removeDeadFunctions(); |
305 | |
306 | /// Remove any ssa_copy intrinsics that may have been introduced. |
307 | void cleanUpSSA(); |
308 | |
309 | /// @brief Find potential specialization opportunities. |
310 | /// @param F Function to specialize |
311 | /// @param FuncSize Cost of specializing a function. |
312 | /// @param AllSpecs A vector to add potential specializations to. |
313 | /// @param SM A map for a function's specialisation range |
314 | /// @return True, if any potential specializations were found |
315 | bool findSpecializations(Function *F, unsigned FuncSize, |
316 | SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM); |
317 | |
318 | /// Compute the inlining bonus for replacing argument \p A with constant \p C. |
319 | unsigned getInliningBonus(Argument *A, Constant *C); |
320 | |
321 | bool isCandidateFunction(Function *F); |
322 | |
323 | /// @brief Create a specialization of \p F and prime the SCCPSolver |
324 | /// @param F Function to specialize |
325 | /// @param S Which specialization to create |
326 | /// @return The new, cloned function |
327 | Function *createSpecialization(Function *F, const SpecSig &S); |
328 | |
329 | /// Determine if it is possible to specialise the function for constant values |
330 | /// of the formal parameter \p A. |
331 | bool isArgumentInteresting(Argument *A); |
332 | |
333 | /// Check if the value \p V (an actual argument) is a constant or can only |
334 | /// have a constant value. Return that constant. |
335 | Constant *getCandidateConstant(Value *V); |
336 | |
337 | /// @brief Find and update calls to \p F, which match a specialization |
338 | /// @param F Orginal function |
339 | /// @param Begin Start of a range of possibly matching specialisations |
340 | /// @param End End of a range (exclusive) of possibly matching specialisations |
341 | void updateCallSites(Function *F, const Spec *Begin, const Spec *End); |
342 | }; |
343 | } // namespace llvm |
344 | |
345 | #endif // LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H |
346 | |