1//===- FunctionSpecialization.h - Function Specialization -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Overview:
10// ---------
11// Function Specialization is a transformation which propagates the constant
12// parameters of a function call from the caller to the callee. It is part of
13// the Inter-Procedural Sparse Conditional Constant Propagation (IPSCCP) pass.
14// The transformation runs iteratively a number of times which is controlled
15// by the option `funcspec-max-iters`. Running it multiple times is needed
16// for specializing recursive functions, but also exposes new opportunities
17// arising from specializations which return constant values or contain calls
18// which can be specialized.
19//
20// Function Specialization supports propagating constant parameters like
21// function pointers, literal constants and addresses of global variables.
22// By propagating function pointers, indirect calls become direct calls. This
23// exposes inlining opportunities which we would have otherwise missed. That's
24// why function specialization is run before the inliner in the optimization
25// pipeline; that is by design.
26//
27// Cost Model:
28// -----------
29// The cost model facilitates a utility for estimating the specialization bonus
30// from propagating a constant argument. This is the InstCostVisitor, a class
31// that inherits from the InstVisitor. The bonus itself is expressed as codesize
32// and latency savings. Codesize savings means the amount of code that becomes
33// dead in the specialization from propagating the constant, whereas latency
34// savings represents the cycles we are saving from replacing instructions with
35// constant values. The InstCostVisitor overrides a set of `visit*` methods to
36// be able to handle different types of instructions. These attempt to constant-
37// fold the instruction in which case a constant is returned and propagated
38// further.
39//
40// Function pointers are not handled by the InstCostVisitor. They are treated
41// separately as they could expose inlining opportunities via indirect call
42// promotion. The inlining bonus contributes to the total specialization score.
43//
44// For a specialization to be profitable its bonus needs to exceed a minimum
45// threshold. There are three options for controlling the threshold which are
46// expressed as percentages of the original function size:
47// * funcspec-min-codesize-savings
48// * funcspec-min-latency-savings
49// * funcspec-min-inlining-bonus
50// There's also an option for controlling the codesize growth from recursive
51// specializations. That is `funcspec-max-codesize-growth`.
52//
53// Once we have all the potential specializations with their score we need to
54// choose the best ones, which fit in the module specialization budget. That
55// is controlled by the option `funcspec-max-clones`. To find the best `NSpec`
56// specializations we use a max-heap. For more details refer to D139346.
57//
58// Ideas:
59// ------
60// - With a function specialization attribute for arguments, we could have
61// a direct way to steer function specialization, avoiding the cost-model,
62// and thus control compile-times / code-size.
63//
64// - Perhaps a post-inlining function specialization pass could be more
65// aggressive on literal constants.
66//
67// References:
68// -----------
69// 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable
70// it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q
71//
72//===----------------------------------------------------------------------===//
73
74#ifndef LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
75#define LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
76
77#include "llvm/Analysis/BlockFrequencyInfo.h"
78#include "llvm/Analysis/CodeMetrics.h"
79#include "llvm/Analysis/InlineCost.h"
80#include "llvm/Analysis/TargetTransformInfo.h"
81#include "llvm/IR/InstVisitor.h"
82#include "llvm/Transforms/Scalar/SCCP.h"
83#include "llvm/Transforms/Utils/Cloning.h"
84#include "llvm/Transforms/Utils/SCCPSolver.h"
85#include "llvm/Transforms/Utils/SizeOpts.h"
86
87namespace llvm {
88// Map of potential specializations for each function. The FunctionSpecializer
89// keeps the discovered specialisation opportunities for the module in a single
90// vector, where the specialisations of each function form a contiguous range.
91// This map's value is the beginning and the end of that range.
92using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>;
93
94// Just a shorter abbreviation to improve indentation.
95using Cost = InstructionCost;
96
97// Map of known constants found during the specialization bonus estimation.
98using ConstMap = DenseMap<Value *, Constant *>;
99
100// Specialization signature, used to uniquely designate a specialization within
101// a function.
102struct SpecSig {
103 // Hashing support, used to distinguish between ordinary, empty, or tombstone
104 // keys.
105 unsigned Key = 0;
106 SmallVector<ArgInfo, 4> Args;
107
108 bool operator==(const SpecSig &Other) const {
109 if (Key != Other.Key)
110 return false;
111 return Args == Other.Args;
112 }
113
114 friend hash_code hash_value(const SpecSig &S) {
115 return hash_combine(args: hash_value(value: S.Key),
116 args: hash_combine_range(first: S.Args.begin(), last: S.Args.end()));
117 }
118};
119
120// Specialization instance.
121struct Spec {
122 // Original function.
123 Function *F;
124
125 // Cloned function, a specialized version of the original one.
126 Function *Clone = nullptr;
127
128 // Specialization signature.
129 SpecSig Sig;
130
131 // Profitability of the specialization.
132 unsigned Score;
133
134 // List of call sites, matching this specialization.
135 SmallVector<CallBase *> CallSites;
136
137 Spec(Function *F, const SpecSig &S, unsigned Score)
138 : F(F), Sig(S), Score(Score) {}
139 Spec(Function *F, const SpecSig &&S, unsigned Score)
140 : F(F), Sig(S), Score(Score) {}
141};
142
143struct Bonus {
144 unsigned CodeSize = 0;
145 unsigned Latency = 0;
146
147 Bonus() = default;
148
149 Bonus(Cost CodeSize, Cost Latency) {
150 int64_t Sz = *CodeSize.getValue();
151 int64_t Ltc = *Latency.getValue();
152
153 assert(Sz >= 0 && Ltc >= 0 && "CodeSize and Latency cannot be negative");
154 // It is safe to down cast since we know the arguments
155 // cannot be negative and Cost is of type int64_t.
156 this->CodeSize = static_cast<unsigned>(Sz);
157 this->Latency = static_cast<unsigned>(Ltc);
158 }
159
160 Bonus &operator+=(const Bonus RHS) {
161 CodeSize += RHS.CodeSize;
162 Latency += RHS.Latency;
163 return *this;
164 }
165
166 Bonus operator+(const Bonus RHS) const {
167 return Bonus(CodeSize + RHS.CodeSize, Latency + RHS.Latency);
168 }
169
170 bool operator==(const Bonus RHS) const {
171 return CodeSize == RHS.CodeSize && Latency == RHS.Latency;
172 }
173};
174
175class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
176 const DataLayout &DL;
177 BlockFrequencyInfo &BFI;
178 TargetTransformInfo &TTI;
179 SCCPSolver &Solver;
180
181 ConstMap KnownConstants;
182 // Basic blocks known to be unreachable after constant propagation.
183 DenseSet<BasicBlock *> DeadBlocks;
184 // PHI nodes we have visited before.
185 DenseSet<Instruction *> VisitedPHIs;
186 // PHI nodes we have visited once without successfully constant folding them.
187 // Once the InstCostVisitor has processed all the specialization arguments,
188 // it should be possible to determine whether those PHIs can be folded
189 // (some of their incoming values may have become constant or dead).
190 SmallVector<Instruction *> PendingPHIs;
191
192 ConstMap::iterator LastVisited;
193
194public:
195 InstCostVisitor(const DataLayout &DL, BlockFrequencyInfo &BFI,
196 TargetTransformInfo &TTI, SCCPSolver &Solver)
197 : DL(DL), BFI(BFI), TTI(TTI), Solver(Solver) {}
198
199 bool isBlockExecutable(BasicBlock *BB) {
200 return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(V: BB);
201 }
202
203 Bonus getSpecializationBonus(Argument *A, Constant *C);
204
205 Bonus getBonusFromPendingPHIs();
206
207private:
208 friend class InstVisitor<InstCostVisitor, Constant *>;
209
210 static bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
211 DenseSet<BasicBlock *> &DeadBlocks);
212
213 Bonus getUserBonus(Instruction *User, Value *Use = nullptr,
214 Constant *C = nullptr);
215
216 Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList);
217 Cost estimateSwitchInst(SwitchInst &I);
218 Cost estimateBranchInst(BranchInst &I);
219
220 // Transitively Incoming Values (TIV) is a set of Values that can "feed" a
221 // value to the initial PHI-node. It is defined like this:
222 //
223 // * the initial PHI-node belongs to TIV.
224 //
225 // * for every PHI-node in TIV, its operands belong to TIV
226 //
227 // If TIV for the initial PHI-node (P) contains more than one constant or a
228 // value that is not a PHI-node, then P cannot be folded to a constant.
229 //
230 // As soon as we detect these cases, we bail, without constructing the
231 // full TIV.
232 // Otherwise P can be folded to the one constant in TIV.
233 bool discoverTransitivelyIncomingValues(Constant *Const, PHINode *Root,
234 DenseSet<PHINode *> &TransitivePHIs);
235
236 Constant *visitInstruction(Instruction &I) { return nullptr; }
237 Constant *visitPHINode(PHINode &I);
238 Constant *visitFreezeInst(FreezeInst &I);
239 Constant *visitCallBase(CallBase &I);
240 Constant *visitLoadInst(LoadInst &I);
241 Constant *visitGetElementPtrInst(GetElementPtrInst &I);
242 Constant *visitSelectInst(SelectInst &I);
243 Constant *visitCastInst(CastInst &I);
244 Constant *visitCmpInst(CmpInst &I);
245 Constant *visitUnaryOperator(UnaryOperator &I);
246 Constant *visitBinaryOperator(BinaryOperator &I);
247};
248
249class FunctionSpecializer {
250
251 /// The IPSCCP Solver.
252 SCCPSolver &Solver;
253
254 Module &M;
255
256 /// Analysis manager, needed to invalidate analyses.
257 FunctionAnalysisManager *FAM;
258
259 /// Analyses used to help determine if a function should be specialized.
260 std::function<BlockFrequencyInfo &(Function &)> GetBFI;
261 std::function<const TargetLibraryInfo &(Function &)> GetTLI;
262 std::function<TargetTransformInfo &(Function &)> GetTTI;
263 std::function<AssumptionCache &(Function &)> GetAC;
264
265 SmallPtrSet<Function *, 32> Specializations;
266 SmallPtrSet<Function *, 32> FullySpecialized;
267 DenseMap<Function *, CodeMetrics> FunctionMetrics;
268 DenseMap<Function *, unsigned> FunctionGrowth;
269 unsigned NGlobals = 0;
270
271public:
272 FunctionSpecializer(
273 SCCPSolver &Solver, Module &M, FunctionAnalysisManager *FAM,
274 std::function<BlockFrequencyInfo &(Function &)> GetBFI,
275 std::function<const TargetLibraryInfo &(Function &)> GetTLI,
276 std::function<TargetTransformInfo &(Function &)> GetTTI,
277 std::function<AssumptionCache &(Function &)> GetAC)
278 : Solver(Solver), M(M), FAM(FAM), GetBFI(GetBFI), GetTLI(GetTLI),
279 GetTTI(GetTTI), GetAC(GetAC) {}
280
281 ~FunctionSpecializer();
282
283 bool run();
284
285 InstCostVisitor getInstCostVisitorFor(Function *F) {
286 auto &BFI = GetBFI(*F);
287 auto &TTI = GetTTI(*F);
288 return InstCostVisitor(M.getDataLayout(), BFI, TTI, Solver);
289 }
290
291private:
292 Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call);
293
294 /// A constant stack value is an AllocaInst that has a single constant
295 /// value stored to it. Return this constant if such an alloca stack value
296 /// is a function argument.
297 Constant *getConstantStackValue(CallInst *Call, Value *Val);
298
299 /// See if there are any new constant values for the callers of \p F via
300 /// stack variables and promote them to global variables.
301 void promoteConstantStackValues(Function *F);
302
303 /// Clean up fully specialized functions.
304 void removeDeadFunctions();
305
306 /// Remove any ssa_copy intrinsics that may have been introduced.
307 void cleanUpSSA();
308
309 /// @brief Find potential specialization opportunities.
310 /// @param F Function to specialize
311 /// @param FuncSize Cost of specializing a function.
312 /// @param AllSpecs A vector to add potential specializations to.
313 /// @param SM A map for a function's specialisation range
314 /// @return True, if any potential specializations were found
315 bool findSpecializations(Function *F, unsigned FuncSize,
316 SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
317
318 /// Compute the inlining bonus for replacing argument \p A with constant \p C.
319 unsigned getInliningBonus(Argument *A, Constant *C);
320
321 bool isCandidateFunction(Function *F);
322
323 /// @brief Create a specialization of \p F and prime the SCCPSolver
324 /// @param F Function to specialize
325 /// @param S Which specialization to create
326 /// @return The new, cloned function
327 Function *createSpecialization(Function *F, const SpecSig &S);
328
329 /// Determine if it is possible to specialise the function for constant values
330 /// of the formal parameter \p A.
331 bool isArgumentInteresting(Argument *A);
332
333 /// Check if the value \p V (an actual argument) is a constant or can only
334 /// have a constant value. Return that constant.
335 Constant *getCandidateConstant(Value *V);
336
337 /// @brief Find and update calls to \p F, which match a specialization
338 /// @param F Orginal function
339 /// @param Begin Start of a range of possibly matching specialisations
340 /// @param End End of a range (exclusive) of possibly matching specialisations
341 void updateCallSites(Function *F, const Spec *Begin, const Spec *End);
342};
343} // namespace llvm
344
345#endif // LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
346

source code of llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h