1 | //===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the transformation that promotes indirect calls to |
10 | // conditional direct calls when the indirect-call value profile metadata is |
11 | // available. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "llvm/ADT/ArrayRef.h" |
16 | #include "llvm/ADT/Statistic.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" |
19 | #include "llvm/Analysis/IndirectCallVisitor.h" |
20 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
21 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
22 | #include "llvm/IR/DiagnosticInfo.h" |
23 | #include "llvm/IR/Function.h" |
24 | #include "llvm/IR/InstrTypes.h" |
25 | #include "llvm/IR/Instructions.h" |
26 | #include "llvm/IR/LLVMContext.h" |
27 | #include "llvm/IR/MDBuilder.h" |
28 | #include "llvm/IR/PassManager.h" |
29 | #include "llvm/IR/ProfDataUtils.h" |
30 | #include "llvm/IR/Value.h" |
31 | #include "llvm/ProfileData/InstrProf.h" |
32 | #include "llvm/Support/Casting.h" |
33 | #include "llvm/Support/CommandLine.h" |
34 | #include "llvm/Support/Debug.h" |
35 | #include "llvm/Support/Error.h" |
36 | #include "llvm/Support/raw_ostream.h" |
37 | #include "llvm/Transforms/Instrumentation.h" |
38 | #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" |
39 | #include "llvm/Transforms/Utils/CallPromotionUtils.h" |
40 | #include <cassert> |
41 | #include <cstdint> |
42 | #include <memory> |
43 | #include <string> |
44 | #include <utility> |
45 | #include <vector> |
46 | |
47 | using namespace llvm; |
48 | |
49 | #define DEBUG_TYPE "pgo-icall-prom" |
50 | |
51 | STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions." ); |
52 | STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites." ); |
53 | |
54 | // Command line option to disable indirect-call promotion with the default as |
55 | // false. This is for debug purpose. |
56 | static cl::opt<bool> DisableICP("disable-icp" , cl::init(Val: false), cl::Hidden, |
57 | cl::desc("Disable indirect call promotion" )); |
58 | |
59 | // Set the cutoff value for the promotion. If the value is other than 0, we |
60 | // stop the transformation once the total number of promotions equals the cutoff |
61 | // value. |
62 | // For debug use only. |
63 | static cl::opt<unsigned> |
64 | ICPCutOff("icp-cutoff" , cl::init(Val: 0), cl::Hidden, |
65 | cl::desc("Max number of promotions for this compilation" )); |
66 | |
67 | // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped. |
68 | // For debug use only. |
69 | static cl::opt<unsigned> |
70 | ICPCSSkip("icp-csskip" , cl::init(Val: 0), cl::Hidden, |
71 | cl::desc("Skip Callsite up to this number for this compilation" )); |
72 | |
73 | // Set if the pass is called in LTO optimization. The difference for LTO mode |
74 | // is the pass won't prefix the source module name to the internal linkage |
75 | // symbols. |
76 | static cl::opt<bool> ICPLTOMode("icp-lto" , cl::init(Val: false), cl::Hidden, |
77 | cl::desc("Run indirect-call promotion in LTO " |
78 | "mode" )); |
79 | |
80 | // Set if the pass is called in SamplePGO mode. The difference for SamplePGO |
81 | // mode is it will add prof metadatato the created direct call. |
82 | static cl::opt<bool> |
83 | ICPSamplePGOMode("icp-samplepgo" , cl::init(Val: false), cl::Hidden, |
84 | cl::desc("Run indirect-call promotion in SamplePGO mode" )); |
85 | |
86 | // If the option is set to true, only call instructions will be considered for |
87 | // transformation -- invoke instructions will be ignored. |
88 | static cl::opt<bool> |
89 | ICPCallOnly("icp-call-only" , cl::init(Val: false), cl::Hidden, |
90 | cl::desc("Run indirect-call promotion for call instructions " |
91 | "only" )); |
92 | |
93 | // If the option is set to true, only invoke instructions will be considered for |
94 | // transformation -- call instructions will be ignored. |
95 | static cl::opt<bool> ICPInvokeOnly("icp-invoke-only" , cl::init(Val: false), |
96 | cl::Hidden, |
97 | cl::desc("Run indirect-call promotion for " |
98 | "invoke instruction only" )); |
99 | |
100 | // Dump the function level IR if the transformation happened in this |
101 | // function. For debug use only. |
102 | static cl::opt<bool> |
103 | ICPDUMPAFTER("icp-dumpafter" , cl::init(Val: false), cl::Hidden, |
104 | cl::desc("Dump IR after transformation happens" )); |
105 | |
106 | namespace { |
107 | |
108 | // Promote indirect calls to conditional direct calls, keeping track of |
109 | // thresholds. |
110 | class IndirectCallPromoter { |
111 | private: |
112 | Function &F; |
113 | |
114 | // Symtab that maps indirect call profile values to function names and |
115 | // defines. |
116 | InstrProfSymtab *const Symtab; |
117 | |
118 | const bool SamplePGO; |
119 | |
120 | OptimizationRemarkEmitter &ORE; |
121 | |
122 | // A struct that records the direct target and it's call count. |
123 | struct PromotionCandidate { |
124 | Function *const TargetFunction; |
125 | const uint64_t Count; |
126 | |
127 | PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} |
128 | }; |
129 | |
130 | // Check if the indirect-call call site should be promoted. Return the number |
131 | // of promotions. Inst is the candidate indirect call, ValueDataRef |
132 | // contains the array of value profile data for profiled targets, |
133 | // TotalCount is the total profiled count of call executions, and |
134 | // NumCandidates is the number of candidate entries in ValueDataRef. |
135 | std::vector<PromotionCandidate> getPromotionCandidatesForCallSite( |
136 | const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef, |
137 | uint64_t TotalCount, uint32_t NumCandidates); |
138 | |
139 | // Promote a list of targets for one indirect-call callsite. Return |
140 | // the number of promotions. |
141 | uint32_t tryToPromote(CallBase &CB, |
142 | const std::vector<PromotionCandidate> &Candidates, |
143 | uint64_t &TotalCount); |
144 | |
145 | public: |
146 | (Function &Func, InstrProfSymtab *Symtab, bool SamplePGO, |
147 | OptimizationRemarkEmitter &ORE) |
148 | : F(Func), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {} |
149 | IndirectCallPromoter(const IndirectCallPromoter &) = delete; |
150 | IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete; |
151 | |
152 | bool processFunction(ProfileSummaryInfo *PSI); |
153 | }; |
154 | |
155 | } // end anonymous namespace |
156 | |
157 | // Indirect-call promotion heuristic. The direct targets are sorted based on |
158 | // the count. Stop at the first target that is not promoted. |
159 | std::vector<IndirectCallPromoter::PromotionCandidate> |
160 | IndirectCallPromoter::getPromotionCandidatesForCallSite( |
161 | const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef, |
162 | uint64_t TotalCount, uint32_t NumCandidates) { |
163 | std::vector<PromotionCandidate> Ret; |
164 | |
165 | LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB |
166 | << " Num_targets: " << ValueDataRef.size() |
167 | << " Num_candidates: " << NumCandidates << "\n" ); |
168 | NumOfPGOICallsites++; |
169 | if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) { |
170 | LLVM_DEBUG(dbgs() << " Skip: User options.\n" ); |
171 | return Ret; |
172 | } |
173 | |
174 | for (uint32_t I = 0; I < NumCandidates; I++) { |
175 | uint64_t Count = ValueDataRef[I].Count; |
176 | assert(Count <= TotalCount); |
177 | (void)TotalCount; |
178 | uint64_t Target = ValueDataRef[I].Value; |
179 | LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count |
180 | << " Target_func: " << Target << "\n" ); |
181 | |
182 | if (ICPInvokeOnly && isa<CallInst>(Val: CB)) { |
183 | LLVM_DEBUG(dbgs() << " Not promote: User options.\n" ); |
184 | ORE.emit(RemarkBuilder: [&]() { |
185 | return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions" , &CB) |
186 | << " Not promote: User options" ; |
187 | }); |
188 | break; |
189 | } |
190 | if (ICPCallOnly && isa<InvokeInst>(Val: CB)) { |
191 | LLVM_DEBUG(dbgs() << " Not promote: User option.\n" ); |
192 | ORE.emit(RemarkBuilder: [&]() { |
193 | return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions" , &CB) |
194 | << " Not promote: User options" ; |
195 | }); |
196 | break; |
197 | } |
198 | if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { |
199 | LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n" ); |
200 | ORE.emit(RemarkBuilder: [&]() { |
201 | return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached" , &CB) |
202 | << " Not promote: Cutoff reached" ; |
203 | }); |
204 | break; |
205 | } |
206 | |
207 | // Don't promote if the symbol is not defined in the module. This avoids |
208 | // creating a reference to a symbol that doesn't exist in the module |
209 | // This can happen when we compile with a sample profile collected from |
210 | // one binary but used for another, which may have profiled targets that |
211 | // aren't used in the new binary. We might have a declaration initially in |
212 | // the case where the symbol is globally dead in the binary and removed by |
213 | // ThinLTO. |
214 | Function *TargetFunction = Symtab->getFunction(FuncMD5Hash: Target); |
215 | if (TargetFunction == nullptr || TargetFunction->isDeclaration()) { |
216 | LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n" ); |
217 | ORE.emit(RemarkBuilder: [&]() { |
218 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget" , &CB) |
219 | << "Cannot promote indirect call: target with md5sum " |
220 | << ore::NV("target md5sum" , Target) << " not found" ; |
221 | }); |
222 | break; |
223 | } |
224 | |
225 | const char *Reason = nullptr; |
226 | if (!isLegalToPromote(CB, Callee: TargetFunction, FailureReason: &Reason)) { |
227 | using namespace ore; |
228 | |
229 | ORE.emit(RemarkBuilder: [&]() { |
230 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote" , &CB) |
231 | << "Cannot promote indirect call to " |
232 | << NV("TargetFunction" , TargetFunction) << " with count of " |
233 | << NV("Count" , Count) << ": " << Reason; |
234 | }); |
235 | break; |
236 | } |
237 | |
238 | Ret.push_back(x: PromotionCandidate(TargetFunction, Count)); |
239 | TotalCount -= Count; |
240 | } |
241 | return Ret; |
242 | } |
243 | |
244 | CallBase &llvm::pgo::(CallBase &CB, Function *DirectCallee, |
245 | uint64_t Count, uint64_t TotalCount, |
246 | bool AttachProfToDirectCall, |
247 | OptimizationRemarkEmitter *ORE) { |
248 | |
249 | uint64_t ElseCount = TotalCount - Count; |
250 | uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount); |
251 | uint64_t Scale = calculateCountScale(MaxCount); |
252 | MDBuilder MDB(CB.getContext()); |
253 | MDNode *BranchWeights = MDB.createBranchWeights( |
254 | TrueWeight: scaleBranchCount(Count, Scale), FalseWeight: scaleBranchCount(Count: ElseCount, Scale)); |
255 | |
256 | CallBase &NewInst = |
257 | promoteCallWithIfThenElse(CB, Callee: DirectCallee, BranchWeights); |
258 | |
259 | if (AttachProfToDirectCall) { |
260 | setBranchWeights(I&: NewInst, Weights: {static_cast<uint32_t>(Count)}); |
261 | } |
262 | |
263 | using namespace ore; |
264 | |
265 | if (ORE) |
266 | ORE->emit(RemarkBuilder: [&]() { |
267 | return OptimizationRemark(DEBUG_TYPE, "Promoted" , &CB) |
268 | << "Promote indirect call to " << NV("DirectCallee" , DirectCallee) |
269 | << " with count " << NV("Count" , Count) << " out of " |
270 | << NV("TotalCount" , TotalCount); |
271 | }); |
272 | return NewInst; |
273 | } |
274 | |
275 | // Promote indirect-call to conditional direct-call for one callsite. |
276 | uint32_t IndirectCallPromoter::tryToPromote( |
277 | CallBase &CB, const std::vector<PromotionCandidate> &Candidates, |
278 | uint64_t &TotalCount) { |
279 | uint32_t NumPromoted = 0; |
280 | |
281 | for (const auto &C : Candidates) { |
282 | uint64_t Count = C.Count; |
283 | pgo::promoteIndirectCall(CB, DirectCallee: C.TargetFunction, Count, TotalCount, AttachProfToDirectCall: SamplePGO, |
284 | ORE: &ORE); |
285 | assert(TotalCount >= Count); |
286 | TotalCount -= Count; |
287 | NumOfPGOICallPromotion++; |
288 | NumPromoted++; |
289 | } |
290 | return NumPromoted; |
291 | } |
292 | |
293 | // Traverse all the indirect-call callsite and get the value profile |
294 | // annotation to perform indirect-call promotion. |
295 | bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) { |
296 | bool Changed = false; |
297 | ICallPromotionAnalysis ICallAnalysis; |
298 | for (auto *CB : findIndirectCalls(F)) { |
299 | uint32_t NumVals, NumCandidates; |
300 | uint64_t TotalCount; |
301 | auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction( |
302 | I: CB, NumVals, TotalCount, NumCandidates); |
303 | if (!NumCandidates || |
304 | (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(C: TotalCount))) |
305 | continue; |
306 | auto PromotionCandidates = getPromotionCandidatesForCallSite( |
307 | CB: *CB, ValueDataRef: ICallProfDataRef, TotalCount, NumCandidates); |
308 | uint32_t NumPromoted = tryToPromote(CB&: *CB, Candidates: PromotionCandidates, TotalCount); |
309 | if (NumPromoted == 0) |
310 | continue; |
311 | |
312 | Changed = true; |
313 | // Adjust the MD.prof metadata. First delete the old one. |
314 | CB->setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr); |
315 | // If all promoted, we don't need the MD.prof metadata. |
316 | if (TotalCount == 0 || NumPromoted == NumVals) |
317 | continue; |
318 | // Otherwise we need update with the un-promoted records back. |
319 | annotateValueSite(M&: *F.getParent(), Inst&: *CB, VDs: ICallProfDataRef.slice(N: NumPromoted), |
320 | Sum: TotalCount, ValueKind: IPVK_IndirectCallTarget, MaxMDCount: NumCandidates); |
321 | } |
322 | return Changed; |
323 | } |
324 | |
325 | // A wrapper function that does the actual work. |
326 | static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, |
327 | bool SamplePGO, ModuleAnalysisManager &MAM) { |
328 | if (DisableICP) |
329 | return false; |
330 | InstrProfSymtab Symtab; |
331 | if (Error E = Symtab.create(M, InLTO)) { |
332 | std::string SymtabFailure = toString(E: std::move(E)); |
333 | M.getContext().emitError(ErrorStr: "Failed to create symtab: " + SymtabFailure); |
334 | return false; |
335 | } |
336 | bool Changed = false; |
337 | for (auto &F : M) { |
338 | if (F.isDeclaration() || F.hasOptNone()) |
339 | continue; |
340 | |
341 | auto &FAM = |
342 | MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
343 | auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F); |
344 | |
345 | IndirectCallPromoter CallPromoter(F, &Symtab, SamplePGO, ORE); |
346 | bool FuncChanged = CallPromoter.processFunction(PSI); |
347 | if (ICPDUMPAFTER && FuncChanged) { |
348 | LLVM_DEBUG(dbgs() << "\n== IR Dump After ==" ; F.print(dbgs())); |
349 | LLVM_DEBUG(dbgs() << "\n" ); |
350 | } |
351 | Changed |= FuncChanged; |
352 | if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { |
353 | LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n" ); |
354 | break; |
355 | } |
356 | } |
357 | return Changed; |
358 | } |
359 | |
360 | PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, |
361 | ModuleAnalysisManager &MAM) { |
362 | ProfileSummaryInfo *PSI = &MAM.getResult<ProfileSummaryAnalysis>(IR&: M); |
363 | |
364 | if (!promoteIndirectCalls(M, PSI, InLTO: InLTO | ICPLTOMode, |
365 | SamplePGO: SamplePGO | ICPSamplePGOMode, MAM)) |
366 | return PreservedAnalyses::all(); |
367 | |
368 | return PreservedAnalyses::none(); |
369 | } |
370 | |