1 | //===- llvm/Analysis/ProfileSummaryInfo.h - profile summary ---*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that provides access to profile summary |
10 | // information. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_ANALYSIS_PROFILESUMMARYINFO_H |
15 | #define LLVM_ANALYSIS_PROFILESUMMARYINFO_H |
16 | |
17 | #include "llvm/ADT/DenseMap.h" |
18 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
19 | #include "llvm/IR/Function.h" |
20 | #include "llvm/IR/Instructions.h" |
21 | #include "llvm/IR/PassManager.h" |
22 | #include "llvm/IR/ProfileSummary.h" |
23 | #include "llvm/Pass.h" |
24 | #include <memory> |
25 | #include <optional> |
26 | |
27 | namespace llvm { |
28 | class BasicBlock; |
29 | class CallBase; |
30 | class MachineFunction; |
31 | |
32 | /// Analysis providing profile information. |
33 | /// |
34 | /// This is an immutable analysis pass that provides ability to query global |
35 | /// (program-level) profile information. The main APIs are isHotCount and |
36 | /// isColdCount that tells whether a given profile count is considered hot/cold |
37 | /// based on the profile summary. This also provides convenience methods to |
38 | /// check whether a function is hot or cold. |
39 | |
40 | // FIXME: Provide convenience methods to determine hotness/coldness of other IR |
41 | // units. This would require making this depend on BFI. |
42 | class ProfileSummaryInfo { |
43 | private: |
44 | const Module *M; |
45 | std::unique_ptr<ProfileSummary> Summary; |
46 | void computeThresholds(); |
47 | // Count thresholds to answer isHotCount and isColdCount queries. |
48 | std::optional<uint64_t> HotCountThreshold, ColdCountThreshold; |
49 | // True if the working set size of the code is considered huge, |
50 | // because the number of profile counts required to reach the hot |
51 | // percentile is above a huge threshold. |
52 | std::optional<bool> HasHugeWorkingSetSize; |
53 | // True if the working set size of the code is considered large, |
54 | // because the number of profile counts required to reach the hot |
55 | // percentile is above a large threshold. |
56 | std::optional<bool> HasLargeWorkingSetSize; |
57 | // Compute the threshold for a given cutoff. |
58 | std::optional<uint64_t> computeThreshold(int PercentileCutoff) const; |
59 | // The map that caches the threshold values. The keys are the percentile |
60 | // cutoff values and the values are the corresponding threshold values. |
61 | mutable DenseMap<int, uint64_t> ThresholdCache; |
62 | |
63 | public: |
64 | ProfileSummaryInfo(const Module &M) : M(&M) { refresh(); } |
65 | ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default; |
66 | |
67 | /// If no summary is present, attempt to refresh. |
68 | void refresh(); |
69 | |
70 | /// Returns true if profile summary is available. |
71 | bool hasProfileSummary() const { return Summary != nullptr; } |
72 | |
73 | /// Returns true if module \c M has sample profile. |
74 | bool hasSampleProfile() const { |
75 | return hasProfileSummary() && |
76 | Summary->getKind() == ProfileSummary::PSK_Sample; |
77 | } |
78 | |
79 | /// Returns true if module \c M has instrumentation profile. |
80 | bool hasInstrumentationProfile() const { |
81 | return hasProfileSummary() && |
82 | Summary->getKind() == ProfileSummary::PSK_Instr; |
83 | } |
84 | |
85 | /// Returns true if module \c M has context sensitive instrumentation profile. |
86 | bool hasCSInstrumentationProfile() const { |
87 | return hasProfileSummary() && |
88 | Summary->getKind() == ProfileSummary::PSK_CSInstr; |
89 | } |
90 | |
91 | /// Handle the invalidation of this information. |
92 | /// |
93 | /// When used as a result of \c ProfileSummaryAnalysis this method will be |
94 | /// called when the module this was computed for changes. Since profile |
95 | /// summary is immutable after it is annotated on the module, we return false |
96 | /// here. |
97 | bool invalidate(Module &, const PreservedAnalyses &, |
98 | ModuleAnalysisManager::Invalidator &) { |
99 | return false; |
100 | } |
101 | |
102 | /// Returns the profile count for \p CallInst. |
103 | std::optional<uint64_t> getProfileCount(const CallBase &CallInst, |
104 | BlockFrequencyInfo *BFI, |
105 | bool AllowSynthetic = false) const; |
106 | /// Returns true if module \c M has partial-profile sample profile. |
107 | bool hasPartialSampleProfile() const; |
108 | /// Returns true if the working set size of the code is considered huge. |
109 | bool hasHugeWorkingSetSize() const; |
110 | /// Returns true if the working set size of the code is considered large. |
111 | bool hasLargeWorkingSetSize() const; |
112 | /// Returns true if \p F has hot function entry. If it returns false, it |
113 | /// either means it is not hot or it is unknown whether it is hot or not (for |
114 | /// example, no profile data is available). |
115 | template <typename FuncT> bool isFunctionEntryHot(const FuncT *F) const { |
116 | if (!F || !hasProfileSummary()) |
117 | return false; |
118 | std::optional<Function::ProfileCount> FunctionCount = getEntryCount(F); |
119 | // FIXME: The heuristic used below for determining hotness is based on |
120 | // preliminary SPEC tuning for inliner. This will eventually be a |
121 | // convenience method that calls isHotCount. |
122 | return FunctionCount && isHotCount(C: FunctionCount->getCount()); |
123 | } |
124 | |
125 | /// Returns true if \p F contains hot code. |
126 | template <typename FuncT, typename BFIT> |
127 | bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const { |
128 | if (!F || !hasProfileSummary()) |
129 | return false; |
130 | if (auto FunctionCount = getEntryCount(F)) |
131 | if (isHotCount(C: FunctionCount->getCount())) |
132 | return true; |
133 | |
134 | if (auto TotalCallCount = getTotalCallCount(F)) |
135 | if (isHotCount(C: *TotalCallCount)) |
136 | return true; |
137 | |
138 | for (const auto &BB : *F) |
139 | if (isHotBlock(&BB, &BFI)) |
140 | return true; |
141 | return false; |
142 | } |
143 | /// Returns true if \p F has cold function entry. |
144 | bool isFunctionEntryCold(const Function *F) const; |
145 | /// Returns true if \p F contains only cold code. |
146 | template <typename FuncT, typename BFIT> |
147 | bool isFunctionColdInCallGraph(const FuncT *F, BFIT &BFI) const { |
148 | if (!F || !hasProfileSummary()) |
149 | return false; |
150 | if (auto FunctionCount = getEntryCount(F)) |
151 | if (!isColdCount(C: FunctionCount->getCount())) |
152 | return false; |
153 | |
154 | if (auto TotalCallCount = getTotalCallCount(F)) |
155 | if (!isColdCount(C: *TotalCallCount)) |
156 | return false; |
157 | |
158 | for (const auto &BB : *F) |
159 | if (!isColdBlock(&BB, &BFI)) |
160 | return false; |
161 | return true; |
162 | } |
163 | /// Returns true if the hotness of \p F is unknown. |
164 | bool isFunctionHotnessUnknown(const Function &F) const; |
165 | /// Returns true if \p F contains hot code with regard to a given hot |
166 | /// percentile cutoff value. |
167 | template <typename FuncT, typename BFIT> |
168 | bool isFunctionHotInCallGraphNthPercentile(int PercentileCutoff, |
169 | const FuncT *F, BFIT &BFI) const { |
170 | return isFunctionHotOrColdInCallGraphNthPercentile<true, FuncT, BFIT>( |
171 | PercentileCutoff, F, BFI); |
172 | } |
173 | /// Returns true if \p F contains cold code with regard to a given cold |
174 | /// percentile cutoff value. |
175 | template <typename FuncT, typename BFIT> |
176 | bool isFunctionColdInCallGraphNthPercentile(int PercentileCutoff, |
177 | const FuncT *F, BFIT &BFI) const { |
178 | return isFunctionHotOrColdInCallGraphNthPercentile<false, FuncT, BFIT>( |
179 | PercentileCutoff, F, BFI); |
180 | } |
181 | /// Returns true if count \p C is considered hot. |
182 | bool isHotCount(uint64_t C) const; |
183 | /// Returns true if count \p C is considered cold. |
184 | bool isColdCount(uint64_t C) const; |
185 | /// Returns true if count \p C is considered hot with regard to a given |
186 | /// hot percentile cutoff value. |
187 | /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where |
188 | /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile. |
189 | bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const; |
190 | /// Returns true if count \p C is considered cold with regard to a given |
191 | /// cold percentile cutoff value. |
192 | /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where |
193 | /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile. |
194 | bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const; |
195 | |
196 | /// Returns true if BasicBlock \p BB is considered hot. |
197 | template <typename BBType, typename BFIT> |
198 | bool isHotBlock(const BBType *BB, BFIT *BFI) const { |
199 | auto Count = BFI->getBlockProfileCount(BB); |
200 | return Count && isHotCount(C: *Count); |
201 | } |
202 | |
203 | /// Returns true if BasicBlock \p BB is considered cold. |
204 | template <typename BBType, typename BFIT> |
205 | bool isColdBlock(const BBType *BB, BFIT *BFI) const { |
206 | auto Count = BFI->getBlockProfileCount(BB); |
207 | return Count && isColdCount(C: *Count); |
208 | } |
209 | |
210 | template <typename BFIT> |
211 | bool isColdBlock(BlockFrequency BlockFreq, const BFIT *BFI) const { |
212 | auto Count = BFI->getProfileCountFromFreq(BlockFreq); |
213 | return Count && isColdCount(C: *Count); |
214 | } |
215 | |
216 | template <typename BBType, typename BFIT> |
217 | bool isHotBlockNthPercentile(int PercentileCutoff, const BBType *BB, |
218 | BFIT *BFI) const { |
219 | return isHotOrColdBlockNthPercentile<true, BBType, BFIT>(PercentileCutoff, |
220 | BB, BFI); |
221 | } |
222 | |
223 | template <typename BFIT> |
224 | bool isHotBlockNthPercentile(int PercentileCutoff, BlockFrequency BlockFreq, |
225 | BFIT *BFI) const { |
226 | return isHotOrColdBlockNthPercentile<true, BFIT>(PercentileCutoff, |
227 | BlockFreq, BFI); |
228 | } |
229 | |
230 | /// Returns true if BasicBlock \p BB is considered cold with regard to a given |
231 | /// cold percentile cutoff value. |
232 | /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where |
233 | /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile. |
234 | template <typename BBType, typename BFIT> |
235 | bool isColdBlockNthPercentile(int PercentileCutoff, const BBType *BB, |
236 | BFIT *BFI) const { |
237 | return isHotOrColdBlockNthPercentile<false, BBType, BFIT>(PercentileCutoff, |
238 | BB, BFI); |
239 | } |
240 | template <typename BFIT> |
241 | bool isColdBlockNthPercentile(int PercentileCutoff, BlockFrequency BlockFreq, |
242 | BFIT *BFI) const { |
243 | return isHotOrColdBlockNthPercentile<false, BFIT>(PercentileCutoff, |
244 | BlockFreq, BFI); |
245 | } |
246 | /// Returns true if the call site \p CB is considered hot. |
247 | bool isHotCallSite(const CallBase &CB, BlockFrequencyInfo *BFI) const; |
248 | /// Returns true if call site \p CB is considered cold. |
249 | bool isColdCallSite(const CallBase &CB, BlockFrequencyInfo *BFI) const; |
250 | /// Returns HotCountThreshold if set. Recompute HotCountThreshold |
251 | /// if not set. |
252 | uint64_t getOrCompHotCountThreshold() const; |
253 | /// Returns ColdCountThreshold if set. Recompute HotCountThreshold |
254 | /// if not set. |
255 | uint64_t getOrCompColdCountThreshold() const; |
256 | /// Returns HotCountThreshold if set. |
257 | uint64_t getHotCountThreshold() const { |
258 | return HotCountThreshold.value_or(u: 0); |
259 | } |
260 | /// Returns ColdCountThreshold if set. |
261 | uint64_t getColdCountThreshold() const { |
262 | return ColdCountThreshold.value_or(u: 0); |
263 | } |
264 | |
265 | private: |
266 | template <typename FuncT> |
267 | std::optional<uint64_t> getTotalCallCount(const FuncT *F) const { |
268 | return std::nullopt; |
269 | } |
270 | |
271 | template <bool isHot, typename FuncT, typename BFIT> |
272 | bool isFunctionHotOrColdInCallGraphNthPercentile(int PercentileCutoff, |
273 | const FuncT *F, |
274 | BFIT &FI) const { |
275 | if (!F || !hasProfileSummary()) |
276 | return false; |
277 | if (auto FunctionCount = getEntryCount(F)) { |
278 | if (isHot && |
279 | isHotCountNthPercentile(PercentileCutoff, C: FunctionCount->getCount())) |
280 | return true; |
281 | if (!isHot && !isColdCountNthPercentile(PercentileCutoff, |
282 | C: FunctionCount->getCount())) |
283 | return false; |
284 | } |
285 | if (auto TotalCallCount = getTotalCallCount(F)) { |
286 | if (isHot && isHotCountNthPercentile(PercentileCutoff, C: *TotalCallCount)) |
287 | return true; |
288 | if (!isHot && |
289 | !isColdCountNthPercentile(PercentileCutoff, C: *TotalCallCount)) |
290 | return false; |
291 | } |
292 | for (const auto &BB : *F) { |
293 | if (isHot && isHotBlockNthPercentile(PercentileCutoff, &BB, &FI)) |
294 | return true; |
295 | if (!isHot && !isColdBlockNthPercentile(PercentileCutoff, &BB, &FI)) |
296 | return false; |
297 | } |
298 | return !isHot; |
299 | } |
300 | |
301 | template <bool isHot> |
302 | bool isHotOrColdCountNthPercentile(int PercentileCutoff, uint64_t C) const; |
303 | |
304 | template <bool isHot, typename BBType, typename BFIT> |
305 | bool isHotOrColdBlockNthPercentile(int PercentileCutoff, const BBType *BB, |
306 | BFIT *BFI) const { |
307 | auto Count = BFI->getBlockProfileCount(BB); |
308 | if (isHot) |
309 | return Count && isHotCountNthPercentile(PercentileCutoff, C: *Count); |
310 | else |
311 | return Count && isColdCountNthPercentile(PercentileCutoff, C: *Count); |
312 | } |
313 | |
314 | template <bool isHot, typename BFIT> |
315 | bool isHotOrColdBlockNthPercentile(int PercentileCutoff, |
316 | BlockFrequency BlockFreq, |
317 | BFIT *BFI) const { |
318 | auto Count = BFI->getProfileCountFromFreq(BlockFreq); |
319 | if (isHot) |
320 | return Count && isHotCountNthPercentile(PercentileCutoff, C: *Count); |
321 | else |
322 | return Count && isColdCountNthPercentile(PercentileCutoff, C: *Count); |
323 | } |
324 | |
325 | template <typename FuncT> |
326 | std::optional<Function::ProfileCount> getEntryCount(const FuncT *F) const { |
327 | return F->getEntryCount(); |
328 | } |
329 | }; |
330 | |
331 | template <> |
332 | inline std::optional<uint64_t> |
333 | ProfileSummaryInfo::getTotalCallCount<Function>(const Function *F) const { |
334 | if (!hasSampleProfile()) |
335 | return std::nullopt; |
336 | uint64_t TotalCallCount = 0; |
337 | for (const auto &BB : *F) |
338 | for (const auto &I : BB) |
339 | if (isa<CallInst>(Val: I) || isa<InvokeInst>(Val: I)) |
340 | if (auto CallCount = getProfileCount(CallInst: cast<CallBase>(Val: I), BFI: nullptr)) |
341 | TotalCallCount += *CallCount; |
342 | return TotalCallCount; |
343 | } |
344 | |
345 | // Declare template specialization for llvm::MachineFunction. Do not implement |
346 | // here, because we cannot include MachineFunction header here, that would break |
347 | // dependency rules. |
348 | template <> |
349 | std::optional<Function::ProfileCount> |
350 | ProfileSummaryInfo::getEntryCount<MachineFunction>( |
351 | const MachineFunction *F) const; |
352 | |
353 | /// An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo. |
354 | class ProfileSummaryInfoWrapperPass : public ImmutablePass { |
355 | std::unique_ptr<ProfileSummaryInfo> PSI; |
356 | |
357 | public: |
358 | static char ID; |
359 | ProfileSummaryInfoWrapperPass(); |
360 | |
361 | ProfileSummaryInfo &getPSI() { return *PSI; } |
362 | const ProfileSummaryInfo &getPSI() const { return *PSI; } |
363 | |
364 | bool doInitialization(Module &M) override; |
365 | bool doFinalization(Module &M) override; |
366 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
367 | AU.setPreservesAll(); |
368 | } |
369 | }; |
370 | |
371 | /// An analysis pass based on the new PM to deliver ProfileSummaryInfo. |
372 | class ProfileSummaryAnalysis |
373 | : public AnalysisInfoMixin<ProfileSummaryAnalysis> { |
374 | public: |
375 | typedef ProfileSummaryInfo Result; |
376 | |
377 | Result run(Module &M, ModuleAnalysisManager &); |
378 | |
379 | private: |
380 | friend AnalysisInfoMixin<ProfileSummaryAnalysis>; |
381 | static AnalysisKey Key; |
382 | }; |
383 | |
384 | /// Printer pass that uses \c ProfileSummaryAnalysis. |
385 | class ProfileSummaryPrinterPass |
386 | : public PassInfoMixin<ProfileSummaryPrinterPass> { |
387 | raw_ostream &OS; |
388 | |
389 | public: |
390 | explicit ProfileSummaryPrinterPass(raw_ostream &OS) : OS(OS) {} |
391 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
392 | static bool isRequired() { return true; } |
393 | }; |
394 | |
395 | } // end namespace llvm |
396 | |
397 | #endif |
398 | |