1//===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
10#define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
11#include "CSPreInliner.h"
12#include "ErrorHandling.h"
13#include "PerfReader.h"
14#include "ProfiledBinary.h"
15#include "llvm/IR/DebugInfoMetadata.h"
16#include "llvm/ProfileData/SampleProfWriter.h"
17#include <memory>
18#include <unordered_set>
19
20using namespace llvm;
21using namespace sampleprof;
22
23namespace llvm {
24namespace sampleprof {
25
26using ProbeCounterMap =
27 std::unordered_map<const MCDecodedPseudoProbe *, uint64_t>;
28
29// This base class for profile generation of sample-based PGO. We reuse all
30// structures relating to function profiles and profile writers as seen in
31// /ProfileData/SampleProf.h.
32class ProfileGeneratorBase {
33
34public:
35 ProfileGeneratorBase(ProfiledBinary *Binary) : Binary(Binary){};
36 ProfileGeneratorBase(ProfiledBinary *Binary,
37 const ContextSampleCounterMap *Counters)
38 : Binary(Binary), SampleCounters(Counters){};
39 ProfileGeneratorBase(ProfiledBinary *Binary,
40 const SampleProfileMap &&Profiles)
41 : Binary(Binary), ProfileMap(std::move(Profiles)){};
42
43 virtual ~ProfileGeneratorBase() = default;
44 static std::unique_ptr<ProfileGeneratorBase>
45 create(ProfiledBinary *Binary, const ContextSampleCounterMap *Counters,
46 bool profileIsCS);
47 static std::unique_ptr<ProfileGeneratorBase>
48 create(ProfiledBinary *Binary, SampleProfileMap &ProfileMap,
49 bool profileIsCS);
50 virtual void generateProfile() = 0;
51 void write();
52
53 static uint32_t
54 getDuplicationFactor(unsigned Discriminator,
55 bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
56 return UseFSD ? 1
57 : llvm::DILocation::getDuplicationFactorFromDiscriminator(
58 D: Discriminator);
59 }
60
61 static uint32_t
62 getBaseDiscriminator(unsigned Discriminator,
63 bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
64 return UseFSD ? Discriminator
65 : DILocation::getBaseDiscriminatorFromDiscriminator(
66 D: Discriminator, /* IsFSDiscriminator */ IsFSDiscriminator: false);
67 }
68
69 static bool UseFSDiscriminator;
70
71protected:
72 // Use SampleProfileWriter to serialize profile map
73 void write(std::unique_ptr<SampleProfileWriter> Writer,
74 SampleProfileMap &ProfileMap);
75 /*
76 For each region boundary point, mark if it is begin or end (or both) of
77 the region. Boundary points are inclusive. Log the sample count as well
78 so we can use it when we compute the sample count of each disjoint region
79 later. Note that there might be multiple ranges with different sample
80 count that share same begin/end point. We need to accumulate the sample
81 count for the boundary point for such case, because for the example
82 below,
83
84 |<--100-->|
85 |<------200------>|
86 A B C
87
88 sample count for disjoint region [A,B] would be 300.
89 */
90 void findDisjointRanges(RangeSample &DisjointRanges,
91 const RangeSample &Ranges);
92
93 // Go through each address from range to extract the top frame probe by
94 // looking up in the Address2ProbeMap
95 void extractProbesFromRange(const RangeSample &RangeCounter,
96 ProbeCounterMap &ProbeCounter,
97 bool FindDisjointRanges = true);
98
99 // Helper function for updating body sample for a leaf location in
100 // FunctionProfile
101 void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
102 const SampleContextFrame &LeafLoc,
103 uint64_t Count);
104
105 void updateFunctionSamples();
106
107 void updateTotalSamples();
108
109 void updateCallsiteSamples();
110
111 void filterAmbiguousProfile(SampleProfileMap &Profiles);
112
113 bool filterAmbiguousProfile(FunctionSamples &FS);
114
115 StringRef getCalleeNameForAddress(uint64_t TargetAddress);
116
117 void computeSummaryAndThreshold(SampleProfileMap &ProfileMap);
118
119 void calculateAndShowDensity(const SampleProfileMap &Profiles);
120
121 double calculateDensity(const SampleProfileMap &Profiles,
122 uint64_t HotCntThreshold);
123
124 void showDensitySuggestion(double Density);
125
126 void collectProfiledFunctions();
127
128 bool collectFunctionsFromRawProfile(
129 std::unordered_set<const BinaryFunction *> &ProfiledFunctions);
130
131 // Collect profiled Functions for llvm sample profile input.
132 virtual bool collectFunctionsFromLLVMProfile(
133 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) = 0;
134
135 // List of function prefix to filter out.
136 static constexpr const char *FuncPrefixsToFilter[] = {"__cxx_global_var_init",
137 "__tls_init"};
138
139 // Thresholds from profile summary to answer isHotCount/isColdCount queries.
140 uint64_t HotCountThreshold;
141
142 uint64_t ColdCountThreshold;
143
144 ProfiledBinary *Binary = nullptr;
145
146 std::unique_ptr<ProfileSummary> Summary;
147
148 // Used by SampleProfileWriter
149 SampleProfileMap ProfileMap;
150
151 const ContextSampleCounterMap *SampleCounters = nullptr;
152};
153
154class ProfileGenerator : public ProfileGeneratorBase {
155
156public:
157 ProfileGenerator(ProfiledBinary *Binary,
158 const ContextSampleCounterMap *Counters)
159 : ProfileGeneratorBase(Binary, Counters){};
160 ProfileGenerator(ProfiledBinary *Binary, const SampleProfileMap &&Profiles)
161 : ProfileGeneratorBase(Binary, std::move(Profiles)){};
162 void generateProfile() override;
163
164private:
165 void generateLineNumBasedProfile();
166 void generateProbeBasedProfile();
167 RangeSample preprocessRangeCounter(const RangeSample &RangeCounter);
168 FunctionSamples &getTopLevelFunctionProfile(FunctionId FuncName);
169 // Helper function to get the leaf frame's FunctionProfile by traversing the
170 // inline stack and meanwhile it adds the total samples for each frame's
171 // function profile.
172 FunctionSamples &
173 getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec,
174 uint64_t Count);
175 void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter);
176 void
177 populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
178 void
179 populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter);
180 void populateBoundarySamplesWithProbesForAllFunctions(
181 const BranchSample &BranchCounters);
182 void postProcessProfiles();
183 void trimColdProfiles(const SampleProfileMap &Profiles,
184 uint64_t ColdCntThreshold);
185 bool collectFunctionsFromLLVMProfile(
186 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
187};
188
189class CSProfileGenerator : public ProfileGeneratorBase {
190public:
191 CSProfileGenerator(ProfiledBinary *Binary,
192 const ContextSampleCounterMap *Counters)
193 : ProfileGeneratorBase(Binary, Counters){};
194 CSProfileGenerator(ProfiledBinary *Binary, SampleProfileMap &Profiles)
195 : ProfileGeneratorBase(Binary), ContextTracker(Profiles, nullptr){};
196 void generateProfile() override;
197
198 // Trim the context stack at a given depth.
199 template <typename T>
200 static void trimContext(SmallVectorImpl<T> &S, int Depth = MaxContextDepth) {
201 if (Depth < 0 || static_cast<size_t>(Depth) >= S.size())
202 return;
203 std::copy(S.begin() + S.size() - static_cast<size_t>(Depth), S.end(),
204 S.begin());
205 S.resize(Depth);
206 }
207
208 // Remove adjacent repeated context sequences up to a given sequence length,
209 // -1 means no size limit. Note that repeated sequences are identified based
210 // on the exact call site, this is finer granularity than function recursion.
211 template <typename T>
212 static void compressRecursionContext(SmallVectorImpl<T> &Context,
213 int32_t CSize = MaxCompressionSize) {
214 uint32_t I = 1;
215 uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
216 uint32_t MaxDedupSize =
217 CSize == -1 ? HS : std::min(a: static_cast<uint32_t>(CSize), b: HS);
218 auto BeginIter = Context.begin();
219 // Use an in-place algorithm to save memory copy
220 // End indicates the end location of current iteration's data
221 uint32_t End = 0;
222 // Deduplicate from length 1 to the max possible size of a repeated
223 // sequence.
224 while (I <= MaxDedupSize) {
225 // This is a linear algorithm that deduplicates adjacent repeated
226 // sequences of size I. The deduplication detection runs on a sliding
227 // window whose size is 2*I and it keeps sliding the window to deduplicate
228 // the data inside. Once duplication is detected, deduplicate it by
229 // skipping the right half part of the window, otherwise just copy back
230 // the new one by appending them at the back of End pointer(for the next
231 // iteration).
232 //
233 // For example:
234 // Input: [a1, a2, b1, b2]
235 // (Added index to distinguish the same char, the origin is [a, a, b,
236 // b], the size of the dedup window is 2(I = 1) at the beginning)
237 //
238 // 1) The initial status is a dummy window[null, a1], then just copy the
239 // right half of the window(End = 0), then slide the window.
240 // Result: [a1], a2, b1, b2 (End points to the element right before ],
241 // after ] is the data of the previous iteration)
242 //
243 // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
244 // the window i.e the duplication happen. Only slide the window.
245 // Result: [a1], a2, b1, b2
246 //
247 // 3) Next window is [a2, b1], copy the right half of the window(b1 is
248 // new) to the End and slide the window.
249 // Result: [a1, b1], b1, b2
250 //
251 // 4) Next window is [b1, b2], same to 2), skip b2.
252 // Result: [a1, b1], b1, b2
253 // After resize, it will be [a, b]
254
255 // Use pointers like below to do comparison inside the window
256 // [a b c a b c]
257 // | | | | |
258 // LeftBoundary Left Right Left+I Right+I
259 // A duplication found if Left < LeftBoundry.
260
261 int32_t Right = I - 1;
262 End = I;
263 int32_t LeftBoundary = 0;
264 while (Right + I < Context.size()) {
265 // To avoids scanning a part of a sequence repeatedly, it finds out
266 // the common suffix of two hald in the window. The common suffix will
267 // serve as the common prefix of next possible pair of duplicate
268 // sequences. The non-common part will be ignored and never scanned
269 // again.
270
271 // For example.
272 // Input: [a, b1], c1, b2, c2
273 // I = 2
274 //
275 // 1) For the window [a, b1, c1, b2], non-common-suffix for the right
276 // part is 'c1', copy it and only slide the window 1 step.
277 // Result: [a, b1, c1], b2, c2
278 //
279 // 2) Next window is [b1, c1, b2, c2], so duplication happen.
280 // Result after resize: [a, b, c]
281
282 int32_t Left = Right;
283 while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
284 // Find the longest suffix inside the window. When stops, Left points
285 // at the diverging point in the current sequence.
286 Left--;
287 }
288
289 bool DuplicationFound = (Left < LeftBoundary);
290 // Don't need to recheck the data before Right
291 LeftBoundary = Right + 1;
292 if (DuplicationFound) {
293 // Duplication found, skip right half of the window.
294 Right += I;
295 } else {
296 // Copy the non-common-suffix part of the adjacent sequence.
297 std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
298 BeginIter + End);
299 End += Left + I - Right;
300 // Only slide the window by the size of non-common-suffix
301 Right = Left + I;
302 }
303 }
304 // Don't forget the remaining part that's not scanned.
305 std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
306 End += Context.size() - Right - 1;
307 I++;
308 Context.resize(End);
309 MaxDedupSize = std::min(a: static_cast<uint32_t>(End / 2), b: MaxDedupSize);
310 }
311 }
312
313private:
314 void generateLineNumBasedProfile();
315
316 FunctionSamples *getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
317 bool WasLeafInlined = false);
318
319 // Lookup or create ContextTrieNode for the context, FunctionSamples is
320 // created inside this function.
321 ContextTrieNode *getOrCreateContextNode(const SampleContextFrames Context,
322 bool WasLeafInlined = false);
323
324 // For profiled only functions, on-demand compute their inline context
325 // function byte size which is used by the pre-inliner.
326 void computeSizeForProfiledFunctions();
327 // Post processing for profiles before writing out, such as mermining
328 // and trimming cold profiles, running preinliner on profiles.
329 void postProcessProfiles();
330
331 void populateBodySamplesForFunction(FunctionSamples &FunctionProfile,
332 const RangeSample &RangeCounters);
333
334 void populateBoundarySamplesForFunction(ContextTrieNode *CallerNode,
335 const BranchSample &BranchCounters);
336
337 void populateInferredFunctionSamples(ContextTrieNode &Node);
338
339 void updateFunctionSamples();
340
341 void generateProbeBasedProfile();
342
343 // Fill in function body samples from probes
344 void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
345 const AddrBasedCtxKey *CtxKey);
346 // Fill in boundary samples for a call probe
347 void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter,
348 const AddrBasedCtxKey *CtxKey);
349
350 ContextTrieNode *
351 getContextNodeForLeafProbe(const AddrBasedCtxKey *CtxKey,
352 const MCDecodedPseudoProbe *LeafProbe);
353
354 // Helper function to get FunctionSamples for the leaf probe
355 FunctionSamples &
356 getFunctionProfileForLeafProbe(const AddrBasedCtxKey *CtxKey,
357 const MCDecodedPseudoProbe *LeafProbe);
358
359 void convertToProfileMap(ContextTrieNode &Node,
360 SampleContextFrameVector &Context);
361
362 void convertToProfileMap();
363
364 void computeSummaryAndThreshold();
365
366 bool collectFunctionsFromLLVMProfile(
367 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
368
369 void initializeMissingFrameInferrer();
370
371 // Given an input `Context`, output `NewContext` with inferred missing tail
372 // call frames.
373 void inferMissingFrames(const SmallVectorImpl<uint64_t> &Context,
374 SmallVectorImpl<uint64_t> &NewContext);
375
376 ContextTrieNode &getRootContext() { return ContextTracker.getRootContext(); };
377
378 // The container for holding the FunctionSamples used by context trie.
379 std::list<FunctionSamples> FSamplesList;
380
381 // Underlying context table serves for sample profile writer.
382 std::unordered_set<SampleContextFrameVector, SampleContextFrameHash> Contexts;
383
384 SampleContextTracker ContextTracker;
385
386 bool IsProfileValidOnTrie = true;
387
388public:
389 // Deduplicate adjacent repeated context sequences up to a given sequence
390 // length. -1 means no size limit.
391 static int32_t MaxCompressionSize;
392 static int MaxContextDepth;
393};
394
395} // end namespace sampleprof
396} // end namespace llvm
397
398#endif
399

source code of llvm/tools/llvm-profgen/ProfileGenerator.h