1//===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "ProfileGenerator.h"
9#include "ErrorHandling.h"
10#include "MissingFrameInferrer.h"
11#include "PerfReader.h"
12#include "ProfiledBinary.h"
13#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
14#include "llvm/ProfileData/ProfileCommon.h"
15#include <algorithm>
16#include <float.h>
17#include <unordered_set>
18#include <utility>
19
20cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
21 cl::Required,
22 cl::desc("Output profile file"));
23static cl::alias OutputA("o", cl::desc("Alias for --output"),
24 cl::aliasopt(OutputFilename));
25
26static cl::opt<SampleProfileFormat> OutputFormat(
27 "format", cl::desc("Format of output profile"), cl::init(Val: SPF_Ext_Binary),
28 cl::values(
29 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
30 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
31 clEnumValN(SPF_Text, "text", "Text encoding"),
32 clEnumValN(SPF_GCC, "gcc",
33 "GCC encoding (only meaningful for -sample)")));
34
35static cl::opt<bool> UseMD5(
36 "use-md5", cl::Hidden,
37 cl::desc("Use md5 to represent function names in the output profile (only "
38 "meaningful for -extbinary)"));
39
40static cl::opt<bool> PopulateProfileSymbolList(
41 "populate-profile-symbol-list", cl::init(Val: false), cl::Hidden,
42 cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
43
44static cl::opt<bool> FillZeroForAllFuncs(
45 "fill-zero-for-all-funcs", cl::init(Val: false), cl::Hidden,
46 cl::desc("Attribute all functions' range with zero count "
47 "even it's not hit by any samples."));
48
49static cl::opt<int32_t, true> RecursionCompression(
50 "compress-recursion",
51 cl::desc("Compressing recursion by deduplicating adjacent frame "
52 "sequences up to the specified size. -1 means no size limit."),
53 cl::Hidden,
54 cl::location(L&: llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
55
56static cl::opt<bool>
57 TrimColdProfile("trim-cold-profile",
58 cl::desc("If the total count of the profile is smaller "
59 "than threshold, it will be trimmed."));
60
61static cl::opt<bool> CSProfMergeColdContext(
62 "csprof-merge-cold-context", cl::init(Val: true),
63 cl::desc("If the total count of context profile is smaller than "
64 "the threshold, it will be merged into context-less base "
65 "profile."));
66
67static cl::opt<uint32_t> CSProfMaxColdContextDepth(
68 "csprof-max-cold-context-depth", cl::init(Val: 1),
69 cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
70 "context-less base profile"));
71
72static cl::opt<int, true> CSProfMaxContextDepth(
73 "csprof-max-context-depth",
74 cl::desc("Keep the last K contexts while merging profile. -1 means no "
75 "depth limit."),
76 cl::location(L&: llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
77
78static cl::opt<double> HotFunctionDensityThreshold(
79 "hot-function-density-threshold", llvm::cl::init(Val: 1000),
80 llvm::cl::desc(
81 "specify density threshold for hot functions (default: 1000)"),
82 llvm::cl::Optional);
83static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(Val: false),
84 llvm::cl::desc("show profile density details"),
85 llvm::cl::Optional);
86
87static cl::opt<bool> UpdateTotalSamples(
88 "update-total-samples", llvm::cl::init(Val: false),
89 llvm::cl::desc(
90 "Update total samples by accumulating all its body samples."),
91 llvm::cl::Optional);
92
93static cl::opt<bool> GenCSNestedProfile(
94 "gen-cs-nested-profile", cl::Hidden, cl::init(Val: true),
95 cl::desc("Generate nested function profiles for CSSPGO"));
96
97cl::opt<bool> InferMissingFrames(
98 "infer-missing-frames", llvm::cl::init(Val: true),
99 llvm::cl::desc(
100 "Infer missing call frames due to compiler tail call elimination."),
101 llvm::cl::Optional);
102
103using namespace llvm;
104using namespace sampleprof;
105
106namespace llvm {
107extern cl::opt<int> ProfileSummaryCutoffHot;
108extern cl::opt<bool> UseContextLessSummary;
109
110namespace sampleprof {
111
112// Initialize the MaxCompressionSize to -1 which means no size limit
113int32_t CSProfileGenerator::MaxCompressionSize = -1;
114
115int CSProfileGenerator::MaxContextDepth = -1;
116
117bool ProfileGeneratorBase::UseFSDiscriminator = false;
118
119std::unique_ptr<ProfileGeneratorBase>
120ProfileGeneratorBase::create(ProfiledBinary *Binary,
121 const ContextSampleCounterMap *SampleCounters,
122 bool ProfileIsCS) {
123 std::unique_ptr<ProfileGeneratorBase> Generator;
124 if (ProfileIsCS) {
125 Generator.reset(p: new CSProfileGenerator(Binary, SampleCounters));
126 } else {
127 Generator.reset(p: new ProfileGenerator(Binary, SampleCounters));
128 }
129 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
130 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
131
132 return Generator;
133}
134
135std::unique_ptr<ProfileGeneratorBase>
136ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles,
137 bool ProfileIsCS) {
138 std::unique_ptr<ProfileGeneratorBase> Generator;
139 if (ProfileIsCS) {
140 Generator.reset(p: new CSProfileGenerator(Binary, Profiles));
141 } else {
142 Generator.reset(p: new ProfileGenerator(Binary, std::move(Profiles)));
143 }
144 ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
145 FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
146
147 return Generator;
148}
149
150void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
151 SampleProfileMap &ProfileMap) {
152 // Populate profile symbol list if extended binary format is used.
153 ProfileSymbolList SymbolList;
154
155 if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
156 Binary->populateSymbolListFromDWARF(SymbolList);
157 Writer->setProfileSymbolList(&SymbolList);
158 }
159
160 if (std::error_code EC = Writer->write(ProfileMap))
161 exitWithError(EC: std::move(EC));
162}
163
164void ProfileGeneratorBase::write() {
165 auto WriterOrErr = SampleProfileWriter::create(Filename: OutputFilename, Format: OutputFormat);
166 if (std::error_code EC = WriterOrErr.getError())
167 exitWithError(EC, Whence: OutputFilename);
168
169 if (UseMD5) {
170 if (OutputFormat != SPF_Ext_Binary)
171 WithColor::warning() << "-use-md5 is ignored. Specify "
172 "--format=extbinary to enable it\n";
173 else
174 WriterOrErr.get()->setUseMD5();
175 }
176
177 write(Writer: std::move(WriterOrErr.get()), ProfileMap);
178}
179
180void ProfileGeneratorBase::showDensitySuggestion(double Density) {
181 if (Density == 0.0)
182 WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
183 "set too low. Please check your command.\n";
184 else if (Density < HotFunctionDensityThreshold)
185 WithColor::warning()
186 << "Sample PGO is estimated to optimize better with "
187 << format(Fmt: "%.1f", Vals: HotFunctionDensityThreshold / Density)
188 << "x more samples. Please consider increasing sampling rate or "
189 "profiling for longer duration to get more samples.\n";
190
191 if (ShowDensity)
192 outs() << "Minimum profile density for hot functions with top "
193 << format(Fmt: "%.2f",
194 Vals: static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
195 10000)
196 << "% total samples: " << format(Fmt: "%.1f", Vals: Density) << "\n";
197}
198
199bool ProfileGeneratorBase::filterAmbiguousProfile(FunctionSamples &FS) {
200 for (const auto &Prefix : FuncPrefixsToFilter) {
201 if (FS.getFuncName().starts_with(Prefix))
202 return true;
203 }
204
205 // Filter the function profiles for the inlinees. It's useful for fuzzy
206 // profile matching which flattens the profile and inlinees' samples are
207 // merged into top-level function.
208 for (auto &Callees :
209 const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
210 auto &CalleesMap = Callees.second;
211 for (auto I = CalleesMap.begin(); I != CalleesMap.end();) {
212 auto FS = I++;
213 if (filterAmbiguousProfile(FS&: FS->second))
214 CalleesMap.erase(position: FS);
215 }
216 }
217 return false;
218}
219
220// For built-in local initialization function such as __cxx_global_var_init,
221// __tls_init prefix function, there could be multiple versions of the functions
222// in the final binary. However, in the profile generation, we call
223// getCanonicalFnName to canonicalize the names which strips the suffixes.
224// Therefore, samples from different functions queries the same profile and the
225// samples are merged. As the functions are essentially different, entries of
226// the merged profile are ambiguous. In sample loader, the IR from one version
227// would be attributed towards a merged entries, which is inaccurate. Especially
228// for fuzzy profile matching, it gets multiple callsites(from different
229// function) but used to match one callsite, which misleads the matching and
230// causes a lot of false positives report. Hence, we want to filter them out
231// from the profile map during the profile generation time. The profiles are all
232// cold functions, it won't have perf impact.
233void ProfileGeneratorBase::filterAmbiguousProfile(SampleProfileMap &Profiles) {
234 for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
235 auto FS = I++;
236 if (filterAmbiguousProfile(FS&: FS->second))
237 ProfileMap.erase(It: FS);
238 }
239}
240
241double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
242 uint64_t HotCntThreshold) {
243 double Density = DBL_MAX;
244 std::vector<const FunctionSamples *> HotFuncs;
245 for (auto &I : Profiles) {
246 auto &FuncSamples = I.second;
247 if (FuncSamples.getTotalSamples() < HotCntThreshold)
248 continue;
249 HotFuncs.emplace_back(args: &FuncSamples);
250 }
251
252 for (auto *FuncSamples : HotFuncs) {
253 auto *Func = Binary->getBinaryFunction(FName: FuncSamples->getFunction());
254 if (!Func)
255 continue;
256 uint64_t FuncSize = Func->getFuncSize();
257 if (FuncSize == 0)
258 continue;
259 Density =
260 std::min(a: Density, b: static_cast<double>(FuncSamples->getTotalSamples()) /
261 FuncSize);
262 }
263
264 return Density == DBL_MAX ? 0.0 : Density;
265}
266
267void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
268 const RangeSample &Ranges) {
269
270 /*
271 Regions may overlap with each other. Using the boundary info, find all
272 disjoint ranges and their sample count. BoundaryPoint contains the count
273 multiple samples begin/end at this points.
274
275 |<--100-->| Sample1
276 |<------200------>| Sample2
277 A B C
278
279 In the example above,
280 Sample1 begins at A, ends at B, its value is 100.
281 Sample2 beings at A, ends at C, its value is 200.
282 For A, BeginCount is the sum of sample begins at A, which is 300 and no
283 samples ends at A, so EndCount is 0.
284 Then boundary points A, B, and C with begin/end counts are:
285 A: (300, 0)
286 B: (0, 100)
287 C: (0, 200)
288 */
289 struct BoundaryPoint {
290 // Sum of sample counts beginning at this point
291 uint64_t BeginCount = UINT64_MAX;
292 // Sum of sample counts ending at this point
293 uint64_t EndCount = UINT64_MAX;
294 // Is the begin point of a zero range.
295 bool IsZeroRangeBegin = false;
296 // Is the end point of a zero range.
297 bool IsZeroRangeEnd = false;
298
299 void addBeginCount(uint64_t Count) {
300 if (BeginCount == UINT64_MAX)
301 BeginCount = 0;
302 BeginCount += Count;
303 }
304
305 void addEndCount(uint64_t Count) {
306 if (EndCount == UINT64_MAX)
307 EndCount = 0;
308 EndCount += Count;
309 }
310 };
311
312 /*
313 For the above example. With boundary points, follwing logic finds two
314 disjoint region of
315
316 [A,B]: 300
317 [B+1,C]: 200
318
319 If there is a boundary point that both begin and end, the point itself
320 becomes a separate disjoint region. For example, if we have original
321 ranges of
322
323 |<--- 100 --->|
324 |<--- 200 --->|
325 A B C
326
327 there are three boundary points with their begin/end counts of
328
329 A: (100, 0)
330 B: (200, 100)
331 C: (0, 200)
332
333 the disjoint ranges would be
334
335 [A, B-1]: 100
336 [B, B]: 300
337 [B+1, C]: 200.
338
339 Example for zero value range:
340
341 |<--- 100 --->|
342 |<--- 200 --->|
343 |<--------------- 0 ----------------->|
344 A B C D E F
345
346 [A, B-1] : 0
347 [B, C] : 100
348 [C+1, D-1]: 0
349 [D, E] : 200
350 [E+1, F] : 0
351 */
352 std::map<uint64_t, BoundaryPoint> Boundaries;
353
354 for (const auto &Item : Ranges) {
355 assert(Item.first.first <= Item.first.second &&
356 "Invalid instruction range");
357 auto &BeginPoint = Boundaries[Item.first.first];
358 auto &EndPoint = Boundaries[Item.first.second];
359 uint64_t Count = Item.second;
360
361 BeginPoint.addBeginCount(Count);
362 EndPoint.addEndCount(Count);
363 if (Count == 0) {
364 BeginPoint.IsZeroRangeBegin = true;
365 EndPoint.IsZeroRangeEnd = true;
366 }
367 }
368
369 // Use UINT64_MAX to indicate there is no existing range between BeginAddress
370 // and the next valid address
371 uint64_t BeginAddress = UINT64_MAX;
372 int ZeroRangeDepth = 0;
373 uint64_t Count = 0;
374 for (const auto &Item : Boundaries) {
375 uint64_t Address = Item.first;
376 const BoundaryPoint &Point = Item.second;
377 if (Point.BeginCount != UINT64_MAX) {
378 if (BeginAddress != UINT64_MAX)
379 DisjointRanges[{BeginAddress, Address - 1}] = Count;
380 Count += Point.BeginCount;
381 BeginAddress = Address;
382 ZeroRangeDepth += Point.IsZeroRangeBegin;
383 }
384 if (Point.EndCount != UINT64_MAX) {
385 assert((BeginAddress != UINT64_MAX) &&
386 "First boundary point cannot be 'end' point");
387 DisjointRanges[{BeginAddress, Address}] = Count;
388 assert(Count >= Point.EndCount && "Mismatched live ranges");
389 Count -= Point.EndCount;
390 BeginAddress = Address + 1;
391 ZeroRangeDepth -= Point.IsZeroRangeEnd;
392 // If the remaining count is zero and it's no longer in a zero range, this
393 // means we consume all the ranges before, thus mark BeginAddress as
394 // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
395 // [<---- 10 ---->]
396 // [<---- 20 ---->]
397 // A B C D
398 // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
399 // have the [B+1, C-1] zero range.
400 if (Count == 0 && ZeroRangeDepth == 0)
401 BeginAddress = UINT64_MAX;
402 }
403 }
404}
405
406void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
407 FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
408 uint64_t Count) {
409 // Use the maximum count of samples with same line location
410 uint32_t Discriminator = getBaseDiscriminator(Discriminator: LeafLoc.Location.Discriminator);
411
412 // Use duplication factor to compensated for loop unroll/vectorization.
413 // Note that this is only needed when we're taking MAX of the counts at
414 // the location instead of SUM.
415 Count *= getDuplicationFactor(Discriminator: LeafLoc.Location.Discriminator);
416
417 ErrorOr<uint64_t> R =
418 FunctionProfile.findSamplesAt(LineOffset: LeafLoc.Location.LineOffset, Discriminator);
419
420 uint64_t PreviousCount = R ? R.get() : 0;
421 if (PreviousCount <= Count) {
422 FunctionProfile.addBodySamples(LineOffset: LeafLoc.Location.LineOffset, Discriminator,
423 Num: Count - PreviousCount);
424 }
425}
426
427void ProfileGeneratorBase::updateTotalSamples() {
428 for (auto &Item : ProfileMap) {
429 FunctionSamples &FunctionProfile = Item.second;
430 FunctionProfile.updateTotalSamples();
431 }
432}
433
434void ProfileGeneratorBase::updateCallsiteSamples() {
435 for (auto &Item : ProfileMap) {
436 FunctionSamples &FunctionProfile = Item.second;
437 FunctionProfile.updateCallsiteSamples();
438 }
439}
440
441void ProfileGeneratorBase::updateFunctionSamples() {
442 updateCallsiteSamples();
443
444 if (UpdateTotalSamples)
445 updateTotalSamples();
446}
447
448void ProfileGeneratorBase::collectProfiledFunctions() {
449 std::unordered_set<const BinaryFunction *> ProfiledFunctions;
450 if (collectFunctionsFromRawProfile(ProfiledFunctions))
451 Binary->setProfiledFunctions(ProfiledFunctions);
452 else if (collectFunctionsFromLLVMProfile(ProfiledFunctions))
453 Binary->setProfiledFunctions(ProfiledFunctions);
454 else
455 llvm_unreachable("Unsupported input profile");
456}
457
458bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
459 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
460 if (!SampleCounters)
461 return false;
462 // Go through all the stacks, ranges and branches in sample counters, use
463 // the start of the range to look up the function it belongs and record the
464 // function.
465 for (const auto &CI : *SampleCounters) {
466 if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(Val: CI.first.getPtr())) {
467 for (auto StackAddr : CtxKey->Context) {
468 if (FuncRange *FRange = Binary->findFuncRange(Address: StackAddr))
469 ProfiledFunctions.insert(x: FRange->Func);
470 }
471 }
472
473 for (auto Item : CI.second.RangeCounter) {
474 uint64_t StartAddress = Item.first.first;
475 if (FuncRange *FRange = Binary->findFuncRange(Address: StartAddress))
476 ProfiledFunctions.insert(x: FRange->Func);
477 }
478
479 for (auto Item : CI.second.BranchCounter) {
480 uint64_t SourceAddress = Item.first.first;
481 uint64_t TargetAddress = Item.first.second;
482 if (FuncRange *FRange = Binary->findFuncRange(Address: SourceAddress))
483 ProfiledFunctions.insert(x: FRange->Func);
484 if (FuncRange *FRange = Binary->findFuncRange(Address: TargetAddress))
485 ProfiledFunctions.insert(x: FRange->Func);
486 }
487 }
488 return true;
489}
490
491bool ProfileGenerator::collectFunctionsFromLLVMProfile(
492 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
493 for (const auto &FS : ProfileMap) {
494 if (auto *Func = Binary->getBinaryFunction(FName: FS.second.getFunction()))
495 ProfiledFunctions.insert(x: Func);
496 }
497 return true;
498}
499
500bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
501 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
502 for (auto *Node : ContextTracker) {
503 if (!Node->getFuncName().empty())
504 if (auto *Func = Binary->getBinaryFunction(FName: Node->getFuncName()))
505 ProfiledFunctions.insert(x: Func);
506 }
507 return true;
508}
509
510FunctionSamples &
511ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) {
512 SampleContext Context(FuncName);
513 return ProfileMap.Create(Ctx: Context);
514}
515
516void ProfileGenerator::generateProfile() {
517 collectProfiledFunctions();
518
519 if (Binary->usePseudoProbes())
520 Binary->decodePseudoProbe();
521
522 if (SampleCounters) {
523 if (Binary->usePseudoProbes()) {
524 generateProbeBasedProfile();
525 } else {
526 generateLineNumBasedProfile();
527 }
528 }
529
530 postProcessProfiles();
531}
532
533void ProfileGenerator::postProcessProfiles() {
534 computeSummaryAndThreshold(ProfileMap);
535 trimColdProfiles(Profiles: ProfileMap, ColdCntThreshold: ColdCountThreshold);
536 filterAmbiguousProfile(Profiles&: ProfileMap);
537 calculateAndShowDensity(Profiles: ProfileMap);
538}
539
540void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
541 uint64_t ColdCntThreshold) {
542 if (!TrimColdProfile)
543 return;
544
545 // Move cold profiles into a tmp container.
546 std::vector<hash_code> ColdProfileHashes;
547 for (const auto &I : ProfileMap) {
548 if (I.second.getTotalSamples() < ColdCntThreshold)
549 ColdProfileHashes.emplace_back(args: I.first);
550 }
551
552 // Remove the cold profile from ProfileMap.
553 for (const auto &I : ColdProfileHashes)
554 ProfileMap.erase(Key: I);
555}
556
557void ProfileGenerator::generateLineNumBasedProfile() {
558 assert(SampleCounters->size() == 1 &&
559 "Must have one entry for profile generation.");
560 const SampleCounter &SC = SampleCounters->begin()->second;
561 // Fill in function body samples
562 populateBodySamplesForAllFunctions(RangeCounter: SC.RangeCounter);
563 // Fill in boundary sample counts as well as call site samples for calls
564 populateBoundarySamplesForAllFunctions(BranchCounters: SC.BranchCounter);
565
566 updateFunctionSamples();
567}
568
569void ProfileGenerator::generateProbeBasedProfile() {
570 assert(SampleCounters->size() == 1 &&
571 "Must have one entry for profile generation.");
572 // Enable pseudo probe functionalities in SampleProf
573 FunctionSamples::ProfileIsProbeBased = true;
574 const SampleCounter &SC = SampleCounters->begin()->second;
575 // Fill in function body samples
576 populateBodySamplesWithProbesForAllFunctions(RangeCounter: SC.RangeCounter);
577 // Fill in boundary sample counts as well as call site samples for calls
578 populateBoundarySamplesWithProbesForAllFunctions(BranchCounters: SC.BranchCounter);
579
580 updateFunctionSamples();
581}
582
583void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
584 const RangeSample &RangeCounter) {
585 ProbeCounterMap ProbeCounter;
586 // preprocessRangeCounter returns disjoint ranges, so no longer to redo it
587 // inside extractProbesFromRange.
588 extractProbesFromRange(RangeCounter: preprocessRangeCounter(RangeCounter), ProbeCounter,
589 FindDisjointRanges: false);
590
591 for (const auto &PI : ProbeCounter) {
592 const MCDecodedPseudoProbe *Probe = PI.first;
593 uint64_t Count = PI.second;
594 SampleContextFrameVector FrameVec;
595 Binary->getInlineContextForProbe(Probe, InlineContextStack&: FrameVec, IncludeLeaf: true);
596 FunctionSamples &FunctionProfile =
597 getLeafProfileAndAddTotalSamples(FrameVec, Count);
598 FunctionProfile.addBodySamples(LineOffset: Probe->getIndex(), Discriminator: Probe->getDiscriminator(),
599 Num: Count);
600 if (Probe->isEntry())
601 FunctionProfile.addHeadSamples(Num: Count);
602 }
603}
604
605void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
606 const BranchSample &BranchCounters) {
607 for (const auto &Entry : BranchCounters) {
608 uint64_t SourceAddress = Entry.first.first;
609 uint64_t TargetAddress = Entry.first.second;
610 uint64_t Count = Entry.second;
611 assert(Count != 0 && "Unexpected zero weight branch");
612
613 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
614 if (CalleeName.size() == 0)
615 continue;
616
617 const MCDecodedPseudoProbe *CallProbe =
618 Binary->getCallProbeForAddr(Address: SourceAddress);
619 if (CallProbe == nullptr)
620 continue;
621
622 // Record called target sample and its count.
623 SampleContextFrameVector FrameVec;
624 Binary->getInlineContextForProbe(Probe: CallProbe, InlineContextStack&: FrameVec, IncludeLeaf: true);
625
626 if (!FrameVec.empty()) {
627 FunctionSamples &FunctionProfile =
628 getLeafProfileAndAddTotalSamples(FrameVec, Count: 0);
629 FunctionProfile.addCalledTargetSamples(
630 LineOffset: FrameVec.back().Location.LineOffset,
631 Discriminator: FrameVec.back().Location.Discriminator,
632 Func: FunctionId(CalleeName), Num: Count);
633 }
634 }
635}
636
637FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
638 const SampleContextFrameVector &FrameVec, uint64_t Count) {
639 // Get top level profile
640 FunctionSamples *FunctionProfile =
641 &getTopLevelFunctionProfile(FuncName: FrameVec[0].Func);
642 FunctionProfile->addTotalSamples(Num: Count);
643 if (Binary->usePseudoProbes()) {
644 const auto *FuncDesc = Binary->getFuncDescForGUID(
645 GUID: FunctionProfile->getFunction().getHashCode());
646 FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
647 }
648
649 for (size_t I = 1; I < FrameVec.size(); I++) {
650 LineLocation Callsite(
651 FrameVec[I - 1].Location.LineOffset,
652 getBaseDiscriminator(Discriminator: FrameVec[I - 1].Location.Discriminator));
653 FunctionSamplesMap &SamplesMap =
654 FunctionProfile->functionSamplesAt(Loc: Callsite);
655 auto Ret =
656 SamplesMap.emplace(args: FrameVec[I].Func, args: FunctionSamples());
657 if (Ret.second) {
658 SampleContext Context(FrameVec[I].Func);
659 Ret.first->second.setContext(Context);
660 }
661 FunctionProfile = &Ret.first->second;
662 FunctionProfile->addTotalSamples(Num: Count);
663 if (Binary->usePseudoProbes()) {
664 const auto *FuncDesc = Binary->getFuncDescForGUID(
665 GUID: FunctionProfile->getFunction().getHashCode());
666 FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
667 }
668 }
669
670 return *FunctionProfile;
671}
672
673RangeSample
674ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
675 RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
676 if (FillZeroForAllFuncs) {
677 for (auto &FuncI : Binary->getAllBinaryFunctions()) {
678 for (auto &R : FuncI.second.Ranges) {
679 Ranges[{R.first, R.second - 1}] += 0;
680 }
681 }
682 } else {
683 // For each range, we search for all ranges of the function it belongs to
684 // and initialize it with zero count, so it remains zero if doesn't hit any
685 // samples. This is to be consistent with compiler that interpret zero count
686 // as unexecuted(cold).
687 for (const auto &I : RangeCounter) {
688 uint64_t StartAddress = I.first.first;
689 for (const auto &Range : Binary->getRanges(Address: StartAddress))
690 Ranges[{Range.first, Range.second - 1}] += 0;
691 }
692 }
693 RangeSample DisjointRanges;
694 findDisjointRanges(DisjointRanges, Ranges);
695 return DisjointRanges;
696}
697
698void ProfileGenerator::populateBodySamplesForAllFunctions(
699 const RangeSample &RangeCounter) {
700 for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
701 uint64_t RangeBegin = Range.first.first;
702 uint64_t RangeEnd = Range.first.second;
703 uint64_t Count = Range.second;
704
705 InstructionPointer IP(Binary, RangeBegin, true);
706 // Disjoint ranges may have range in the middle of two instr,
707 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
708 // can be Addr1+1 to Addr2-1. We should ignore such range.
709 if (IP.Address > RangeEnd)
710 continue;
711
712 do {
713 const SampleContextFrameVector FrameVec =
714 Binary->getFrameLocationStack(Address: IP.Address);
715 if (!FrameVec.empty()) {
716 // FIXME: As accumulating total count per instruction caused some
717 // regression, we changed to accumulate total count per byte as a
718 // workaround. Tuning hotness threshold on the compiler side might be
719 // necessary in the future.
720 FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
721 FrameVec, Count: Count * Binary->getInstSize(Address: IP.Address));
722 updateBodySamplesforFunctionProfile(FunctionProfile, LeafLoc: FrameVec.back(),
723 Count);
724 }
725 } while (IP.advance() && IP.Address <= RangeEnd);
726 }
727}
728
729StringRef
730ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) {
731 // Get the function range by branch target if it's a call branch.
732 auto *FRange = Binary->findFuncRangeForStartAddr(Address: TargetAddress);
733
734 // We won't accumulate sample count for a range whose start is not the real
735 // function entry such as outlined function or inner labels.
736 if (!FRange || !FRange->IsFuncEntry)
737 return StringRef();
738
739 return FunctionSamples::getCanonicalFnName(FnName: FRange->getFuncName());
740}
741
742void ProfileGenerator::populateBoundarySamplesForAllFunctions(
743 const BranchSample &BranchCounters) {
744 for (const auto &Entry : BranchCounters) {
745 uint64_t SourceAddress = Entry.first.first;
746 uint64_t TargetAddress = Entry.first.second;
747 uint64_t Count = Entry.second;
748 assert(Count != 0 && "Unexpected zero weight branch");
749
750 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
751 if (CalleeName.size() == 0)
752 continue;
753 // Record called target sample and its count.
754 const SampleContextFrameVector &FrameVec =
755 Binary->getCachedFrameLocationStack(Address: SourceAddress);
756 if (!FrameVec.empty()) {
757 FunctionSamples &FunctionProfile =
758 getLeafProfileAndAddTotalSamples(FrameVec, Count: 0);
759 FunctionProfile.addCalledTargetSamples(
760 LineOffset: FrameVec.back().Location.LineOffset,
761 Discriminator: getBaseDiscriminator(Discriminator: FrameVec.back().Location.Discriminator),
762 Func: FunctionId(CalleeName), Num: Count);
763 }
764 // Add head samples for callee.
765 FunctionSamples &CalleeProfile =
766 getTopLevelFunctionProfile(FuncName: FunctionId(CalleeName));
767 CalleeProfile.addHeadSamples(Num: Count);
768 }
769}
770
771void ProfileGeneratorBase::calculateAndShowDensity(
772 const SampleProfileMap &Profiles) {
773 double Density = calculateDensity(Profiles, HotCntThreshold: HotCountThreshold);
774 showDensitySuggestion(Density);
775}
776
777FunctionSamples *
778CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
779 bool WasLeafInlined) {
780 FunctionSamples *FProfile = ContextNode->getFunctionSamples();
781 if (!FProfile) {
782 FSamplesList.emplace_back();
783 FProfile = &FSamplesList.back();
784 FProfile->setFunction(ContextNode->getFuncName());
785 ContextNode->setFunctionSamples(FProfile);
786 }
787 // Update ContextWasInlined attribute for existing contexts.
788 // The current function can be called in two ways:
789 // - when processing a probe of the current frame
790 // - when processing the entry probe of an inlinee's frame, which
791 // is then used to update the callsite count of the current frame.
792 // The two can happen in any order, hence here we are making sure
793 // `ContextWasInlined` is always set as expected.
794 // TODO: Note that the former does not always happen if no probes of the
795 // current frame has samples, and if the latter happens, we could lose the
796 // attribute. This should be fixed.
797 if (WasLeafInlined)
798 FProfile->getContext().setAttribute(ContextWasInlined);
799 return FProfile;
800}
801
802ContextTrieNode *
803CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context,
804 bool WasLeafInlined) {
805 ContextTrieNode *ContextNode =
806 ContextTracker.getOrCreateContextPath(Context, AllowCreate: true);
807 getOrCreateFunctionSamples(ContextNode, WasLeafInlined);
808 return ContextNode;
809}
810
811void CSProfileGenerator::generateProfile() {
812 FunctionSamples::ProfileIsCS = true;
813
814 collectProfiledFunctions();
815
816 if (Binary->usePseudoProbes()) {
817 Binary->decodePseudoProbe();
818 if (InferMissingFrames)
819 initializeMissingFrameInferrer();
820 }
821
822 if (SampleCounters) {
823 if (Binary->usePseudoProbes()) {
824 generateProbeBasedProfile();
825 } else {
826 generateLineNumBasedProfile();
827 }
828 }
829
830 if (Binary->getTrackFuncContextSize())
831 computeSizeForProfiledFunctions();
832
833 postProcessProfiles();
834}
835
836void CSProfileGenerator::initializeMissingFrameInferrer() {
837 Binary->getMissingContextInferrer()->initialize(SampleCounters);
838}
839
840void CSProfileGenerator::inferMissingFrames(
841 const SmallVectorImpl<uint64_t> &Context,
842 SmallVectorImpl<uint64_t> &NewContext) {
843 Binary->inferMissingFrames(Context, NewContext);
844}
845
846void CSProfileGenerator::computeSizeForProfiledFunctions() {
847 for (auto *Func : Binary->getProfiledFunctions())
848 Binary->computeInlinedContextSizeForFunc(Func);
849
850 // Flush the symbolizer to save memory.
851 Binary->flushSymbolizer();
852}
853
854void CSProfileGenerator::updateFunctionSamples() {
855 for (auto *Node : ContextTracker) {
856 FunctionSamples *FSamples = Node->getFunctionSamples();
857 if (FSamples) {
858 if (UpdateTotalSamples)
859 FSamples->updateTotalSamples();
860 FSamples->updateCallsiteSamples();
861 }
862 }
863}
864
865void CSProfileGenerator::generateLineNumBasedProfile() {
866 for (const auto &CI : *SampleCounters) {
867 const auto *CtxKey = cast<StringBasedCtxKey>(Val: CI.first.getPtr());
868
869 ContextTrieNode *ContextNode = &getRootContext();
870 // Sample context will be empty if the jump is an external-to-internal call
871 // pattern, the head samples should be added for the internal function.
872 if (!CtxKey->Context.empty()) {
873 // Get or create function profile for the range
874 ContextNode =
875 getOrCreateContextNode(Context: CtxKey->Context, WasLeafInlined: CtxKey->WasLeafInlined);
876 // Fill in function body samples
877 populateBodySamplesForFunction(FunctionProfile&: *ContextNode->getFunctionSamples(),
878 RangeCounters: CI.second.RangeCounter);
879 }
880 // Fill in boundary sample counts as well as call site samples for calls
881 populateBoundarySamplesForFunction(CallerNode: ContextNode, BranchCounters: CI.second.BranchCounter);
882 }
883 // Fill in call site value sample for inlined calls and also use context to
884 // infer missing samples. Since we don't have call count for inlined
885 // functions, we estimate it from inlinee's profile using the entry of the
886 // body sample.
887 populateInferredFunctionSamples(Node&: getRootContext());
888
889 updateFunctionSamples();
890}
891
892void CSProfileGenerator::populateBodySamplesForFunction(
893 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
894 // Compute disjoint ranges first, so we can use MAX
895 // for calculating count for each location.
896 RangeSample Ranges;
897 findDisjointRanges(DisjointRanges&: Ranges, Ranges: RangeCounter);
898 for (const auto &Range : Ranges) {
899 uint64_t RangeBegin = Range.first.first;
900 uint64_t RangeEnd = Range.first.second;
901 uint64_t Count = Range.second;
902 // Disjoint ranges have introduce zero-filled gap that
903 // doesn't belong to current context, filter them out.
904 if (Count == 0)
905 continue;
906
907 InstructionPointer IP(Binary, RangeBegin, true);
908 // Disjoint ranges may have range in the middle of two instr,
909 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
910 // can be Addr1+1 to Addr2-1. We should ignore such range.
911 if (IP.Address > RangeEnd)
912 continue;
913
914 do {
915 auto LeafLoc = Binary->getInlineLeafFrameLoc(Address: IP.Address);
916 if (LeafLoc) {
917 // Recording body sample for this specific context
918 updateBodySamplesforFunctionProfile(FunctionProfile, LeafLoc: *LeafLoc, Count);
919 FunctionProfile.addTotalSamples(Num: Count);
920 }
921 } while (IP.advance() && IP.Address <= RangeEnd);
922 }
923}
924
925void CSProfileGenerator::populateBoundarySamplesForFunction(
926 ContextTrieNode *Node, const BranchSample &BranchCounters) {
927
928 for (const auto &Entry : BranchCounters) {
929 uint64_t SourceAddress = Entry.first.first;
930 uint64_t TargetAddress = Entry.first.second;
931 uint64_t Count = Entry.second;
932 assert(Count != 0 && "Unexpected zero weight branch");
933
934 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
935 if (CalleeName.size() == 0)
936 continue;
937
938 ContextTrieNode *CallerNode = Node;
939 LineLocation CalleeCallSite(0, 0);
940 if (CallerNode != &getRootContext()) {
941 // Record called target sample and its count
942 auto LeafLoc = Binary->getInlineLeafFrameLoc(Address: SourceAddress);
943 if (LeafLoc) {
944 CallerNode->getFunctionSamples()->addCalledTargetSamples(
945 LineOffset: LeafLoc->Location.LineOffset,
946 Discriminator: getBaseDiscriminator(Discriminator: LeafLoc->Location.Discriminator),
947 Func: FunctionId(CalleeName),
948 Num: Count);
949 // Record head sample for called target(callee)
950 CalleeCallSite = LeafLoc->Location;
951 }
952 }
953
954 ContextTrieNode *CalleeNode =
955 CallerNode->getOrCreateChildContext(CallSite: CalleeCallSite,
956 ChildName: FunctionId(CalleeName));
957 FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(ContextNode: CalleeNode);
958 CalleeProfile->addHeadSamples(Num: Count);
959 }
960}
961
962void CSProfileGenerator::populateInferredFunctionSamples(
963 ContextTrieNode &Node) {
964 // There is no call jmp sample between the inliner and inlinee, we need to use
965 // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s
966 // sample depends on child(inlinee)'s sample, so traverse the tree in
967 // post-order.
968 for (auto &It : Node.getAllChildContext())
969 populateInferredFunctionSamples(Node&: It.second);
970
971 FunctionSamples *CalleeProfile = Node.getFunctionSamples();
972 if (!CalleeProfile)
973 return;
974 // If we already have head sample counts, we must have value profile
975 // for call sites added already. Skip to avoid double counting.
976 if (CalleeProfile->getHeadSamples())
977 return;
978 ContextTrieNode *CallerNode = Node.getParentContext();
979 // If we don't have context, nothing to do for caller's call site.
980 // This could happen for entry point function.
981 if (CallerNode == &getRootContext())
982 return;
983
984 LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc();
985 FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(ContextNode: CallerNode);
986 // Since we don't have call count for inlined functions, we
987 // estimate it from inlinee's profile using entry body sample.
988 uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate();
989 // If we don't have samples with location, use 1 to indicate live.
990 if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size())
991 EstimatedCallCount = 1;
992 CallerProfile.addCalledTargetSamples(LineOffset: CallerLeafFrameLoc.LineOffset,
993 Discriminator: CallerLeafFrameLoc.Discriminator,
994 Func: Node.getFuncName(), Num: EstimatedCallCount);
995 CallerProfile.addBodySamples(LineOffset: CallerLeafFrameLoc.LineOffset,
996 Discriminator: CallerLeafFrameLoc.Discriminator,
997 Num: EstimatedCallCount);
998 CallerProfile.addTotalSamples(Num: EstimatedCallCount);
999}
1000
1001void CSProfileGenerator::convertToProfileMap(
1002 ContextTrieNode &Node, SampleContextFrameVector &Context) {
1003 FunctionSamples *FProfile = Node.getFunctionSamples();
1004 if (FProfile) {
1005 Context.emplace_back(Args: Node.getFuncName(), Args: LineLocation(0, 0));
1006 // Save the new context for future references.
1007 SampleContextFrames NewContext = *Contexts.insert(x: Context).first;
1008 auto Ret = ProfileMap.emplace(Args&: NewContext, Args: std::move(*FProfile));
1009 FunctionSamples &NewProfile = Ret.first->second;
1010 NewProfile.getContext().setContext(Context: NewContext);
1011 Context.pop_back();
1012 }
1013
1014 for (auto &It : Node.getAllChildContext()) {
1015 ContextTrieNode &ChildNode = It.second;
1016 Context.emplace_back(Args: Node.getFuncName(), Args: ChildNode.getCallSiteLoc());
1017 convertToProfileMap(Node&: ChildNode, Context);
1018 Context.pop_back();
1019 }
1020}
1021
1022void CSProfileGenerator::convertToProfileMap() {
1023 assert(ProfileMap.empty() &&
1024 "ProfileMap should be empty before converting from the trie");
1025 assert(IsProfileValidOnTrie &&
1026 "Do not convert the trie twice, it's already destroyed");
1027
1028 SampleContextFrameVector Context;
1029 for (auto &It : getRootContext().getAllChildContext())
1030 convertToProfileMap(Node&: It.second, Context);
1031
1032 IsProfileValidOnTrie = false;
1033}
1034
1035void CSProfileGenerator::postProcessProfiles() {
1036 // Compute hot/cold threshold based on profile. This will be used for cold
1037 // context profile merging/trimming.
1038 computeSummaryAndThreshold();
1039
1040 // Run global pre-inliner to adjust/merge context profile based on estimated
1041 // inline decisions.
1042 if (EnableCSPreInliner) {
1043 ContextTracker.populateFuncToCtxtMap();
1044 CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
1045 // Turn off the profile merger by default unless it is explicitly enabled.
1046 if (!CSProfMergeColdContext.getNumOccurrences())
1047 CSProfMergeColdContext = false;
1048 }
1049
1050 convertToProfileMap();
1051
1052 // Trim and merge cold context profile using cold threshold above.
1053 if (TrimColdProfile || CSProfMergeColdContext) {
1054 SampleContextTrimmer(ProfileMap)
1055 .trimAndMergeColdContextProfiles(
1056 ColdCountThreshold: HotCountThreshold, TrimColdContext: TrimColdProfile, MergeColdContext: CSProfMergeColdContext,
1057 ColdContextFrameLength: CSProfMaxColdContextDepth, TrimBaseProfileOnly: EnableCSPreInliner);
1058 }
1059
1060 // Merge function samples of CS profile to calculate profile density.
1061 sampleprof::SampleProfileMap ContextLessProfiles;
1062 ProfileConverter::flattenProfile(InputProfiles: ProfileMap, OutputProfiles&: ContextLessProfiles, ProfileIsCS: true);
1063
1064 calculateAndShowDensity(Profiles: ContextLessProfiles);
1065 if (GenCSNestedProfile) {
1066 ProfileConverter CSConverter(ProfileMap);
1067 CSConverter.convertCSProfiles();
1068 FunctionSamples::ProfileIsCS = false;
1069 }
1070 filterAmbiguousProfile(Profiles&: ProfileMap);
1071}
1072
1073void ProfileGeneratorBase::computeSummaryAndThreshold(
1074 SampleProfileMap &Profiles) {
1075 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1076 Summary = Builder.computeSummaryForProfiles(Profiles);
1077 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
1078 DS: (Summary->getDetailedSummary()));
1079 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
1080 DS: (Summary->getDetailedSummary()));
1081}
1082
1083void CSProfileGenerator::computeSummaryAndThreshold() {
1084 // Always merge and use context-less profile map to compute summary.
1085 SampleProfileMap ContextLessProfiles;
1086 ContextTracker.createContextLessProfileMap(ContextLessProfiles);
1087
1088 // Set the flag below to avoid merging the profile again in
1089 // computeSummaryAndThreshold
1090 FunctionSamples::ProfileIsCS = false;
1091 assert(
1092 (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) &&
1093 "Don't set --profile-summary-contextless to false for profile "
1094 "generation");
1095 ProfileGeneratorBase::computeSummaryAndThreshold(Profiles&: ContextLessProfiles);
1096 // Recover the old value.
1097 FunctionSamples::ProfileIsCS = true;
1098}
1099
1100void ProfileGeneratorBase::extractProbesFromRange(
1101 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
1102 bool FindDisjointRanges) {
1103 const RangeSample *PRanges = &RangeCounter;
1104 RangeSample Ranges;
1105 if (FindDisjointRanges) {
1106 findDisjointRanges(DisjointRanges&: Ranges, Ranges: RangeCounter);
1107 PRanges = &Ranges;
1108 }
1109
1110 for (const auto &Range : *PRanges) {
1111 uint64_t RangeBegin = Range.first.first;
1112 uint64_t RangeEnd = Range.first.second;
1113 uint64_t Count = Range.second;
1114
1115 InstructionPointer IP(Binary, RangeBegin, true);
1116 // Disjoint ranges may have range in the middle of two instr,
1117 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
1118 // can be Addr1+1 to Addr2-1. We should ignore such range.
1119 if (IP.Address > RangeEnd)
1120 continue;
1121
1122 do {
1123 const AddressProbesMap &Address2ProbesMap =
1124 Binary->getAddress2ProbesMap();
1125 auto It = Address2ProbesMap.find(x: IP.Address);
1126 if (It != Address2ProbesMap.end()) {
1127 for (const auto &Probe : It->second) {
1128 ProbeCounter[&Probe] += Count;
1129 }
1130 }
1131 } while (IP.advance() && IP.Address <= RangeEnd);
1132 }
1133}
1134
1135static void extractPrefixContextStack(SampleContextFrameVector &ContextStack,
1136 const SmallVectorImpl<uint64_t> &AddrVec,
1137 ProfiledBinary *Binary) {
1138 SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
1139 for (auto Address : reverse(C: AddrVec)) {
1140 const MCDecodedPseudoProbe *CallProbe =
1141 Binary->getCallProbeForAddr(Address);
1142 // These could be the cases when a probe is not found at a calliste. Cutting
1143 // off the context from here since the inliner will not know how to consume
1144 // a context with unknown callsites.
1145 // 1. for functions that are not sampled when
1146 // --decode-probe-for-profiled-functions-only is on.
1147 // 2. for a merged callsite. Callsite merging may cause the loss of original
1148 // probe IDs.
1149 // 3. for an external callsite.
1150 if (!CallProbe)
1151 break;
1152 Probes.push_back(Elt: CallProbe);
1153 }
1154
1155 std::reverse(first: Probes.begin(), last: Probes.end());
1156
1157 // Extract context stack for reusing, leaf context stack will be added
1158 // compressed while looking up function profile.
1159 for (const auto *P : Probes) {
1160 Binary->getInlineContextForProbe(Probe: P, InlineContextStack&: ContextStack, IncludeLeaf: true);
1161 }
1162}
1163
1164void CSProfileGenerator::generateProbeBasedProfile() {
1165 // Enable pseudo probe functionalities in SampleProf
1166 FunctionSamples::ProfileIsProbeBased = true;
1167 for (const auto &CI : *SampleCounters) {
1168 const AddrBasedCtxKey *CtxKey =
1169 dyn_cast<AddrBasedCtxKey>(Val: CI.first.getPtr());
1170 // Fill in function body samples from probes, also infer caller's samples
1171 // from callee's probe
1172 populateBodySamplesWithProbes(RangeCounter: CI.second.RangeCounter, CtxKey);
1173 // Fill in boundary samples for a call probe
1174 populateBoundarySamplesWithProbes(BranchCounter: CI.second.BranchCounter, CtxKey);
1175 }
1176}
1177
1178void CSProfileGenerator::populateBodySamplesWithProbes(
1179 const RangeSample &RangeCounter, const AddrBasedCtxKey *CtxKey) {
1180 ProbeCounterMap ProbeCounter;
1181 // Extract the top frame probes by looking up each address among the range in
1182 // the Address2ProbeMap
1183 extractProbesFromRange(RangeCounter, ProbeCounter);
1184 std::unordered_map<MCDecodedPseudoProbeInlineTree *,
1185 std::unordered_set<FunctionSamples *>>
1186 FrameSamples;
1187 for (const auto &PI : ProbeCounter) {
1188 const MCDecodedPseudoProbe *Probe = PI.first;
1189 uint64_t Count = PI.second;
1190 // Disjoint ranges have introduce zero-filled gap that
1191 // doesn't belong to current context, filter them out.
1192 if (!Probe->isBlock() || Count == 0)
1193 continue;
1194
1195 ContextTrieNode *ContextNode = getContextNodeForLeafProbe(CtxKey, LeafProbe: Probe);
1196 FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples();
1197 // Record the current frame and FunctionProfile whenever samples are
1198 // collected for non-danglie probes. This is for reporting all of the
1199 // zero count probes of the frame later.
1200 FrameSamples[Probe->getInlineTreeNode()].insert(x: &FunctionProfile);
1201 FunctionProfile.addBodySamples(LineOffset: Probe->getIndex(), Discriminator: Probe->getDiscriminator(),
1202 Num: Count);
1203 FunctionProfile.addTotalSamples(Num: Count);
1204 if (Probe->isEntry()) {
1205 FunctionProfile.addHeadSamples(Num: Count);
1206 // Look up for the caller's function profile
1207 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
1208 ContextTrieNode *CallerNode = ContextNode->getParentContext();
1209 if (InlinerDesc != nullptr && CallerNode != &getRootContext()) {
1210 // Since the context id will be compressed, we have to use callee's
1211 // context id to infer caller's context id to ensure they share the
1212 // same context prefix.
1213 uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset;
1214 uint64_t CallerDiscriminator = ContextNode->getCallSiteLoc().Discriminator;
1215 assert(CallerIndex &&
1216 "Inferred caller's location index shouldn't be zero!");
1217 assert(!CallerDiscriminator &&
1218 "Callsite probe should not have a discriminator!");
1219 FunctionSamples &CallerProfile =
1220 *getOrCreateFunctionSamples(ContextNode: CallerNode);
1221 CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
1222 CallerProfile.addBodySamples(LineOffset: CallerIndex, Discriminator: CallerDiscriminator, Num: Count);
1223 CallerProfile.addTotalSamples(Num: Count);
1224 CallerProfile.addCalledTargetSamples(LineOffset: CallerIndex, Discriminator: CallerDiscriminator,
1225 Func: ContextNode->getFuncName(), Num: Count);
1226 }
1227 }
1228 }
1229
1230 // Assign zero count for remaining probes without sample hits to
1231 // differentiate from probes optimized away, of which the counts are unknown
1232 // and will be inferred by the compiler.
1233 for (auto &I : FrameSamples) {
1234 for (auto *FunctionProfile : I.second) {
1235 for (auto *Probe : I.first->getProbes()) {
1236 FunctionProfile->addBodySamples(LineOffset: Probe->getIndex(),
1237 Discriminator: Probe->getDiscriminator(), Num: 0);
1238 }
1239 }
1240 }
1241}
1242
1243void CSProfileGenerator::populateBoundarySamplesWithProbes(
1244 const BranchSample &BranchCounter, const AddrBasedCtxKey *CtxKey) {
1245 for (const auto &BI : BranchCounter) {
1246 uint64_t SourceAddress = BI.first.first;
1247 uint64_t TargetAddress = BI.first.second;
1248 uint64_t Count = BI.second;
1249 const MCDecodedPseudoProbe *CallProbe =
1250 Binary->getCallProbeForAddr(Address: SourceAddress);
1251 if (CallProbe == nullptr)
1252 continue;
1253 FunctionSamples &FunctionProfile =
1254 getFunctionProfileForLeafProbe(CtxKey, LeafProbe: CallProbe);
1255 FunctionProfile.addBodySamples(LineOffset: CallProbe->getIndex(), Discriminator: 0, Num: Count);
1256 FunctionProfile.addTotalSamples(Num: Count);
1257 StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
1258 if (CalleeName.size() == 0)
1259 continue;
1260 FunctionProfile.addCalledTargetSamples(LineOffset: CallProbe->getIndex(),
1261 Discriminator: CallProbe->getDiscriminator(),
1262 Func: FunctionId(CalleeName), Num: Count);
1263 }
1264}
1265
1266ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe(
1267 const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) {
1268
1269 const SmallVectorImpl<uint64_t> *PContext = &CtxKey->Context;
1270 SmallVector<uint64_t, 16> NewContext;
1271
1272 if (InferMissingFrames) {
1273 SmallVector<uint64_t, 16> Context = CtxKey->Context;
1274 // Append leaf frame for a complete inference.
1275 Context.push_back(Elt: LeafProbe->getAddress());
1276 inferMissingFrames(Context, NewContext);
1277 // Pop out the leaf probe that was pushed in above.
1278 NewContext.pop_back();
1279 PContext = &NewContext;
1280 }
1281
1282 SampleContextFrameVector ContextStack;
1283 extractPrefixContextStack(ContextStack, AddrVec: *PContext, Binary);
1284
1285 // Explicitly copy the context for appending the leaf context
1286 SampleContextFrameVector NewContextStack(ContextStack.begin(),
1287 ContextStack.end());
1288 Binary->getInlineContextForProbe(Probe: LeafProbe, InlineContextStack&: NewContextStack, IncludeLeaf: true);
1289 // For leaf inlined context with the top frame, we should strip off the top
1290 // frame's probe id, like:
1291 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
1292 auto LeafFrame = NewContextStack.back();
1293 LeafFrame.Location = LineLocation(0, 0);
1294 NewContextStack.pop_back();
1295 // Compress the context string except for the leaf frame
1296 CSProfileGenerator::compressRecursionContext(Context&: NewContextStack);
1297 CSProfileGenerator::trimContext(S&: NewContextStack);
1298 NewContextStack.push_back(Elt: LeafFrame);
1299
1300 const auto *FuncDesc = Binary->getFuncDescForGUID(GUID: LeafProbe->getGuid());
1301 bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
1302 ContextTrieNode *ContextNode =
1303 getOrCreateContextNode(Context: NewContextStack, WasLeafInlined);
1304 ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash);
1305 return ContextNode;
1306}
1307
1308FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
1309 const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) {
1310 return *getContextNodeForLeafProbe(CtxKey, LeafProbe)->getFunctionSamples();
1311}
1312
1313} // end namespace sampleprof
1314} // end namespace llvm
1315

source code of llvm/tools/llvm-profgen/ProfileGenerator.cpp