1 | //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains common definitions used in the reading and writing of |
10 | // sample profile data. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H |
15 | #define LLVM_PROFILEDATA_SAMPLEPROF_H |
16 | |
17 | #include "llvm/ADT/DenseSet.h" |
18 | #include "llvm/ADT/SmallVector.h" |
19 | #include "llvm/ADT/StringExtras.h" |
20 | #include "llvm/ADT/StringRef.h" |
21 | #include "llvm/IR/Function.h" |
22 | #include "llvm/IR/GlobalValue.h" |
23 | #include "llvm/ProfileData/FunctionId.h" |
24 | #include "llvm/Support/Allocator.h" |
25 | #include "llvm/Support/Debug.h" |
26 | #include "llvm/Support/ErrorOr.h" |
27 | #include "llvm/Support/MathExtras.h" |
28 | #include "llvm/ProfileData/HashKeyMap.h" |
29 | #include <algorithm> |
30 | #include <cstdint> |
31 | #include <list> |
32 | #include <map> |
33 | #include <set> |
34 | #include <sstream> |
35 | #include <string> |
36 | #include <system_error> |
37 | #include <unordered_map> |
38 | #include <utility> |
39 | |
40 | namespace llvm { |
41 | |
42 | class DILocation; |
43 | class raw_ostream; |
44 | |
45 | const std::error_category &sampleprof_category(); |
46 | |
47 | enum class sampleprof_error { |
48 | success = 0, |
49 | bad_magic, |
50 | unsupported_version, |
51 | too_large, |
52 | truncated, |
53 | malformed, |
54 | unrecognized_format, |
55 | unsupported_writing_format, |
56 | truncated_name_table, |
57 | not_implemented, |
58 | counter_overflow, |
59 | ostream_seek_unsupported, |
60 | uncompress_failed, |
61 | zlib_unavailable, |
62 | hash_mismatch |
63 | }; |
64 | |
65 | inline std::error_code make_error_code(sampleprof_error E) { |
66 | return std::error_code(static_cast<int>(E), sampleprof_category()); |
67 | } |
68 | |
69 | inline sampleprof_error MergeResult(sampleprof_error &Accumulator, |
70 | sampleprof_error Result) { |
71 | // Prefer first error encountered as later errors may be secondary effects of |
72 | // the initial problem. |
73 | if (Accumulator == sampleprof_error::success && |
74 | Result != sampleprof_error::success) |
75 | Accumulator = Result; |
76 | return Accumulator; |
77 | } |
78 | |
79 | } // end namespace llvm |
80 | |
81 | namespace std { |
82 | |
83 | template <> |
84 | struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {}; |
85 | |
86 | } // end namespace std |
87 | |
88 | namespace llvm { |
89 | namespace sampleprof { |
90 | |
91 | enum SampleProfileFormat { |
92 | SPF_None = 0, |
93 | SPF_Text = 0x1, |
94 | SPF_Compact_Binary = 0x2, // Deprecated |
95 | SPF_GCC = 0x3, |
96 | SPF_Ext_Binary = 0x4, |
97 | SPF_Binary = 0xff |
98 | }; |
99 | |
100 | enum SampleProfileLayout { |
101 | SPL_None = 0, |
102 | SPL_Nest = 0x1, |
103 | SPL_Flat = 0x2, |
104 | }; |
105 | |
106 | static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) { |
107 | return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) | |
108 | uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) | |
109 | uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) | |
110 | uint64_t('2') << (64 - 56) | uint64_t(Format); |
111 | } |
112 | |
113 | static inline uint64_t SPVersion() { return 103; } |
114 | |
115 | // Section Type used by SampleProfileExtBinaryBaseReader and |
116 | // SampleProfileExtBinaryBaseWriter. Never change the existing |
117 | // value of enum. Only append new ones. |
118 | enum SecType { |
119 | SecInValid = 0, |
120 | SecProfSummary = 1, |
121 | SecNameTable = 2, |
122 | SecProfileSymbolList = 3, |
123 | SecFuncOffsetTable = 4, |
124 | SecFuncMetadata = 5, |
125 | SecCSNameTable = 6, |
126 | // marker for the first type of profile. |
127 | SecFuncProfileFirst = 32, |
128 | SecLBRProfile = SecFuncProfileFirst |
129 | }; |
130 | |
131 | static inline std::string getSecName(SecType Type) { |
132 | switch ((int)Type) { // Avoid -Wcovered-switch-default |
133 | case SecInValid: |
134 | return "InvalidSection" ; |
135 | case SecProfSummary: |
136 | return "ProfileSummarySection" ; |
137 | case SecNameTable: |
138 | return "NameTableSection" ; |
139 | case SecProfileSymbolList: |
140 | return "ProfileSymbolListSection" ; |
141 | case SecFuncOffsetTable: |
142 | return "FuncOffsetTableSection" ; |
143 | case SecFuncMetadata: |
144 | return "FunctionMetadata" ; |
145 | case SecCSNameTable: |
146 | return "CSNameTableSection" ; |
147 | case SecLBRProfile: |
148 | return "LBRProfileSection" ; |
149 | default: |
150 | return "UnknownSection" ; |
151 | } |
152 | } |
153 | |
154 | // Entry type of section header table used by SampleProfileExtBinaryBaseReader |
155 | // and SampleProfileExtBinaryBaseWriter. |
156 | struct SecHdrTableEntry { |
157 | SecType Type; |
158 | uint64_t Flags; |
159 | uint64_t Offset; |
160 | uint64_t Size; |
161 | // The index indicating the location of the current entry in |
162 | // SectionHdrLayout table. |
163 | uint64_t LayoutIndex; |
164 | }; |
165 | |
166 | // Flags common for all sections are defined here. In SecHdrTableEntry::Flags, |
167 | // common flags will be saved in the lower 32bits and section specific flags |
168 | // will be saved in the higher 32 bits. |
169 | enum class SecCommonFlags : uint32_t { |
170 | SecFlagInValid = 0, |
171 | SecFlagCompress = (1 << 0), |
172 | // Indicate the section contains only profile without context. |
173 | SecFlagFlat = (1 << 1) |
174 | }; |
175 | |
176 | // Section specific flags are defined here. |
177 | // !!!Note: Everytime a new enum class is created here, please add |
178 | // a new check in verifySecFlag. |
179 | enum class SecNameTableFlags : uint32_t { |
180 | SecFlagInValid = 0, |
181 | SecFlagMD5Name = (1 << 0), |
182 | // Store MD5 in fixed length instead of ULEB128 so NameTable can be |
183 | // accessed like an array. |
184 | SecFlagFixedLengthMD5 = (1 << 1), |
185 | // Profile contains ".__uniq." suffix name. Compiler shouldn't strip |
186 | // the suffix when doing profile matching when seeing the flag. |
187 | SecFlagUniqSuffix = (1 << 2) |
188 | }; |
189 | enum class SecProfSummaryFlags : uint32_t { |
190 | SecFlagInValid = 0, |
191 | /// SecFlagPartial means the profile is for common/shared code. |
192 | /// The common profile is usually merged from profiles collected |
193 | /// from running other targets. |
194 | SecFlagPartial = (1 << 0), |
195 | /// SecFlagContext means this is context-sensitive flat profile for |
196 | /// CSSPGO |
197 | SecFlagFullContext = (1 << 1), |
198 | /// SecFlagFSDiscriminator means this profile uses flow-sensitive |
199 | /// discriminators. |
200 | SecFlagFSDiscriminator = (1 << 2), |
201 | /// SecFlagIsPreInlined means this profile contains ShouldBeInlined |
202 | /// contexts thus this is CS preinliner computed. |
203 | SecFlagIsPreInlined = (1 << 4), |
204 | }; |
205 | |
206 | enum class SecFuncMetadataFlags : uint32_t { |
207 | SecFlagInvalid = 0, |
208 | SecFlagIsProbeBased = (1 << 0), |
209 | SecFlagHasAttribute = (1 << 1), |
210 | }; |
211 | |
212 | enum class SecFuncOffsetFlags : uint32_t { |
213 | SecFlagInvalid = 0, |
214 | // Store function offsets in an order of contexts. The order ensures that |
215 | // callee contexts of a given context laid out next to it. |
216 | SecFlagOrdered = (1 << 0), |
217 | }; |
218 | |
219 | // Verify section specific flag is used for the correct section. |
220 | template <class SecFlagType> |
221 | static inline void verifySecFlag(SecType Type, SecFlagType Flag) { |
222 | // No verification is needed for common flags. |
223 | if (std::is_same<SecCommonFlags, SecFlagType>()) |
224 | return; |
225 | |
226 | // Verification starts here for section specific flag. |
227 | bool IsFlagLegal = false; |
228 | switch (Type) { |
229 | case SecNameTable: |
230 | IsFlagLegal = std::is_same<SecNameTableFlags, SecFlagType>(); |
231 | break; |
232 | case SecProfSummary: |
233 | IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>(); |
234 | break; |
235 | case SecFuncMetadata: |
236 | IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>(); |
237 | break; |
238 | default: |
239 | case SecFuncOffsetTable: |
240 | IsFlagLegal = std::is_same<SecFuncOffsetFlags, SecFlagType>(); |
241 | break; |
242 | } |
243 | if (!IsFlagLegal) |
244 | llvm_unreachable("Misuse of a flag in an incompatible section" ); |
245 | } |
246 | |
247 | template <class SecFlagType> |
248 | static inline void addSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) { |
249 | verifySecFlag(Entry.Type, Flag); |
250 | auto FVal = static_cast<uint64_t>(Flag); |
251 | bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); |
252 | Entry.Flags |= IsCommon ? FVal : (FVal << 32); |
253 | } |
254 | |
255 | template <class SecFlagType> |
256 | static inline void removeSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) { |
257 | verifySecFlag(Entry.Type, Flag); |
258 | auto FVal = static_cast<uint64_t>(Flag); |
259 | bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); |
260 | Entry.Flags &= ~(IsCommon ? FVal : (FVal << 32)); |
261 | } |
262 | |
263 | template <class SecFlagType> |
264 | static inline bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlagType Flag) { |
265 | verifySecFlag(Entry.Type, Flag); |
266 | auto FVal = static_cast<uint64_t>(Flag); |
267 | bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); |
268 | return Entry.Flags & (IsCommon ? FVal : (FVal << 32)); |
269 | } |
270 | |
271 | /// Represents the relative location of an instruction. |
272 | /// |
273 | /// Instruction locations are specified by the line offset from the |
274 | /// beginning of the function (marked by the line where the function |
275 | /// header is) and the discriminator value within that line. |
276 | /// |
277 | /// The discriminator value is useful to distinguish instructions |
278 | /// that are on the same line but belong to different basic blocks |
279 | /// (e.g., the two post-increment instructions in "if (p) x++; else y++;"). |
280 | struct LineLocation { |
281 | LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {} |
282 | |
283 | void print(raw_ostream &OS) const; |
284 | void dump() const; |
285 | |
286 | bool operator<(const LineLocation &O) const { |
287 | return LineOffset < O.LineOffset || |
288 | (LineOffset == O.LineOffset && Discriminator < O.Discriminator); |
289 | } |
290 | |
291 | bool operator==(const LineLocation &O) const { |
292 | return LineOffset == O.LineOffset && Discriminator == O.Discriminator; |
293 | } |
294 | |
295 | bool operator!=(const LineLocation &O) const { |
296 | return LineOffset != O.LineOffset || Discriminator != O.Discriminator; |
297 | } |
298 | |
299 | uint64_t getHashCode() const { |
300 | return ((uint64_t) Discriminator << 32) | LineOffset; |
301 | } |
302 | |
303 | uint32_t LineOffset; |
304 | uint32_t Discriminator; |
305 | }; |
306 | |
307 | struct LineLocationHash { |
308 | uint64_t operator()(const LineLocation &Loc) const { |
309 | return Loc.getHashCode(); |
310 | } |
311 | }; |
312 | |
313 | raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc); |
314 | |
315 | /// Representation of a single sample record. |
316 | /// |
317 | /// A sample record is represented by a positive integer value, which |
318 | /// indicates how frequently was the associated line location executed. |
319 | /// |
320 | /// Additionally, if the associated location contains a function call, |
321 | /// the record will hold a list of all the possible called targets. For |
322 | /// direct calls, this will be the exact function being invoked. For |
323 | /// indirect calls (function pointers, virtual table dispatch), this |
324 | /// will be a list of one or more functions. |
325 | class SampleRecord { |
326 | public: |
327 | using CallTarget = std::pair<FunctionId, uint64_t>; |
328 | struct CallTargetComparator { |
329 | bool operator()(const CallTarget &LHS, const CallTarget &RHS) const { |
330 | if (LHS.second != RHS.second) |
331 | return LHS.second > RHS.second; |
332 | |
333 | return LHS.first < RHS.first; |
334 | } |
335 | }; |
336 | |
337 | using SortedCallTargetSet = std::set<CallTarget, CallTargetComparator>; |
338 | using CallTargetMap = std::unordered_map<FunctionId, uint64_t>; |
339 | SampleRecord() = default; |
340 | |
341 | /// Increment the number of samples for this record by \p S. |
342 | /// Optionally scale sample count \p S by \p Weight. |
343 | /// |
344 | /// Sample counts accumulate using saturating arithmetic, to avoid wrapping |
345 | /// around unsigned integers. |
346 | sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) { |
347 | bool Overflowed; |
348 | NumSamples = SaturatingMultiplyAdd(X: S, Y: Weight, A: NumSamples, ResultOverflowed: &Overflowed); |
349 | return Overflowed ? sampleprof_error::counter_overflow |
350 | : sampleprof_error::success; |
351 | } |
352 | |
353 | /// Decrease the number of samples for this record by \p S. Return the amout |
354 | /// of samples actually decreased. |
355 | uint64_t removeSamples(uint64_t S) { |
356 | if (S > NumSamples) |
357 | S = NumSamples; |
358 | NumSamples -= S; |
359 | return S; |
360 | } |
361 | |
362 | /// Add called function \p F with samples \p S. |
363 | /// Optionally scale sample count \p S by \p Weight. |
364 | /// |
365 | /// Sample counts accumulate using saturating arithmetic, to avoid wrapping |
366 | /// around unsigned integers. |
367 | sampleprof_error addCalledTarget(FunctionId F, uint64_t S, |
368 | uint64_t Weight = 1) { |
369 | uint64_t &TargetSamples = CallTargets[F]; |
370 | bool Overflowed; |
371 | TargetSamples = |
372 | SaturatingMultiplyAdd(X: S, Y: Weight, A: TargetSamples, ResultOverflowed: &Overflowed); |
373 | return Overflowed ? sampleprof_error::counter_overflow |
374 | : sampleprof_error::success; |
375 | } |
376 | |
377 | /// Remove called function from the call target map. Return the target sample |
378 | /// count of the called function. |
379 | uint64_t removeCalledTarget(FunctionId F) { |
380 | uint64_t Count = 0; |
381 | auto I = CallTargets.find(x: F); |
382 | if (I != CallTargets.end()) { |
383 | Count = I->second; |
384 | CallTargets.erase(position: I); |
385 | } |
386 | return Count; |
387 | } |
388 | |
389 | /// Return true if this sample record contains function calls. |
390 | bool hasCalls() const { return !CallTargets.empty(); } |
391 | |
392 | uint64_t getSamples() const { return NumSamples; } |
393 | const CallTargetMap &getCallTargets() const { return CallTargets; } |
394 | const SortedCallTargetSet getSortedCallTargets() const { |
395 | return SortCallTargets(Targets: CallTargets); |
396 | } |
397 | |
398 | uint64_t getCallTargetSum() const { |
399 | uint64_t Sum = 0; |
400 | for (const auto &I : CallTargets) |
401 | Sum += I.second; |
402 | return Sum; |
403 | } |
404 | |
405 | /// Sort call targets in descending order of call frequency. |
406 | static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) { |
407 | SortedCallTargetSet SortedTargets; |
408 | for (const auto &[Target, Frequency] : Targets) { |
409 | SortedTargets.emplace(args: Target, args: Frequency); |
410 | } |
411 | return SortedTargets; |
412 | } |
413 | |
414 | /// Prorate call targets by a distribution factor. |
415 | static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, |
416 | float DistributionFactor) { |
417 | CallTargetMap AdjustedTargets; |
418 | for (const auto &[Target, Frequency] : Targets) { |
419 | AdjustedTargets[Target] = Frequency * DistributionFactor; |
420 | } |
421 | return AdjustedTargets; |
422 | } |
423 | |
424 | /// Merge the samples in \p Other into this record. |
425 | /// Optionally scale sample counts by \p Weight. |
426 | sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1); |
427 | void print(raw_ostream &OS, unsigned Indent) const; |
428 | void dump() const; |
429 | |
430 | bool operator==(const SampleRecord &Other) const { |
431 | return NumSamples == Other.NumSamples && CallTargets == Other.CallTargets; |
432 | } |
433 | |
434 | bool operator!=(const SampleRecord &Other) const { |
435 | return !(*this == Other); |
436 | } |
437 | |
438 | private: |
439 | uint64_t NumSamples = 0; |
440 | CallTargetMap CallTargets; |
441 | }; |
442 | |
443 | raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); |
444 | |
445 | // State of context associated with FunctionSamples |
446 | enum ContextStateMask { |
447 | UnknownContext = 0x0, // Profile without context |
448 | RawContext = 0x1, // Full context profile from input profile |
449 | SyntheticContext = 0x2, // Synthetic context created for context promotion |
450 | InlinedContext = 0x4, // Profile for context that is inlined into caller |
451 | MergedContext = 0x8 // Profile for context merged into base profile |
452 | }; |
453 | |
454 | // Attribute of context associated with FunctionSamples |
455 | enum ContextAttributeMask { |
456 | ContextNone = 0x0, |
457 | ContextWasInlined = 0x1, // Leaf of context was inlined in previous build |
458 | ContextShouldBeInlined = 0x2, // Leaf of context should be inlined |
459 | ContextDuplicatedIntoBase = |
460 | 0x4, // Leaf of context is duplicated into the base profile |
461 | }; |
462 | |
463 | // Represents a context frame with profile function and line location |
464 | struct SampleContextFrame { |
465 | FunctionId Func; |
466 | LineLocation Location; |
467 | |
468 | SampleContextFrame() : Location(0, 0) {} |
469 | |
470 | SampleContextFrame(FunctionId Func, LineLocation Location) |
471 | : Func(Func), Location(Location) {} |
472 | |
473 | bool operator==(const SampleContextFrame &That) const { |
474 | return Location == That.Location && Func == That.Func; |
475 | } |
476 | |
477 | bool operator!=(const SampleContextFrame &That) const { |
478 | return !(*this == That); |
479 | } |
480 | |
481 | std::string toString(bool OutputLineLocation) const { |
482 | std::ostringstream OContextStr; |
483 | OContextStr << Func.str(); |
484 | if (OutputLineLocation) { |
485 | OContextStr << ":" << Location.LineOffset; |
486 | if (Location.Discriminator) |
487 | OContextStr << "." << Location.Discriminator; |
488 | } |
489 | return OContextStr.str(); |
490 | } |
491 | |
492 | uint64_t getHashCode() const { |
493 | uint64_t NameHash = Func.getHashCode(); |
494 | uint64_t LocId = Location.getHashCode(); |
495 | return NameHash + (LocId << 5) + LocId; |
496 | } |
497 | }; |
498 | |
499 | static inline hash_code hash_value(const SampleContextFrame &arg) { |
500 | return arg.getHashCode(); |
501 | } |
502 | |
503 | using SampleContextFrameVector = SmallVector<SampleContextFrame, 1>; |
504 | using SampleContextFrames = ArrayRef<SampleContextFrame>; |
505 | |
506 | struct SampleContextFrameHash { |
507 | uint64_t operator()(const SampleContextFrameVector &S) const { |
508 | return hash_combine_range(first: S.begin(), last: S.end()); |
509 | } |
510 | }; |
511 | |
512 | // Sample context for FunctionSamples. It consists of the calling context, |
513 | // the function name and context state. Internally sample context is represented |
514 | // using ArrayRef, which is also the input for constructing a `SampleContext`. |
515 | // It can accept and represent both full context string as well as context-less |
516 | // function name. |
517 | // For a CS profile, a full context vector can look like: |
518 | // `main:3 _Z5funcAi:1 _Z8funcLeafi` |
519 | // For a base CS profile without calling context, the context vector should only |
520 | // contain the leaf frame name. |
521 | // For a non-CS profile, the context vector should be empty. |
522 | class SampleContext { |
523 | public: |
524 | SampleContext() : State(UnknownContext), Attributes(ContextNone) {} |
525 | |
526 | SampleContext(StringRef Name) |
527 | : Func(Name), State(UnknownContext), Attributes(ContextNone) { |
528 | assert(!Name.empty() && "Name is empty" ); |
529 | } |
530 | |
531 | SampleContext(FunctionId Func) |
532 | : Func(Func), State(UnknownContext), Attributes(ContextNone) {} |
533 | |
534 | SampleContext(SampleContextFrames Context, |
535 | ContextStateMask CState = RawContext) |
536 | : Attributes(ContextNone) { |
537 | assert(!Context.empty() && "Context is empty" ); |
538 | setContext(Context, CState); |
539 | } |
540 | |
541 | // Give a context string, decode and populate internal states like |
542 | // Function name, Calling context and context state. Example of input |
543 | // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` |
544 | SampleContext(StringRef ContextStr, |
545 | std::list<SampleContextFrameVector> &CSNameTable, |
546 | ContextStateMask CState = RawContext) |
547 | : Attributes(ContextNone) { |
548 | assert(!ContextStr.empty()); |
549 | // Note that `[]` wrapped input indicates a full context string, otherwise |
550 | // it's treated as context-less function name only. |
551 | bool HasContext = ContextStr.starts_with(Prefix: "[" ); |
552 | if (!HasContext) { |
553 | State = UnknownContext; |
554 | Func = FunctionId(ContextStr); |
555 | } else { |
556 | CSNameTable.emplace_back(); |
557 | SampleContextFrameVector &Context = CSNameTable.back(); |
558 | createCtxVectorFromStr(ContextStr, Context); |
559 | setContext(Context, CState); |
560 | } |
561 | } |
562 | |
563 | /// Create a context vector from a given context string and save it in |
564 | /// `Context`. |
565 | static void createCtxVectorFromStr(StringRef ContextStr, |
566 | SampleContextFrameVector &Context) { |
567 | // Remove encapsulating '[' and ']' if any |
568 | ContextStr = ContextStr.substr(Start: 1, N: ContextStr.size() - 2); |
569 | StringRef ContextRemain = ContextStr; |
570 | StringRef ChildContext; |
571 | FunctionId Callee; |
572 | while (!ContextRemain.empty()) { |
573 | auto ContextSplit = ContextRemain.split(Separator: " @ " ); |
574 | ChildContext = ContextSplit.first; |
575 | ContextRemain = ContextSplit.second; |
576 | LineLocation CallSiteLoc(0, 0); |
577 | decodeContextString(ContextStr: ChildContext, Func&: Callee, LineLoc&: CallSiteLoc); |
578 | Context.emplace_back(Args&: Callee, Args&: CallSiteLoc); |
579 | } |
580 | } |
581 | |
582 | // Decode context string for a frame to get function name and location. |
583 | // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`. |
584 | static void decodeContextString(StringRef ContextStr, |
585 | FunctionId &Func, |
586 | LineLocation &LineLoc) { |
587 | // Get function name |
588 | auto EntrySplit = ContextStr.split(Separator: ':'); |
589 | Func = FunctionId(EntrySplit.first); |
590 | |
591 | LineLoc = {0, 0}; |
592 | if (!EntrySplit.second.empty()) { |
593 | // Get line offset, use signed int for getAsInteger so string will |
594 | // be parsed as signed. |
595 | int LineOffset = 0; |
596 | auto LocSplit = EntrySplit.second.split(Separator: '.'); |
597 | LocSplit.first.getAsInteger(Radix: 10, Result&: LineOffset); |
598 | LineLoc.LineOffset = LineOffset; |
599 | |
600 | // Get discriminator |
601 | if (!LocSplit.second.empty()) |
602 | LocSplit.second.getAsInteger(Radix: 10, Result&: LineLoc.Discriminator); |
603 | } |
604 | } |
605 | |
606 | operator SampleContextFrames() const { return FullContext; } |
607 | bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; } |
608 | void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; } |
609 | uint32_t getAllAttributes() { return Attributes; } |
610 | void setAllAttributes(uint32_t A) { Attributes = A; } |
611 | bool hasState(ContextStateMask S) { return State & (uint32_t)S; } |
612 | void setState(ContextStateMask S) { State |= (uint32_t)S; } |
613 | void clearState(ContextStateMask S) { State &= (uint32_t)~S; } |
614 | bool hasContext() const { return State != UnknownContext; } |
615 | bool isBaseContext() const { return FullContext.size() == 1; } |
616 | FunctionId getFunction() const { return Func; } |
617 | SampleContextFrames getContextFrames() const { return FullContext; } |
618 | |
619 | static std::string getContextString(SampleContextFrames Context, |
620 | bool IncludeLeafLineLocation = false) { |
621 | std::ostringstream OContextStr; |
622 | for (uint32_t I = 0; I < Context.size(); I++) { |
623 | if (OContextStr.str().size()) { |
624 | OContextStr << " @ " ; |
625 | } |
626 | OContextStr << Context[I].toString(OutputLineLocation: I != Context.size() - 1 || |
627 | IncludeLeafLineLocation); |
628 | } |
629 | return OContextStr.str(); |
630 | } |
631 | |
632 | std::string toString() const { |
633 | if (!hasContext()) |
634 | return Func.str(); |
635 | return getContextString(Context: FullContext, IncludeLeafLineLocation: false); |
636 | } |
637 | |
638 | uint64_t getHashCode() const { |
639 | if (hasContext()) |
640 | return hash_value(S: getContextFrames()); |
641 | return getFunction().getHashCode(); |
642 | } |
643 | |
644 | /// Set the name of the function and clear the current context. |
645 | void setFunction(FunctionId newFunction) { |
646 | Func = newFunction; |
647 | FullContext = SampleContextFrames(); |
648 | State = UnknownContext; |
649 | } |
650 | |
651 | void setContext(SampleContextFrames Context, |
652 | ContextStateMask CState = RawContext) { |
653 | assert(CState != UnknownContext); |
654 | FullContext = Context; |
655 | Func = Context.back().Func; |
656 | State = CState; |
657 | } |
658 | |
659 | bool operator==(const SampleContext &That) const { |
660 | return State == That.State && Func == That.Func && |
661 | FullContext == That.FullContext; |
662 | } |
663 | |
664 | bool operator!=(const SampleContext &That) const { return !(*this == That); } |
665 | |
666 | bool operator<(const SampleContext &That) const { |
667 | if (State != That.State) |
668 | return State < That.State; |
669 | |
670 | if (!hasContext()) { |
671 | return Func < That.Func; |
672 | } |
673 | |
674 | uint64_t I = 0; |
675 | while (I < std::min(a: FullContext.size(), b: That.FullContext.size())) { |
676 | auto &Context1 = FullContext[I]; |
677 | auto &Context2 = That.FullContext[I]; |
678 | auto V = Context1.Func.compare(Other: Context2.Func); |
679 | if (V) |
680 | return V < 0; |
681 | if (Context1.Location != Context2.Location) |
682 | return Context1.Location < Context2.Location; |
683 | I++; |
684 | } |
685 | |
686 | return FullContext.size() < That.FullContext.size(); |
687 | } |
688 | |
689 | struct Hash { |
690 | uint64_t operator()(const SampleContext &Context) const { |
691 | return Context.getHashCode(); |
692 | } |
693 | }; |
694 | |
695 | bool IsPrefixOf(const SampleContext &That) const { |
696 | auto ThisContext = FullContext; |
697 | auto ThatContext = That.FullContext; |
698 | if (ThatContext.size() < ThisContext.size()) |
699 | return false; |
700 | ThatContext = ThatContext.take_front(N: ThisContext.size()); |
701 | // Compare Leaf frame first |
702 | if (ThisContext.back().Func != ThatContext.back().Func) |
703 | return false; |
704 | // Compare leading context |
705 | return ThisContext.drop_back() == ThatContext.drop_back(); |
706 | } |
707 | |
708 | private: |
709 | // The function associated with this context. If CS profile, this is the leaf |
710 | // function. |
711 | FunctionId Func; |
712 | // Full context including calling context and leaf function name |
713 | SampleContextFrames FullContext; |
714 | // State of the associated sample profile |
715 | uint32_t State; |
716 | // Attribute of the associated sample profile |
717 | uint32_t Attributes; |
718 | }; |
719 | |
720 | static inline hash_code hash_value(const SampleContext &Context) { |
721 | return Context.getHashCode(); |
722 | } |
723 | |
724 | inline raw_ostream &operator<<(raw_ostream &OS, const SampleContext &Context) { |
725 | return OS << Context.toString(); |
726 | } |
727 | |
728 | class FunctionSamples; |
729 | class SampleProfileReaderItaniumRemapper; |
730 | |
731 | using BodySampleMap = std::map<LineLocation, SampleRecord>; |
732 | // NOTE: Using a StringMap here makes parsed profiles consume around 17% more |
733 | // memory, which is *very* significant for large profiles. |
734 | using FunctionSamplesMap = std::map<FunctionId, FunctionSamples>; |
735 | using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>; |
736 | using LocToLocMap = |
737 | std::unordered_map<LineLocation, LineLocation, LineLocationHash>; |
738 | |
739 | /// Representation of the samples collected for a function. |
740 | /// |
741 | /// This data structure contains all the collected samples for the body |
742 | /// of a function. Each sample corresponds to a LineLocation instance |
743 | /// within the body of the function. |
744 | class FunctionSamples { |
745 | public: |
746 | FunctionSamples() = default; |
747 | |
748 | void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const; |
749 | void dump() const; |
750 | |
751 | sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) { |
752 | bool Overflowed; |
753 | TotalSamples = |
754 | SaturatingMultiplyAdd(X: Num, Y: Weight, A: TotalSamples, ResultOverflowed: &Overflowed); |
755 | return Overflowed ? sampleprof_error::counter_overflow |
756 | : sampleprof_error::success; |
757 | } |
758 | |
759 | void removeTotalSamples(uint64_t Num) { |
760 | if (TotalSamples < Num) |
761 | TotalSamples = 0; |
762 | else |
763 | TotalSamples -= Num; |
764 | } |
765 | |
766 | void setTotalSamples(uint64_t Num) { TotalSamples = Num; } |
767 | |
768 | void setHeadSamples(uint64_t Num) { TotalHeadSamples = Num; } |
769 | |
770 | sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { |
771 | bool Overflowed; |
772 | TotalHeadSamples = |
773 | SaturatingMultiplyAdd(X: Num, Y: Weight, A: TotalHeadSamples, ResultOverflowed: &Overflowed); |
774 | return Overflowed ? sampleprof_error::counter_overflow |
775 | : sampleprof_error::success; |
776 | } |
777 | |
778 | sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator, |
779 | uint64_t Num, uint64_t Weight = 1) { |
780 | return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples( |
781 | S: Num, Weight); |
782 | } |
783 | |
784 | sampleprof_error addCalledTargetSamples(uint32_t LineOffset, |
785 | uint32_t Discriminator, |
786 | FunctionId Func, |
787 | uint64_t Num, |
788 | uint64_t Weight = 1) { |
789 | return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget( |
790 | F: Func, S: Num, Weight); |
791 | } |
792 | |
793 | sampleprof_error addSampleRecord(LineLocation Location, |
794 | const SampleRecord &SampleRecord, |
795 | uint64_t Weight = 1) { |
796 | return BodySamples[Location].merge(Other: SampleRecord, Weight); |
797 | } |
798 | |
799 | // Remove a call target and decrease the body sample correspondingly. Return |
800 | // the number of body samples actually decreased. |
801 | uint64_t removeCalledTargetAndBodySample(uint32_t LineOffset, |
802 | uint32_t Discriminator, |
803 | FunctionId Func) { |
804 | uint64_t Count = 0; |
805 | auto I = BodySamples.find(x: LineLocation(LineOffset, Discriminator)); |
806 | if (I != BodySamples.end()) { |
807 | Count = I->second.removeCalledTarget(F: Func); |
808 | Count = I->second.removeSamples(S: Count); |
809 | if (!I->second.getSamples()) |
810 | BodySamples.erase(position: I); |
811 | } |
812 | return Count; |
813 | } |
814 | |
815 | // Remove all call site samples for inlinees. This is needed when flattening |
816 | // a nested profile. |
817 | void removeAllCallsiteSamples() { |
818 | CallsiteSamples.clear(); |
819 | } |
820 | |
821 | // Accumulate all call target samples to update the body samples. |
822 | void updateCallsiteSamples() { |
823 | for (auto &I : BodySamples) { |
824 | uint64_t TargetSamples = I.second.getCallTargetSum(); |
825 | // It's possible that the body sample count can be greater than the call |
826 | // target sum. E.g, if some call targets are external targets, they won't |
827 | // be considered valid call targets, but the body sample count which is |
828 | // from lbr ranges can actually include them. |
829 | if (TargetSamples > I.second.getSamples()) |
830 | I.second.addSamples(S: TargetSamples - I.second.getSamples()); |
831 | } |
832 | } |
833 | |
834 | // Accumulate all body samples to set total samples. |
835 | void updateTotalSamples() { |
836 | setTotalSamples(0); |
837 | for (const auto &I : BodySamples) |
838 | addTotalSamples(Num: I.second.getSamples()); |
839 | |
840 | for (auto &I : CallsiteSamples) { |
841 | for (auto &CS : I.second) { |
842 | CS.second.updateTotalSamples(); |
843 | addTotalSamples(Num: CS.second.getTotalSamples()); |
844 | } |
845 | } |
846 | } |
847 | |
848 | // Set current context and all callee contexts to be synthetic. |
849 | void SetContextSynthetic() { |
850 | Context.setState(SyntheticContext); |
851 | for (auto &I : CallsiteSamples) { |
852 | for (auto &CS : I.second) { |
853 | CS.second.SetContextSynthetic(); |
854 | } |
855 | } |
856 | } |
857 | |
858 | // Query the stale profile matching results and remap the location. |
859 | const LineLocation &mapIRLocToProfileLoc(const LineLocation &IRLoc) const { |
860 | // There is no remapping if the profile is not stale or the matching gives |
861 | // the same location. |
862 | if (!IRToProfileLocationMap) |
863 | return IRLoc; |
864 | const auto &ProfileLoc = IRToProfileLocationMap->find(x: IRLoc); |
865 | if (ProfileLoc != IRToProfileLocationMap->end()) |
866 | return ProfileLoc->second; |
867 | else |
868 | return IRLoc; |
869 | } |
870 | |
871 | /// Return the number of samples collected at the given location. |
872 | /// Each location is specified by \p LineOffset and \p Discriminator. |
873 | /// If the location is not found in profile, return error. |
874 | ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset, |
875 | uint32_t Discriminator) const { |
876 | const auto &ret = BodySamples.find( |
877 | x: mapIRLocToProfileLoc(IRLoc: LineLocation(LineOffset, Discriminator))); |
878 | if (ret == BodySamples.end()) |
879 | return std::error_code(); |
880 | return ret->second.getSamples(); |
881 | } |
882 | |
883 | /// Returns the call target map collected at a given location. |
884 | /// Each location is specified by \p LineOffset and \p Discriminator. |
885 | /// If the location is not found in profile, return error. |
886 | ErrorOr<const SampleRecord::CallTargetMap &> |
887 | findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const { |
888 | const auto &ret = BodySamples.find( |
889 | x: mapIRLocToProfileLoc(IRLoc: LineLocation(LineOffset, Discriminator))); |
890 | if (ret == BodySamples.end()) |
891 | return std::error_code(); |
892 | return ret->second.getCallTargets(); |
893 | } |
894 | |
895 | /// Returns the call target map collected at a given location specified by \p |
896 | /// CallSite. If the location is not found in profile, return error. |
897 | ErrorOr<const SampleRecord::CallTargetMap &> |
898 | findCallTargetMapAt(const LineLocation &CallSite) const { |
899 | const auto &Ret = BodySamples.find(x: mapIRLocToProfileLoc(IRLoc: CallSite)); |
900 | if (Ret == BodySamples.end()) |
901 | return std::error_code(); |
902 | return Ret->second.getCallTargets(); |
903 | } |
904 | |
905 | /// Return the function samples at the given callsite location. |
906 | FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) { |
907 | return CallsiteSamples[mapIRLocToProfileLoc(IRLoc: Loc)]; |
908 | } |
909 | |
910 | /// Returns the FunctionSamplesMap at the given \p Loc. |
911 | const FunctionSamplesMap * |
912 | findFunctionSamplesMapAt(const LineLocation &Loc) const { |
913 | auto iter = CallsiteSamples.find(x: mapIRLocToProfileLoc(IRLoc: Loc)); |
914 | if (iter == CallsiteSamples.end()) |
915 | return nullptr; |
916 | return &iter->second; |
917 | } |
918 | |
919 | /// Returns a pointer to FunctionSamples at the given callsite location |
920 | /// \p Loc with callee \p CalleeName. If no callsite can be found, relax |
921 | /// the restriction to return the FunctionSamples at callsite location |
922 | /// \p Loc with the maximum total sample count. If \p Remapper is not |
923 | /// nullptr, use \p Remapper to find FunctionSamples with equivalent name |
924 | /// as \p CalleeName. |
925 | const FunctionSamples * |
926 | findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName, |
927 | SampleProfileReaderItaniumRemapper *Remapper) const; |
928 | |
929 | bool empty() const { return TotalSamples == 0; } |
930 | |
931 | /// Return the total number of samples collected inside the function. |
932 | uint64_t getTotalSamples() const { return TotalSamples; } |
933 | |
934 | /// For top-level functions, return the total number of branch samples that |
935 | /// have the function as the branch target (or 0 otherwise). This is the raw |
936 | /// data fetched from the profile. This should be equivalent to the sample of |
937 | /// the first instruction of the symbol. But as we directly get this info for |
938 | /// raw profile without referring to potentially inaccurate debug info, this |
939 | /// gives more accurate profile data and is preferred for standalone symbols. |
940 | uint64_t getHeadSamples() const { return TotalHeadSamples; } |
941 | |
942 | /// Return an estimate of the sample count of the function entry basic block. |
943 | /// The function can be either a standalone symbol or an inlined function. |
944 | /// For Context-Sensitive profiles, this will prefer returning the head |
945 | /// samples (i.e. getHeadSamples()), if non-zero. Otherwise it estimates from |
946 | /// the function body's samples or callsite samples. |
947 | uint64_t getHeadSamplesEstimate() const { |
948 | if (FunctionSamples::ProfileIsCS && getHeadSamples()) { |
949 | // For CS profile, if we already have more accurate head samples |
950 | // counted by branch sample from caller, use them as entry samples. |
951 | return getHeadSamples(); |
952 | } |
953 | uint64_t Count = 0; |
954 | // Use either BodySamples or CallsiteSamples which ever has the smaller |
955 | // lineno. |
956 | if (!BodySamples.empty() && |
957 | (CallsiteSamples.empty() || |
958 | BodySamples.begin()->first < CallsiteSamples.begin()->first)) |
959 | Count = BodySamples.begin()->second.getSamples(); |
960 | else if (!CallsiteSamples.empty()) { |
961 | // An indirect callsite may be promoted to several inlined direct calls. |
962 | // We need to get the sum of them. |
963 | for (const auto &N_FS : CallsiteSamples.begin()->second) |
964 | Count += N_FS.second.getHeadSamplesEstimate(); |
965 | } |
966 | // Return at least 1 if total sample is not 0. |
967 | return Count ? Count : TotalSamples > 0; |
968 | } |
969 | |
970 | /// Return all the samples collected in the body of the function. |
971 | const BodySampleMap &getBodySamples() const { return BodySamples; } |
972 | |
973 | /// Return all the callsite samples collected in the body of the function. |
974 | const CallsiteSampleMap &getCallsiteSamples() const { |
975 | return CallsiteSamples; |
976 | } |
977 | |
978 | /// Return the maximum of sample counts in a function body. When SkipCallSite |
979 | /// is false, which is the default, the return count includes samples in the |
980 | /// inlined functions. When SkipCallSite is true, the return count only |
981 | /// considers the body samples. |
982 | uint64_t getMaxCountInside(bool SkipCallSite = false) const { |
983 | uint64_t MaxCount = 0; |
984 | for (const auto &L : getBodySamples()) |
985 | MaxCount = std::max(a: MaxCount, b: L.second.getSamples()); |
986 | if (SkipCallSite) |
987 | return MaxCount; |
988 | for (const auto &C : getCallsiteSamples()) |
989 | for (const FunctionSamplesMap::value_type &F : C.second) |
990 | MaxCount = std::max(a: MaxCount, b: F.second.getMaxCountInside()); |
991 | return MaxCount; |
992 | } |
993 | |
994 | /// Merge the samples in \p Other into this one. |
995 | /// Optionally scale samples by \p Weight. |
996 | sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) { |
997 | sampleprof_error Result = sampleprof_error::success; |
998 | if (!GUIDToFuncNameMap) |
999 | GUIDToFuncNameMap = Other.GUIDToFuncNameMap; |
1000 | if (Context.getFunction().empty()) |
1001 | Context = Other.getContext(); |
1002 | if (FunctionHash == 0) { |
1003 | // Set the function hash code for the target profile. |
1004 | FunctionHash = Other.getFunctionHash(); |
1005 | } else if (FunctionHash != Other.getFunctionHash()) { |
1006 | // The two profiles coming with different valid hash codes indicates |
1007 | // either: |
1008 | // 1. They are same-named static functions from different compilation |
1009 | // units (without using -unique-internal-linkage-names), or |
1010 | // 2. They are really the same function but from different compilations. |
1011 | // Let's bail out in either case for now, which means one profile is |
1012 | // dropped. |
1013 | return sampleprof_error::hash_mismatch; |
1014 | } |
1015 | |
1016 | MergeResult(Accumulator&: Result, Result: addTotalSamples(Num: Other.getTotalSamples(), Weight)); |
1017 | MergeResult(Accumulator&: Result, Result: addHeadSamples(Num: Other.getHeadSamples(), Weight)); |
1018 | for (const auto &I : Other.getBodySamples()) { |
1019 | const LineLocation &Loc = I.first; |
1020 | const SampleRecord &Rec = I.second; |
1021 | MergeResult(Accumulator&: Result, Result: BodySamples[Loc].merge(Other: Rec, Weight)); |
1022 | } |
1023 | for (const auto &I : Other.getCallsiteSamples()) { |
1024 | const LineLocation &Loc = I.first; |
1025 | FunctionSamplesMap &FSMap = functionSamplesAt(Loc); |
1026 | for (const auto &Rec : I.second) |
1027 | MergeResult(Accumulator&: Result, Result: FSMap[Rec.first].merge(Other: Rec.second, Weight)); |
1028 | } |
1029 | return Result; |
1030 | } |
1031 | |
1032 | /// Recursively traverses all children, if the total sample count of the |
1033 | /// corresponding function is no less than \p Threshold, add its corresponding |
1034 | /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID |
1035 | /// to \p S. |
1036 | void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, |
1037 | const HashKeyMap<std::unordered_map, FunctionId, |
1038 | Function *> &SymbolMap, |
1039 | uint64_t Threshold) const { |
1040 | if (TotalSamples <= Threshold) |
1041 | return; |
1042 | auto isDeclaration = [](const Function *F) { |
1043 | return !F || F->isDeclaration(); |
1044 | }; |
1045 | if (isDeclaration(SymbolMap.lookup(Key: getFunction()))) { |
1046 | // Add to the import list only when it's defined out of module. |
1047 | S.insert(V: getGUID()); |
1048 | } |
1049 | // Import hot CallTargets, which may not be available in IR because full |
1050 | // profile annotation cannot be done until backend compilation in ThinLTO. |
1051 | for (const auto &BS : BodySamples) |
1052 | for (const auto &TS : BS.second.getCallTargets()) |
1053 | if (TS.second > Threshold) { |
1054 | const Function *Callee = SymbolMap.lookup(Key: TS.first); |
1055 | if (isDeclaration(Callee)) |
1056 | S.insert(V: TS.first.getHashCode()); |
1057 | } |
1058 | for (const auto &CS : CallsiteSamples) |
1059 | for (const auto &NameFS : CS.second) |
1060 | NameFS.second.findInlinedFunctions(S, SymbolMap, Threshold); |
1061 | } |
1062 | |
1063 | /// Set the name of the function. |
1064 | void setFunction(FunctionId newFunction) { |
1065 | Context.setFunction(newFunction); |
1066 | } |
1067 | |
1068 | /// Return the function name. |
1069 | FunctionId getFunction() const { return Context.getFunction(); } |
1070 | |
1071 | /// Return the original function name. |
1072 | StringRef getFuncName() const { return getFuncName(Func: getFunction()); } |
1073 | |
1074 | void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; } |
1075 | |
1076 | uint64_t getFunctionHash() const { return FunctionHash; } |
1077 | |
1078 | void setIRToProfileLocationMap(const LocToLocMap *LTLM) { |
1079 | assert(IRToProfileLocationMap == nullptr && "this should be set only once" ); |
1080 | IRToProfileLocationMap = LTLM; |
1081 | } |
1082 | |
1083 | /// Return the canonical name for a function, taking into account |
1084 | /// suffix elision policy attributes. |
1085 | static StringRef getCanonicalFnName(const Function &F) { |
1086 | auto AttrName = "sample-profile-suffix-elision-policy" ; |
1087 | auto Attr = F.getFnAttribute(Kind: AttrName).getValueAsString(); |
1088 | return getCanonicalFnName(FnName: F.getName(), Attr); |
1089 | } |
1090 | |
1091 | /// Name suffixes which canonicalization should handle to avoid |
1092 | /// profile mismatch. |
1093 | static constexpr const char *LLVMSuffix = ".llvm." ; |
1094 | static constexpr const char *PartSuffix = ".part." ; |
1095 | static constexpr const char *UniqSuffix = ".__uniq." ; |
1096 | |
1097 | static StringRef getCanonicalFnName(StringRef FnName, |
1098 | StringRef Attr = "selected" ) { |
1099 | // Note the sequence of the suffixes in the knownSuffixes array matters. |
1100 | // If suffix "A" is appended after the suffix "B", "A" should be in front |
1101 | // of "B" in knownSuffixes. |
1102 | const char *knownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix}; |
1103 | if (Attr == "" || Attr == "all" ) { |
1104 | return FnName.split(Separator: '.').first; |
1105 | } else if (Attr == "selected" ) { |
1106 | StringRef Cand(FnName); |
1107 | for (const auto &Suf : knownSuffixes) { |
1108 | StringRef Suffix(Suf); |
1109 | // If the profile contains ".__uniq." suffix, don't strip the |
1110 | // suffix for names in the IR. |
1111 | if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix) |
1112 | continue; |
1113 | auto It = Cand.rfind(Str: Suffix); |
1114 | if (It == StringRef::npos) |
1115 | continue; |
1116 | auto Dit = Cand.rfind(C: '.'); |
1117 | if (Dit == It + Suffix.size() - 1) |
1118 | Cand = Cand.substr(Start: 0, N: It); |
1119 | } |
1120 | return Cand; |
1121 | } else if (Attr == "none" ) { |
1122 | return FnName; |
1123 | } else { |
1124 | assert(false && "internal error: unknown suffix elision policy" ); |
1125 | } |
1126 | return FnName; |
1127 | } |
1128 | |
1129 | /// Translate \p Func into its original name. |
1130 | /// When profile doesn't use MD5, \p Func needs no translation. |
1131 | /// When profile uses MD5, \p Func in current FunctionSamples |
1132 | /// is actually GUID of the original function name. getFuncName will |
1133 | /// translate \p Func in current FunctionSamples into its original name |
1134 | /// by looking up in the function map GUIDToFuncNameMap. |
1135 | /// If the original name doesn't exist in the map, return empty StringRef. |
1136 | StringRef getFuncName(FunctionId Func) const { |
1137 | if (!UseMD5) |
1138 | return Func.stringRef(); |
1139 | |
1140 | assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first" ); |
1141 | return GUIDToFuncNameMap->lookup(Val: Func.getHashCode()); |
1142 | } |
1143 | |
1144 | /// Returns the line offset to the start line of the subprogram. |
1145 | /// We assume that a single function will not exceed 65535 LOC. |
1146 | static unsigned getOffset(const DILocation *DIL); |
1147 | |
1148 | /// Returns a unique call site identifier for a given debug location of a call |
1149 | /// instruction. This is wrapper of two scenarios, the probe-based profile and |
1150 | /// regular profile, to hide implementation details from the sample loader and |
1151 | /// the context tracker. |
1152 | static LineLocation getCallSiteIdentifier(const DILocation *DIL, |
1153 | bool ProfileIsFS = false); |
1154 | |
1155 | /// Returns a unique hash code for a combination of a callsite location and |
1156 | /// the callee function name. |
1157 | /// Guarantee MD5 and non-MD5 representation of the same function results in |
1158 | /// the same hash. |
1159 | static uint64_t getCallSiteHash(FunctionId Callee, |
1160 | const LineLocation &Callsite) { |
1161 | return SampleContextFrame(Callee, Callsite).getHashCode(); |
1162 | } |
1163 | |
1164 | /// Get the FunctionSamples of the inline instance where DIL originates |
1165 | /// from. |
1166 | /// |
1167 | /// The FunctionSamples of the instruction (Machine or IR) associated to |
1168 | /// \p DIL is the inlined instance in which that instruction is coming from. |
1169 | /// We traverse the inline stack of that instruction, and match it with the |
1170 | /// tree nodes in the profile. |
1171 | /// |
1172 | /// \returns the FunctionSamples pointer to the inlined instance. |
1173 | /// If \p Remapper is not nullptr, it will be used to find matching |
1174 | /// FunctionSamples with not exactly the same but equivalent name. |
1175 | const FunctionSamples *findFunctionSamples( |
1176 | const DILocation *DIL, |
1177 | SampleProfileReaderItaniumRemapper *Remapper = nullptr) const; |
1178 | |
1179 | static bool ProfileIsProbeBased; |
1180 | |
1181 | static bool ProfileIsCS; |
1182 | |
1183 | static bool ProfileIsPreInlined; |
1184 | |
1185 | SampleContext &getContext() const { return Context; } |
1186 | |
1187 | void setContext(const SampleContext &FContext) { Context = FContext; } |
1188 | |
1189 | /// Whether the profile uses MD5 to represent string. |
1190 | static bool UseMD5; |
1191 | |
1192 | /// Whether the profile contains any ".__uniq." suffix in a name. |
1193 | static bool HasUniqSuffix; |
1194 | |
1195 | /// If this profile uses flow sensitive discriminators. |
1196 | static bool ProfileIsFS; |
1197 | |
1198 | /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for |
1199 | /// all the function symbols defined or declared in current module. |
1200 | DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap = nullptr; |
1201 | |
1202 | /// Return the GUID of the context's name. If the context is already using |
1203 | /// MD5, don't hash it again. |
1204 | uint64_t getGUID() const { |
1205 | return getFunction().getHashCode(); |
1206 | } |
1207 | |
1208 | // Find all the names in the current FunctionSamples including names in |
1209 | // all the inline instances and names of call targets. |
1210 | void findAllNames(DenseSet<FunctionId> &NameSet) const; |
1211 | |
1212 | bool operator==(const FunctionSamples &Other) const { |
1213 | return (GUIDToFuncNameMap == Other.GUIDToFuncNameMap || |
1214 | (GUIDToFuncNameMap && Other.GUIDToFuncNameMap && |
1215 | *GUIDToFuncNameMap == *Other.GUIDToFuncNameMap)) && |
1216 | FunctionHash == Other.FunctionHash && Context == Other.Context && |
1217 | TotalSamples == Other.TotalSamples && |
1218 | TotalHeadSamples == Other.TotalHeadSamples && |
1219 | BodySamples == Other.BodySamples && |
1220 | CallsiteSamples == Other.CallsiteSamples; |
1221 | } |
1222 | |
1223 | bool operator!=(const FunctionSamples &Other) const { |
1224 | return !(*this == Other); |
1225 | } |
1226 | |
1227 | private: |
1228 | /// CFG hash value for the function. |
1229 | uint64_t FunctionHash = 0; |
1230 | |
1231 | /// Calling context for function profile |
1232 | mutable SampleContext Context; |
1233 | |
1234 | /// Total number of samples collected inside this function. |
1235 | /// |
1236 | /// Samples are cumulative, they include all the samples collected |
1237 | /// inside this function and all its inlined callees. |
1238 | uint64_t TotalSamples = 0; |
1239 | |
1240 | /// Total number of samples collected at the head of the function. |
1241 | /// This is an approximation of the number of calls made to this function |
1242 | /// at runtime. |
1243 | uint64_t TotalHeadSamples = 0; |
1244 | |
1245 | /// Map instruction locations to collected samples. |
1246 | /// |
1247 | /// Each entry in this map contains the number of samples |
1248 | /// collected at the corresponding line offset. All line locations |
1249 | /// are an offset from the start of the function. |
1250 | BodySampleMap BodySamples; |
1251 | |
1252 | /// Map call sites to collected samples for the called function. |
1253 | /// |
1254 | /// Each entry in this map corresponds to all the samples |
1255 | /// collected for the inlined function call at the given |
1256 | /// location. For example, given: |
1257 | /// |
1258 | /// void foo() { |
1259 | /// 1 bar(); |
1260 | /// ... |
1261 | /// 8 baz(); |
1262 | /// } |
1263 | /// |
1264 | /// If the bar() and baz() calls were inlined inside foo(), this |
1265 | /// map will contain two entries. One for all the samples collected |
1266 | /// in the call to bar() at line offset 1, the other for all the samples |
1267 | /// collected in the call to baz() at line offset 8. |
1268 | CallsiteSampleMap CallsiteSamples; |
1269 | |
1270 | /// IR to profile location map generated by stale profile matching. |
1271 | /// |
1272 | /// Each entry is a mapping from the location on current build to the matched |
1273 | /// location in the "stale" profile. For example: |
1274 | /// Profiled source code: |
1275 | /// void foo() { |
1276 | /// 1 bar(); |
1277 | /// } |
1278 | /// |
1279 | /// Current source code: |
1280 | /// void foo() { |
1281 | /// 1 // Code change |
1282 | /// 2 bar(); |
1283 | /// } |
1284 | /// Supposing the stale profile matching algorithm generated the mapping [2 -> |
1285 | /// 1], the profile query using the location of bar on the IR which is 2 will |
1286 | /// be remapped to 1 and find the location of bar in the profile. |
1287 | const LocToLocMap *IRToProfileLocationMap = nullptr; |
1288 | }; |
1289 | |
1290 | /// Get the proper representation of a string according to whether the |
1291 | /// current Format uses MD5 to represent the string. |
1292 | static inline FunctionId getRepInFormat(StringRef Name) { |
1293 | if (Name.empty() || !FunctionSamples::UseMD5) |
1294 | return FunctionId(Name); |
1295 | return FunctionId(Function::getGUID(GlobalName: Name)); |
1296 | } |
1297 | |
1298 | raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS); |
1299 | |
1300 | /// This class provides operator overloads to the map container using MD5 as the |
1301 | /// key type, so that existing code can still work in most cases using |
1302 | /// SampleContext as key. |
1303 | /// Note: when populating container, make sure to assign the SampleContext to |
1304 | /// the mapped value immediately because the key no longer holds it. |
1305 | class SampleProfileMap |
1306 | : public HashKeyMap<std::unordered_map, SampleContext, FunctionSamples> { |
1307 | public: |
1308 | // Convenience method because this is being used in many places. Set the |
1309 | // FunctionSamples' context if its newly inserted. |
1310 | mapped_type &Create(const SampleContext &Ctx) { |
1311 | auto Ret = try_emplace(Key: Ctx, Args: FunctionSamples()); |
1312 | if (Ret.second) |
1313 | Ret.first->second.setContext(Ctx); |
1314 | return Ret.first->second; |
1315 | } |
1316 | |
1317 | iterator find(const SampleContext &Ctx) { |
1318 | return HashKeyMap<std::unordered_map, SampleContext, FunctionSamples>::find( |
1319 | Key: Ctx); |
1320 | } |
1321 | |
1322 | const_iterator find(const SampleContext &Ctx) const { |
1323 | return HashKeyMap<std::unordered_map, SampleContext, FunctionSamples>::find( |
1324 | Key: Ctx); |
1325 | } |
1326 | |
1327 | size_t erase(const SampleContext &Ctx) { |
1328 | return HashKeyMap<std::unordered_map, SampleContext, FunctionSamples>:: |
1329 | erase(Ctx); |
1330 | } |
1331 | |
1332 | size_t erase(const key_type &Key) { return base_type::erase(x: Key); } |
1333 | |
1334 | iterator erase(iterator It) { return base_type::erase(position: It); } |
1335 | }; |
1336 | |
1337 | using NameFunctionSamples = std::pair<hash_code, const FunctionSamples *>; |
1338 | |
1339 | void sortFuncProfiles(const SampleProfileMap &ProfileMap, |
1340 | std::vector<NameFunctionSamples> &SortedProfiles); |
1341 | |
1342 | /// Sort a LocationT->SampleT map by LocationT. |
1343 | /// |
1344 | /// It produces a sorted list of <LocationT, SampleT> records by ascending |
1345 | /// order of LocationT. |
1346 | template <class LocationT, class SampleT> class SampleSorter { |
1347 | public: |
1348 | using SamplesWithLoc = std::pair<const LocationT, SampleT>; |
1349 | using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>; |
1350 | |
1351 | SampleSorter(const std::map<LocationT, SampleT> &Samples) { |
1352 | for (const auto &I : Samples) |
1353 | V.push_back(&I); |
1354 | llvm::stable_sort(V, [](const SamplesWithLoc *A, const SamplesWithLoc *B) { |
1355 | return A->first < B->first; |
1356 | }); |
1357 | } |
1358 | |
1359 | const SamplesWithLocList &get() const { return V; } |
1360 | |
1361 | private: |
1362 | SamplesWithLocList V; |
1363 | }; |
1364 | |
1365 | /// SampleContextTrimmer impelements helper functions to trim, merge cold |
1366 | /// context profiles. It also supports context profile canonicalization to make |
1367 | /// sure ProfileMap's key is consistent with FunctionSample's name/context. |
1368 | class SampleContextTrimmer { |
1369 | public: |
1370 | SampleContextTrimmer(SampleProfileMap &Profiles) : ProfileMap(Profiles){}; |
1371 | // Trim and merge cold context profile when requested. TrimBaseProfileOnly |
1372 | // should only be effective when TrimColdContext is true. On top of |
1373 | // TrimColdContext, TrimBaseProfileOnly can be used to specify to trim all |
1374 | // cold profiles or only cold base profiles. Trimming base profiles only is |
1375 | // mainly to honor the preinliner decsion. Note that when MergeColdContext is |
1376 | // true, preinliner decsion is not honored anyway so TrimBaseProfileOnly will |
1377 | // be ignored. |
1378 | void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold, |
1379 | bool TrimColdContext, |
1380 | bool MergeColdContext, |
1381 | uint32_t ColdContextFrameLength, |
1382 | bool TrimBaseProfileOnly); |
1383 | |
1384 | private: |
1385 | SampleProfileMap &ProfileMap; |
1386 | }; |
1387 | |
1388 | /// Helper class for profile conversion. |
1389 | /// |
1390 | /// It supports full context-sensitive profile to nested profile conversion, |
1391 | /// nested profile to flatten profile conversion, etc. |
1392 | class ProfileConverter { |
1393 | public: |
1394 | ProfileConverter(SampleProfileMap &Profiles); |
1395 | // Convert a full context-sensitive flat sample profile into a nested sample |
1396 | // profile. |
1397 | void convertCSProfiles(); |
1398 | struct FrameNode { |
1399 | FrameNode(FunctionId FName = FunctionId(), |
1400 | FunctionSamples *FSamples = nullptr, |
1401 | LineLocation CallLoc = {0, 0}) |
1402 | : FuncName(FName), FuncSamples(FSamples), CallSiteLoc(CallLoc){}; |
1403 | |
1404 | // Map line+discriminator location to child frame |
1405 | std::map<uint64_t, FrameNode> AllChildFrames; |
1406 | // Function name for current frame |
1407 | FunctionId FuncName; |
1408 | // Function Samples for current frame |
1409 | FunctionSamples *FuncSamples; |
1410 | // Callsite location in parent context |
1411 | LineLocation CallSiteLoc; |
1412 | |
1413 | FrameNode *getOrCreateChildFrame(const LineLocation &CallSite, |
1414 | FunctionId CalleeName); |
1415 | }; |
1416 | |
1417 | static void flattenProfile(SampleProfileMap &ProfileMap, |
1418 | bool ProfileIsCS = false) { |
1419 | SampleProfileMap TmpProfiles; |
1420 | flattenProfile(InputProfiles: ProfileMap, OutputProfiles&: TmpProfiles, ProfileIsCS); |
1421 | ProfileMap = std::move(TmpProfiles); |
1422 | } |
1423 | |
1424 | static void flattenProfile(const SampleProfileMap &InputProfiles, |
1425 | SampleProfileMap &OutputProfiles, |
1426 | bool ProfileIsCS = false) { |
1427 | if (ProfileIsCS) { |
1428 | for (const auto &I : InputProfiles) { |
1429 | // Retain the profile name and clear the full context for each function |
1430 | // profile. |
1431 | FunctionSamples &FS = OutputProfiles.Create(Ctx: I.second.getFunction()); |
1432 | FS.merge(Other: I.second); |
1433 | } |
1434 | } else { |
1435 | for (const auto &I : InputProfiles) |
1436 | flattenNestedProfile(OutputProfiles, FS: I.second); |
1437 | } |
1438 | } |
1439 | |
1440 | private: |
1441 | static void flattenNestedProfile(SampleProfileMap &OutputProfiles, |
1442 | const FunctionSamples &FS) { |
1443 | // To retain the context, checksum, attributes of the original profile, make |
1444 | // a copy of it if no profile is found. |
1445 | SampleContext &Context = FS.getContext(); |
1446 | auto Ret = OutputProfiles.try_emplace(Key: Context, Args: FS); |
1447 | FunctionSamples &Profile = Ret.first->second; |
1448 | if (Ret.second) { |
1449 | // Clear nested inlinees' samples for the flattened copy. These inlinees |
1450 | // will have their own top-level entries after flattening. |
1451 | Profile.removeAllCallsiteSamples(); |
1452 | // We recompute TotalSamples later, so here set to zero. |
1453 | Profile.setTotalSamples(0); |
1454 | } else { |
1455 | for (const auto &[LineLocation, SampleRecord] : FS.getBodySamples()) { |
1456 | Profile.addSampleRecord(Location: LineLocation, SampleRecord); |
1457 | } |
1458 | } |
1459 | |
1460 | assert(Profile.getCallsiteSamples().empty() && |
1461 | "There should be no inlinees' profiles after flattening." ); |
1462 | |
1463 | // TotalSamples might not be equal to the sum of all samples from |
1464 | // BodySamples and CallsiteSamples. So here we use "TotalSamples = |
1465 | // Original_TotalSamples - All_of_Callsite_TotalSamples + |
1466 | // All_of_Callsite_HeadSamples" to compute the new TotalSamples. |
1467 | uint64_t TotalSamples = FS.getTotalSamples(); |
1468 | |
1469 | for (const auto &I : FS.getCallsiteSamples()) { |
1470 | for (const auto &Callee : I.second) { |
1471 | const auto &CalleeProfile = Callee.second; |
1472 | // Add body sample. |
1473 | Profile.addBodySamples(LineOffset: I.first.LineOffset, Discriminator: I.first.Discriminator, |
1474 | Num: CalleeProfile.getHeadSamplesEstimate()); |
1475 | // Add callsite sample. |
1476 | Profile.addCalledTargetSamples( |
1477 | LineOffset: I.first.LineOffset, Discriminator: I.first.Discriminator, |
1478 | Func: CalleeProfile.getFunction(), |
1479 | Num: CalleeProfile.getHeadSamplesEstimate()); |
1480 | // Update total samples. |
1481 | TotalSamples = TotalSamples >= CalleeProfile.getTotalSamples() |
1482 | ? TotalSamples - CalleeProfile.getTotalSamples() |
1483 | : 0; |
1484 | TotalSamples += CalleeProfile.getHeadSamplesEstimate(); |
1485 | // Recursively convert callee profile. |
1486 | flattenNestedProfile(OutputProfiles, FS: CalleeProfile); |
1487 | } |
1488 | } |
1489 | Profile.addTotalSamples(Num: TotalSamples); |
1490 | |
1491 | Profile.setHeadSamples(Profile.getHeadSamplesEstimate()); |
1492 | } |
1493 | |
1494 | // Nest all children profiles into the profile of Node. |
1495 | void convertCSProfiles(FrameNode &Node); |
1496 | FrameNode *getOrCreateContextPath(const SampleContext &Context); |
1497 | |
1498 | SampleProfileMap &ProfileMap; |
1499 | FrameNode RootFrame; |
1500 | }; |
1501 | |
1502 | /// ProfileSymbolList records the list of function symbols shown up |
1503 | /// in the binary used to generate the profile. It is useful to |
1504 | /// to discriminate a function being so cold as not to shown up |
1505 | /// in the profile and a function newly added. |
1506 | class ProfileSymbolList { |
1507 | public: |
1508 | /// copy indicates whether we need to copy the underlying memory |
1509 | /// for the input Name. |
1510 | void add(StringRef Name, bool copy = false) { |
1511 | if (!copy) { |
1512 | Syms.insert(V: Name); |
1513 | return; |
1514 | } |
1515 | Syms.insert(V: Name.copy(A&: Allocator)); |
1516 | } |
1517 | |
1518 | bool contains(StringRef Name) { return Syms.count(V: Name); } |
1519 | |
1520 | void merge(const ProfileSymbolList &List) { |
1521 | for (auto Sym : List.Syms) |
1522 | add(Name: Sym, copy: true); |
1523 | } |
1524 | |
1525 | unsigned size() { return Syms.size(); } |
1526 | |
1527 | void setToCompress(bool TC) { ToCompress = TC; } |
1528 | bool toCompress() { return ToCompress; } |
1529 | |
1530 | std::error_code read(const uint8_t *Data, uint64_t ListSize); |
1531 | std::error_code write(raw_ostream &OS); |
1532 | void dump(raw_ostream &OS = dbgs()) const; |
1533 | |
1534 | private: |
1535 | // Determine whether or not to compress the symbol list when |
1536 | // writing it into profile. The variable is unused when the symbol |
1537 | // list is read from an existing profile. |
1538 | bool ToCompress = false; |
1539 | DenseSet<StringRef> Syms; |
1540 | BumpPtrAllocator Allocator; |
1541 | }; |
1542 | |
1543 | } // end namespace sampleprof |
1544 | |
1545 | using namespace sampleprof; |
1546 | // Provide DenseMapInfo for SampleContext. |
1547 | template <> struct DenseMapInfo<SampleContext> { |
1548 | static inline SampleContext getEmptyKey() { return SampleContext(); } |
1549 | |
1550 | static inline SampleContext getTombstoneKey() { |
1551 | return SampleContext(FunctionId(~1ULL)); |
1552 | } |
1553 | |
1554 | static unsigned getHashValue(const SampleContext &Val) { |
1555 | return Val.getHashCode(); |
1556 | } |
1557 | |
1558 | static bool isEqual(const SampleContext &LHS, const SampleContext &RHS) { |
1559 | return LHS == RHS; |
1560 | } |
1561 | }; |
1562 | |
1563 | // Prepend "__uniq" before the hash for tools like profilers to understand |
1564 | // that this symbol is of internal linkage type. The "__uniq" is the |
1565 | // pre-determined prefix that is used to tell tools that this symbol was |
1566 | // created with -funique-internal-linkage-symbols and the tools can strip or |
1567 | // keep the prefix as needed. |
1568 | inline std::string getUniqueInternalLinkagePostfix(const StringRef &FName) { |
1569 | llvm::MD5 Md5; |
1570 | Md5.update(Str: FName); |
1571 | llvm::MD5::MD5Result R; |
1572 | Md5.final(Result&: R); |
1573 | SmallString<32> Str; |
1574 | llvm::MD5::stringifyResult(Result&: R, Str); |
1575 | // Convert MD5hash to Decimal. Demangler suffixes can either contain |
1576 | // numbers or characters but not both. |
1577 | llvm::APInt IntHash(128, Str.str(), 16); |
1578 | return toString(I: IntHash, /* Radix = */ Radix: 10, /* Signed = */ Signed: false) |
1579 | .insert(pos: 0, s: FunctionSamples::UniqSuffix); |
1580 | } |
1581 | |
1582 | } // end namespace llvm |
1583 | |
1584 | #endif // LLVM_PROFILEDATA_SAMPLEPROF_H |
1585 | |