1//===- SampleProfReader.h - Read LLVM sample profile data -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains definitions needed for reading sample profiles.
10//
11// NOTE: If you are making changes to this file format, please remember
12// to document them in the Clang documentation at
13// tools/clang/docs/UsersManual.rst.
14//
15// Text format
16// -----------
17//
18// Sample profiles are written as ASCII text. The file is divided into
19// sections, which correspond to each of the functions executed at runtime.
20// Each section has the following format
21//
22// function1:total_samples:total_head_samples
23// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
24// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
25// ...
26// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
27// offsetA[.discriminator]: fnA:num_of_total_samples
28// offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ]
29// ...
30// !CFGChecksum: num
31// !Attribute: flags
32//
33// This is a nested tree in which the indentation represents the nesting level
34// of the inline stack. There are no blank lines in the file. And the spacing
35// within a single line is fixed. Additional spaces will result in an error
36// while reading the file.
37//
38// Any line starting with the '#' character is completely ignored.
39//
40// Inlined calls are represented with indentation. The Inline stack is a
41// stack of source locations in which the top of the stack represents the
42// leaf function, and the bottom of the stack represents the actual
43// symbol to which the instruction belongs.
44//
45// Function names must be mangled in order for the profile loader to
46// match them in the current translation unit. The two numbers in the
47// function header specify how many total samples were accumulated in the
48// function (first number), and the total number of samples accumulated
49// in the prologue of the function (second number). This head sample
50// count provides an indicator of how frequently the function is invoked.
51//
52// There are three types of lines in the function body.
53//
54// * Sampled line represents the profile information of a source location.
55// * Callsite line represents the profile information of a callsite.
56// * Metadata line represents extra metadata of the function.
57//
58// Each sampled line may contain several items. Some are optional (marked
59// below):
60//
61// a. Source line offset. This number represents the line number
62// in the function where the sample was collected. The line number is
63// always relative to the line where symbol of the function is
64// defined. So, if the function has its header at line 280, the offset
65// 13 is at line 293 in the file.
66//
67// Note that this offset should never be a negative number. This could
68// happen in cases like macros. The debug machinery will register the
69// line number at the point of macro expansion. So, if the macro was
70// expanded in a line before the start of the function, the profile
71// converter should emit a 0 as the offset (this means that the optimizers
72// will not be able to associate a meaningful weight to the instructions
73// in the macro).
74//
75// b. [OPTIONAL] Discriminator. This is used if the sampled program
76// was compiled with DWARF discriminator support
77// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators).
78// DWARF discriminators are unsigned integer values that allow the
79// compiler to distinguish between multiple execution paths on the
80// same source line location.
81//
82// For example, consider the line of code ``if (cond) foo(); else bar();``.
83// If the predicate ``cond`` is true 80% of the time, then the edge
84// into function ``foo`` should be considered to be taken most of the
85// time. But both calls to ``foo`` and ``bar`` are at the same source
86// line, so a sample count at that line is not sufficient. The
87// compiler needs to know which part of that line is taken more
88// frequently.
89//
90// This is what discriminators provide. In this case, the calls to
91// ``foo`` and ``bar`` will be at the same line, but will have
92// different discriminator values. This allows the compiler to correctly
93// set edge weights into ``foo`` and ``bar``.
94//
95// c. Number of samples. This is an integer quantity representing the
96// number of samples collected by the profiler at this source
97// location.
98//
99// d. [OPTIONAL] Potential call targets and samples. If present, this
100// line contains a call instruction. This models both direct and
101// number of samples. For example,
102//
103// 130: 7 foo:3 bar:2 baz:7
104//
105// The above means that at relative line offset 130 there is a call
106// instruction that calls one of ``foo()``, ``bar()`` and ``baz()``,
107// with ``baz()`` being the relatively more frequently called target.
108//
109// Each callsite line may contain several items. Some are optional.
110//
111// a. Source line offset. This number represents the line number of the
112// callsite that is inlined in the profiled binary.
113//
114// b. [OPTIONAL] Discriminator. Same as the discriminator for sampled line.
115//
116// c. Number of samples. This is an integer quantity representing the
117// total number of samples collected for the inlined instance at this
118// callsite
119//
120// Metadata line can occur in lines with one indent only, containing extra
121// information for the top-level function. Furthermore, metadata can only
122// occur after all the body samples and callsite samples.
123// Each metadata line may contain a particular type of metadata, marked by
124// the starting characters annotated with !. We process each metadata line
125// independently, hence each metadata line has to form an independent piece
126// of information that does not require cross-line reference.
127// We support the following types of metadata:
128//
129// a. CFG Checksum (a.k.a. function hash):
130// !CFGChecksum: 12345
131// b. CFG Checksum (see ContextAttributeMask):
132// !Atribute: 1
133//
134//
135// Binary format
136// -------------
137//
138// This is a more compact encoding. Numbers are encoded as ULEB128 values
139// and all strings are encoded in a name table. The file is organized in
140// the following sections:
141//
142// MAGIC (uint64_t)
143// File identifier computed by function SPMagic() (0x5350524f463432ff)
144//
145// VERSION (uint32_t)
146// File format version number computed by SPVersion()
147//
148// SUMMARY
149// TOTAL_COUNT (uint64_t)
150// Total number of samples in the profile.
151// MAX_COUNT (uint64_t)
152// Maximum value of samples on a line.
153// MAX_FUNCTION_COUNT (uint64_t)
154// Maximum number of samples at function entry (head samples).
155// NUM_COUNTS (uint64_t)
156// Number of lines with samples.
157// NUM_FUNCTIONS (uint64_t)
158// Number of functions with samples.
159// NUM_DETAILED_SUMMARY_ENTRIES (size_t)
160// Number of entries in detailed summary
161// DETAILED_SUMMARY
162// A list of detailed summary entry. Each entry consists of
163// CUTOFF (uint32_t)
164// Required percentile of total sample count expressed as a fraction
165// multiplied by 1000000.
166// MIN_COUNT (uint64_t)
167// The minimum number of samples required to reach the target
168// CUTOFF.
169// NUM_COUNTS (uint64_t)
170// Number of samples to get to the desrired percentile.
171//
172// NAME TABLE
173// SIZE (uint64_t)
174// Number of entries in the name table.
175// NAMES
176// A NUL-separated list of SIZE strings.
177//
178// FUNCTION BODY (one for each uninlined function body present in the profile)
179// HEAD_SAMPLES (uint64_t) [only for top-level functions]
180// Total number of samples collected at the head (prologue) of the
181// function.
182// NOTE: This field should only be present for top-level functions
183// (i.e., not inlined into any caller). Inlined function calls
184// have no prologue, so they don't need this.
185// NAME_IDX (uint64_t)
186// Index into the name table indicating the function name.
187// SAMPLES (uint64_t)
188// Total number of samples collected in this function.
189// NRECS (uint32_t)
190// Total number of sampling records this function's profile.
191// BODY RECORDS
192// A list of NRECS entries. Each entry contains:
193// OFFSET (uint32_t)
194// Line offset from the start of the function.
195// DISCRIMINATOR (uint32_t)
196// Discriminator value (see description of discriminators
197// in the text format documentation above).
198// SAMPLES (uint64_t)
199// Number of samples collected at this location.
200// NUM_CALLS (uint32_t)
201// Number of non-inlined function calls made at this location. In the
202// case of direct calls, this number will always be 1. For indirect
203// calls (virtual functions and function pointers) this will
204// represent all the actual functions called at runtime.
205// CALL_TARGETS
206// A list of NUM_CALLS entries for each called function:
207// NAME_IDX (uint64_t)
208// Index into the name table with the callee name.
209// SAMPLES (uint64_t)
210// Number of samples collected at the call site.
211// NUM_INLINED_FUNCTIONS (uint32_t)
212// Number of callees inlined into this function.
213// INLINED FUNCTION RECORDS
214// A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
215// callees.
216// OFFSET (uint32_t)
217// Line offset from the start of the function.
218// DISCRIMINATOR (uint32_t)
219// Discriminator value (see description of discriminators
220// in the text format documentation above).
221// FUNCTION BODY
222// A FUNCTION BODY entry describing the inlined function.
223//===----------------------------------------------------------------------===//
224
225#ifndef LLVM_PROFILEDATA_SAMPLEPROFREADER_H
226#define LLVM_PROFILEDATA_SAMPLEPROFREADER_H
227
228#include "llvm/ADT/SmallVector.h"
229#include "llvm/ADT/StringRef.h"
230#include "llvm/IR/DiagnosticInfo.h"
231#include "llvm/IR/LLVMContext.h"
232#include "llvm/IR/ProfileSummary.h"
233#include "llvm/ProfileData/GCOV.h"
234#include "llvm/ProfileData/SampleProf.h"
235#include "llvm/ProfileData/SymbolRemappingReader.h"
236#include "llvm/Support/Debug.h"
237#include "llvm/Support/Discriminator.h"
238#include "llvm/Support/ErrorOr.h"
239#include "llvm/Support/MemoryBuffer.h"
240#include <cstdint>
241#include <list>
242#include <memory>
243#include <optional>
244#include <string>
245#include <system_error>
246#include <unordered_set>
247#include <vector>
248
249namespace llvm {
250
251class raw_ostream;
252class Twine;
253
254namespace vfs {
255class FileSystem;
256} // namespace vfs
257
258namespace sampleprof {
259
260class SampleProfileReader;
261
262/// SampleProfileReaderItaniumRemapper remaps the profile data from a
263/// sample profile data reader, by applying a provided set of equivalences
264/// between components of the symbol names in the profile.
265class SampleProfileReaderItaniumRemapper {
266public:
267 SampleProfileReaderItaniumRemapper(std::unique_ptr<MemoryBuffer> B,
268 std::unique_ptr<SymbolRemappingReader> SRR,
269 SampleProfileReader &R)
270 : Buffer(std::move(B)), Remappings(std::move(SRR)), Reader(R) {
271 assert(Remappings && "Remappings cannot be nullptr");
272 }
273
274 /// Create a remapper from the given remapping file. The remapper will
275 /// be used for profile read in by Reader.
276 static ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
277 create(const std::string Filename, vfs::FileSystem &FS,
278 SampleProfileReader &Reader, LLVMContext &C);
279
280 /// Create a remapper from the given Buffer. The remapper will
281 /// be used for profile read in by Reader.
282 static ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
283 create(std::unique_ptr<MemoryBuffer> &B, SampleProfileReader &Reader,
284 LLVMContext &C);
285
286 /// Apply remappings to the profile read by Reader.
287 void applyRemapping(LLVMContext &Ctx);
288
289 bool hasApplied() { return RemappingApplied; }
290
291 /// Insert function name into remapper.
292 void insert(StringRef FunctionName) { Remappings->insert(FunctionName); }
293
294 /// Query whether there is equivalent in the remapper which has been
295 /// inserted.
296 bool exist(StringRef FunctionName) {
297 return Remappings->lookup(FunctionName);
298 }
299
300 /// Return the equivalent name in the profile for \p FunctionName if
301 /// it exists.
302 std::optional<StringRef> lookUpNameInProfile(StringRef FunctionName);
303
304private:
305 // The buffer holding the content read from remapping file.
306 std::unique_ptr<MemoryBuffer> Buffer;
307 std::unique_ptr<SymbolRemappingReader> Remappings;
308 // Map remapping key to the name in the profile. By looking up the
309 // key in the remapper, a given new name can be mapped to the
310 // cannonical name using the NameMap.
311 DenseMap<SymbolRemappingReader::Key, StringRef> NameMap;
312 // The Reader the remapper is servicing.
313 SampleProfileReader &Reader;
314 // Indicate whether remapping has been applied to the profile read
315 // by Reader -- by calling applyRemapping.
316 bool RemappingApplied = false;
317};
318
319/// Sample-based profile reader.
320///
321/// Each profile contains sample counts for all the functions
322/// executed. Inside each function, statements are annotated with the
323/// collected samples on all the instructions associated with that
324/// statement.
325///
326/// For this to produce meaningful data, the program needs to be
327/// compiled with some debug information (at minimum, line numbers:
328/// -gline-tables-only). Otherwise, it will be impossible to match IR
329/// instructions to the line numbers collected by the profiler.
330///
331/// From the profile file, we are interested in collecting the
332/// following information:
333///
334/// * A list of functions included in the profile (mangled names).
335///
336/// * For each function F:
337/// 1. The total number of samples collected in F.
338///
339/// 2. The samples collected at each line in F. To provide some
340/// protection against source code shuffling, line numbers should
341/// be relative to the start of the function.
342///
343/// The reader supports two file formats: text and binary. The text format
344/// is useful for debugging and testing, while the binary format is more
345/// compact and I/O efficient. They can both be used interchangeably.
346class SampleProfileReader {
347public:
348 SampleProfileReader(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
349 SampleProfileFormat Format = SPF_None)
350 : Profiles(), Ctx(C), Buffer(std::move(B)), Format(Format) {}
351
352 virtual ~SampleProfileReader() = default;
353
354 /// Read and validate the file header.
355 virtual std::error_code readHeader() = 0;
356
357 /// Set the bits for FS discriminators. Parameter Pass specify the sequence
358 /// number, Pass == i is for the i-th round of adding FS discriminators.
359 /// Pass == 0 is for using base discriminators.
360 void setDiscriminatorMaskedBitFrom(FSDiscriminatorPass P) {
361 MaskedBitFrom = getFSPassBitEnd(P);
362 }
363
364 /// Get the bitmask the discriminators: For FS profiles, return the bit
365 /// mask for this pass. For non FS profiles, return (unsigned) -1.
366 uint32_t getDiscriminatorMask() const {
367 if (!ProfileIsFS)
368 return 0xFFFFFFFF;
369 assert((MaskedBitFrom != 0) && "MaskedBitFrom is not set properly");
370 return getN1Bits(N: MaskedBitFrom);
371 }
372
373 /// The interface to read sample profiles from the associated file.
374 std::error_code read() {
375 if (std::error_code EC = readImpl())
376 return EC;
377 if (Remapper)
378 Remapper->applyRemapping(Ctx);
379 FunctionSamples::UseMD5 = useMD5();
380 return sampleprof_error::success;
381 }
382
383 /// The implementaion to read sample profiles from the associated file.
384 virtual std::error_code readImpl() = 0;
385
386 /// Print the profile for \p FunctionSamples on stream \p OS.
387 void dumpFunctionProfile(const FunctionSamples &FS, raw_ostream &OS = dbgs());
388
389 /// Collect functions with definitions in Module M. For reader which
390 /// support loading function profiles on demand, return true when the
391 /// reader has been given a module. Always return false for reader
392 /// which doesn't support loading function profiles on demand.
393 virtual bool collectFuncsFromModule() { return false; }
394
395 /// Print all the profiles on stream \p OS.
396 void dump(raw_ostream &OS = dbgs());
397
398 /// Print all the profiles on stream \p OS in the JSON format.
399 void dumpJson(raw_ostream &OS = dbgs());
400
401 /// Return the samples collected for function \p F.
402 FunctionSamples *getSamplesFor(const Function &F) {
403 // The function name may have been updated by adding suffix. Call
404 // a helper to (optionally) strip off suffixes so that we can
405 // match against the original function name in the profile.
406 StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
407 return getSamplesFor(Fname: CanonName);
408 }
409
410 /// Return the samples collected for function \p F.
411 FunctionSamples *getSamplesFor(StringRef Fname) {
412 auto It = Profiles.find(Ctx: FunctionId(Fname));
413 if (It != Profiles.end())
414 return &It->second;
415
416 if (Remapper) {
417 if (auto NameInProfile = Remapper->lookUpNameInProfile(FunctionName: Fname)) {
418 auto It = Profiles.find(Ctx: FunctionId(*NameInProfile));
419 if (It != Profiles.end())
420 return &It->second;
421 }
422 }
423 return nullptr;
424 }
425
426 /// Return all the profiles.
427 SampleProfileMap &getProfiles() { return Profiles; }
428
429 /// Report a parse error message.
430 void reportError(int64_t LineNumber, const Twine &Msg) const {
431 Ctx.diagnose(DI: DiagnosticInfoSampleProfile(Buffer->getBufferIdentifier(),
432 LineNumber, Msg));
433 }
434
435 /// Create a sample profile reader appropriate to the file format.
436 /// Create a remapper underlying if RemapFilename is not empty.
437 /// Parameter P specifies the FSDiscriminatorPass.
438 static ErrorOr<std::unique_ptr<SampleProfileReader>>
439 create(const std::string Filename, LLVMContext &C, vfs::FileSystem &FS,
440 FSDiscriminatorPass P = FSDiscriminatorPass::Base,
441 const std::string RemapFilename = "");
442
443 /// Create a sample profile reader from the supplied memory buffer.
444 /// Create a remapper underlying if RemapFilename is not empty.
445 /// Parameter P specifies the FSDiscriminatorPass.
446 static ErrorOr<std::unique_ptr<SampleProfileReader>>
447 create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C, vfs::FileSystem &FS,
448 FSDiscriminatorPass P = FSDiscriminatorPass::Base,
449 const std::string RemapFilename = "");
450
451 /// Return the profile summary.
452 ProfileSummary &getSummary() const { return *(Summary.get()); }
453
454 MemoryBuffer *getBuffer() const { return Buffer.get(); }
455
456 /// \brief Return the profile format.
457 SampleProfileFormat getFormat() const { return Format; }
458
459 /// Whether input profile is based on pseudo probes.
460 bool profileIsProbeBased() const { return ProfileIsProbeBased; }
461
462 /// Whether input profile is fully context-sensitive.
463 bool profileIsCS() const { return ProfileIsCS; }
464
465 /// Whether input profile contains ShouldBeInlined contexts.
466 bool profileIsPreInlined() const { return ProfileIsPreInlined; }
467
468 /// Whether input profile is flow-sensitive.
469 bool profileIsFS() const { return ProfileIsFS; }
470
471 virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() {
472 return nullptr;
473 };
474
475 /// It includes all the names that have samples either in outline instance
476 /// or inline instance.
477 virtual std::vector<FunctionId> *getNameTable() { return nullptr; }
478 virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) { return false; };
479
480 /// Return whether names in the profile are all MD5 numbers.
481 bool useMD5() const { return ProfileIsMD5; }
482
483 /// Force the profile to use MD5 in Sample contexts, even if function names
484 /// are present.
485 virtual void setProfileUseMD5() { ProfileIsMD5 = true; }
486
487 /// Don't read profile without context if the flag is set. This is only meaningful
488 /// for ExtBinary format.
489 virtual void setSkipFlatProf(bool Skip) {}
490 /// Return whether any name in the profile contains ".__uniq." suffix.
491 virtual bool hasUniqSuffix() { return false; }
492
493 SampleProfileReaderItaniumRemapper *getRemapper() { return Remapper.get(); }
494
495 void setModule(const Module *Mod) { M = Mod; }
496
497protected:
498 /// Map every function to its associated profile.
499 ///
500 /// The profile of every function executed at runtime is collected
501 /// in the structure FunctionSamples. This maps function objects
502 /// to their corresponding profiles.
503 SampleProfileMap Profiles;
504
505 /// LLVM context used to emit diagnostics.
506 LLVMContext &Ctx;
507
508 /// Memory buffer holding the profile file.
509 std::unique_ptr<MemoryBuffer> Buffer;
510
511 /// Profile summary information.
512 std::unique_ptr<ProfileSummary> Summary;
513
514 /// Take ownership of the summary of this reader.
515 static std::unique_ptr<ProfileSummary>
516 takeSummary(SampleProfileReader &Reader) {
517 return std::move(Reader.Summary);
518 }
519
520 /// Compute summary for this profile.
521 void computeSummary();
522
523 std::unique_ptr<SampleProfileReaderItaniumRemapper> Remapper;
524
525 /// \brief Whether samples are collected based on pseudo probes.
526 bool ProfileIsProbeBased = false;
527
528 /// Whether function profiles are context-sensitive flat profiles.
529 bool ProfileIsCS = false;
530
531 /// Whether function profile contains ShouldBeInlined contexts.
532 bool ProfileIsPreInlined = false;
533
534 /// Number of context-sensitive profiles.
535 uint32_t CSProfileCount = 0;
536
537 /// Whether the function profiles use FS discriminators.
538 bool ProfileIsFS = false;
539
540 /// \brief The format of sample.
541 SampleProfileFormat Format = SPF_None;
542
543 /// \brief The current module being compiled if SampleProfileReader
544 /// is used by compiler. If SampleProfileReader is used by other
545 /// tools which are not compiler, M is usually nullptr.
546 const Module *M = nullptr;
547
548 /// Zero out the discriminator bits higher than bit MaskedBitFrom (0 based).
549 /// The default is to keep all the bits.
550 uint32_t MaskedBitFrom = 31;
551
552 /// Whether the profile uses MD5 for Sample Contexts and function names. This
553 /// can be one-way overriden by the user to force use MD5.
554 bool ProfileIsMD5 = false;
555};
556
557class SampleProfileReaderText : public SampleProfileReader {
558public:
559 SampleProfileReaderText(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
560 : SampleProfileReader(std::move(B), C, SPF_Text) {}
561
562 /// Read and validate the file header.
563 std::error_code readHeader() override { return sampleprof_error::success; }
564
565 /// Read sample profiles from the associated file.
566 std::error_code readImpl() override;
567
568 /// Return true if \p Buffer is in the format supported by this class.
569 static bool hasFormat(const MemoryBuffer &Buffer);
570
571 /// Text format sample profile does not support MD5 for now.
572 void setProfileUseMD5() override {}
573
574private:
575 /// CSNameTable is used to save full context vectors. This serves as an
576 /// underlying immutable buffer for all clients.
577 std::list<SampleContextFrameVector> CSNameTable;
578};
579
580class SampleProfileReaderBinary : public SampleProfileReader {
581public:
582 SampleProfileReaderBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
583 SampleProfileFormat Format = SPF_None)
584 : SampleProfileReader(std::move(B), C, Format) {}
585
586 /// Read and validate the file header.
587 std::error_code readHeader() override;
588
589 /// Read sample profiles from the associated file.
590 std::error_code readImpl() override;
591
592 /// It includes all the names that have samples either in outline instance
593 /// or inline instance.
594 std::vector<FunctionId> *getNameTable() override {
595 return &NameTable;
596 }
597
598protected:
599 /// Read a numeric value of type T from the profile.
600 ///
601 /// If an error occurs during decoding, a diagnostic message is emitted and
602 /// EC is set.
603 ///
604 /// \returns the read value.
605 template <typename T> ErrorOr<T> readNumber();
606
607 /// Read a numeric value of type T from the profile. The value is saved
608 /// without encoded.
609 template <typename T> ErrorOr<T> readUnencodedNumber();
610
611 /// Read a string from the profile.
612 ///
613 /// If an error occurs during decoding, a diagnostic message is emitted and
614 /// EC is set.
615 ///
616 /// \returns the read value.
617 ErrorOr<StringRef> readString();
618
619 /// Read the string index and check whether it overflows the table.
620 template <typename T> inline ErrorOr<size_t> readStringIndex(T &Table);
621
622 /// Read the next function profile instance.
623 std::error_code readFuncProfile(const uint8_t *Start);
624
625 /// Read the contents of the given profile instance.
626 std::error_code readProfile(FunctionSamples &FProfile);
627
628 /// Read the contents of Magic number and Version number.
629 std::error_code readMagicIdent();
630
631 /// Read profile summary.
632 std::error_code readSummary();
633
634 /// Read the whole name table.
635 std::error_code readNameTable();
636
637 /// Read a string indirectly via the name table. Optionally return the index.
638 ErrorOr<FunctionId> readStringFromTable(size_t *RetIdx = nullptr);
639
640 /// Read a context indirectly via the CSNameTable. Optionally return the
641 /// index.
642 ErrorOr<SampleContextFrames> readContextFromTable(size_t *RetIdx = nullptr);
643
644 /// Read a context indirectly via the CSNameTable if the profile has context,
645 /// otherwise same as readStringFromTable, also return its hash value.
646 ErrorOr<std::pair<SampleContext, uint64_t>> readSampleContextFromTable();
647
648 /// Points to the current location in the buffer.
649 const uint8_t *Data = nullptr;
650
651 /// Points to the end of the buffer.
652 const uint8_t *End = nullptr;
653
654 /// Function name table.
655 std::vector<FunctionId> NameTable;
656
657 /// CSNameTable is used to save full context vectors. It is the backing buffer
658 /// for SampleContextFrames.
659 std::vector<SampleContextFrameVector> CSNameTable;
660
661 /// Table to cache MD5 values of sample contexts corresponding to
662 /// readSampleContextFromTable(), used to index into Profiles or
663 /// FuncOffsetTable.
664 std::vector<uint64_t> MD5SampleContextTable;
665
666 /// The starting address of the table of MD5 values of sample contexts. For
667 /// fixed length MD5 non-CS profile it is same as MD5NameMemStart because
668 /// hashes of non-CS contexts are already in the profile. Otherwise it points
669 /// to the start of MD5SampleContextTable.
670 const uint64_t *MD5SampleContextStart = nullptr;
671
672private:
673 std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries);
674 virtual std::error_code verifySPMagic(uint64_t Magic) = 0;
675};
676
677class SampleProfileReaderRawBinary : public SampleProfileReaderBinary {
678private:
679 std::error_code verifySPMagic(uint64_t Magic) override;
680
681public:
682 SampleProfileReaderRawBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
683 SampleProfileFormat Format = SPF_Binary)
684 : SampleProfileReaderBinary(std::move(B), C, Format) {}
685
686 /// \brief Return true if \p Buffer is in the format supported by this class.
687 static bool hasFormat(const MemoryBuffer &Buffer);
688};
689
690/// SampleProfileReaderExtBinaryBase/SampleProfileWriterExtBinaryBase defines
691/// the basic structure of the extensible binary format.
692/// The format is organized in sections except the magic and version number
693/// at the beginning. There is a section table before all the sections, and
694/// each entry in the table describes the entry type, start, size and
695/// attributes. The format in each section is defined by the section itself.
696///
697/// It is easy to add a new section while maintaining the backward
698/// compatibility of the profile. Nothing extra needs to be done. If we want
699/// to extend an existing section, like add cache misses information in
700/// addition to the sample count in the profile body, we can add a new section
701/// with the extension and retire the existing section, and we could choose
702/// to keep the parser of the old section if we want the reader to be able
703/// to read both new and old format profile.
704///
705/// SampleProfileReaderExtBinary/SampleProfileWriterExtBinary define the
706/// commonly used sections of a profile in extensible binary format. It is
707/// possible to define other types of profile inherited from
708/// SampleProfileReaderExtBinaryBase/SampleProfileWriterExtBinaryBase.
709class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
710private:
711 std::error_code decompressSection(const uint8_t *SecStart,
712 const uint64_t SecSize,
713 const uint8_t *&DecompressBuf,
714 uint64_t &DecompressBufSize);
715
716 BumpPtrAllocator Allocator;
717
718protected:
719 std::vector<SecHdrTableEntry> SecHdrTable;
720 std::error_code readSecHdrTableEntry(uint64_t Idx);
721 std::error_code readSecHdrTable();
722
723 std::error_code readFuncMetadata(bool ProfileHasAttribute);
724 std::error_code readFuncMetadata(bool ProfileHasAttribute,
725 FunctionSamples *FProfile);
726 std::error_code readFuncOffsetTable();
727 std::error_code readFuncProfiles();
728 std::error_code readNameTableSec(bool IsMD5, bool FixedLengthMD5);
729 std::error_code readCSNameTableSec();
730 std::error_code readProfileSymbolList();
731
732 std::error_code readHeader() override;
733 std::error_code verifySPMagic(uint64_t Magic) override = 0;
734 virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size,
735 const SecHdrTableEntry &Entry);
736 // placeholder for subclasses to dispatch their own section readers.
737 virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0;
738
739 /// Determine which container readFuncOffsetTable() should populate, the list
740 /// FuncOffsetList or the map FuncOffsetTable.
741 bool useFuncOffsetList() const;
742
743 std::unique_ptr<ProfileSymbolList> ProfSymList;
744
745 /// The table mapping from a function context's MD5 to the offset of its
746 /// FunctionSample towards file start.
747 /// At most one of FuncOffsetTable and FuncOffsetList is populated.
748 DenseMap<hash_code, uint64_t> FuncOffsetTable;
749
750 /// The list version of FuncOffsetTable. This is used if every entry is
751 /// being accessed.
752 std::vector<std::pair<SampleContext, uint64_t>> FuncOffsetList;
753
754 /// The set containing the functions to use when compiling a module.
755 DenseSet<StringRef> FuncsToUse;
756
757 /// If SkipFlatProf is true, skip the sections with
758 /// SecFlagFlat flag.
759 bool SkipFlatProf = false;
760
761public:
762 SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B,
763 LLVMContext &C, SampleProfileFormat Format)
764 : SampleProfileReaderBinary(std::move(B), C, Format) {}
765
766 /// Read sample profiles in extensible format from the associated file.
767 std::error_code readImpl() override;
768
769 /// Get the total size of all \p Type sections.
770 uint64_t getSectionSize(SecType Type);
771 /// Get the total size of header and all sections.
772 uint64_t getFileSize();
773 bool dumpSectionInfo(raw_ostream &OS = dbgs()) override;
774
775 /// Collect functions with definitions in Module M. Return true if
776 /// the reader has been given a module.
777 bool collectFuncsFromModule() override;
778
779 std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
780 return std::move(ProfSymList);
781 };
782
783 void setSkipFlatProf(bool Skip) override { SkipFlatProf = Skip; }
784};
785
786class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase {
787private:
788 std::error_code verifySPMagic(uint64_t Magic) override;
789 std::error_code readCustomSection(const SecHdrTableEntry &Entry) override {
790 // Update the data reader pointer to the end of the section.
791 Data = End;
792 return sampleprof_error::success;
793 };
794
795public:
796 SampleProfileReaderExtBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
797 SampleProfileFormat Format = SPF_Ext_Binary)
798 : SampleProfileReaderExtBinaryBase(std::move(B), C, Format) {}
799
800 /// \brief Return true if \p Buffer is in the format supported by this class.
801 static bool hasFormat(const MemoryBuffer &Buffer);
802};
803
804using InlineCallStack = SmallVector<FunctionSamples *, 10>;
805
806// Supported histogram types in GCC. Currently, we only need support for
807// call target histograms.
808enum HistType {
809 HIST_TYPE_INTERVAL,
810 HIST_TYPE_POW2,
811 HIST_TYPE_SINGLE_VALUE,
812 HIST_TYPE_CONST_DELTA,
813 HIST_TYPE_INDIR_CALL,
814 HIST_TYPE_AVERAGE,
815 HIST_TYPE_IOR,
816 HIST_TYPE_INDIR_CALL_TOPN
817};
818
819class SampleProfileReaderGCC : public SampleProfileReader {
820public:
821 SampleProfileReaderGCC(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
822 : SampleProfileReader(std::move(B), C, SPF_GCC),
823 GcovBuffer(Buffer.get()) {}
824
825 /// Read and validate the file header.
826 std::error_code readHeader() override;
827
828 /// Read sample profiles from the associated file.
829 std::error_code readImpl() override;
830
831 /// Return true if \p Buffer is in the format supported by this class.
832 static bool hasFormat(const MemoryBuffer &Buffer);
833
834protected:
835 std::error_code readNameTable();
836 std::error_code readOneFunctionProfile(const InlineCallStack &InlineStack,
837 bool Update, uint32_t Offset);
838 std::error_code readFunctionProfiles();
839 std::error_code skipNextWord();
840 template <typename T> ErrorOr<T> readNumber();
841 ErrorOr<StringRef> readString();
842
843 /// Read the section tag and check that it's the same as \p Expected.
844 std::error_code readSectionTag(uint32_t Expected);
845
846 /// GCOV buffer containing the profile.
847 GCOVBuffer GcovBuffer;
848
849 /// Function names in this profile.
850 std::vector<std::string> Names;
851
852 /// GCOV tags used to separate sections in the profile file.
853 static const uint32_t GCOVTagAFDOFileNames = 0xaa000000;
854 static const uint32_t GCOVTagAFDOFunction = 0xac000000;
855};
856
857} // end namespace sampleprof
858
859} // end namespace llvm
860
861#endif // LLVM_PROFILEDATA_SAMPLEPROFREADER_H
862

source code of llvm/include/llvm/ProfileData/SampleProfReader.h