1//===- InstrProf.h - Instrumented profiling format support ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Instrumentation-based profiling data is generated by instrumented
10// binaries through library functions in compiler-rt, and read by the clang
11// frontend to feed PGO.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_PROFILEDATA_INSTRPROF_H
16#define LLVM_PROFILEDATA_INSTRPROF_H
17
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/ADT/StringSet.h"
22#include "llvm/ADT/Triple.h"
23#include "llvm/IR/GlobalValue.h"
24#include "llvm/IR/ProfileSummary.h"
25#include "llvm/ProfileData/InstrProfData.inc"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/Compiler.h"
28#include "llvm/Support/Endian.h"
29#include "llvm/Support/Error.h"
30#include "llvm/Support/ErrorHandling.h"
31#include "llvm/Support/Host.h"
32#include "llvm/Support/MD5.h"
33#include "llvm/Support/MathExtras.h"
34#include "llvm/Support/raw_ostream.h"
35#include <algorithm>
36#include <cassert>
37#include <cstddef>
38#include <cstdint>
39#include <cstring>
40#include <list>
41#include <memory>
42#include <string>
43#include <system_error>
44#include <utility>
45#include <vector>
46
47namespace llvm {
48
49class Function;
50class GlobalVariable;
51struct InstrProfRecord;
52class InstrProfSymtab;
53class Instruction;
54class MDNode;
55class Module;
56
57enum InstrProfSectKind {
58#define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind,
59#include "llvm/ProfileData/InstrProfData.inc"
60};
61
62/// Return the name of the profile section corresponding to \p IPSK.
63///
64/// The name of the section depends on the object format type \p OF. If
65/// \p AddSegmentInfo is true, a segment prefix and additional linker hints may
66/// be added to the section name (this is the default).
67std::string getInstrProfSectionName(InstrProfSectKind IPSK,
68 Triple::ObjectFormatType OF,
69 bool AddSegmentInfo = true);
70
71/// Return the name profile runtime entry point to do value profiling
72/// for a given site.
73inline StringRef getInstrProfValueProfFuncName() {
74 return INSTR_PROF_VALUE_PROF_FUNC_STR;
75}
76
77/// Return the name profile runtime entry point to do memop size value
78/// profiling.
79inline StringRef getInstrProfValueProfMemOpFuncName() {
80 return INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR;
81}
82
83/// Return the name prefix of variables containing instrumented function names.
84inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; }
85
86/// Return the name prefix of variables containing per-function control data.
87inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; }
88
89/// Return the name prefix of profile counter variables.
90inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; }
91
92/// Return the name prefix of value profile variables.
93inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; }
94
95/// Return the name of value profile node array variables:
96inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; }
97
98/// Return the name of the variable holding the strings (possibly compressed)
99/// of all function's PGO names.
100inline StringRef getInstrProfNamesVarName() {
101 return "__llvm_prf_nm";
102}
103
104/// Return the name of a covarage mapping variable (internal linkage)
105/// for each instrumented source module. Such variables are allocated
106/// in the __llvm_covmap section.
107inline StringRef getCoverageMappingVarName() {
108 return "__llvm_coverage_mapping";
109}
110
111/// Return the name of the internal variable recording the array
112/// of PGO name vars referenced by the coverage mapping. The owning
113/// functions of those names are not emitted by FE (e.g, unused inline
114/// functions.)
115inline StringRef getCoverageUnusedNamesVarName() {
116 return "__llvm_coverage_names";
117}
118
119/// Return the name of function that registers all the per-function control
120/// data at program startup time by calling __llvm_register_function. This
121/// function has internal linkage and is called by __llvm_profile_init
122/// runtime method. This function is not generated for these platforms:
123/// Darwin, Linux, and FreeBSD.
124inline StringRef getInstrProfRegFuncsName() {
125 return "__llvm_profile_register_functions";
126}
127
128/// Return the name of the runtime interface that registers per-function control
129/// data for one instrumented function.
130inline StringRef getInstrProfRegFuncName() {
131 return "__llvm_profile_register_function";
132}
133
134/// Return the name of the runtime interface that registers the PGO name strings.
135inline StringRef getInstrProfNamesRegFuncName() {
136 return "__llvm_profile_register_names_function";
137}
138
139/// Return the name of the runtime initialization method that is generated by
140/// the compiler. The function calls __llvm_profile_register_functions and
141/// __llvm_profile_override_default_filename functions if needed. This function
142/// has internal linkage and invoked at startup time via init_array.
143inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; }
144
145/// Return the name of the hook variable defined in profile runtime library.
146/// A reference to the variable causes the linker to link in the runtime
147/// initialization module (which defines the hook variable).
148inline StringRef getInstrProfRuntimeHookVarName() {
149 return INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_RUNTIME_VAR);
150}
151
152/// Return the name of the compiler generated function that references the
153/// runtime hook variable. The function is a weak global.
154inline StringRef getInstrProfRuntimeHookVarUseFuncName() {
155 return "__llvm_profile_runtime_user";
156}
157
158inline StringRef getInstrProfCounterBiasVarName() {
159 return "__llvm_profile_counter_bias";
160}
161
162/// Return the marker used to separate PGO names during serialization.
163inline StringRef getInstrProfNameSeparator() { return "\01"; }
164
165/// Return the modified name for function \c F suitable to be
166/// used the key for profile lookup. Variable \c InLTO indicates if this
167/// is called in LTO optimization passes.
168std::string getPGOFuncName(const Function &F, bool InLTO = false,
169 uint64_t Version = INSTR_PROF_INDEX_VERSION);
170
171/// Return the modified name for a function suitable to be
172/// used the key for profile lookup. The function's original
173/// name is \c RawFuncName and has linkage of type \c Linkage.
174/// The function is defined in module \c FileName.
175std::string getPGOFuncName(StringRef RawFuncName,
176 GlobalValue::LinkageTypes Linkage,
177 StringRef FileName,
178 uint64_t Version = INSTR_PROF_INDEX_VERSION);
179
180/// Return the name of the global variable used to store a function
181/// name in PGO instrumentation. \c FuncName is the name of the function
182/// returned by the \c getPGOFuncName call.
183std::string getPGOFuncNameVarName(StringRef FuncName,
184 GlobalValue::LinkageTypes Linkage);
185
186/// Create and return the global variable for function name used in PGO
187/// instrumentation. \c FuncName is the name of the function returned
188/// by \c getPGOFuncName call.
189GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName);
190
191/// Create and return the global variable for function name used in PGO
192/// instrumentation. /// \c FuncName is the name of the function
193/// returned by \c getPGOFuncName call, \c M is the owning module,
194/// and \c Linkage is the linkage of the instrumented function.
195GlobalVariable *createPGOFuncNameVar(Module &M,
196 GlobalValue::LinkageTypes Linkage,
197 StringRef PGOFuncName);
198
199/// Return the initializer in string of the PGO name var \c NameVar.
200StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar);
201
202/// Given a PGO function name, remove the filename prefix and return
203/// the original (static) function name.
204StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName,
205 StringRef FileName = "<unknown>");
206
207/// Given a vector of strings (function PGO names) \c NameStrs, the
208/// method generates a combined string \c Result thatis ready to be
209/// serialized. The \c Result string is comprised of three fields:
210/// The first field is the legnth of the uncompressed strings, and the
211/// the second field is the length of the zlib-compressed string.
212/// Both fields are encoded in ULEB128. If \c doCompress is false, the
213/// third field is the uncompressed strings; otherwise it is the
214/// compressed string. When the string compression is off, the
215/// second field will have value zero.
216Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs,
217 bool doCompression, std::string &Result);
218
219/// Produce \c Result string with the same format described above. The input
220/// is vector of PGO function name variables that are referenced.
221Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
222 std::string &Result, bool doCompression = true);
223
224/// \c NameStrings is a string composed of one of more sub-strings encoded in
225/// the format described above. The substrings are separated by 0 or more zero
226/// bytes. This method decodes the string and populates the \c Symtab.
227Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
228
229/// Check if INSTR_PROF_RAW_VERSION_VAR is defined. This global is only being
230/// set in IR PGO compilation.
231bool isIRPGOFlagSet(const Module *M);
232
233/// Check if we can safely rename this Comdat function. Instances of the same
234/// comdat function may have different control flows thus can not share the
235/// same counter variable.
236bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken = false);
237
238enum InstrProfValueKind : uint32_t {
239#define VALUE_PROF_KIND(Enumerator, Value, Descr) Enumerator = Value,
240#include "llvm/ProfileData/InstrProfData.inc"
241};
242
243/// Get the value profile data for value site \p SiteIdx from \p InstrProfR
244/// and annotate the instruction \p Inst with the value profile meta data.
245/// Annotate up to \p MaxMDCount (default 3) number of records per value site.
246void annotateValueSite(Module &M, Instruction &Inst,
247 const InstrProfRecord &InstrProfR,
248 InstrProfValueKind ValueKind, uint32_t SiteIndx,
249 uint32_t MaxMDCount = 3);
250
251/// Same as the above interface but using an ArrayRef, as well as \p Sum.
252void annotateValueSite(Module &M, Instruction &Inst,
253 ArrayRef<InstrProfValueData> VDs, uint64_t Sum,
254 InstrProfValueKind ValueKind, uint32_t MaxMDCount);
255
256/// Extract the value profile data from \p Inst which is annotated with
257/// value profile meta data. Return false if there is no value data annotated,
258/// otherwise return true.
259bool getValueProfDataFromInst(const Instruction &Inst,
260 InstrProfValueKind ValueKind,
261 uint32_t MaxNumValueData,
262 InstrProfValueData ValueData[],
263 uint32_t &ActualNumValueData, uint64_t &TotalC,
264 bool GetNoICPValue = false);
265
266inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; }
267
268/// Return the PGOFuncName meta data associated with a function.
269MDNode *getPGOFuncNameMetadata(const Function &F);
270
271/// Create the PGOFuncName meta data if PGOFuncName is different from
272/// function's raw name. This should only apply to internal linkage functions
273/// declared by users only.
274void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName);
275
276/// Check if we can use Comdat for profile variables. This will eliminate
277/// the duplicated profile variables for Comdat functions.
278bool needsComdatForCounter(const Function &F, const Module &M);
279
280const std::error_category &instrprof_category();
281
282enum class instrprof_error {
283 success = 0,
284 eof,
285 unrecognized_format,
286 bad_magic,
287 bad_header,
288 unsupported_version,
289 unsupported_hash_type,
290 too_large,
291 truncated,
292 malformed,
293 unknown_function,
294 invalid_prof,
295 hash_mismatch,
296 count_mismatch,
297 counter_overflow,
298 value_site_count_mismatch,
299 compress_failed,
300 uncompress_failed,
301 empty_raw_profile,
302 zlib_unavailable
303};
304
305inline std::error_code make_error_code(instrprof_error E) {
306 return std::error_code(static_cast<int>(E), instrprof_category());
307}
308
309class InstrProfError : public ErrorInfo<InstrProfError> {
310public:
311 InstrProfError(instrprof_error Err) : Err(Err) {
312 assert(Err != instrprof_error::success && "Not an error");
313 }
314
315 std::string message() const override;
316
317 void log(raw_ostream &OS) const override { OS << message(); }
318
319 std::error_code convertToErrorCode() const override {
320 return make_error_code(Err);
321 }
322
323 instrprof_error get() const { return Err; }
324
325 /// Consume an Error and return the raw enum value contained within it. The
326 /// Error must either be a success value, or contain a single InstrProfError.
327 static instrprof_error take(Error E) {
328 auto Err = instrprof_error::success;
329 handleAllErrors(std::move(E), [&Err](const InstrProfError &IPE) {
330 assert(Err == instrprof_error::success && "Multiple errors encountered");
331 Err = IPE.get();
332 });
333 return Err;
334 }
335
336 static char ID;
337
338private:
339 instrprof_error Err;
340};
341
342class SoftInstrProfErrors {
343 /// Count the number of soft instrprof_errors encountered and keep track of
344 /// the first such error for reporting purposes.
345
346 /// The first soft error encountered.
347 instrprof_error FirstError = instrprof_error::success;
348
349 /// The number of hash mismatches.
350 unsigned NumHashMismatches = 0;
351
352 /// The number of count mismatches.
353 unsigned NumCountMismatches = 0;
354
355 /// The number of counter overflows.
356 unsigned NumCounterOverflows = 0;
357
358 /// The number of value site count mismatches.
359 unsigned NumValueSiteCountMismatches = 0;
360
361public:
362 SoftInstrProfErrors() = default;
363
364 ~SoftInstrProfErrors() {
365 assert(FirstError == instrprof_error::success &&
366 "Unchecked soft error encountered");
367 }
368
369 /// Track a soft error (\p IE) and increment its associated counter.
370 void addError(instrprof_error IE);
371
372 /// Get the number of hash mismatches.
373 unsigned getNumHashMismatches() const { return NumHashMismatches; }
374
375 /// Get the number of count mismatches.
376 unsigned getNumCountMismatches() const { return NumCountMismatches; }
377
378 /// Get the number of counter overflows.
379 unsigned getNumCounterOverflows() const { return NumCounterOverflows; }
380
381 /// Get the number of value site count mismatches.
382 unsigned getNumValueSiteCountMismatches() const {
383 return NumValueSiteCountMismatches;
384 }
385
386 /// Return the first encountered error and reset FirstError to a success
387 /// value.
388 Error takeError() {
389 if (FirstError == instrprof_error::success)
390 return Error::success();
391 auto E = make_error<InstrProfError>(FirstError);
392 FirstError = instrprof_error::success;
393 return E;
394 }
395};
396
397namespace object {
398
399class SectionRef;
400
401} // end namespace object
402
403namespace IndexedInstrProf {
404
405uint64_t ComputeHash(StringRef K);
406
407} // end namespace IndexedInstrProf
408
409/// A symbol table used for function PGO name look-up with keys
410/// (such as pointers, md5hash values) to the function. A function's
411/// PGO name or name's md5hash are used in retrieving the profile
412/// data of the function. See \c getPGOFuncName() method for details
413/// on how PGO name is formed.
414class InstrProfSymtab {
415public:
416 using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>;
417
418private:
419 StringRef Data;
420 uint64_t Address = 0;
421 // Unique name strings.
422 StringSet<> NameTab;
423 // A map from MD5 keys to function name strings.
424 std::vector<std::pair<uint64_t, StringRef>> MD5NameMap;
425 // A map from MD5 keys to function define. We only populate this map
426 // when build the Symtab from a Module.
427 std::vector<std::pair<uint64_t, Function *>> MD5FuncMap;
428 // A map from function runtime address to function name MD5 hash.
429 // This map is only populated and used by raw instr profile reader.
430 AddrHashMap AddrToMD5Map;
431 bool Sorted = false;
432
433 static StringRef getExternalSymbol() {
434 return "** External Symbol **";
435 }
436
437 // If the symtab is created by a series of calls to \c addFuncName, \c
438 // finalizeSymtab needs to be called before looking up function names.
439 // This is required because the underlying map is a vector (for space
440 // efficiency) which needs to be sorted.
441 inline void finalizeSymtab();
442
443public:
444 InstrProfSymtab() = default;
445
446 /// Create InstrProfSymtab from an object file section which
447 /// contains function PGO names. When section may contain raw
448 /// string data or string data in compressed form. This method
449 /// only initialize the symtab with reference to the data and
450 /// the section base address. The decompression will be delayed
451 /// until before it is used. See also \c create(StringRef) method.
452 Error create(object::SectionRef &Section);
453
454 /// This interface is used by reader of CoverageMapping test
455 /// format.
456 inline Error create(StringRef D, uint64_t BaseAddr);
457
458 /// \c NameStrings is a string composed of one of more sub-strings
459 /// encoded in the format described in \c collectPGOFuncNameStrings.
460 /// This method is a wrapper to \c readPGOFuncNameStrings method.
461 inline Error create(StringRef NameStrings);
462
463 /// A wrapper interface to populate the PGO symtab with functions
464 /// decls from module \c M. This interface is used by transformation
465 /// passes such as indirect function call promotion. Variable \c InLTO
466 /// indicates if this is called from LTO optimization passes.
467 Error create(Module &M, bool InLTO = false);
468
469 /// Create InstrProfSymtab from a set of names iteratable from
470 /// \p IterRange. This interface is used by IndexedProfReader.
471 template <typename NameIterRange> Error create(const NameIterRange &IterRange);
472
473 /// Update the symtab by adding \p FuncName to the table. This interface
474 /// is used by the raw and text profile readers.
475 Error addFuncName(StringRef FuncName) {
476 if (FuncName.empty())
477 return make_error<InstrProfError>(instrprof_error::malformed);
478 auto Ins = NameTab.insert(FuncName);
479 if (Ins.second) {
480 MD5NameMap.push_back(std::make_pair(
481 IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey()));
482 Sorted = false;
483 }
484 return Error::success();
485 }
486
487 /// Map a function address to its name's MD5 hash. This interface
488 /// is only used by the raw profiler reader.
489 void mapAddress(uint64_t Addr, uint64_t MD5Val) {
490 AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
491 }
492
493 /// Return a function's hash, or 0, if the function isn't in this SymTab.
494 uint64_t getFunctionHashFromAddress(uint64_t Address);
495
496 /// Return function's PGO name from the function name's symbol
497 /// address in the object file. If an error occurs, return
498 /// an empty string.
499 StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
500
501 /// Return function's PGO name from the name's md5 hash value.
502 /// If not found, return an empty string.
503 inline StringRef getFuncName(uint64_t FuncMD5Hash);
504
505 /// Just like getFuncName, except that it will return a non-empty StringRef
506 /// if the function is external to this symbol table. All such cases
507 /// will be represented using the same StringRef value.
508 inline StringRef getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash);
509
510 /// True if Symbol is the value used to represent external symbols.
511 static bool isExternalSymbol(const StringRef &Symbol) {
512 return Symbol == InstrProfSymtab::getExternalSymbol();
513 }
514
515 /// Return function from the name's md5 hash. Return nullptr if not found.
516 inline Function *getFunction(uint64_t FuncMD5Hash);
517
518 /// Return the function's original assembly name by stripping off
519 /// the prefix attached (to symbols with priviate linkage). For
520 /// global functions, it returns the same string as getFuncName.
521 inline StringRef getOrigFuncName(uint64_t FuncMD5Hash);
522
523 /// Return the name section data.
524 inline StringRef getNameData() const { return Data; }
525};
526
527Error InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
528 Data = D;
529 Address = BaseAddr;
530 return Error::success();
531}
532
533Error InstrProfSymtab::create(StringRef NameStrings) {
534 return readPGOFuncNameStrings(NameStrings, *this);
535}
536
537template <typename NameIterRange>
538Error InstrProfSymtab::create(const NameIterRange &IterRange) {
539 for (auto Name : IterRange)
540 if (Error E = addFuncName(Name))
541 return E;
542
543 finalizeSymtab();
544 return Error::success();
545}
546
547void InstrProfSymtab::finalizeSymtab() {
548 if (Sorted)
549 return;
550 llvm::sort(MD5NameMap, less_first());
551 llvm::sort(MD5FuncMap, less_first());
552 llvm::sort(AddrToMD5Map, less_first());
553 AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
554 AddrToMD5Map.end());
555 Sorted = true;
556}
557
558StringRef InstrProfSymtab::getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash) {
559 StringRef ret = getFuncName(FuncMD5Hash);
560 if (ret.empty())
561 return InstrProfSymtab::getExternalSymbol();
562 return ret;
563}
564
565StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) {
566 finalizeSymtab();
567 auto Result = llvm::lower_bound(MD5NameMap, FuncMD5Hash,
568 [](const std::pair<uint64_t, StringRef> &LHS,
569 uint64_t RHS) { return LHS.first < RHS; });
570 if (Result != MD5NameMap.end() && Result->first == FuncMD5Hash)
571 return Result->second;
572 return StringRef();
573}
574
575Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) {
576 finalizeSymtab();
577 auto Result = llvm::lower_bound(MD5FuncMap, FuncMD5Hash,
578 [](const std::pair<uint64_t, Function *> &LHS,
579 uint64_t RHS) { return LHS.first < RHS; });
580 if (Result != MD5FuncMap.end() && Result->first == FuncMD5Hash)
581 return Result->second;
582 return nullptr;
583}
584
585// See also getPGOFuncName implementation. These two need to be
586// matched.
587StringRef InstrProfSymtab::getOrigFuncName(uint64_t FuncMD5Hash) {
588 StringRef PGOName = getFuncName(FuncMD5Hash);
589 size_t S = PGOName.find_first_of(':');
590 if (S == StringRef::npos)
591 return PGOName;
592 return PGOName.drop_front(S + 1);
593}
594
595// To store the sums of profile count values, or the percentage of
596// the sums of the total count values.
597struct CountSumOrPercent {
598 uint64_t NumEntries;
599 double CountSum;
600 double ValueCounts[IPVK_Last - IPVK_First + 1];
601 CountSumOrPercent() : NumEntries(0), CountSum(0.0f), ValueCounts() {}
602 void reset() {
603 NumEntries = 0;
604 CountSum = 0.0f;
605 for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++)
606 ValueCounts[I] = 0.0f;
607 }
608};
609
610// Function level or program level overlap information.
611struct OverlapStats {
612 enum OverlapStatsLevel { ProgramLevel, FunctionLevel };
613 // Sum of the total count values for the base profile.
614 CountSumOrPercent Base;
615 // Sum of the total count values for the test profile.
616 CountSumOrPercent Test;
617 // Overlap lap score. Should be in range of [0.0f to 1.0f].
618 CountSumOrPercent Overlap;
619 CountSumOrPercent Mismatch;
620 CountSumOrPercent Unique;
621 OverlapStatsLevel Level;
622 const std::string *BaseFilename;
623 const std::string *TestFilename;
624 StringRef FuncName;
625 uint64_t FuncHash;
626 bool Valid;
627
628 OverlapStats(OverlapStatsLevel L = ProgramLevel)
629 : Level(L), BaseFilename(nullptr), TestFilename(nullptr), FuncHash(0),
630 Valid(false) {}
631
632 void dump(raw_fd_ostream &OS) const;
633
634 void setFuncInfo(StringRef Name, uint64_t Hash) {
635 FuncName = Name;
636 FuncHash = Hash;
637 }
638
639 Error accumulateCounts(const std::string &BaseFilename,
640 const std::string &TestFilename, bool IsCS);
641 void addOneMismatch(const CountSumOrPercent &MismatchFunc);
642 void addOneUnique(const CountSumOrPercent &UniqueFunc);
643
644 static inline double score(uint64_t Val1, uint64_t Val2, double Sum1,
645 double Sum2) {
646 if (Sum1 < 1.0f || Sum2 < 1.0f)
647 return 0.0f;
648 return std::min(Val1 / Sum1, Val2 / Sum2);
649 }
650};
651
652// This is used to filter the functions whose overlap information
653// to be output.
654struct OverlapFuncFilters {
655 uint64_t ValueCutoff;
656 const std::string NameFilter;
657};
658
659struct InstrProfValueSiteRecord {
660 /// Value profiling data pairs at a given value site.
661 std::list<InstrProfValueData> ValueData;
662
663 InstrProfValueSiteRecord() { ValueData.clear(); }
664 template <class InputIterator>
665 InstrProfValueSiteRecord(InputIterator F, InputIterator L)
666 : ValueData(F, L) {}
667
668 /// Sort ValueData ascending by Value
669 void sortByTargetValues() {
670 ValueData.sort(
671 [](const InstrProfValueData &left, const InstrProfValueData &right) {
672 return left.Value < right.Value;
673 });
674 }
675 /// Sort ValueData Descending by Count
676 inline void sortByCount();
677
678 /// Merge data from another InstrProfValueSiteRecord
679 /// Optionally scale merged counts by \p Weight.
680 void merge(InstrProfValueSiteRecord &Input, uint64_t Weight,
681 function_ref<void(instrprof_error)> Warn);
682 /// Scale up value profile data counts by N (Numerator) / D (Denominator).
683 void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn);
684
685 /// Compute the overlap b/w this record and Input record.
686 void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind,
687 OverlapStats &Overlap, OverlapStats &FuncLevelOverlap);
688};
689
690/// Profiling information for a single function.
691struct InstrProfRecord {
692 std::vector<uint64_t> Counts;
693
694 InstrProfRecord() = default;
695 InstrProfRecord(std::vector<uint64_t> Counts) : Counts(std::move(Counts)) {}
696 InstrProfRecord(InstrProfRecord &&) = default;
697 InstrProfRecord(const InstrProfRecord &RHS)
698 : Counts(RHS.Counts),
699 ValueData(RHS.ValueData
700 ? std::make_unique<ValueProfData>(*RHS.ValueData)
701 : nullptr) {}
702 InstrProfRecord &operator=(InstrProfRecord &&) = default;
703 InstrProfRecord &operator=(const InstrProfRecord &RHS) {
704 Counts = RHS.Counts;
705 if (!RHS.ValueData) {
706 ValueData = nullptr;
707 return *this;
708 }
709 if (!ValueData)
710 ValueData = std::make_unique<ValueProfData>(*RHS.ValueData);
711 else
712 *ValueData = *RHS.ValueData;
713 return *this;
714 }
715
716 /// Return the number of value profile kinds with non-zero number
717 /// of profile sites.
718 inline uint32_t getNumValueKinds() const;
719 /// Return the number of instrumented sites for ValueKind.
720 inline uint32_t getNumValueSites(uint32_t ValueKind) const;
721
722 /// Return the total number of ValueData for ValueKind.
723 inline uint32_t getNumValueData(uint32_t ValueKind) const;
724
725 /// Return the number of value data collected for ValueKind at profiling
726 /// site: Site.
727 inline uint32_t getNumValueDataForSite(uint32_t ValueKind,
728 uint32_t Site) const;
729
730 /// Return the array of profiled values at \p Site. If \p TotalC
731 /// is not null, the total count of all target values at this site
732 /// will be stored in \c *TotalC.
733 inline std::unique_ptr<InstrProfValueData[]>
734 getValueForSite(uint32_t ValueKind, uint32_t Site,
735 uint64_t *TotalC = nullptr) const;
736
737 /// Get the target value/counts of kind \p ValueKind collected at site
738 /// \p Site and store the result in array \p Dest. Return the total
739 /// counts of all target values at this site.
740 inline uint64_t getValueForSite(InstrProfValueData Dest[], uint32_t ValueKind,
741 uint32_t Site) const;
742
743 /// Reserve space for NumValueSites sites.
744 inline void reserveSites(uint32_t ValueKind, uint32_t NumValueSites);
745
746 /// Add ValueData for ValueKind at value Site.
747 void addValueData(uint32_t ValueKind, uint32_t Site,
748 InstrProfValueData *VData, uint32_t N,
749 InstrProfSymtab *SymTab);
750
751 /// Merge the counts in \p Other into this one.
752 /// Optionally scale merged counts by \p Weight.
753 void merge(InstrProfRecord &Other, uint64_t Weight,
754 function_ref<void(instrprof_error)> Warn);
755
756 /// Scale up profile counts (including value profile data) by
757 /// a factor of (N / D).
758 void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn);
759
760 /// Sort value profile data (per site) by count.
761 void sortValueData() {
762 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
763 for (auto &SR : getValueSitesForKind(Kind))
764 SR.sortByCount();
765 }
766
767 /// Clear value data entries and edge counters.
768 void Clear() {
769 Counts.clear();
770 clearValueData();
771 }
772
773 /// Clear value data entries
774 void clearValueData() { ValueData = nullptr; }
775
776 /// Compute the sums of all counts and store in Sum.
777 void accumulateCounts(CountSumOrPercent &Sum) const;
778
779 /// Compute the overlap b/w this IntrprofRecord and Other.
780 void overlap(InstrProfRecord &Other, OverlapStats &Overlap,
781 OverlapStats &FuncLevelOverlap, uint64_t ValueCutoff);
782
783 /// Compute the overlap of value profile counts.
784 void overlapValueProfData(uint32_t ValueKind, InstrProfRecord &Src,
785 OverlapStats &Overlap,
786 OverlapStats &FuncLevelOverlap);
787
788private:
789 struct ValueProfData {
790 std::vector<InstrProfValueSiteRecord> IndirectCallSites;
791 std::vector<InstrProfValueSiteRecord> MemOPSizes;
792 };
793 std::unique_ptr<ValueProfData> ValueData;
794
795 MutableArrayRef<InstrProfValueSiteRecord>
796 getValueSitesForKind(uint32_t ValueKind) {
797 // Cast to /add/ const (should be an implicit_cast, ideally, if that's ever
798 // implemented in LLVM) to call the const overload of this function, then
799 // cast away the constness from the result.
800 auto AR = const_cast<const InstrProfRecord *>(this)->getValueSitesForKind(
801 ValueKind);
802 return makeMutableArrayRef(
803 const_cast<InstrProfValueSiteRecord *>(AR.data()), AR.size());
804 }
805 ArrayRef<InstrProfValueSiteRecord>
806 getValueSitesForKind(uint32_t ValueKind) const {
807 if (!ValueData)
808 return None;
809 switch (ValueKind) {
810 case IPVK_IndirectCallTarget:
811 return ValueData->IndirectCallSites;
812 case IPVK_MemOPSize:
813 return ValueData->MemOPSizes;
814 default:
815 llvm_unreachable("Unknown value kind!");
816 }
817 }
818
819 std::vector<InstrProfValueSiteRecord> &
820 getOrCreateValueSitesForKind(uint32_t ValueKind) {
821 if (!ValueData)
822 ValueData = std::make_unique<ValueProfData>();
823 switch (ValueKind) {
824 case IPVK_IndirectCallTarget:
825 return ValueData->IndirectCallSites;
826 case IPVK_MemOPSize:
827 return ValueData->MemOPSizes;
828 default:
829 llvm_unreachable("Unknown value kind!");
830 }
831 }
832
833 // Map indirect call target name hash to name string.
834 uint64_t remapValue(uint64_t Value, uint32_t ValueKind,
835 InstrProfSymtab *SymTab);
836
837 // Merge Value Profile data from Src record to this record for ValueKind.
838 // Scale merged value counts by \p Weight.
839 void mergeValueProfData(uint32_t ValkeKind, InstrProfRecord &Src,
840 uint64_t Weight,
841 function_ref<void(instrprof_error)> Warn);
842
843 // Scale up value profile data count by N (Numerator) / D (Denominator).
844 void scaleValueProfData(uint32_t ValueKind, uint64_t N, uint64_t D,
845 function_ref<void(instrprof_error)> Warn);
846};
847
848struct NamedInstrProfRecord : InstrProfRecord {
849 StringRef Name;
850 uint64_t Hash;
851
852 // We reserve this bit as the flag for context sensitive profile record.
853 static const int CS_FLAG_IN_FUNC_HASH = 60;
854
855 NamedInstrProfRecord() = default;
856 NamedInstrProfRecord(StringRef Name, uint64_t Hash,
857 std::vector<uint64_t> Counts)
858 : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {}
859
860 static bool hasCSFlagInHash(uint64_t FuncHash) {
861 return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1);
862 }
863 static void setCSFlagInHash(uint64_t &FuncHash) {
864 FuncHash |= ((uint64_t)1 << CS_FLAG_IN_FUNC_HASH);
865 }
866};
867
868uint32_t InstrProfRecord::getNumValueKinds() const {
869 uint32_t NumValueKinds = 0;
870 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
871 NumValueKinds += !(getValueSitesForKind(Kind).empty());
872 return NumValueKinds;
873}
874
875uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const {
876 uint32_t N = 0;
877 for (auto &SR : getValueSitesForKind(ValueKind))
878 N += SR.ValueData.size();
879 return N;
880}
881
882uint32_t InstrProfRecord::getNumValueSites(uint32_t ValueKind) const {
883 return getValueSitesForKind(ValueKind).size();
884}
885
886uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind,
887 uint32_t Site) const {
888 return getValueSitesForKind(ValueKind)[Site].ValueData.size();
889}
890
891std::unique_ptr<InstrProfValueData[]>
892InstrProfRecord::getValueForSite(uint32_t ValueKind, uint32_t Site,
893 uint64_t *TotalC) const {
894 uint64_t Dummy = 0;
895 uint64_t &TotalCount = (TotalC == nullptr ? Dummy : *TotalC);
896 uint32_t N = getNumValueDataForSite(ValueKind, Site);
897 if (N == 0) {
898 TotalCount = 0;
899 return std::unique_ptr<InstrProfValueData[]>(nullptr);
900 }
901
902 auto VD = std::make_unique<InstrProfValueData[]>(N);
903 TotalCount = getValueForSite(VD.get(), ValueKind, Site);
904
905 return VD;
906}
907
908uint64_t InstrProfRecord::getValueForSite(InstrProfValueData Dest[],
909 uint32_t ValueKind,
910 uint32_t Site) const {
911 uint32_t I = 0;
912 uint64_t TotalCount = 0;
913 for (auto V : getValueSitesForKind(ValueKind)[Site].ValueData) {
914 Dest[I].Value = V.Value;
915 Dest[I].Count = V.Count;
916 TotalCount = SaturatingAdd(TotalCount, V.Count);
917 I++;
918 }
919 return TotalCount;
920}
921
922void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) {
923 if (!NumValueSites)
924 return;
925 getOrCreateValueSitesForKind(ValueKind).reserve(NumValueSites);
926}
927
928inline support::endianness getHostEndianness() {
929 return sys::IsLittleEndianHost ? support::little : support::big;
930}
931
932// Include definitions for value profile data
933#define INSTR_PROF_VALUE_PROF_DATA
934#include "llvm/ProfileData/InstrProfData.inc"
935
936void InstrProfValueSiteRecord::sortByCount() {
937 ValueData.sort(
938 [](const InstrProfValueData &left, const InstrProfValueData &right) {
939 return left.Count > right.Count;
940 });
941 // Now truncate
942 size_t max_s = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
943 if (ValueData.size() > max_s)
944 ValueData.resize(max_s);
945}
946
947namespace IndexedInstrProf {
948
949enum class HashT : uint32_t {
950 MD5,
951 Last = MD5
952};
953
954inline uint64_t ComputeHash(HashT Type, StringRef K) {
955 switch (Type) {
956 case HashT::MD5:
957 return MD5Hash(K);
958 }
959 llvm_unreachable("Unhandled hash type");
960}
961
962const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
963
964enum ProfVersion {
965 // Version 1 is the first version. In this version, the value of
966 // a key/value pair can only include profile data of a single function.
967 // Due to this restriction, the number of block counters for a given
968 // function is not recorded but derived from the length of the value.
969 Version1 = 1,
970 // The version 2 format supports recording profile data of multiple
971 // functions which share the same key in one value field. To support this,
972 // the number block counters is recorded as an uint64_t field right after the
973 // function structural hash.
974 Version2 = 2,
975 // Version 3 supports value profile data. The value profile data is expected
976 // to follow the block counter profile data.
977 Version3 = 3,
978 // In this version, profile summary data \c IndexedInstrProf::Summary is
979 // stored after the profile header.
980 Version4 = 4,
981 // In this version, the frontend PGO stable hash algorithm defaults to V2.
982 Version5 = 5,
983 // In this version, the frontend PGO stable hash algorithm got fixed and
984 // may produce hashes different from Version5.
985 Version6 = 6,
986 // An additional counter is added around logical operators.
987 Version7 = 7,
988 // The current version is 7.
989 CurrentVersion = INSTR_PROF_INDEX_VERSION
990};
991const uint64_t Version = ProfVersion::CurrentVersion;
992
993const HashT HashType = HashT::MD5;
994
995inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); }
996
997// This structure defines the file header of the LLVM profile
998// data file in indexed-format.
999struct Header {
1000 uint64_t Magic;
1001 uint64_t Version;
1002 uint64_t Unused; // Becomes unused since version 4
1003 uint64_t HashType;
1004 uint64_t HashOffset;
1005};
1006
1007// Profile summary data recorded in the profile data file in indexed
1008// format. It is introduced in version 4. The summary data follows
1009// right after the profile file header.
1010struct Summary {
1011 struct Entry {
1012 uint64_t Cutoff; ///< The required percentile of total execution count.
1013 uint64_t
1014 MinBlockCount; ///< The minimum execution count for this percentile.
1015 uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count.
1016 };
1017 // The field kind enumerator to assigned value mapping should remain
1018 // unchanged when a new kind is added or an old kind gets deleted in
1019 // the future.
1020 enum SummaryFieldKind {
1021 /// The total number of functions instrumented.
1022 TotalNumFunctions = 0,
1023 /// Total number of instrumented blocks/edges.
1024 TotalNumBlocks = 1,
1025 /// The maximal execution count among all functions.
1026 /// This field does not exist for profile data from IR based
1027 /// instrumentation.
1028 MaxFunctionCount = 2,
1029 /// Max block count of the program.
1030 MaxBlockCount = 3,
1031 /// Max internal block count of the program (excluding entry blocks).
1032 MaxInternalBlockCount = 4,
1033 /// The sum of all instrumented block counts.
1034 TotalBlockCount = 5,
1035 NumKinds = TotalBlockCount + 1
1036 };
1037
1038 // The number of summmary fields following the summary header.
1039 uint64_t NumSummaryFields;
1040 // The number of Cutoff Entries (Summary::Entry) following summary fields.
1041 uint64_t NumCutoffEntries;
1042
1043 Summary() = delete;
1044 Summary(uint32_t Size) { memset(this, 0, Size); }
1045
1046 void operator delete(void *ptr) { ::operator delete(ptr); }
1047
1048 static uint32_t getSize(uint32_t NumSumFields, uint32_t NumCutoffEntries) {
1049 return sizeof(Summary) + NumCutoffEntries * sizeof(Entry) +
1050 NumSumFields * sizeof(uint64_t);
1051 }
1052
1053 const uint64_t *getSummaryDataBase() const {
1054 return reinterpret_cast<const uint64_t *>(this + 1);
1055 }
1056
1057 uint64_t *getSummaryDataBase() {
1058 return reinterpret_cast<uint64_t *>(this + 1);
1059 }
1060
1061 const Entry *getCutoffEntryBase() const {
1062 return reinterpret_cast<const Entry *>(
1063 &getSummaryDataBase()[NumSummaryFields]);
1064 }
1065
1066 Entry *getCutoffEntryBase() {
1067 return reinterpret_cast<Entry *>(&getSummaryDataBase()[NumSummaryFields]);
1068 }
1069
1070 uint64_t get(SummaryFieldKind K) const {
1071 return getSummaryDataBase()[K];
1072 }
1073
1074 void set(SummaryFieldKind K, uint64_t V) {
1075 getSummaryDataBase()[K] = V;
1076 }
1077
1078 const Entry &getEntry(uint32_t I) const { return getCutoffEntryBase()[I]; }
1079
1080 void setEntry(uint32_t I, const ProfileSummaryEntry &E) {
1081 Entry &ER = getCutoffEntryBase()[I];
1082 ER.Cutoff = E.Cutoff;
1083 ER.MinBlockCount = E.MinCount;
1084 ER.NumBlocks = E.NumCounts;
1085 }
1086};
1087
1088inline std::unique_ptr<Summary> allocSummary(uint32_t TotalSize) {
1089 return std::unique_ptr<Summary>(new (::operator new(TotalSize))
1090 Summary(TotalSize));
1091}
1092
1093} // end namespace IndexedInstrProf
1094
1095namespace RawInstrProf {
1096
1097// Version 1: First version
1098// Version 2: Added value profile data section. Per-function control data
1099// struct has more fields to describe value profile information.
1100// Version 3: Compressed name section support. Function PGO name reference
1101// from control data struct is changed from raw pointer to Name's MD5 value.
1102// Version 4: ValueDataBegin and ValueDataSizes fields are removed from the
1103// raw header.
1104// Version 5: Bit 60 of FuncHash is reserved for the flag for the context
1105// sensitive records.
1106const uint64_t Version = INSTR_PROF_RAW_VERSION;
1107
1108template <class IntPtrT> inline uint64_t getMagic();
1109template <> inline uint64_t getMagic<uint64_t>() {
1110 return INSTR_PROF_RAW_MAGIC_64;
1111}
1112
1113template <> inline uint64_t getMagic<uint32_t>() {
1114 return INSTR_PROF_RAW_MAGIC_32;
1115}
1116
1117// Per-function profile data header/control structure.
1118// The definition should match the structure defined in
1119// compiler-rt/lib/profile/InstrProfiling.h.
1120// It should also match the synthesized type in
1121// Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters.
1122template <class IntPtrT> struct alignas(8) ProfileData {
1123 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name;
1124 #include "llvm/ProfileData/InstrProfData.inc"
1125};
1126
1127// File header structure of the LLVM profile data in raw format.
1128// The definition should match the header referenced in
1129// compiler-rt/lib/profile/InstrProfilingFile.c and
1130// InstrProfilingBuffer.c.
1131struct Header {
1132#define INSTR_PROF_RAW_HEADER(Type, Name, Init) const Type Name;
1133#include "llvm/ProfileData/InstrProfData.inc"
1134};
1135
1136} // end namespace RawInstrProf
1137
1138// Parse MemOP Size range option.
1139void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
1140 int64_t &RangeLast);
1141
1142// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
1143// aware this is an ir_level profile so it can set the version flag.
1144void createIRLevelProfileFlagVar(Module &M, bool IsCS,
1145 bool InstrEntryBBEnabled);
1146
1147// Create the variable for the profile file name.
1148void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);
1149
1150// Whether to compress function names in profile records, and filenames in
1151// code coverage mappings. Used by the Instrumentation library and unit tests.
1152extern cl::opt<bool> DoInstrProfNameCompression;
1153
1154} // end namespace llvm
1155#endif // LLVM_PROFILEDATA_INSTRPROF_H
1156