1//===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// llvm-profdata merges .profdata files.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/SmallSet.h"
14#include "llvm/ADT/SmallVector.h"
15#include "llvm/ADT/StringRef.h"
16#include "llvm/IR/LLVMContext.h"
17#include "llvm/Object/Binary.h"
18#include "llvm/ProfileData/InstrProfCorrelator.h"
19#include "llvm/ProfileData/InstrProfReader.h"
20#include "llvm/ProfileData/InstrProfWriter.h"
21#include "llvm/ProfileData/MemProf.h"
22#include "llvm/ProfileData/MemProfReader.h"
23#include "llvm/ProfileData/ProfileCommon.h"
24#include "llvm/ProfileData/SampleProfReader.h"
25#include "llvm/ProfileData/SampleProfWriter.h"
26#include "llvm/Support/BalancedPartitioning.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/Discriminator.h"
29#include "llvm/Support/Errc.h"
30#include "llvm/Support/FileSystem.h"
31#include "llvm/Support/Format.h"
32#include "llvm/Support/FormattedStream.h"
33#include "llvm/Support/LLVMDriver.h"
34#include "llvm/Support/MD5.h"
35#include "llvm/Support/MemoryBuffer.h"
36#include "llvm/Support/Path.h"
37#include "llvm/Support/Regex.h"
38#include "llvm/Support/ThreadPool.h"
39#include "llvm/Support/Threading.h"
40#include "llvm/Support/VirtualFileSystem.h"
41#include "llvm/Support/WithColor.h"
42#include "llvm/Support/raw_ostream.h"
43#include <algorithm>
44#include <cmath>
45#include <optional>
46#include <queue>
47
48using namespace llvm;
49using ProfCorrelatorKind = InstrProfCorrelator::ProfCorrelatorKind;
50
51// https://llvm.org/docs/CommandGuide/llvm-profdata.html has documentations
52// on each subcommand.
53cl::SubCommand ShowSubcommand(
54 "show",
55 "Takes a profile data file and displays the profiles. See detailed "
56 "documentation in "
57 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-show");
58cl::SubCommand OrderSubcommand(
59 "order",
60 "Reads temporal profiling traces from a profile and outputs a function "
61 "order that reduces the number of page faults for those traces. See "
62 "detailed documentation in "
63 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-order");
64cl::SubCommand OverlapSubcommand(
65 "overlap",
66 "Computes and displays the overlap between two profiles. See detailed "
67 "documentation in "
68 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-overlap");
69cl::SubCommand MergeSubcommand(
70 "merge",
71 "Takes several profiles and merge them together. See detailed "
72 "documentation in "
73 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge");
74
75namespace {
76enum ProfileKinds { instr, sample, memory };
77enum FailureMode { warnOnly, failIfAnyAreInvalid, failIfAllAreInvalid };
78} // namespace
79
80enum ProfileFormat {
81 PF_None = 0,
82 PF_Text,
83 PF_Compact_Binary, // Deprecated
84 PF_Ext_Binary,
85 PF_GCC,
86 PF_Binary
87};
88
89enum class ShowFormat { Text, Json, Yaml };
90
91// Common options.
92cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
93 cl::init(Val: "-"), cl::desc("Output file"),
94 cl::sub(ShowSubcommand),
95 cl::sub(OrderSubcommand),
96 cl::sub(OverlapSubcommand),
97 cl::sub(MergeSubcommand));
98// NOTE: cl::alias must not have cl::sub(), since aliased option's cl::sub()
99// will be used. llvm::cl::alias::done() method asserts this condition.
100cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
101 cl::aliasopt(OutputFilename));
102
103// Options common to at least two commands.
104cl::opt<ProfileKinds> ProfileKind(
105 cl::desc("Profile kind:"), cl::sub(MergeSubcommand),
106 cl::sub(OverlapSubcommand), cl::init(Val: instr),
107 cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
108 clEnumVal(sample, "Sample profile")));
109cl::opt<std::string> Filename(cl::Positional, cl::desc("<profdata-file>"),
110 cl::sub(ShowSubcommand),
111 cl::sub(OrderSubcommand));
112cl::opt<unsigned> MaxDbgCorrelationWarnings(
113 "max-debug-info-correlation-warnings",
114 cl::desc("The maximum number of warnings to emit when correlating "
115 "profile from debug info (0 = no limit)"),
116 cl::sub(MergeSubcommand), cl::sub(ShowSubcommand), cl::init(Val: 5));
117cl::opt<std::string> ProfiledBinary(
118 "profiled-binary", cl::init(Val: ""),
119 cl::desc("Path to binary from which the profile was collected."),
120 cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
121cl::opt<std::string> DebugInfoFilename(
122 "debug-info", cl::init(Val: ""),
123 cl::desc(
124 "For show, read and extract profile metadata from debug info and show "
125 "the functions it found. For merge, use the provided debug info to "
126 "correlate the raw profile."),
127 cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
128cl::opt<std::string>
129 BinaryFilename("binary-file", cl::init(Val: ""),
130 cl::desc("For merge, use the provided unstripped bianry to "
131 "correlate the raw profile."),
132 cl::sub(MergeSubcommand));
133cl::opt<std::string> FuncNameFilter(
134 "function",
135 cl::desc("Only functions matching the filter are shown in the output. For "
136 "overlapping CSSPGO, this takes a function name with calling "
137 "context."),
138 cl::sub(ShowSubcommand), cl::sub(OverlapSubcommand),
139 cl::sub(MergeSubcommand));
140
141// TODO: Consider creating a template class (e.g., MergeOption, ShowOption) to
142// factor out the common cl::sub in cl::opt constructor for subcommand-specific
143// options.
144
145// Options specific to merge subcommand.
146cl::list<std::string> InputFilenames(cl::Positional, cl::sub(MergeSubcommand),
147 cl::desc("<filename...>"));
148cl::list<std::string> WeightedInputFilenames("weighted-input",
149 cl::sub(MergeSubcommand),
150 cl::desc("<weight>,<filename>"));
151cl::opt<ProfileFormat> OutputFormat(
152 cl::desc("Format of output profile"), cl::sub(MergeSubcommand),
153 cl::init(Val: PF_Ext_Binary),
154 cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding"),
155 clEnumValN(PF_Ext_Binary, "extbinary",
156 "Extensible binary encoding "
157 "(default)"),
158 clEnumValN(PF_Text, "text", "Text encoding"),
159 clEnumValN(PF_GCC, "gcc",
160 "GCC encoding (only meaningful for -sample)")));
161cl::opt<std::string>
162 InputFilenamesFile("input-files", cl::init(Val: ""), cl::sub(MergeSubcommand),
163 cl::desc("Path to file containing newline-separated "
164 "[<weight>,]<filename> entries"));
165cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
166 cl::aliasopt(InputFilenamesFile));
167cl::opt<bool> DumpInputFileList(
168 "dump-input-file-list", cl::init(Val: false), cl::Hidden,
169 cl::sub(MergeSubcommand),
170 cl::desc("Dump the list of input files and their weights, then exit"));
171cl::opt<std::string> RemappingFile("remapping-file", cl::value_desc("file"),
172 cl::sub(MergeSubcommand),
173 cl::desc("Symbol remapping file"));
174cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
175 cl::aliasopt(RemappingFile));
176cl::opt<bool>
177 UseMD5("use-md5", cl::init(Val: false), cl::Hidden,
178 cl::desc("Choose to use MD5 to represent string in name table (only "
179 "meaningful for -extbinary)"),
180 cl::sub(MergeSubcommand));
181cl::opt<bool> CompressAllSections(
182 "compress-all-sections", cl::init(Val: false), cl::Hidden,
183 cl::sub(MergeSubcommand),
184 cl::desc("Compress all sections when writing the profile (only "
185 "meaningful for -extbinary)"));
186cl::opt<bool> SampleMergeColdContext(
187 "sample-merge-cold-context", cl::init(Val: false), cl::Hidden,
188 cl::sub(MergeSubcommand),
189 cl::desc(
190 "Merge context sample profiles whose count is below cold threshold"));
191cl::opt<bool> SampleTrimColdContext(
192 "sample-trim-cold-context", cl::init(Val: false), cl::Hidden,
193 cl::sub(MergeSubcommand),
194 cl::desc(
195 "Trim context sample profiles whose count is below cold threshold"));
196cl::opt<uint32_t> SampleColdContextFrameDepth(
197 "sample-frame-depth-for-cold-context", cl::init(Val: 1),
198 cl::sub(MergeSubcommand),
199 cl::desc("Keep the last K frames while merging cold profile. 1 means the "
200 "context-less base profile"));
201cl::opt<size_t> OutputSizeLimit(
202 "output-size-limit", cl::init(Val: 0), cl::Hidden, cl::sub(MergeSubcommand),
203 cl::desc("Trim cold functions until profile size is below specified "
204 "limit in bytes. This uses a heursitic and functions may be "
205 "excessively trimmed"));
206cl::opt<bool> GenPartialProfile(
207 "gen-partial-profile", cl::init(Val: false), cl::Hidden,
208 cl::sub(MergeSubcommand),
209 cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
210cl::opt<std::string> SupplInstrWithSample(
211 "supplement-instr-with-sample", cl::init(Val: ""), cl::Hidden,
212 cl::sub(MergeSubcommand),
213 cl::desc("Supplement an instr profile with sample profile, to correct "
214 "the profile unrepresentativeness issue. The sample "
215 "profile is the input of the flag. Output will be in instr "
216 "format (The flag only works with -instr)"));
217cl::opt<float> ZeroCounterThreshold(
218 "zero-counter-threshold", cl::init(Val: 0.7), cl::Hidden,
219 cl::sub(MergeSubcommand),
220 cl::desc("For the function which is cold in instr profile but hot in "
221 "sample profile, if the ratio of the number of zero counters "
222 "divided by the total number of counters is above the "
223 "threshold, the profile of the function will be regarded as "
224 "being harmful for performance and will be dropped."));
225cl::opt<unsigned> SupplMinSizeThreshold(
226 "suppl-min-size-threshold", cl::init(Val: 10), cl::Hidden,
227 cl::sub(MergeSubcommand),
228 cl::desc("If the size of a function is smaller than the threshold, "
229 "assume it can be inlined by PGO early inliner and it won't "
230 "be adjusted based on sample profile."));
231cl::opt<unsigned> InstrProfColdThreshold(
232 "instr-prof-cold-threshold", cl::init(Val: 0), cl::Hidden,
233 cl::sub(MergeSubcommand),
234 cl::desc("User specified cold threshold for instr profile which will "
235 "override the cold threshold got from profile summary. "));
236// WARNING: This reservoir size value is propagated to any input indexed
237// profiles for simplicity. Changing this value between invocations could
238// result in sample bias.
239cl::opt<uint64_t> TemporalProfTraceReservoirSize(
240 "temporal-profile-trace-reservoir-size", cl::init(Val: 100),
241 cl::sub(MergeSubcommand),
242 cl::desc("The maximum number of stored temporal profile traces (default: "
243 "100)"));
244cl::opt<uint64_t> TemporalProfMaxTraceLength(
245 "temporal-profile-max-trace-length", cl::init(Val: 10000),
246 cl::sub(MergeSubcommand),
247 cl::desc("The maximum length of a single temporal profile trace "
248 "(default: 10000)"));
249cl::opt<std::string> FuncNameNegativeFilter(
250 "no-function", cl::init(Val: ""),
251 cl::sub(MergeSubcommand),
252 cl::desc("Exclude functions matching the filter from the output."));
253
254cl::opt<FailureMode>
255 FailMode("failure-mode", cl::init(Val: failIfAnyAreInvalid),
256 cl::desc("Failure mode:"), cl::sub(MergeSubcommand),
257 cl::values(clEnumValN(warnOnly, "warn",
258 "Do not fail and just print warnings."),
259 clEnumValN(failIfAnyAreInvalid, "any",
260 "Fail if any profile is invalid."),
261 clEnumValN(failIfAllAreInvalid, "all",
262 "Fail only if all profiles are invalid.")));
263
264cl::opt<bool> OutputSparse(
265 "sparse", cl::init(Val: false), cl::sub(MergeSubcommand),
266 cl::desc("Generate a sparse profile (only meaningful for -instr)"));
267cl::opt<unsigned> NumThreads(
268 "num-threads", cl::init(Val: 0), cl::sub(MergeSubcommand),
269 cl::desc("Number of merge threads to use (default: autodetect)"));
270cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
271 cl::aliasopt(NumThreads));
272
273cl::opt<std::string> ProfileSymbolListFile(
274 "prof-sym-list", cl::init(Val: ""), cl::sub(MergeSubcommand),
275 cl::desc("Path to file containing the list of function symbols "
276 "used to populate profile symbol list"));
277
278cl::opt<SampleProfileLayout> ProfileLayout(
279 "convert-sample-profile-layout",
280 cl::desc("Convert the generated profile to a profile with a new layout"),
281 cl::sub(MergeSubcommand), cl::init(Val: SPL_None),
282 cl::values(
283 clEnumValN(SPL_Nest, "nest",
284 "Nested profile, the input should be CS flat profile"),
285 clEnumValN(SPL_Flat, "flat",
286 "Profile with nested inlinee flatten out")));
287
288cl::opt<bool> DropProfileSymbolList(
289 "drop-profile-symbol-list", cl::init(Val: false), cl::Hidden,
290 cl::sub(MergeSubcommand),
291 cl::desc("Drop the profile symbol list when merging AutoFDO profiles "
292 "(only meaningful for -sample)"));
293
294// Temporary support for writing the previous version of the format, to enable
295// some forward compatibility.
296// TODO: Consider enabling this with future version changes as well, to ease
297// deployment of newer versions of llvm-profdata.
298cl::opt<bool> DoWritePrevVersion(
299 "write-prev-version", cl::init(Val: false), cl::Hidden,
300 cl::desc("Write the previous version of indexed format, to enable "
301 "some forward compatibility."));
302
303cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
304 "memprof-version", cl::Hidden, cl::sub(MergeSubcommand),
305 cl::desc("Specify the version of the memprof format to use"),
306 cl::init(Val: memprof::Version0),
307 cl::values(clEnumValN(memprof::Version0, "0", "version 0"),
308 clEnumValN(memprof::Version1, "1", "version 1"),
309 clEnumValN(memprof::Version2, "2", "version 2")));
310
311// Options specific to overlap subcommand.
312cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
313 cl::desc("<base profile file>"),
314 cl::sub(OverlapSubcommand));
315cl::opt<std::string> TestFilename(cl::Positional, cl::Required,
316 cl::desc("<test profile file>"),
317 cl::sub(OverlapSubcommand));
318
319cl::opt<unsigned long long> SimilarityCutoff(
320 "similarity-cutoff", cl::init(Val: 0),
321 cl::desc("For sample profiles, list function names (with calling context "
322 "for csspgo) for overlapped functions "
323 "with similarities below the cutoff (percentage times 10000)."),
324 cl::sub(OverlapSubcommand));
325
326cl::opt<bool> IsCS(
327 "cs", cl::init(Val: false),
328 cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."),
329 cl::sub(OverlapSubcommand));
330
331cl::opt<unsigned long long> OverlapValueCutoff(
332 "value-cutoff", cl::init(Val: -1),
333 cl::desc(
334 "Function level overlap information for every function (with calling "
335 "context for csspgo) in test "
336 "profile with max count value greater then the parameter value"),
337 cl::sub(OverlapSubcommand));
338
339// Options unique to show subcommand.
340cl::opt<bool> ShowCounts("counts", cl::init(Val: false),
341 cl::desc("Show counter values for shown functions"),
342 cl::sub(ShowSubcommand));
343cl::opt<ShowFormat>
344 SFormat("show-format", cl::init(Val: ShowFormat::Text),
345 cl::desc("Emit output in the selected format if supported"),
346 cl::sub(ShowSubcommand),
347 cl::values(clEnumValN(ShowFormat::Text, "text",
348 "emit normal text output (default)"),
349 clEnumValN(ShowFormat::Json, "json", "emit JSON"),
350 clEnumValN(ShowFormat::Yaml, "yaml", "emit YAML")));
351// TODO: Consider replacing this with `--show-format=text-encoding`.
352cl::opt<bool>
353 TextFormat("text", cl::init(Val: false),
354 cl::desc("Show instr profile data in text dump format"),
355 cl::sub(ShowSubcommand));
356cl::opt<bool>
357 JsonFormat("json",
358 cl::desc("Show sample profile data in the JSON format "
359 "(deprecated, please use --show-format=json)"),
360 cl::sub(ShowSubcommand));
361cl::opt<bool> ShowIndirectCallTargets(
362 "ic-targets", cl::init(Val: false),
363 cl::desc("Show indirect call site target values for shown functions"),
364 cl::sub(ShowSubcommand));
365cl::opt<bool> ShowVTables("show-vtables", cl::init(Val: false),
366 cl::desc("Show vtable names for shown functions"),
367 cl::sub(ShowSubcommand));
368cl::opt<bool> ShowMemOPSizes(
369 "memop-sizes", cl::init(Val: false),
370 cl::desc("Show the profiled sizes of the memory intrinsic calls "
371 "for shown functions"),
372 cl::sub(ShowSubcommand));
373cl::opt<bool> ShowDetailedSummary("detailed-summary", cl::init(Val: false),
374 cl::desc("Show detailed profile summary"),
375 cl::sub(ShowSubcommand));
376cl::list<uint32_t> DetailedSummaryCutoffs(
377 cl::CommaSeparated, "detailed-summary-cutoffs",
378 cl::desc(
379 "Cutoff percentages (times 10000) for generating detailed summary"),
380 cl::value_desc("800000,901000,999999"), cl::sub(ShowSubcommand));
381cl::opt<bool>
382 ShowHotFuncList("hot-func-list", cl::init(Val: false),
383 cl::desc("Show profile summary of a list of hot functions"),
384 cl::sub(ShowSubcommand));
385cl::opt<bool> ShowAllFunctions("all-functions", cl::init(Val: false),
386 cl::desc("Details for each and every function"),
387 cl::sub(ShowSubcommand));
388cl::opt<bool> ShowCS("showcs", cl::init(Val: false),
389 cl::desc("Show context sensitive counts"),
390 cl::sub(ShowSubcommand));
391cl::opt<ProfileKinds> ShowProfileKind(
392 cl::desc("Profile kind supported by show:"), cl::sub(ShowSubcommand),
393 cl::init(Val: instr),
394 cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
395 clEnumVal(sample, "Sample profile"),
396 clEnumVal(memory, "MemProf memory access profile")));
397cl::opt<uint32_t> TopNFunctions(
398 "topn", cl::init(Val: 0),
399 cl::desc("Show the list of functions with the largest internal counts"),
400 cl::sub(ShowSubcommand));
401cl::opt<uint32_t> ShowValueCutoff(
402 "value-cutoff", cl::init(Val: 0),
403 cl::desc("Set the count value cutoff. Functions with the maximum count "
404 "less than this value will not be printed out. (Default is 0)"),
405 cl::sub(ShowSubcommand));
406cl::opt<bool> OnlyListBelow(
407 "list-below-cutoff", cl::init(Val: false),
408 cl::desc("Only output names of functions whose max count values are "
409 "below the cutoff value"),
410 cl::sub(ShowSubcommand));
411cl::opt<bool> ShowProfileSymbolList(
412 "show-prof-sym-list", cl::init(Val: false),
413 cl::desc("Show profile symbol list if it exists in the profile. "),
414 cl::sub(ShowSubcommand));
415cl::opt<bool> ShowSectionInfoOnly(
416 "show-sec-info-only", cl::init(Val: false),
417 cl::desc("Show the information of each section in the sample profile. "
418 "The flag is only usable when the sample profile is in "
419 "extbinary format"),
420 cl::sub(ShowSubcommand));
421cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(Val: false),
422 cl::desc("Show binary ids in the profile. "),
423 cl::sub(ShowSubcommand));
424cl::opt<bool> ShowTemporalProfTraces(
425 "temporal-profile-traces",
426 cl::desc("Show temporal profile traces in the profile."),
427 cl::sub(ShowSubcommand));
428
429cl::opt<bool>
430 ShowCovered("covered", cl::init(Val: false),
431 cl::desc("Show only the functions that have been executed."),
432 cl::sub(ShowSubcommand));
433
434cl::opt<bool> ShowProfileVersion("profile-version", cl::init(Val: false),
435 cl::desc("Show profile version. "),
436 cl::sub(ShowSubcommand));
437
438// We use this string to indicate that there are
439// multiple static functions map to the same name.
440const std::string DuplicateNameStr = "----";
441
442static void warn(Twine Message, std::string Whence = "",
443 std::string Hint = "") {
444 WithColor::warning();
445 if (!Whence.empty())
446 errs() << Whence << ": ";
447 errs() << Message << "\n";
448 if (!Hint.empty())
449 WithColor::note() << Hint << "\n";
450}
451
452static void warn(Error E, StringRef Whence = "") {
453 if (E.isA<InstrProfError>()) {
454 handleAllErrors(E: std::move(E), Handlers: [&](const InstrProfError &IPE) {
455 warn(Message: IPE.message(), Whence: std::string(Whence), Hint: std::string(""));
456 });
457 }
458}
459
460static void exitWithError(Twine Message, std::string Whence = "",
461 std::string Hint = "") {
462 WithColor::error();
463 if (!Whence.empty())
464 errs() << Whence << ": ";
465 errs() << Message << "\n";
466 if (!Hint.empty())
467 WithColor::note() << Hint << "\n";
468 ::exit(status: 1);
469}
470
471static void exitWithError(Error E, StringRef Whence = "") {
472 if (E.isA<InstrProfError>()) {
473 handleAllErrors(E: std::move(E), Handlers: [&](const InstrProfError &IPE) {
474 instrprof_error instrError = IPE.get();
475 StringRef Hint = "";
476 if (instrError == instrprof_error::unrecognized_format) {
477 // Hint in case user missed specifying the profile type.
478 Hint = "Perhaps you forgot to use the --sample or --memory option?";
479 }
480 exitWithError(Message: IPE.message(), Whence: std::string(Whence), Hint: std::string(Hint));
481 });
482 return;
483 }
484
485 exitWithError(Message: toString(E: std::move(E)), Whence: std::string(Whence));
486}
487
488static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
489 exitWithError(Message: EC.message(), Whence: std::string(Whence));
490}
491
492static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC,
493 StringRef Whence = "") {
494 if (FailMode == failIfAnyAreInvalid)
495 exitWithErrorCode(EC, Whence);
496 else
497 warn(Message: EC.message(), Whence: std::string(Whence));
498}
499
500static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
501 StringRef WhenceFunction = "",
502 bool ShowHint = true) {
503 if (!WhenceFile.empty())
504 errs() << WhenceFile << ": ";
505 if (!WhenceFunction.empty())
506 errs() << WhenceFunction << ": ";
507
508 auto IPE = instrprof_error::success;
509 E = handleErrors(E: std::move(E),
510 Hs: [&IPE](std::unique_ptr<InstrProfError> E) -> Error {
511 IPE = E->get();
512 return Error(std::move(E));
513 });
514 errs() << toString(E: std::move(E)) << "\n";
515
516 if (ShowHint) {
517 StringRef Hint = "";
518 if (IPE != instrprof_error::success) {
519 switch (IPE) {
520 case instrprof_error::hash_mismatch:
521 case instrprof_error::count_mismatch:
522 case instrprof_error::value_site_count_mismatch:
523 Hint = "Make sure that all profile data to be merged is generated "
524 "from the same binary.";
525 break;
526 default:
527 break;
528 }
529 }
530
531 if (!Hint.empty())
532 errs() << Hint << "\n";
533 }
534}
535
536namespace {
537/// A remapper from original symbol names to new symbol names based on a file
538/// containing a list of mappings from old name to new name.
539class SymbolRemapper {
540 std::unique_ptr<MemoryBuffer> File;
541 DenseMap<StringRef, StringRef> RemappingTable;
542
543public:
544 /// Build a SymbolRemapper from a file containing a list of old/new symbols.
545 static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) {
546 auto BufOrError = MemoryBuffer::getFileOrSTDIN(Filename: InputFile);
547 if (!BufOrError)
548 exitWithErrorCode(EC: BufOrError.getError(), Whence: InputFile);
549
550 auto Remapper = std::make_unique<SymbolRemapper>();
551 Remapper->File = std::move(BufOrError.get());
552
553 for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
554 !LineIt.is_at_eof(); ++LineIt) {
555 std::pair<StringRef, StringRef> Parts = LineIt->split(Separator: ' ');
556 if (Parts.first.empty() || Parts.second.empty() ||
557 Parts.second.count(C: ' ')) {
558 exitWithError(Message: "unexpected line in remapping file",
559 Whence: (InputFile + ":" + Twine(LineIt.line_number())).str(),
560 Hint: "expected 'old_symbol new_symbol'");
561 }
562 Remapper->RemappingTable.insert(KV: Parts);
563 }
564 return Remapper;
565 }
566
567 /// Attempt to map the given old symbol into a new symbol.
568 ///
569 /// \return The new symbol, or \p Name if no such symbol was found.
570 StringRef operator()(StringRef Name) {
571 StringRef New = RemappingTable.lookup(Val: Name);
572 return New.empty() ? Name : New;
573 }
574
575 FunctionId operator()(FunctionId Name) {
576 // MD5 name cannot be remapped.
577 if (!Name.isStringRef())
578 return Name;
579 StringRef New = RemappingTable.lookup(Val: Name.stringRef());
580 return New.empty() ? Name : FunctionId(New);
581 }
582};
583}
584
585struct WeightedFile {
586 std::string Filename;
587 uint64_t Weight;
588};
589typedef SmallVector<WeightedFile, 5> WeightedFileVector;
590
591/// Keep track of merged data and reported errors.
592struct WriterContext {
593 std::mutex Lock;
594 InstrProfWriter Writer;
595 std::vector<std::pair<Error, std::string>> Errors;
596 std::mutex &ErrLock;
597 SmallSet<instrprof_error, 4> &WriterErrorCodes;
598
599 WriterContext(bool IsSparse, std::mutex &ErrLock,
600 SmallSet<instrprof_error, 4> &WriterErrorCodes,
601 uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
602 : Writer(IsSparse, ReservoirSize, MaxTraceLength, DoWritePrevVersion,
603 MemProfVersionRequested),
604 ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
605};
606
607/// Computer the overlap b/w profile BaseFilename and TestFileName,
608/// and store the program level result to Overlap.
609static void overlapInput(const std::string &BaseFilename,
610 const std::string &TestFilename, WriterContext *WC,
611 OverlapStats &Overlap,
612 const OverlapFuncFilters &FuncFilter,
613 raw_fd_ostream &OS, bool IsCS) {
614 auto FS = vfs::getRealFileSystem();
615 auto ReaderOrErr = InstrProfReader::create(Path: TestFilename, FS&: *FS);
616 if (Error E = ReaderOrErr.takeError()) {
617 // Skip the empty profiles by returning sliently.
618 auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
619 if (ErrorCode != instrprof_error::empty_raw_profile)
620 WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg),
621 args: TestFilename);
622 return;
623 }
624
625 auto Reader = std::move(ReaderOrErr.get());
626 for (auto &I : *Reader) {
627 OverlapStats FuncOverlap(OverlapStats::FunctionLevel);
628 FuncOverlap.setFuncInfo(Name: I.Name, Hash: I.Hash);
629
630 WC->Writer.overlapRecord(Other: std::move(I), Overlap, FuncLevelOverlap&: FuncOverlap, FuncFilter);
631 FuncOverlap.dump(OS);
632 }
633}
634
635/// Load an input into a writer context.
636static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
637 const InstrProfCorrelator *Correlator,
638 const StringRef ProfiledBinary, WriterContext *WC) {
639 std::unique_lock<std::mutex> CtxGuard{WC->Lock};
640
641 // Copy the filename, because llvm::ThreadPool copied the input "const
642 // WeightedFile &" by value, making a reference to the filename within it
643 // invalid outside of this packaged task.
644 std::string Filename = Input.Filename;
645
646 using ::llvm::memprof::RawMemProfReader;
647 if (RawMemProfReader::hasFormat(Path: Input.Filename)) {
648 auto ReaderOrErr = RawMemProfReader::create(Path: Input.Filename, ProfiledBinary);
649 if (!ReaderOrErr) {
650 exitWithError(E: ReaderOrErr.takeError(), Whence: Input.Filename);
651 }
652 std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get());
653 // Check if the profile types can be merged, e.g. clang frontend profiles
654 // should not be merged with memprof profiles.
655 if (Error E = WC->Writer.mergeProfileKind(Other: Reader->getProfileKind())) {
656 consumeError(Err: std::move(E));
657 WC->Errors.emplace_back(
658 args: make_error<StringError>(
659 Args: "Cannot merge MemProf profile with Clang generated profile.",
660 Args: std::error_code()),
661 args&: Filename);
662 return;
663 }
664
665 auto MemProfError = [&](Error E) {
666 auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
667 WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg),
668 args&: Filename);
669 };
670
671 // Add the frame mappings into the writer context.
672 const auto &IdToFrame = Reader->getFrameMapping();
673 for (const auto &I : IdToFrame) {
674 bool Succeeded = WC->Writer.addMemProfFrame(
675 /*Id=*/I.first, /*Frame=*/F: I.getSecond(), Warn: MemProfError);
676 // If we weren't able to add the frame mappings then it doesn't make sense
677 // to try to add the records from this profile.
678 if (!Succeeded)
679 return;
680 }
681
682 // Add the call stacks into the writer context.
683 const auto &CSIdToCallStacks = Reader->getCallStacks();
684 for (const auto &I : CSIdToCallStacks) {
685 bool Succeeded = WC->Writer.addMemProfCallStack(
686 /*Id=*/CSId: I.first, /*Frame=*/CallStack: I.getSecond(), Warn: MemProfError);
687 // If we weren't able to add the call stacks then it doesn't make sense
688 // to try to add the records from this profile.
689 if (!Succeeded)
690 return;
691 }
692
693 const auto &FunctionProfileData = Reader->getProfileData();
694 // Add the memprof records into the writer context.
695 for (const auto &[GUID, Record] : FunctionProfileData) {
696 WC->Writer.addMemProfRecord(Id: GUID, Record);
697 }
698 return;
699 }
700
701 auto FS = vfs::getRealFileSystem();
702 // TODO: This only saves the first non-fatal error from InstrProfReader, and
703 // then added to WriterContext::Errors. However, this is not extensible, if
704 // we have more non-fatal errors from InstrProfReader in the future. How
705 // should this interact with different -failure-mode?
706 std::optional<std::pair<Error, std::string>> ReaderWarning;
707 auto Warn = [&](Error E) {
708 if (ReaderWarning) {
709 consumeError(Err: std::move(E));
710 return;
711 }
712 // Only show the first time an error occurs in this file.
713 auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
714 ReaderWarning = {make_error<InstrProfError>(Args&: ErrCode, Args&: Msg), Filename};
715 };
716 auto ReaderOrErr =
717 InstrProfReader::create(Path: Input.Filename, FS&: *FS, Correlator, Warn);
718 if (Error E = ReaderOrErr.takeError()) {
719 // Skip the empty profiles by returning silently.
720 auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
721 if (ErrCode != instrprof_error::empty_raw_profile)
722 WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrCode, Args&: Msg),
723 args&: Filename);
724 return;
725 }
726
727 auto Reader = std::move(ReaderOrErr.get());
728 if (Error E = WC->Writer.mergeProfileKind(Other: Reader->getProfileKind())) {
729 consumeError(Err: std::move(E));
730 WC->Errors.emplace_back(
731 args: make_error<StringError>(
732 Args: "Merge IR generated profile with Clang generated profile.",
733 Args: std::error_code()),
734 args&: Filename);
735 return;
736 }
737
738 for (auto &I : *Reader) {
739 if (Remapper)
740 I.Name = (*Remapper)(I.Name);
741 const StringRef FuncName = I.Name;
742 bool Reported = false;
743 WC->Writer.addRecord(I: std::move(I), Weight: Input.Weight, Warn: [&](Error E) {
744 if (Reported) {
745 consumeError(Err: std::move(E));
746 return;
747 }
748 Reported = true;
749 // Only show hint the first time an error occurs.
750 auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
751 std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
752 bool firstTime = WC->WriterErrorCodes.insert(V: ErrCode).second;
753 handleMergeWriterError(E: make_error<InstrProfError>(Args&: ErrCode, Args&: Msg),
754 WhenceFile: Input.Filename, WhenceFunction: FuncName, ShowHint: firstTime);
755 });
756 }
757
758 const InstrProfSymtab &symtab = Reader->getSymtab();
759 const auto &VTableNames = symtab.getVTableNames();
760
761 for (const auto &kv : VTableNames) {
762 WC->Writer.addVTableName(VTableName: kv.getKey());
763 }
764
765 if (Reader->hasTemporalProfile()) {
766 auto &Traces = Reader->getTemporalProfTraces(Weight: Input.Weight);
767 if (!Traces.empty())
768 WC->Writer.addTemporalProfileTraces(
769 SrcTraces&: Traces, SrcStreamSize: Reader->getTemporalProfTraceStreamSize());
770 }
771 if (Reader->hasError()) {
772 if (Error E = Reader->getError()) {
773 WC->Errors.emplace_back(args: std::move(E), args&: Filename);
774 return;
775 }
776 }
777
778 std::vector<llvm::object::BuildID> BinaryIds;
779 if (Error E = Reader->readBinaryIds(BinaryIds)) {
780 WC->Errors.emplace_back(args: std::move(E), args&: Filename);
781 return;
782 }
783 WC->Writer.addBinaryIds(BIs: BinaryIds);
784
785 if (ReaderWarning) {
786 WC->Errors.emplace_back(args: std::move(ReaderWarning->first),
787 args&: ReaderWarning->second);
788 }
789}
790
791/// Merge the \p Src writer context into \p Dst.
792static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
793 for (auto &ErrorPair : Src->Errors)
794 Dst->Errors.push_back(x: std::move(ErrorPair));
795 Src->Errors.clear();
796
797 if (Error E = Dst->Writer.mergeProfileKind(Other: Src->Writer.getProfileKind()))
798 exitWithError(E: std::move(E));
799
800 Dst->Writer.mergeRecordsFromWriter(IPW: std::move(Src->Writer), Warn: [&](Error E) {
801 auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
802 std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
803 bool firstTime = Dst->WriterErrorCodes.insert(V: ErrorCode).second;
804 if (firstTime)
805 warn(Message: toString(E: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg)));
806 });
807}
808
809static StringRef
810getFuncName(const StringMap<InstrProfWriter::ProfilingData>::value_type &Val) {
811 return Val.first();
812}
813
814static std::string
815getFuncName(const SampleProfileMap::value_type &Val) {
816 return Val.second.getContext().toString();
817}
818
819template <typename T>
820static void filterFunctions(T &ProfileMap) {
821 bool hasFilter = !FuncNameFilter.empty();
822 bool hasNegativeFilter = !FuncNameNegativeFilter.empty();
823 if (!hasFilter && !hasNegativeFilter)
824 return;
825
826 // If filter starts with '?' it is MSVC mangled name, not a regex.
827 llvm::Regex ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+");
828 if (hasFilter && FuncNameFilter[0] == '?' &&
829 ProbablyMSVCMangledName.match(String: FuncNameFilter))
830 FuncNameFilter = llvm::Regex::escape(String: FuncNameFilter);
831 if (hasNegativeFilter && FuncNameNegativeFilter[0] == '?' &&
832 ProbablyMSVCMangledName.match(String: FuncNameNegativeFilter))
833 FuncNameNegativeFilter = llvm::Regex::escape(String: FuncNameNegativeFilter);
834
835 size_t Count = ProfileMap.size();
836 llvm::Regex Pattern(FuncNameFilter);
837 llvm::Regex NegativePattern(FuncNameNegativeFilter);
838 std::string Error;
839 if (hasFilter && !Pattern.isValid(Error))
840 exitWithError(Message: Error);
841 if (hasNegativeFilter && !NegativePattern.isValid(Error))
842 exitWithError(Message: Error);
843
844 // Handle MD5 profile, so it is still able to match using the original name.
845 std::string MD5Name = std::to_string(val: llvm::MD5Hash(Str: FuncNameFilter));
846 std::string NegativeMD5Name =
847 std::to_string(val: llvm::MD5Hash(Str: FuncNameNegativeFilter));
848
849 for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
850 auto Tmp = I++;
851 const auto &FuncName = getFuncName(*Tmp);
852 // Negative filter has higher precedence than positive filter.
853 if ((hasNegativeFilter &&
854 (NegativePattern.match(String: FuncName) ||
855 (FunctionSamples::UseMD5 && NegativeMD5Name == FuncName))) ||
856 (hasFilter && !(Pattern.match(String: FuncName) ||
857 (FunctionSamples::UseMD5 && MD5Name == FuncName))))
858 ProfileMap.erase(Tmp);
859 }
860
861 llvm::dbgs() << Count - ProfileMap.size() << " of " << Count << " functions "
862 << "in the original profile are filtered.\n";
863}
864
865static void writeInstrProfile(StringRef OutputFilename,
866 ProfileFormat OutputFormat,
867 InstrProfWriter &Writer) {
868 std::error_code EC;
869 raw_fd_ostream Output(OutputFilename.data(), EC,
870 OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF
871 : sys::fs::OF_None);
872 if (EC)
873 exitWithErrorCode(EC, Whence: OutputFilename);
874
875 if (OutputFormat == PF_Text) {
876 if (Error E = Writer.writeText(OS&: Output))
877 warn(E: std::move(E));
878 } else {
879 if (Output.is_displayed())
880 exitWithError(Message: "cannot write a non-text format profile to the terminal");
881 if (Error E = Writer.write(OS&: Output))
882 warn(E: std::move(E));
883 }
884}
885
886static void mergeInstrProfile(const WeightedFileVector &Inputs,
887 SymbolRemapper *Remapper,
888 int MaxDbgCorrelationWarnings,
889 const StringRef ProfiledBinary) {
890 const uint64_t TraceReservoirSize = TemporalProfTraceReservoirSize.getValue();
891 const uint64_t MaxTraceLength = TemporalProfMaxTraceLength.getValue();
892 if (OutputFormat == PF_Compact_Binary)
893 exitWithError(Message: "Compact Binary is deprecated");
894 if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary &&
895 OutputFormat != PF_Text)
896 exitWithError(Message: "unknown format is specified");
897
898 // TODO: Maybe we should support correlation with mixture of different
899 // correlation modes(w/wo debug-info/object correlation).
900 if (!DebugInfoFilename.empty() && !BinaryFilename.empty())
901 exitWithError(Message: "Expected only one of -debug-info, -binary-file");
902 std::string CorrelateFilename;
903 ProfCorrelatorKind CorrelateKind = ProfCorrelatorKind::NONE;
904 if (!DebugInfoFilename.empty()) {
905 CorrelateFilename = DebugInfoFilename;
906 CorrelateKind = ProfCorrelatorKind::DEBUG_INFO;
907 } else if (!BinaryFilename.empty()) {
908 CorrelateFilename = BinaryFilename;
909 CorrelateKind = ProfCorrelatorKind::BINARY;
910 }
911
912 std::unique_ptr<InstrProfCorrelator> Correlator;
913 if (CorrelateKind != InstrProfCorrelator::NONE) {
914 if (auto Err = InstrProfCorrelator::get(Filename: CorrelateFilename, FileKind: CorrelateKind)
915 .moveInto(Value&: Correlator))
916 exitWithError(E: std::move(Err), Whence: CorrelateFilename);
917 if (auto Err = Correlator->correlateProfileData(MaxWarnings: MaxDbgCorrelationWarnings))
918 exitWithError(E: std::move(Err), Whence: CorrelateFilename);
919 }
920
921 std::mutex ErrorLock;
922 SmallSet<instrprof_error, 4> WriterErrorCodes;
923
924 // If NumThreads is not specified, auto-detect a good default.
925 if (NumThreads == 0)
926 NumThreads = std::min(a: hardware_concurrency().compute_thread_count(),
927 b: unsigned((Inputs.size() + 1) / 2));
928
929 // Initialize the writer contexts.
930 SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
931 for (unsigned I = 0; I < NumThreads; ++I)
932 Contexts.emplace_back(Args: std::make_unique<WriterContext>(
933 args&: OutputSparse, args&: ErrorLock, args&: WriterErrorCodes, args: TraceReservoirSize,
934 args: MaxTraceLength));
935
936 if (NumThreads == 1) {
937 for (const auto &Input : Inputs)
938 loadInput(Input, Remapper, Correlator: Correlator.get(), ProfiledBinary,
939 WC: Contexts[0].get());
940 } else {
941 DefaultThreadPool Pool(hardware_concurrency(ThreadCount: NumThreads));
942
943 // Load the inputs in parallel (N/NumThreads serial steps).
944 unsigned Ctx = 0;
945 for (const auto &Input : Inputs) {
946 Pool.async(F&: loadInput, ArgList: Input, ArgList&: Remapper, ArgList: Correlator.get(), ArgList: ProfiledBinary,
947 ArgList: Contexts[Ctx].get());
948 Ctx = (Ctx + 1) % NumThreads;
949 }
950 Pool.wait();
951
952 // Merge the writer contexts together (~ lg(NumThreads) serial steps).
953 unsigned Mid = Contexts.size() / 2;
954 unsigned End = Contexts.size();
955 assert(Mid > 0 && "Expected more than one context");
956 do {
957 for (unsigned I = 0; I < Mid; ++I)
958 Pool.async(F&: mergeWriterContexts, ArgList: Contexts[I].get(),
959 ArgList: Contexts[I + Mid].get());
960 Pool.wait();
961 if (End & 1) {
962 Pool.async(F&: mergeWriterContexts, ArgList: Contexts[0].get(),
963 ArgList: Contexts[End - 1].get());
964 Pool.wait();
965 }
966 End = Mid;
967 Mid /= 2;
968 } while (Mid > 0);
969 }
970
971 // Handle deferred errors encountered during merging. If the number of errors
972 // is equal to the number of inputs the merge failed.
973 unsigned NumErrors = 0;
974 for (std::unique_ptr<WriterContext> &WC : Contexts) {
975 for (auto &ErrorPair : WC->Errors) {
976 ++NumErrors;
977 warn(Message: toString(E: std::move(ErrorPair.first)), Whence: ErrorPair.second);
978 }
979 }
980 if ((NumErrors == Inputs.size() && FailMode == failIfAllAreInvalid) ||
981 (NumErrors > 0 && FailMode == failIfAnyAreInvalid))
982 exitWithError(Message: "no profile can be merged");
983
984 filterFunctions(ProfileMap&: Contexts[0]->Writer.getProfileData());
985
986 writeInstrProfile(OutputFilename, OutputFormat, Writer&: Contexts[0]->Writer);
987}
988
989/// The profile entry for a function in instrumentation profile.
990struct InstrProfileEntry {
991 uint64_t MaxCount = 0;
992 uint64_t NumEdgeCounters = 0;
993 float ZeroCounterRatio = 0.0;
994 InstrProfRecord *ProfRecord;
995 InstrProfileEntry(InstrProfRecord *Record);
996 InstrProfileEntry() = default;
997};
998
999InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
1000 ProfRecord = Record;
1001 uint64_t CntNum = Record->Counts.size();
1002 uint64_t ZeroCntNum = 0;
1003 for (size_t I = 0; I < CntNum; ++I) {
1004 MaxCount = std::max(a: MaxCount, b: Record->Counts[I]);
1005 ZeroCntNum += !Record->Counts[I];
1006 }
1007 ZeroCounterRatio = (float)ZeroCntNum / CntNum;
1008 NumEdgeCounters = CntNum;
1009}
1010
1011/// Either set all the counters in the instr profile entry \p IFE to
1012/// -1 / -2 /in order to drop the profile or scale up the
1013/// counters in \p IFP to be above hot / cold threshold. We use
1014/// the ratio of zero counters in the profile of a function to
1015/// decide the profile is helpful or harmful for performance,
1016/// and to choose whether to scale up or drop it.
1017static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot,
1018 uint64_t HotInstrThreshold,
1019 uint64_t ColdInstrThreshold,
1020 float ZeroCounterThreshold) {
1021 InstrProfRecord *ProfRecord = IFE.ProfRecord;
1022 if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
1023 // If all or most of the counters of the function are zero, the
1024 // profile is unaccountable and should be dropped. Reset all the
1025 // counters to be -1 / -2 and PGO profile-use will drop the profile.
1026 // All counters being -1 also implies that the function is hot so
1027 // PGO profile-use will also set the entry count metadata to be
1028 // above hot threshold.
1029 // All counters being -2 implies that the function is warm so
1030 // PGO profile-use will also set the entry count metadata to be
1031 // above cold threshold.
1032 auto Kind =
1033 (SetToHot ? InstrProfRecord::PseudoHot : InstrProfRecord::PseudoWarm);
1034 ProfRecord->setPseudoCount(Kind);
1035 return;
1036 }
1037
1038 // Scale up the MaxCount to be multiple times above hot / cold threshold.
1039 const unsigned MultiplyFactor = 3;
1040 uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold);
1041 uint64_t Numerator = Threshold * MultiplyFactor;
1042
1043 // Make sure Threshold for warm counters is below the HotInstrThreshold.
1044 if (!SetToHot && Threshold >= HotInstrThreshold) {
1045 Threshold = (HotInstrThreshold + ColdInstrThreshold) / 2;
1046 }
1047
1048 uint64_t Denominator = IFE.MaxCount;
1049 if (Numerator <= Denominator)
1050 return;
1051 ProfRecord->scale(N: Numerator, D: Denominator, Warn: [&](instrprof_error E) {
1052 warn(Message: toString(E: make_error<InstrProfError>(Args&: E)));
1053 });
1054}
1055
1056const uint64_t ColdPercentileIdx = 15;
1057const uint64_t HotPercentileIdx = 11;
1058
1059using sampleprof::FSDiscriminatorPass;
1060
1061// Internal options to set FSDiscriminatorPass. Used in merge and show
1062// commands.
1063static cl::opt<FSDiscriminatorPass> FSDiscriminatorPassOption(
1064 "fs-discriminator-pass", cl::init(Val: PassLast), cl::Hidden,
1065 cl::desc("Zero out the discriminator bits for the FS discrimiantor "
1066 "pass beyond this value. The enum values are defined in "
1067 "Support/Discriminator.h"),
1068 cl::values(clEnumVal(Base, "Use base discriminators only"),
1069 clEnumVal(Pass1, "Use base and pass 1 discriminators"),
1070 clEnumVal(Pass2, "Use base and pass 1-2 discriminators"),
1071 clEnumVal(Pass3, "Use base and pass 1-3 discriminators"),
1072 clEnumVal(PassLast, "Use all discriminator bits (default)")));
1073
1074static unsigned getDiscriminatorMask() {
1075 return getN1Bits(N: getFSPassBitEnd(P: FSDiscriminatorPassOption.getValue()));
1076}
1077
1078/// Adjust the instr profile in \p WC based on the sample profile in
1079/// \p Reader.
1080static void
1081adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
1082 std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
1083 unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
1084 unsigned InstrProfColdThreshold) {
1085 // Function to its entry in instr profile.
1086 StringMap<InstrProfileEntry> InstrProfileMap;
1087 StringMap<StringRef> StaticFuncMap;
1088 InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
1089
1090 auto checkSampleProfileHasFUnique = [&Reader]() {
1091 for (const auto &PD : Reader->getProfiles()) {
1092 auto &FContext = PD.second.getContext();
1093 if (FContext.toString().find(s: FunctionSamples::UniqSuffix) !=
1094 std::string::npos) {
1095 return true;
1096 }
1097 }
1098 return false;
1099 };
1100
1101 bool SampleProfileHasFUnique = checkSampleProfileHasFUnique();
1102
1103 auto buildStaticFuncMap = [&StaticFuncMap,
1104 SampleProfileHasFUnique](const StringRef Name) {
1105 std::string FilePrefixes[] = {".cpp", "cc", ".c", ".hpp", ".h"};
1106 size_t PrefixPos = StringRef::npos;
1107 for (auto &FilePrefix : FilePrefixes) {
1108 std::string NamePrefix = FilePrefix + kGlobalIdentifierDelimiter;
1109 PrefixPos = Name.find_insensitive(Str: NamePrefix);
1110 if (PrefixPos == StringRef::npos)
1111 continue;
1112 PrefixPos += NamePrefix.size();
1113 break;
1114 }
1115
1116 if (PrefixPos == StringRef::npos) {
1117 return;
1118 }
1119
1120 StringRef NewName = Name.drop_front(N: PrefixPos);
1121 StringRef FName = Name.substr(Start: 0, N: PrefixPos - 1);
1122 if (NewName.size() == 0) {
1123 return;
1124 }
1125
1126 // This name should have a static linkage.
1127 size_t PostfixPos = NewName.find(Str: FunctionSamples::UniqSuffix);
1128 bool ProfileHasFUnique = (PostfixPos != StringRef::npos);
1129
1130 // If sample profile and instrumented profile do not agree on symbol
1131 // uniqification.
1132 if (SampleProfileHasFUnique != ProfileHasFUnique) {
1133 // If instrumented profile uses -funique-internal-linkage-symbols,
1134 // we need to trim the name.
1135 if (ProfileHasFUnique) {
1136 NewName = NewName.substr(Start: 0, N: PostfixPos);
1137 } else {
1138 // If sample profile uses -funique-internal-linkage-symbols,
1139 // we build the map.
1140 std::string NStr =
1141 NewName.str() + getUniqueInternalLinkagePostfix(FName);
1142 NewName = StringRef(NStr);
1143 StaticFuncMap[NewName] = Name;
1144 return;
1145 }
1146 }
1147
1148 if (!StaticFuncMap.contains(Key: NewName)) {
1149 StaticFuncMap[NewName] = Name;
1150 } else {
1151 StaticFuncMap[NewName] = DuplicateNameStr;
1152 }
1153 };
1154
1155 // We need to flatten the SampleFDO profile as the InstrFDO
1156 // profile does not have inlined callsite profiles.
1157 // One caveat is the pre-inlined function -- their samples
1158 // should be collapsed into the caller function.
1159 // Here we do a DFS traversal to get the flatten profile
1160 // info: the sum of entrycount and the max of maxcount.
1161 // Here is the algorithm:
1162 // recursive (FS, root_name) {
1163 // name = FS->getName();
1164 // get samples for FS;
1165 // if (InstrProf.find(name) {
1166 // root_name = name;
1167 // } else {
1168 // if (name is in static_func map) {
1169 // root_name = static_name;
1170 // }
1171 // }
1172 // update the Map entry for root_name;
1173 // for (subfs: FS) {
1174 // recursive(subfs, root_name);
1175 // }
1176 // }
1177 //
1178 // Here is an example.
1179 //
1180 // SampleProfile:
1181 // foo:12345:1000
1182 // 1: 1000
1183 // 2.1: 1000
1184 // 15: 5000
1185 // 4: bar:1000
1186 // 1: 1000
1187 // 2: goo:3000
1188 // 1: 3000
1189 // 8: bar:40000
1190 // 1: 10000
1191 // 2: goo:30000
1192 // 1: 30000
1193 //
1194 // InstrProfile has two entries:
1195 // foo
1196 // bar.cc;bar
1197 //
1198 // After BuildMaxSampleMap, we should have the following in FlattenSampleMap:
1199 // {"foo", {1000, 5000}}
1200 // {"bar.cc;bar", {11000, 30000}}
1201 //
1202 // foo's has an entry count of 1000, and max body count of 5000.
1203 // bar.cc;bar has an entry count of 11000 (sum two callsites of 1000 and
1204 // 10000), and max count of 30000 (from the callsite in line 8).
1205 //
1206 // Note that goo's count will remain in bar.cc;bar() as it does not have an
1207 // entry in InstrProfile.
1208 llvm::StringMap<std::pair<uint64_t, uint64_t>> FlattenSampleMap;
1209 auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap,
1210 &InstrProfileMap](const FunctionSamples &FS,
1211 const StringRef &RootName) {
1212 auto BuildMaxSampleMapImpl = [&](const FunctionSamples &FS,
1213 const StringRef &RootName,
1214 auto &BuildImpl) -> void {
1215 std::string NameStr = FS.getFunction().str();
1216 const StringRef Name = NameStr;
1217 const StringRef *NewRootName = &RootName;
1218 uint64_t EntrySample = FS.getHeadSamplesEstimate();
1219 uint64_t MaxBodySample = FS.getMaxCountInside(/* SkipCallSite*/ true);
1220
1221 auto It = InstrProfileMap.find(Key: Name);
1222 if (It != InstrProfileMap.end()) {
1223 NewRootName = &Name;
1224 } else {
1225 auto NewName = StaticFuncMap.find(Key: Name);
1226 if (NewName != StaticFuncMap.end()) {
1227 It = InstrProfileMap.find(Key: NewName->second.str());
1228 if (NewName->second != DuplicateNameStr) {
1229 NewRootName = &NewName->second;
1230 }
1231 } else {
1232 // Here the EntrySample is of an inlined function, so we should not
1233 // update the EntrySample in the map.
1234 EntrySample = 0;
1235 }
1236 }
1237 EntrySample += FlattenSampleMap[*NewRootName].first;
1238 MaxBodySample =
1239 std::max(a: FlattenSampleMap[*NewRootName].second, b: MaxBodySample);
1240 FlattenSampleMap[*NewRootName] =
1241 std::make_pair(x&: EntrySample, y&: MaxBodySample);
1242
1243 for (const auto &C : FS.getCallsiteSamples())
1244 for (const auto &F : C.second)
1245 BuildImpl(F.second, *NewRootName, BuildImpl);
1246 };
1247 BuildMaxSampleMapImpl(FS, RootName, BuildMaxSampleMapImpl);
1248 };
1249
1250 for (auto &PD : WC->Writer.getProfileData()) {
1251 // Populate IPBuilder.
1252 for (const auto &PDV : PD.getValue()) {
1253 InstrProfRecord Record = PDV.second;
1254 IPBuilder.addRecord(Record);
1255 }
1256
1257 // If a function has multiple entries in instr profile, skip it.
1258 if (PD.getValue().size() != 1)
1259 continue;
1260
1261 // Initialize InstrProfileMap.
1262 InstrProfRecord *R = &PD.getValue().begin()->second;
1263 StringRef FullName = PD.getKey();
1264 InstrProfileMap[FullName] = InstrProfileEntry(R);
1265 buildStaticFuncMap(FullName);
1266 }
1267
1268 for (auto &PD : Reader->getProfiles()) {
1269 sampleprof::FunctionSamples &FS = PD.second;
1270 std::string Name = FS.getFunction().str();
1271 BuildMaxSampleMap(FS, Name);
1272 }
1273
1274 ProfileSummary InstrPS = *IPBuilder.getSummary();
1275 ProfileSummary SamplePS = Reader->getSummary();
1276
1277 // Compute cold thresholds for instr profile and sample profile.
1278 uint64_t HotSampleThreshold =
1279 ProfileSummaryBuilder::getEntryForPercentile(
1280 DS: SamplePS.getDetailedSummary(),
1281 Percentile: ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1282 .MinCount;
1283 uint64_t ColdSampleThreshold =
1284 ProfileSummaryBuilder::getEntryForPercentile(
1285 DS: SamplePS.getDetailedSummary(),
1286 Percentile: ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1287 .MinCount;
1288 uint64_t HotInstrThreshold =
1289 ProfileSummaryBuilder::getEntryForPercentile(
1290 DS: InstrPS.getDetailedSummary(),
1291 Percentile: ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1292 .MinCount;
1293 uint64_t ColdInstrThreshold =
1294 InstrProfColdThreshold
1295 ? InstrProfColdThreshold
1296 : ProfileSummaryBuilder::getEntryForPercentile(
1297 DS: InstrPS.getDetailedSummary(),
1298 Percentile: ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1299 .MinCount;
1300
1301 // Find hot/warm functions in sample profile which is cold in instr profile
1302 // and adjust the profiles of those functions in the instr profile.
1303 for (const auto &E : FlattenSampleMap) {
1304 uint64_t SampleMaxCount = std::max(a: E.second.first, b: E.second.second);
1305 if (SampleMaxCount < ColdSampleThreshold)
1306 continue;
1307 StringRef Name = E.first();
1308 auto It = InstrProfileMap.find(Key: Name);
1309 if (It == InstrProfileMap.end()) {
1310 auto NewName = StaticFuncMap.find(Key: Name);
1311 if (NewName != StaticFuncMap.end()) {
1312 It = InstrProfileMap.find(Key: NewName->second.str());
1313 if (NewName->second == DuplicateNameStr) {
1314 WithColor::warning()
1315 << "Static function " << Name
1316 << " has multiple promoted names, cannot adjust profile.\n";
1317 }
1318 }
1319 }
1320 if (It == InstrProfileMap.end() ||
1321 It->second.MaxCount > ColdInstrThreshold ||
1322 It->second.NumEdgeCounters < SupplMinSizeThreshold)
1323 continue;
1324 bool SetToHot = SampleMaxCount >= HotSampleThreshold;
1325 updateInstrProfileEntry(IFE&: It->second, SetToHot, HotInstrThreshold,
1326 ColdInstrThreshold, ZeroCounterThreshold);
1327 }
1328}
1329
1330/// The main function to supplement instr profile with sample profile.
1331/// \Inputs contains the instr profile. \p SampleFilename specifies the
1332/// sample profile. \p OutputFilename specifies the output profile name.
1333/// \p OutputFormat specifies the output profile format. \p OutputSparse
1334/// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
1335/// specifies the minimal size for the functions whose profile will be
1336/// adjusted. \p ZeroCounterThreshold is the threshold to check whether
1337/// a function contains too many zero counters and whether its profile
1338/// should be dropped. \p InstrProfColdThreshold is the user specified
1339/// cold threshold which will override the cold threshold got from the
1340/// instr profile summary.
1341static void supplementInstrProfile(const WeightedFileVector &Inputs,
1342 StringRef SampleFilename, bool OutputSparse,
1343 unsigned SupplMinSizeThreshold,
1344 float ZeroCounterThreshold,
1345 unsigned InstrProfColdThreshold) {
1346 if (OutputFilename == "-")
1347 exitWithError(Message: "cannot write indexed profdata format to stdout");
1348 if (Inputs.size() != 1)
1349 exitWithError(Message: "expect one input to be an instr profile");
1350 if (Inputs[0].Weight != 1)
1351 exitWithError(Message: "expect instr profile doesn't have weight");
1352
1353 StringRef InstrFilename = Inputs[0].Filename;
1354
1355 // Read sample profile.
1356 LLVMContext Context;
1357 auto FS = vfs::getRealFileSystem();
1358 auto ReaderOrErr = sampleprof::SampleProfileReader::create(
1359 Filename: SampleFilename.str(), C&: Context, FS&: *FS, P: FSDiscriminatorPassOption);
1360 if (std::error_code EC = ReaderOrErr.getError())
1361 exitWithErrorCode(EC, Whence: SampleFilename);
1362 auto Reader = std::move(ReaderOrErr.get());
1363 if (std::error_code EC = Reader->read())
1364 exitWithErrorCode(EC, Whence: SampleFilename);
1365
1366 // Read instr profile.
1367 std::mutex ErrorLock;
1368 SmallSet<instrprof_error, 4> WriterErrorCodes;
1369 auto WC = std::make_unique<WriterContext>(args&: OutputSparse, args&: ErrorLock,
1370 args&: WriterErrorCodes);
1371 loadInput(Input: Inputs[0], Remapper: nullptr, Correlator: nullptr, /*ProfiledBinary=*/"", WC: WC.get());
1372 if (WC->Errors.size() > 0)
1373 exitWithError(E: std::move(WC->Errors[0].first), Whence: InstrFilename);
1374
1375 adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
1376 InstrProfColdThreshold);
1377 writeInstrProfile(OutputFilename, OutputFormat, Writer&: WC->Writer);
1378}
1379
1380/// Make a copy of the given function samples with all symbol names remapped
1381/// by the provided symbol remapper.
1382static sampleprof::FunctionSamples
1383remapSamples(const sampleprof::FunctionSamples &Samples,
1384 SymbolRemapper &Remapper, sampleprof_error &Error) {
1385 sampleprof::FunctionSamples Result;
1386 Result.setFunction(Remapper(Samples.getFunction()));
1387 Result.addTotalSamples(Num: Samples.getTotalSamples());
1388 Result.addHeadSamples(Num: Samples.getHeadSamples());
1389 for (const auto &BodySample : Samples.getBodySamples()) {
1390 uint32_t MaskedDiscriminator =
1391 BodySample.first.Discriminator & getDiscriminatorMask();
1392 Result.addBodySamples(LineOffset: BodySample.first.LineOffset, Discriminator: MaskedDiscriminator,
1393 Num: BodySample.second.getSamples());
1394 for (const auto &Target : BodySample.second.getCallTargets()) {
1395 Result.addCalledTargetSamples(LineOffset: BodySample.first.LineOffset,
1396 Discriminator: MaskedDiscriminator,
1397 Func: Remapper(Target.first), Num: Target.second);
1398 }
1399 }
1400 for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
1401 sampleprof::FunctionSamplesMap &Target =
1402 Result.functionSamplesAt(Loc: CallsiteSamples.first);
1403 for (const auto &Callsite : CallsiteSamples.second) {
1404 sampleprof::FunctionSamples Remapped =
1405 remapSamples(Samples: Callsite.second, Remapper, Error);
1406 MergeResult(Accumulator&: Error, Result: Target[Remapped.getFunction()].merge(Other: Remapped));
1407 }
1408 }
1409 return Result;
1410}
1411
1412static sampleprof::SampleProfileFormat FormatMap[] = {
1413 sampleprof::SPF_None,
1414 sampleprof::SPF_Text,
1415 sampleprof::SPF_None,
1416 sampleprof::SPF_Ext_Binary,
1417 sampleprof::SPF_GCC,
1418 sampleprof::SPF_Binary};
1419
1420static std::unique_ptr<MemoryBuffer>
1421getInputFileBuf(const StringRef &InputFile) {
1422 if (InputFile == "")
1423 return {};
1424
1425 auto BufOrError = MemoryBuffer::getFileOrSTDIN(Filename: InputFile);
1426 if (!BufOrError)
1427 exitWithErrorCode(EC: BufOrError.getError(), Whence: InputFile);
1428
1429 return std::move(*BufOrError);
1430}
1431
1432static void populateProfileSymbolList(MemoryBuffer *Buffer,
1433 sampleprof::ProfileSymbolList &PSL) {
1434 if (!Buffer)
1435 return;
1436
1437 SmallVector<StringRef, 32> SymbolVec;
1438 StringRef Data = Buffer->getBuffer();
1439 Data.split(A&: SymbolVec, Separator: '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1440
1441 for (StringRef SymbolStr : SymbolVec)
1442 PSL.add(Name: SymbolStr.trim());
1443}
1444
1445static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
1446 ProfileFormat OutputFormat,
1447 MemoryBuffer *Buffer,
1448 sampleprof::ProfileSymbolList &WriterList,
1449 bool CompressAllSections, bool UseMD5,
1450 bool GenPartialProfile) {
1451 populateProfileSymbolList(Buffer, PSL&: WriterList);
1452 if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary)
1453 warn(Message: "Profile Symbol list is not empty but the output format is not "
1454 "ExtBinary format. The list will be lost in the output. ");
1455
1456 Writer.setProfileSymbolList(&WriterList);
1457
1458 if (CompressAllSections) {
1459 if (OutputFormat != PF_Ext_Binary)
1460 warn(Message: "-compress-all-section is ignored. Specify -extbinary to enable it");
1461 else
1462 Writer.setToCompressAllSections();
1463 }
1464 if (UseMD5) {
1465 if (OutputFormat != PF_Ext_Binary)
1466 warn(Message: "-use-md5 is ignored. Specify -extbinary to enable it");
1467 else
1468 Writer.setUseMD5();
1469 }
1470 if (GenPartialProfile) {
1471 if (OutputFormat != PF_Ext_Binary)
1472 warn(Message: "-gen-partial-profile is ignored. Specify -extbinary to enable it");
1473 else
1474 Writer.setPartialProfile();
1475 }
1476}
1477
1478static void mergeSampleProfile(const WeightedFileVector &Inputs,
1479 SymbolRemapper *Remapper,
1480 StringRef ProfileSymbolListFile,
1481 size_t OutputSizeLimit) {
1482 using namespace sampleprof;
1483 SampleProfileMap ProfileMap;
1484 SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
1485 LLVMContext Context;
1486 sampleprof::ProfileSymbolList WriterList;
1487 std::optional<bool> ProfileIsProbeBased;
1488 std::optional<bool> ProfileIsCS;
1489 for (const auto &Input : Inputs) {
1490 auto FS = vfs::getRealFileSystem();
1491 auto ReaderOrErr = SampleProfileReader::create(Filename: Input.Filename, C&: Context, FS&: *FS,
1492 P: FSDiscriminatorPassOption);
1493 if (std::error_code EC = ReaderOrErr.getError()) {
1494 warnOrExitGivenError(FailMode, EC, Whence: Input.Filename);
1495 continue;
1496 }
1497
1498 // We need to keep the readers around until after all the files are
1499 // read so that we do not lose the function names stored in each
1500 // reader's memory. The function names are needed to write out the
1501 // merged profile map.
1502 Readers.push_back(Elt: std::move(ReaderOrErr.get()));
1503 const auto Reader = Readers.back().get();
1504 if (std::error_code EC = Reader->read()) {
1505 warnOrExitGivenError(FailMode, EC, Whence: Input.Filename);
1506 Readers.pop_back();
1507 continue;
1508 }
1509
1510 SampleProfileMap &Profiles = Reader->getProfiles();
1511 if (ProfileIsProbeBased &&
1512 ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
1513 exitWithError(
1514 Message: "cannot merge probe-based profile with non-probe-based profile");
1515 ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased;
1516 if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS)
1517 exitWithError(Message: "cannot merge CS profile with non-CS profile");
1518 ProfileIsCS = FunctionSamples::ProfileIsCS;
1519 for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end();
1520 I != E; ++I) {
1521 sampleprof_error Result = sampleprof_error::success;
1522 FunctionSamples Remapped =
1523 Remapper ? remapSamples(Samples: I->second, Remapper&: *Remapper, Error&: Result)
1524 : FunctionSamples();
1525 FunctionSamples &Samples = Remapper ? Remapped : I->second;
1526 SampleContext FContext = Samples.getContext();
1527 MergeResult(Accumulator&: Result, Result: ProfileMap[FContext].merge(Other: Samples, Weight: Input.Weight));
1528 if (Result != sampleprof_error::success) {
1529 std::error_code EC = make_error_code(E: Result);
1530 handleMergeWriterError(E: errorCodeToError(EC), WhenceFile: Input.Filename,
1531 WhenceFunction: FContext.toString());
1532 }
1533 }
1534
1535 if (!DropProfileSymbolList) {
1536 std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
1537 Reader->getProfileSymbolList();
1538 if (ReaderList)
1539 WriterList.merge(List: *ReaderList);
1540 }
1541 }
1542
1543 if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
1544 // Use threshold calculated from profile summary unless specified.
1545 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1546 auto Summary = Builder.computeSummaryForProfiles(Profiles: ProfileMap);
1547 uint64_t SampleProfColdThreshold =
1548 ProfileSummaryBuilder::getColdCountThreshold(
1549 DS: (Summary->getDetailedSummary()));
1550
1551 // Trim and merge cold context profile using cold threshold above;
1552 SampleContextTrimmer(ProfileMap)
1553 .trimAndMergeColdContextProfiles(
1554 ColdCountThreshold: SampleProfColdThreshold, TrimColdContext: SampleTrimColdContext,
1555 MergeColdContext: SampleMergeColdContext, ColdContextFrameLength: SampleColdContextFrameDepth, TrimBaseProfileOnly: false);
1556 }
1557
1558 if (ProfileLayout == llvm::sampleprof::SPL_Flat) {
1559 ProfileConverter::flattenProfile(ProfileMap, ProfileIsCS: FunctionSamples::ProfileIsCS);
1560 ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1561 } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) {
1562 ProfileConverter CSConverter(ProfileMap);
1563 CSConverter.convertCSProfiles();
1564 ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1565 }
1566
1567 filterFunctions(ProfileMap);
1568
1569 auto WriterOrErr =
1570 SampleProfileWriter::create(Filename: OutputFilename, Format: FormatMap[OutputFormat]);
1571 if (std::error_code EC = WriterOrErr.getError())
1572 exitWithErrorCode(EC, Whence: OutputFilename);
1573
1574 auto Writer = std::move(WriterOrErr.get());
1575 // WriterList will have StringRef refering to string in Buffer.
1576 // Make sure Buffer lives as long as WriterList.
1577 auto Buffer = getInputFileBuf(InputFile: ProfileSymbolListFile);
1578 handleExtBinaryWriter(Writer&: *Writer, OutputFormat, Buffer: Buffer.get(), WriterList,
1579 CompressAllSections, UseMD5, GenPartialProfile);
1580
1581 // If OutputSizeLimit is 0 (default), it is the same as write().
1582 if (std::error_code EC =
1583 Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
1584 exitWithErrorCode(EC: std::move(EC));
1585}
1586
1587static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
1588 StringRef WeightStr, FileName;
1589 std::tie(args&: WeightStr, args&: FileName) = WeightedFilename.split(Separator: ',');
1590
1591 uint64_t Weight;
1592 if (WeightStr.getAsInteger(Radix: 10, Result&: Weight) || Weight < 1)
1593 exitWithError(Message: "input weight must be a positive integer");
1594
1595 return {.Filename: std::string(FileName), .Weight: Weight};
1596}
1597
1598static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) {
1599 StringRef Filename = WF.Filename;
1600 uint64_t Weight = WF.Weight;
1601
1602 // If it's STDIN just pass it on.
1603 if (Filename == "-") {
1604 WNI.push_back(Elt: {.Filename: std::string(Filename), .Weight: Weight});
1605 return;
1606 }
1607
1608 llvm::sys::fs::file_status Status;
1609 llvm::sys::fs::status(path: Filename, result&: Status);
1610 if (!llvm::sys::fs::exists(status: Status))
1611 exitWithErrorCode(EC: make_error_code(E: errc::no_such_file_or_directory),
1612 Whence: Filename);
1613 // If it's a source file, collect it.
1614 if (llvm::sys::fs::is_regular_file(status: Status)) {
1615 WNI.push_back(Elt: {.Filename: std::string(Filename), .Weight: Weight});
1616 return;
1617 }
1618
1619 if (llvm::sys::fs::is_directory(status: Status)) {
1620 std::error_code EC;
1621 for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E;
1622 F != E && !EC; F.increment(ec&: EC)) {
1623 if (llvm::sys::fs::is_regular_file(Path: F->path())) {
1624 addWeightedInput(WNI, WF: {.Filename: F->path(), .Weight: Weight});
1625 }
1626 }
1627 if (EC)
1628 exitWithErrorCode(EC, Whence: Filename);
1629 }
1630}
1631
1632static void parseInputFilenamesFile(MemoryBuffer *Buffer,
1633 WeightedFileVector &WFV) {
1634 if (!Buffer)
1635 return;
1636
1637 SmallVector<StringRef, 8> Entries;
1638 StringRef Data = Buffer->getBuffer();
1639 Data.split(A&: Entries, Separator: '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1640 for (const StringRef &FileWeightEntry : Entries) {
1641 StringRef SanitizedEntry = FileWeightEntry.trim(Chars: " \t\v\f\r");
1642 // Skip comments.
1643 if (SanitizedEntry.starts_with(Prefix: "#"))
1644 continue;
1645 // If there's no comma, it's an unweighted profile.
1646 else if (!SanitizedEntry.contains(C: ','))
1647 addWeightedInput(WNI&: WFV, WF: {.Filename: std::string(SanitizedEntry), .Weight: 1});
1648 else
1649 addWeightedInput(WNI&: WFV, WF: parseWeightedFile(WeightedFilename: SanitizedEntry));
1650 }
1651}
1652
1653static int merge_main(int argc, const char *argv[]) {
1654 WeightedFileVector WeightedInputs;
1655 for (StringRef Filename : InputFilenames)
1656 addWeightedInput(WNI&: WeightedInputs, WF: {.Filename: std::string(Filename), .Weight: 1});
1657 for (StringRef WeightedFilename : WeightedInputFilenames)
1658 addWeightedInput(WNI&: WeightedInputs, WF: parseWeightedFile(WeightedFilename));
1659
1660 // Make sure that the file buffer stays alive for the duration of the
1661 // weighted input vector's lifetime.
1662 auto Buffer = getInputFileBuf(InputFile: InputFilenamesFile);
1663 parseInputFilenamesFile(Buffer: Buffer.get(), WFV&: WeightedInputs);
1664
1665 if (WeightedInputs.empty())
1666 exitWithError(Message: "no input files specified. See " +
1667 sys::path::filename(path: argv[0]) + " " + argv[1] + " -help");
1668
1669 if (DumpInputFileList) {
1670 for (auto &WF : WeightedInputs)
1671 outs() << WF.Weight << "," << WF.Filename << "\n";
1672 return 0;
1673 }
1674
1675 std::unique_ptr<SymbolRemapper> Remapper;
1676 if (!RemappingFile.empty())
1677 Remapper = SymbolRemapper::create(InputFile: RemappingFile);
1678
1679 if (!SupplInstrWithSample.empty()) {
1680 if (ProfileKind != instr)
1681 exitWithError(
1682 Message: "-supplement-instr-with-sample can only work with -instr. ");
1683
1684 supplementInstrProfile(Inputs: WeightedInputs, SampleFilename: SupplInstrWithSample, OutputSparse,
1685 SupplMinSizeThreshold, ZeroCounterThreshold,
1686 InstrProfColdThreshold);
1687 return 0;
1688 }
1689
1690 if (ProfileKind == instr)
1691 mergeInstrProfile(Inputs: WeightedInputs, Remapper: Remapper.get(), MaxDbgCorrelationWarnings,
1692 ProfiledBinary);
1693 else
1694 mergeSampleProfile(Inputs: WeightedInputs, Remapper: Remapper.get(), ProfileSymbolListFile,
1695 OutputSizeLimit);
1696 return 0;
1697}
1698
1699/// Computer the overlap b/w profile BaseFilename and profile TestFilename.
1700static void overlapInstrProfile(const std::string &BaseFilename,
1701 const std::string &TestFilename,
1702 const OverlapFuncFilters &FuncFilter,
1703 raw_fd_ostream &OS, bool IsCS) {
1704 std::mutex ErrorLock;
1705 SmallSet<instrprof_error, 4> WriterErrorCodes;
1706 WriterContext Context(false, ErrorLock, WriterErrorCodes);
1707 WeightedFile WeightedInput{.Filename: BaseFilename, .Weight: 1};
1708 OverlapStats Overlap;
1709 Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS);
1710 if (E)
1711 exitWithError(E: std::move(E), Whence: "error in getting profile count sums");
1712 if (Overlap.Base.CountSum < 1.0f) {
1713 OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n";
1714 exit(status: 0);
1715 }
1716 if (Overlap.Test.CountSum < 1.0f) {
1717 OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
1718 exit(status: 0);
1719 }
1720 loadInput(Input: WeightedInput, Remapper: nullptr, Correlator: nullptr, /*ProfiledBinary=*/"", WC: &Context);
1721 overlapInput(BaseFilename, TestFilename, WC: &Context, Overlap, FuncFilter, OS,
1722 IsCS);
1723 Overlap.dump(OS);
1724}
1725
1726namespace {
1727struct SampleOverlapStats {
1728 SampleContext BaseName;
1729 SampleContext TestName;
1730 // Number of overlap units
1731 uint64_t OverlapCount = 0;
1732 // Total samples of overlap units
1733 uint64_t OverlapSample = 0;
1734 // Number of and total samples of units that only present in base or test
1735 // profile
1736 uint64_t BaseUniqueCount = 0;
1737 uint64_t BaseUniqueSample = 0;
1738 uint64_t TestUniqueCount = 0;
1739 uint64_t TestUniqueSample = 0;
1740 // Number of units and total samples in base or test profile
1741 uint64_t BaseCount = 0;
1742 uint64_t BaseSample = 0;
1743 uint64_t TestCount = 0;
1744 uint64_t TestSample = 0;
1745 // Number of and total samples of units that present in at least one profile
1746 uint64_t UnionCount = 0;
1747 uint64_t UnionSample = 0;
1748 // Weighted similarity
1749 double Similarity = 0.0;
1750 // For SampleOverlapStats instances representing functions, weights of the
1751 // function in base and test profiles
1752 double BaseWeight = 0.0;
1753 double TestWeight = 0.0;
1754
1755 SampleOverlapStats() = default;
1756};
1757} // end anonymous namespace
1758
1759namespace {
1760struct FuncSampleStats {
1761 uint64_t SampleSum = 0;
1762 uint64_t MaxSample = 0;
1763 uint64_t HotBlockCount = 0;
1764 FuncSampleStats() = default;
1765 FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample,
1766 uint64_t HotBlockCount)
1767 : SampleSum(SampleSum), MaxSample(MaxSample),
1768 HotBlockCount(HotBlockCount) {}
1769};
1770} // end anonymous namespace
1771
1772namespace {
1773enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None };
1774
1775// Class for updating merging steps for two sorted maps. The class should be
1776// instantiated with a map iterator type.
1777template <class T> class MatchStep {
1778public:
1779 MatchStep() = delete;
1780
1781 MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd)
1782 : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter),
1783 SecondEnd(SecondEnd), Status(MS_None) {}
1784
1785 bool areBothFinished() const {
1786 return (FirstIter == FirstEnd && SecondIter == SecondEnd);
1787 }
1788
1789 bool isFirstFinished() const { return FirstIter == FirstEnd; }
1790
1791 bool isSecondFinished() const { return SecondIter == SecondEnd; }
1792
1793 /// Advance one step based on the previous match status unless the previous
1794 /// status is MS_None. Then update Status based on the comparison between two
1795 /// container iterators at the current step. If the previous status is
1796 /// MS_None, it means two iterators are at the beginning and no comparison has
1797 /// been made, so we simply update Status without advancing the iterators.
1798 void updateOneStep();
1799
1800 T getFirstIter() const { return FirstIter; }
1801
1802 T getSecondIter() const { return SecondIter; }
1803
1804 MatchStatus getMatchStatus() const { return Status; }
1805
1806private:
1807 // Current iterator and end iterator of the first container.
1808 T FirstIter;
1809 T FirstEnd;
1810 // Current iterator and end iterator of the second container.
1811 T SecondIter;
1812 T SecondEnd;
1813 // Match status of the current step.
1814 MatchStatus Status;
1815};
1816} // end anonymous namespace
1817
1818template <class T> void MatchStep<T>::updateOneStep() {
1819 switch (Status) {
1820 case MS_Match:
1821 ++FirstIter;
1822 ++SecondIter;
1823 break;
1824 case MS_FirstUnique:
1825 ++FirstIter;
1826 break;
1827 case MS_SecondUnique:
1828 ++SecondIter;
1829 break;
1830 case MS_None:
1831 break;
1832 }
1833
1834 // Update Status according to iterators at the current step.
1835 if (areBothFinished())
1836 return;
1837 if (FirstIter != FirstEnd &&
1838 (SecondIter == SecondEnd || FirstIter->first < SecondIter->first))
1839 Status = MS_FirstUnique;
1840 else if (SecondIter != SecondEnd &&
1841 (FirstIter == FirstEnd || SecondIter->first < FirstIter->first))
1842 Status = MS_SecondUnique;
1843 else
1844 Status = MS_Match;
1845}
1846
1847// Return the sum of line/block samples, the max line/block sample, and the
1848// number of line/block samples above the given threshold in a function
1849// including its inlinees.
1850static void getFuncSampleStats(const sampleprof::FunctionSamples &Func,
1851 FuncSampleStats &FuncStats,
1852 uint64_t HotThreshold) {
1853 for (const auto &L : Func.getBodySamples()) {
1854 uint64_t Sample = L.second.getSamples();
1855 FuncStats.SampleSum += Sample;
1856 FuncStats.MaxSample = std::max(a: FuncStats.MaxSample, b: Sample);
1857 if (Sample >= HotThreshold)
1858 ++FuncStats.HotBlockCount;
1859 }
1860
1861 for (const auto &C : Func.getCallsiteSamples()) {
1862 for (const auto &F : C.second)
1863 getFuncSampleStats(Func: F.second, FuncStats, HotThreshold);
1864 }
1865}
1866
1867/// Predicate that determines if a function is hot with a given threshold. We
1868/// keep it separate from its callsites for possible extension in the future.
1869static bool isFunctionHot(const FuncSampleStats &FuncStats,
1870 uint64_t HotThreshold) {
1871 // We intentionally compare the maximum sample count in a function with the
1872 // HotThreshold to get an approximate determination on hot functions.
1873 return (FuncStats.MaxSample >= HotThreshold);
1874}
1875
1876namespace {
1877class SampleOverlapAggregator {
1878public:
1879 SampleOverlapAggregator(const std::string &BaseFilename,
1880 const std::string &TestFilename,
1881 double LowSimilarityThreshold, double Epsilon,
1882 const OverlapFuncFilters &FuncFilter)
1883 : BaseFilename(BaseFilename), TestFilename(TestFilename),
1884 LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon),
1885 FuncFilter(FuncFilter) {}
1886
1887 /// Detect 0-sample input profile and report to output stream. This interface
1888 /// should be called after loadProfiles().
1889 bool detectZeroSampleProfile(raw_fd_ostream &OS) const;
1890
1891 /// Write out function-level similarity statistics for functions specified by
1892 /// options --function, --value-cutoff, and --similarity-cutoff.
1893 void dumpFuncSimilarity(raw_fd_ostream &OS) const;
1894
1895 /// Write out program-level similarity and overlap statistics.
1896 void dumpProgramSummary(raw_fd_ostream &OS) const;
1897
1898 /// Write out hot-function and hot-block statistics for base_profile,
1899 /// test_profile, and their overlap. For both cases, the overlap HO is
1900 /// calculated as follows:
1901 /// Given the number of functions (or blocks) that are hot in both profiles
1902 /// HCommon and the number of functions (or blocks) that are hot in at
1903 /// least one profile HUnion, HO = HCommon / HUnion.
1904 void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const;
1905
1906 /// This function tries matching functions in base and test profiles. For each
1907 /// pair of matched functions, it aggregates the function-level
1908 /// similarity into a profile-level similarity. It also dump function-level
1909 /// similarity information of functions specified by --function,
1910 /// --value-cutoff, and --similarity-cutoff options. The program-level
1911 /// similarity PS is computed as follows:
1912 /// Given function-level similarity FS(A) for all function A, the
1913 /// weight of function A in base profile WB(A), and the weight of function
1914 /// A in test profile WT(A), compute PS(base_profile, test_profile) =
1915 /// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
1916 /// meaning no-overlap.
1917 void computeSampleProfileOverlap(raw_fd_ostream &OS);
1918
1919 /// Initialize ProfOverlap with the sum of samples in base and test
1920 /// profiles. This function also computes and keeps the sum of samples and
1921 /// max sample counts of each function in BaseStats and TestStats for later
1922 /// use to avoid re-computations.
1923 void initializeSampleProfileOverlap();
1924
1925 /// Load profiles specified by BaseFilename and TestFilename.
1926 std::error_code loadProfiles();
1927
1928 using FuncSampleStatsMap =
1929 std::unordered_map<SampleContext, FuncSampleStats, SampleContext::Hash>;
1930
1931private:
1932 SampleOverlapStats ProfOverlap;
1933 SampleOverlapStats HotFuncOverlap;
1934 SampleOverlapStats HotBlockOverlap;
1935 std::string BaseFilename;
1936 std::string TestFilename;
1937 std::unique_ptr<sampleprof::SampleProfileReader> BaseReader;
1938 std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
1939 // BaseStats and TestStats hold FuncSampleStats for each function, with
1940 // function name as the key.
1941 FuncSampleStatsMap BaseStats;
1942 FuncSampleStatsMap TestStats;
1943 // Low similarity threshold in floating point number
1944 double LowSimilarityThreshold;
1945 // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
1946 // for tracking hot blocks.
1947 uint64_t BaseHotThreshold;
1948 uint64_t TestHotThreshold;
1949 // A small threshold used to round the results of floating point accumulations
1950 // to resolve imprecision.
1951 const double Epsilon;
1952 std::multimap<double, SampleOverlapStats, std::greater<double>>
1953 FuncSimilarityDump;
1954 // FuncFilter carries specifications in options --value-cutoff and
1955 // --function.
1956 OverlapFuncFilters FuncFilter;
1957 // Column offsets for printing the function-level details table.
1958 static const unsigned int TestWeightCol = 15;
1959 static const unsigned int SimilarityCol = 30;
1960 static const unsigned int OverlapCol = 43;
1961 static const unsigned int BaseUniqueCol = 53;
1962 static const unsigned int TestUniqueCol = 67;
1963 static const unsigned int BaseSampleCol = 81;
1964 static const unsigned int TestSampleCol = 96;
1965 static const unsigned int FuncNameCol = 111;
1966
1967 /// Return a similarity of two line/block sample counters in the same
1968 /// function in base and test profiles. The line/block-similarity BS(i) is
1969 /// computed as follows:
1970 /// For an offsets i, given the sample count at i in base profile BB(i),
1971 /// the sample count at i in test profile BT(i), the sum of sample counts
1972 /// in this function in base profile SB, and the sum of sample counts in
1973 /// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
1974 /// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
1975 double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample,
1976 const SampleOverlapStats &FuncOverlap) const;
1977
1978 void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
1979 uint64_t HotBlockCount);
1980
1981 void getHotFunctions(const FuncSampleStatsMap &ProfStats,
1982 FuncSampleStatsMap &HotFunc,
1983 uint64_t HotThreshold) const;
1984
1985 void computeHotFuncOverlap();
1986
1987 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
1988 /// Difference for two sample units in a matched function according to the
1989 /// given match status.
1990 void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample,
1991 uint64_t HotBlockCount,
1992 SampleOverlapStats &FuncOverlap,
1993 double &Difference, MatchStatus Status);
1994
1995 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
1996 /// Difference for unmatched callees that only present in one profile in a
1997 /// matched caller function.
1998 void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func,
1999 SampleOverlapStats &FuncOverlap,
2000 double &Difference, MatchStatus Status);
2001
2002 /// This function updates sample overlap statistics of an overlap function in
2003 /// base and test profile. It also calculates a function-internal similarity
2004 /// FIS as follows:
2005 /// For offsets i that have samples in at least one profile in this
2006 /// function A, given BS(i) returned by computeBlockSimilarity(), compute
2007 /// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
2008 /// 0.0 meaning no overlap.
2009 double computeSampleFunctionInternalOverlap(
2010 const sampleprof::FunctionSamples &BaseFunc,
2011 const sampleprof::FunctionSamples &TestFunc,
2012 SampleOverlapStats &FuncOverlap);
2013
2014 /// Function-level similarity (FS) is a weighted value over function internal
2015 /// similarity (FIS). This function computes a function's FS from its FIS by
2016 /// applying the weight.
2017 double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample,
2018 uint64_t TestFuncSample) const;
2019
2020 /// The function-level similarity FS(A) for a function A is computed as
2021 /// follows:
2022 /// Compute a function-internal similarity FIS(A) by
2023 /// computeSampleFunctionInternalOverlap(). Then, with the weight of
2024 /// function A in base profile WB(A), and the weight of function A in test
2025 /// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
2026 /// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
2027 double
2028 computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc,
2029 const sampleprof::FunctionSamples *TestFunc,
2030 SampleOverlapStats *FuncOverlap,
2031 uint64_t BaseFuncSample,
2032 uint64_t TestFuncSample);
2033
2034 /// Profile-level similarity (PS) is a weighted aggregate over function-level
2035 /// similarities (FS). This method weights the FS value by the function
2036 /// weights in the base and test profiles for the aggregation.
2037 double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample,
2038 uint64_t TestFuncSample) const;
2039};
2040} // end anonymous namespace
2041
2042bool SampleOverlapAggregator::detectZeroSampleProfile(
2043 raw_fd_ostream &OS) const {
2044 bool HaveZeroSample = false;
2045 if (ProfOverlap.BaseSample == 0) {
2046 OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n";
2047 HaveZeroSample = true;
2048 }
2049 if (ProfOverlap.TestSample == 0) {
2050 OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n";
2051 HaveZeroSample = true;
2052 }
2053 return HaveZeroSample;
2054}
2055
2056double SampleOverlapAggregator::computeBlockSimilarity(
2057 uint64_t BaseSample, uint64_t TestSample,
2058 const SampleOverlapStats &FuncOverlap) const {
2059 double BaseFrac = 0.0;
2060 double TestFrac = 0.0;
2061 if (FuncOverlap.BaseSample > 0)
2062 BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample;
2063 if (FuncOverlap.TestSample > 0)
2064 TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample;
2065 return 1.0 - std::fabs(x: BaseFrac - TestFrac);
2066}
2067
2068void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
2069 uint64_t TestSample,
2070 uint64_t HotBlockCount) {
2071 bool IsBaseHot = (BaseSample >= BaseHotThreshold);
2072 bool IsTestHot = (TestSample >= TestHotThreshold);
2073 if (!IsBaseHot && !IsTestHot)
2074 return;
2075
2076 HotBlockOverlap.UnionCount += HotBlockCount;
2077 if (IsBaseHot)
2078 HotBlockOverlap.BaseCount += HotBlockCount;
2079 if (IsTestHot)
2080 HotBlockOverlap.TestCount += HotBlockCount;
2081 if (IsBaseHot && IsTestHot)
2082 HotBlockOverlap.OverlapCount += HotBlockCount;
2083}
2084
2085void SampleOverlapAggregator::getHotFunctions(
2086 const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc,
2087 uint64_t HotThreshold) const {
2088 for (const auto &F : ProfStats) {
2089 if (isFunctionHot(FuncStats: F.second, HotThreshold))
2090 HotFunc.emplace(args: F.first, args: F.second);
2091 }
2092}
2093
2094void SampleOverlapAggregator::computeHotFuncOverlap() {
2095 FuncSampleStatsMap BaseHotFunc;
2096 getHotFunctions(ProfStats: BaseStats, HotFunc&: BaseHotFunc, HotThreshold: BaseHotThreshold);
2097 HotFuncOverlap.BaseCount = BaseHotFunc.size();
2098
2099 FuncSampleStatsMap TestHotFunc;
2100 getHotFunctions(ProfStats: TestStats, HotFunc&: TestHotFunc, HotThreshold: TestHotThreshold);
2101 HotFuncOverlap.TestCount = TestHotFunc.size();
2102 HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
2103
2104 for (const auto &F : BaseHotFunc) {
2105 if (TestHotFunc.count(x: F.first))
2106 ++HotFuncOverlap.OverlapCount;
2107 else
2108 ++HotFuncOverlap.UnionCount;
2109 }
2110}
2111
2112void SampleOverlapAggregator::updateOverlapStatsForFunction(
2113 uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount,
2114 SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) {
2115 assert(Status != MS_None &&
2116 "Match status should be updated before updating overlap statistics");
2117 if (Status == MS_FirstUnique) {
2118 TestSample = 0;
2119 FuncOverlap.BaseUniqueSample += BaseSample;
2120 } else if (Status == MS_SecondUnique) {
2121 BaseSample = 0;
2122 FuncOverlap.TestUniqueSample += TestSample;
2123 } else {
2124 ++FuncOverlap.OverlapCount;
2125 }
2126
2127 FuncOverlap.UnionSample += std::max(a: BaseSample, b: TestSample);
2128 FuncOverlap.OverlapSample += std::min(a: BaseSample, b: TestSample);
2129 Difference +=
2130 1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap);
2131 updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount);
2132}
2133
2134void SampleOverlapAggregator::updateForUnmatchedCallee(
2135 const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap,
2136 double &Difference, MatchStatus Status) {
2137 assert((Status == MS_FirstUnique || Status == MS_SecondUnique) &&
2138 "Status must be either of the two unmatched cases");
2139 FuncSampleStats FuncStats;
2140 if (Status == MS_FirstUnique) {
2141 getFuncSampleStats(Func, FuncStats, HotThreshold: BaseHotThreshold);
2142 updateOverlapStatsForFunction(BaseSample: FuncStats.SampleSum, TestSample: 0,
2143 HotBlockCount: FuncStats.HotBlockCount, FuncOverlap,
2144 Difference, Status);
2145 } else {
2146 getFuncSampleStats(Func, FuncStats, HotThreshold: TestHotThreshold);
2147 updateOverlapStatsForFunction(BaseSample: 0, TestSample: FuncStats.SampleSum,
2148 HotBlockCount: FuncStats.HotBlockCount, FuncOverlap,
2149 Difference, Status);
2150 }
2151}
2152
2153double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
2154 const sampleprof::FunctionSamples &BaseFunc,
2155 const sampleprof::FunctionSamples &TestFunc,
2156 SampleOverlapStats &FuncOverlap) {
2157
2158 using namespace sampleprof;
2159
2160 double Difference = 0;
2161
2162 // Accumulate Difference for regular line/block samples in the function.
2163 // We match them through sort-merge join algorithm because
2164 // FunctionSamples::getBodySamples() returns a map of sample counters ordered
2165 // by their offsets.
2166 MatchStep<BodySampleMap::const_iterator> BlockIterStep(
2167 BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(),
2168 TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend());
2169 BlockIterStep.updateOneStep();
2170 while (!BlockIterStep.areBothFinished()) {
2171 uint64_t BaseSample =
2172 BlockIterStep.isFirstFinished()
2173 ? 0
2174 : BlockIterStep.getFirstIter()->second.getSamples();
2175 uint64_t TestSample =
2176 BlockIterStep.isSecondFinished()
2177 ? 0
2178 : BlockIterStep.getSecondIter()->second.getSamples();
2179 updateOverlapStatsForFunction(BaseSample, TestSample, HotBlockCount: 1, FuncOverlap,
2180 Difference, Status: BlockIterStep.getMatchStatus());
2181
2182 BlockIterStep.updateOneStep();
2183 }
2184
2185 // Accumulate Difference for callsite lines in the function. We match
2186 // them through sort-merge algorithm because
2187 // FunctionSamples::getCallsiteSamples() returns a map of callsite records
2188 // ordered by their offsets.
2189 MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep(
2190 BaseFunc.getCallsiteSamples().cbegin(),
2191 BaseFunc.getCallsiteSamples().cend(),
2192 TestFunc.getCallsiteSamples().cbegin(),
2193 TestFunc.getCallsiteSamples().cend());
2194 CallsiteIterStep.updateOneStep();
2195 while (!CallsiteIterStep.areBothFinished()) {
2196 MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus();
2197 assert(CallsiteStepStatus != MS_None &&
2198 "Match status should be updated before entering loop body");
2199
2200 if (CallsiteStepStatus != MS_Match) {
2201 auto Callsite = (CallsiteStepStatus == MS_FirstUnique)
2202 ? CallsiteIterStep.getFirstIter()
2203 : CallsiteIterStep.getSecondIter();
2204 for (const auto &F : Callsite->second)
2205 updateForUnmatchedCallee(Func: F.second, FuncOverlap, Difference,
2206 Status: CallsiteStepStatus);
2207 } else {
2208 // There may be multiple inlinees at the same offset, so we need to try
2209 // matching all of them. This match is implemented through sort-merge
2210 // algorithm because callsite records at the same offset are ordered by
2211 // function names.
2212 MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep(
2213 CallsiteIterStep.getFirstIter()->second.cbegin(),
2214 CallsiteIterStep.getFirstIter()->second.cend(),
2215 CallsiteIterStep.getSecondIter()->second.cbegin(),
2216 CallsiteIterStep.getSecondIter()->second.cend());
2217 CalleeIterStep.updateOneStep();
2218 while (!CalleeIterStep.areBothFinished()) {
2219 MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus();
2220 if (CalleeStepStatus != MS_Match) {
2221 auto Callee = (CalleeStepStatus == MS_FirstUnique)
2222 ? CalleeIterStep.getFirstIter()
2223 : CalleeIterStep.getSecondIter();
2224 updateForUnmatchedCallee(Func: Callee->second, FuncOverlap, Difference,
2225 Status: CalleeStepStatus);
2226 } else {
2227 // An inlined function can contain other inlinees inside, so compute
2228 // the Difference recursively.
2229 Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap(
2230 BaseFunc: CalleeIterStep.getFirstIter()->second,
2231 TestFunc: CalleeIterStep.getSecondIter()->second,
2232 FuncOverlap);
2233 }
2234 CalleeIterStep.updateOneStep();
2235 }
2236 }
2237 CallsiteIterStep.updateOneStep();
2238 }
2239
2240 // Difference reflects the total differences of line/block samples in this
2241 // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
2242 // reflect the similarity between function profiles in [0.0f to 1.0f].
2243 return (2.0 - Difference) / 2;
2244}
2245
2246double SampleOverlapAggregator::weightForFuncSimilarity(
2247 double FuncInternalSimilarity, uint64_t BaseFuncSample,
2248 uint64_t TestFuncSample) const {
2249 // Compute the weight as the distance between the function weights in two
2250 // profiles.
2251 double BaseFrac = 0.0;
2252 double TestFrac = 0.0;
2253 assert(ProfOverlap.BaseSample > 0 &&
2254 "Total samples in base profile should be greater than 0");
2255 BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample;
2256 assert(ProfOverlap.TestSample > 0 &&
2257 "Total samples in test profile should be greater than 0");
2258 TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample;
2259 double WeightDistance = std::fabs(x: BaseFrac - TestFrac);
2260
2261 // Take WeightDistance into the similarity.
2262 return FuncInternalSimilarity * (1 - WeightDistance);
2263}
2264
2265double
2266SampleOverlapAggregator::weightByImportance(double FuncSimilarity,
2267 uint64_t BaseFuncSample,
2268 uint64_t TestFuncSample) const {
2269
2270 double BaseFrac = 0.0;
2271 double TestFrac = 0.0;
2272 assert(ProfOverlap.BaseSample > 0 &&
2273 "Total samples in base profile should be greater than 0");
2274 BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / 2.0;
2275 assert(ProfOverlap.TestSample > 0 &&
2276 "Total samples in test profile should be greater than 0");
2277 TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / 2.0;
2278 return FuncSimilarity * (BaseFrac + TestFrac);
2279}
2280
2281double SampleOverlapAggregator::computeSampleFunctionOverlap(
2282 const sampleprof::FunctionSamples *BaseFunc,
2283 const sampleprof::FunctionSamples *TestFunc,
2284 SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample,
2285 uint64_t TestFuncSample) {
2286 // Default function internal similarity before weighted, meaning two functions
2287 // has no overlap.
2288 const double DefaultFuncInternalSimilarity = 0;
2289 double FuncSimilarity;
2290 double FuncInternalSimilarity;
2291
2292 // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
2293 // In this case, we use DefaultFuncInternalSimilarity as the function internal
2294 // similarity.
2295 if (!BaseFunc || !TestFunc) {
2296 FuncInternalSimilarity = DefaultFuncInternalSimilarity;
2297 } else {
2298 assert(FuncOverlap != nullptr &&
2299 "FuncOverlap should be provided in this case");
2300 FuncInternalSimilarity = computeSampleFunctionInternalOverlap(
2301 BaseFunc: *BaseFunc, TestFunc: *TestFunc, FuncOverlap&: *FuncOverlap);
2302 // Now, FuncInternalSimilarity may be a little less than 0 due to
2303 // imprecision of floating point accumulations. Make it zero if the
2304 // difference is below Epsilon.
2305 FuncInternalSimilarity = (std::fabs(x: FuncInternalSimilarity - 0) < Epsilon)
2306 ? 0
2307 : FuncInternalSimilarity;
2308 }
2309 FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity,
2310 BaseFuncSample, TestFuncSample);
2311 return FuncSimilarity;
2312}
2313
2314void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
2315 using namespace sampleprof;
2316
2317 std::unordered_map<SampleContext, const FunctionSamples *,
2318 SampleContext::Hash>
2319 BaseFuncProf;
2320 const auto &BaseProfiles = BaseReader->getProfiles();
2321 for (const auto &BaseFunc : BaseProfiles) {
2322 BaseFuncProf.emplace(args&: BaseFunc.second.getContext(), args: &(BaseFunc.second));
2323 }
2324 ProfOverlap.UnionCount = BaseFuncProf.size();
2325
2326 const auto &TestProfiles = TestReader->getProfiles();
2327 for (const auto &TestFunc : TestProfiles) {
2328 SampleOverlapStats FuncOverlap;
2329 FuncOverlap.TestName = TestFunc.second.getContext();
2330 assert(TestStats.count(FuncOverlap.TestName) &&
2331 "TestStats should have records for all functions in test profile "
2332 "except inlinees");
2333 FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum;
2334
2335 bool Matched = false;
2336 const auto Match = BaseFuncProf.find(x: FuncOverlap.TestName);
2337 if (Match == BaseFuncProf.end()) {
2338 const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName];
2339 ++ProfOverlap.TestUniqueCount;
2340 ProfOverlap.TestUniqueSample += FuncStats.SampleSum;
2341 FuncOverlap.TestUniqueSample = FuncStats.SampleSum;
2342
2343 updateHotBlockOverlap(BaseSample: 0, TestSample: FuncStats.SampleSum, HotBlockCount: FuncStats.HotBlockCount);
2344
2345 double FuncSimilarity = computeSampleFunctionOverlap(
2346 BaseFunc: nullptr, TestFunc: nullptr, FuncOverlap: nullptr, BaseFuncSample: 0, TestFuncSample: FuncStats.SampleSum);
2347 ProfOverlap.Similarity +=
2348 weightByImportance(FuncSimilarity, BaseFuncSample: 0, TestFuncSample: FuncStats.SampleSum);
2349
2350 ++ProfOverlap.UnionCount;
2351 ProfOverlap.UnionSample += FuncStats.SampleSum;
2352 } else {
2353 ++ProfOverlap.OverlapCount;
2354
2355 // Two functions match with each other. Compute function-level overlap and
2356 // aggregate them into profile-level overlap.
2357 FuncOverlap.BaseName = Match->second->getContext();
2358 assert(BaseStats.count(FuncOverlap.BaseName) &&
2359 "BaseStats should have records for all functions in base profile "
2360 "except inlinees");
2361 FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum;
2362
2363 FuncOverlap.Similarity = computeSampleFunctionOverlap(
2364 BaseFunc: Match->second, TestFunc: &TestFunc.second, FuncOverlap: &FuncOverlap, BaseFuncSample: FuncOverlap.BaseSample,
2365 TestFuncSample: FuncOverlap.TestSample);
2366 ProfOverlap.Similarity +=
2367 weightByImportance(FuncSimilarity: FuncOverlap.Similarity, BaseFuncSample: FuncOverlap.BaseSample,
2368 TestFuncSample: FuncOverlap.TestSample);
2369 ProfOverlap.OverlapSample += FuncOverlap.OverlapSample;
2370 ProfOverlap.UnionSample += FuncOverlap.UnionSample;
2371
2372 // Accumulate the percentage of base unique and test unique samples into
2373 // ProfOverlap.
2374 ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample;
2375 ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample;
2376
2377 // Remove matched base functions for later reporting functions not found
2378 // in test profile.
2379 BaseFuncProf.erase(position: Match);
2380 Matched = true;
2381 }
2382
2383 // Print function-level similarity information if specified by options.
2384 assert(TestStats.count(FuncOverlap.TestName) &&
2385 "TestStats should have records for all functions in test profile "
2386 "except inlinees");
2387 if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff ||
2388 (Matched && FuncOverlap.Similarity < LowSimilarityThreshold) ||
2389 (Matched && !FuncFilter.NameFilter.empty() &&
2390 FuncOverlap.BaseName.toString().find(str: FuncFilter.NameFilter) !=
2391 std::string::npos)) {
2392 assert(ProfOverlap.BaseSample > 0 &&
2393 "Total samples in base profile should be greater than 0");
2394 FuncOverlap.BaseWeight =
2395 static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample;
2396 assert(ProfOverlap.TestSample > 0 &&
2397 "Total samples in test profile should be greater than 0");
2398 FuncOverlap.TestWeight =
2399 static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample;
2400 FuncSimilarityDump.emplace(args&: FuncOverlap.BaseWeight, args&: FuncOverlap);
2401 }
2402 }
2403
2404 // Traverse through functions in base profile but not in test profile.
2405 for (const auto &F : BaseFuncProf) {
2406 assert(BaseStats.count(F.second->getContext()) &&
2407 "BaseStats should have records for all functions in base profile "
2408 "except inlinees");
2409 const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()];
2410 ++ProfOverlap.BaseUniqueCount;
2411 ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
2412
2413 updateHotBlockOverlap(BaseSample: FuncStats.SampleSum, TestSample: 0, HotBlockCount: FuncStats.HotBlockCount);
2414
2415 double FuncSimilarity = computeSampleFunctionOverlap(
2416 BaseFunc: nullptr, TestFunc: nullptr, FuncOverlap: nullptr, BaseFuncSample: FuncStats.SampleSum, TestFuncSample: 0);
2417 ProfOverlap.Similarity +=
2418 weightByImportance(FuncSimilarity, BaseFuncSample: FuncStats.SampleSum, TestFuncSample: 0);
2419
2420 ProfOverlap.UnionSample += FuncStats.SampleSum;
2421 }
2422
2423 // Now, ProfSimilarity may be a little greater than 1 due to imprecision
2424 // of floating point accumulations. Make it 1.0 if the difference is below
2425 // Epsilon.
2426 ProfOverlap.Similarity = (std::fabs(x: ProfOverlap.Similarity - 1) < Epsilon)
2427 ? 1
2428 : ProfOverlap.Similarity;
2429
2430 computeHotFuncOverlap();
2431}
2432
2433void SampleOverlapAggregator::initializeSampleProfileOverlap() {
2434 const auto &BaseProf = BaseReader->getProfiles();
2435 for (const auto &I : BaseProf) {
2436 ++ProfOverlap.BaseCount;
2437 FuncSampleStats FuncStats;
2438 getFuncSampleStats(Func: I.second, FuncStats, HotThreshold: BaseHotThreshold);
2439 ProfOverlap.BaseSample += FuncStats.SampleSum;
2440 BaseStats.emplace(args&: I.second.getContext(), args&: FuncStats);
2441 }
2442
2443 const auto &TestProf = TestReader->getProfiles();
2444 for (const auto &I : TestProf) {
2445 ++ProfOverlap.TestCount;
2446 FuncSampleStats FuncStats;
2447 getFuncSampleStats(Func: I.second, FuncStats, HotThreshold: TestHotThreshold);
2448 ProfOverlap.TestSample += FuncStats.SampleSum;
2449 TestStats.emplace(args&: I.second.getContext(), args&: FuncStats);
2450 }
2451
2452 ProfOverlap.BaseName = StringRef(BaseFilename);
2453 ProfOverlap.TestName = StringRef(TestFilename);
2454}
2455
2456void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
2457 using namespace sampleprof;
2458
2459 if (FuncSimilarityDump.empty())
2460 return;
2461
2462 formatted_raw_ostream FOS(OS);
2463 FOS << "Function-level details:\n";
2464 FOS << "Base weight";
2465 FOS.PadToColumn(NewCol: TestWeightCol);
2466 FOS << "Test weight";
2467 FOS.PadToColumn(NewCol: SimilarityCol);
2468 FOS << "Similarity";
2469 FOS.PadToColumn(NewCol: OverlapCol);
2470 FOS << "Overlap";
2471 FOS.PadToColumn(NewCol: BaseUniqueCol);
2472 FOS << "Base unique";
2473 FOS.PadToColumn(NewCol: TestUniqueCol);
2474 FOS << "Test unique";
2475 FOS.PadToColumn(NewCol: BaseSampleCol);
2476 FOS << "Base samples";
2477 FOS.PadToColumn(NewCol: TestSampleCol);
2478 FOS << "Test samples";
2479 FOS.PadToColumn(NewCol: FuncNameCol);
2480 FOS << "Function name\n";
2481 for (const auto &F : FuncSimilarityDump) {
2482 double OverlapPercent =
2483 F.second.UnionSample > 0
2484 ? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample
2485 : 0;
2486 double BaseUniquePercent =
2487 F.second.BaseSample > 0
2488 ? static_cast<double>(F.second.BaseUniqueSample) /
2489 F.second.BaseSample
2490 : 0;
2491 double TestUniquePercent =
2492 F.second.TestSample > 0
2493 ? static_cast<double>(F.second.TestUniqueSample) /
2494 F.second.TestSample
2495 : 0;
2496
2497 FOS << format(Fmt: "%.2f%%", Vals: F.second.BaseWeight * 100);
2498 FOS.PadToColumn(NewCol: TestWeightCol);
2499 FOS << format(Fmt: "%.2f%%", Vals: F.second.TestWeight * 100);
2500 FOS.PadToColumn(NewCol: SimilarityCol);
2501 FOS << format(Fmt: "%.2f%%", Vals: F.second.Similarity * 100);
2502 FOS.PadToColumn(NewCol: OverlapCol);
2503 FOS << format(Fmt: "%.2f%%", Vals: OverlapPercent * 100);
2504 FOS.PadToColumn(NewCol: BaseUniqueCol);
2505 FOS << format(Fmt: "%.2f%%", Vals: BaseUniquePercent * 100);
2506 FOS.PadToColumn(NewCol: TestUniqueCol);
2507 FOS << format(Fmt: "%.2f%%", Vals: TestUniquePercent * 100);
2508 FOS.PadToColumn(NewCol: BaseSampleCol);
2509 FOS << F.second.BaseSample;
2510 FOS.PadToColumn(NewCol: TestSampleCol);
2511 FOS << F.second.TestSample;
2512 FOS.PadToColumn(NewCol: FuncNameCol);
2513 FOS << F.second.TestName.toString() << "\n";
2514 }
2515}
2516
2517void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
2518 OS << "Profile overlap infomation for base_profile: "
2519 << ProfOverlap.BaseName.toString()
2520 << " and test_profile: " << ProfOverlap.TestName.toString()
2521 << "\nProgram level:\n";
2522
2523 OS << " Whole program profile similarity: "
2524 << format(Fmt: "%.3f%%", Vals: ProfOverlap.Similarity * 100) << "\n";
2525
2526 assert(ProfOverlap.UnionSample > 0 &&
2527 "Total samples in two profile should be greater than 0");
2528 double OverlapPercent =
2529 static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample;
2530 assert(ProfOverlap.BaseSample > 0 &&
2531 "Total samples in base profile should be greater than 0");
2532 double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) /
2533 ProfOverlap.BaseSample;
2534 assert(ProfOverlap.TestSample > 0 &&
2535 "Total samples in test profile should be greater than 0");
2536 double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) /
2537 ProfOverlap.TestSample;
2538
2539 OS << " Whole program sample overlap: "
2540 << format(Fmt: "%.3f%%", Vals: OverlapPercent * 100) << "\n";
2541 OS << " percentage of samples unique in base profile: "
2542 << format(Fmt: "%.3f%%", Vals: BaseUniquePercent * 100) << "\n";
2543 OS << " percentage of samples unique in test profile: "
2544 << format(Fmt: "%.3f%%", Vals: TestUniquePercent * 100) << "\n";
2545 OS << " total samples in base profile: " << ProfOverlap.BaseSample << "\n"
2546 << " total samples in test profile: " << ProfOverlap.TestSample << "\n";
2547
2548 assert(ProfOverlap.UnionCount > 0 &&
2549 "There should be at least one function in two input profiles");
2550 double FuncOverlapPercent =
2551 static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount;
2552 OS << " Function overlap: " << format(Fmt: "%.3f%%", Vals: FuncOverlapPercent * 100)
2553 << "\n";
2554 OS << " overlap functions: " << ProfOverlap.OverlapCount << "\n";
2555 OS << " functions unique in base profile: " << ProfOverlap.BaseUniqueCount
2556 << "\n";
2557 OS << " functions unique in test profile: " << ProfOverlap.TestUniqueCount
2558 << "\n";
2559}
2560
2561void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
2562 raw_fd_ostream &OS) const {
2563 assert(HotFuncOverlap.UnionCount > 0 &&
2564 "There should be at least one hot function in two input profiles");
2565 OS << " Hot-function overlap: "
2566 << format(Fmt: "%.3f%%", Vals: static_cast<double>(HotFuncOverlap.OverlapCount) /
2567 HotFuncOverlap.UnionCount * 100)
2568 << "\n";
2569 OS << " overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n";
2570 OS << " hot functions unique in base profile: "
2571 << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n";
2572 OS << " hot functions unique in test profile: "
2573 << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n";
2574
2575 assert(HotBlockOverlap.UnionCount > 0 &&
2576 "There should be at least one hot block in two input profiles");
2577 OS << " Hot-block overlap: "
2578 << format(Fmt: "%.3f%%", Vals: static_cast<double>(HotBlockOverlap.OverlapCount) /
2579 HotBlockOverlap.UnionCount * 100)
2580 << "\n";
2581 OS << " overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n";
2582 OS << " hot blocks unique in base profile: "
2583 << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n";
2584 OS << " hot blocks unique in test profile: "
2585 << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n";
2586}
2587
2588std::error_code SampleOverlapAggregator::loadProfiles() {
2589 using namespace sampleprof;
2590
2591 LLVMContext Context;
2592 auto FS = vfs::getRealFileSystem();
2593 auto BaseReaderOrErr = SampleProfileReader::create(Filename: BaseFilename, C&: Context, FS&: *FS,
2594 P: FSDiscriminatorPassOption);
2595 if (std::error_code EC = BaseReaderOrErr.getError())
2596 exitWithErrorCode(EC, Whence: BaseFilename);
2597
2598 auto TestReaderOrErr = SampleProfileReader::create(Filename: TestFilename, C&: Context, FS&: *FS,
2599 P: FSDiscriminatorPassOption);
2600 if (std::error_code EC = TestReaderOrErr.getError())
2601 exitWithErrorCode(EC, Whence: TestFilename);
2602
2603 BaseReader = std::move(BaseReaderOrErr.get());
2604 TestReader = std::move(TestReaderOrErr.get());
2605
2606 if (std::error_code EC = BaseReader->read())
2607 exitWithErrorCode(EC, Whence: BaseFilename);
2608 if (std::error_code EC = TestReader->read())
2609 exitWithErrorCode(EC, Whence: TestFilename);
2610 if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased())
2611 exitWithError(
2612 Message: "cannot compare probe-based profile with non-probe-based profile");
2613 if (BaseReader->profileIsCS() != TestReader->profileIsCS())
2614 exitWithError(Message: "cannot compare CS profile with non-CS profile");
2615
2616 // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2617 // profile summary.
2618 ProfileSummary &BasePS = BaseReader->getSummary();
2619 ProfileSummary &TestPS = TestReader->getSummary();
2620 BaseHotThreshold =
2621 ProfileSummaryBuilder::getHotCountThreshold(DS: BasePS.getDetailedSummary());
2622 TestHotThreshold =
2623 ProfileSummaryBuilder::getHotCountThreshold(DS: TestPS.getDetailedSummary());
2624
2625 return std::error_code();
2626}
2627
2628void overlapSampleProfile(const std::string &BaseFilename,
2629 const std::string &TestFilename,
2630 const OverlapFuncFilters &FuncFilter,
2631 uint64_t SimilarityCutoff, raw_fd_ostream &OS) {
2632 using namespace sampleprof;
2633
2634 // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
2635 // report 2--3 places after decimal point in percentage numbers.
2636 SampleOverlapAggregator OverlapAggr(
2637 BaseFilename, TestFilename,
2638 static_cast<double>(SimilarityCutoff) / 1000000, 0.000005, FuncFilter);
2639 if (std::error_code EC = OverlapAggr.loadProfiles())
2640 exitWithErrorCode(EC);
2641
2642 OverlapAggr.initializeSampleProfileOverlap();
2643 if (OverlapAggr.detectZeroSampleProfile(OS))
2644 return;
2645
2646 OverlapAggr.computeSampleProfileOverlap(OS);
2647
2648 OverlapAggr.dumpProgramSummary(OS);
2649 OverlapAggr.dumpHotFuncAndBlockOverlap(OS);
2650 OverlapAggr.dumpFuncSimilarity(OS);
2651}
2652
2653static int overlap_main(int argc, const char *argv[]) {
2654 std::error_code EC;
2655 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
2656 if (EC)
2657 exitWithErrorCode(EC, Whence: OutputFilename);
2658
2659 if (ProfileKind == instr)
2660 overlapInstrProfile(BaseFilename, TestFilename,
2661 FuncFilter: OverlapFuncFilters{.ValueCutoff: OverlapValueCutoff, .NameFilter: FuncNameFilter},
2662 OS, IsCS);
2663 else
2664 overlapSampleProfile(BaseFilename, TestFilename,
2665 FuncFilter: OverlapFuncFilters{.ValueCutoff: OverlapValueCutoff, .NameFilter: FuncNameFilter},
2666 SimilarityCutoff, OS);
2667
2668 return 0;
2669}
2670
2671namespace {
2672struct ValueSitesStats {
2673 ValueSitesStats() = default;
2674 uint64_t TotalNumValueSites = 0;
2675 uint64_t TotalNumValueSitesWithValueProfile = 0;
2676 uint64_t TotalNumValues = 0;
2677 std::vector<unsigned> ValueSitesHistogram;
2678};
2679} // namespace
2680
2681static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
2682 ValueSitesStats &Stats, raw_fd_ostream &OS,
2683 InstrProfSymtab *Symtab) {
2684 uint32_t NS = Func.getNumValueSites(ValueKind: VK);
2685 Stats.TotalNumValueSites += NS;
2686 for (size_t I = 0; I < NS; ++I) {
2687 uint32_t NV = Func.getNumValueDataForSite(ValueKind: VK, Site: I);
2688 std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(ValueKind: VK, Site: I);
2689 Stats.TotalNumValues += NV;
2690 if (NV) {
2691 Stats.TotalNumValueSitesWithValueProfile++;
2692 if (NV > Stats.ValueSitesHistogram.size())
2693 Stats.ValueSitesHistogram.resize(new_size: NV, x: 0);
2694 Stats.ValueSitesHistogram[NV - 1]++;
2695 }
2696
2697 uint64_t SiteSum = 0;
2698 for (uint32_t V = 0; V < NV; V++)
2699 SiteSum += VD[V].Count;
2700 if (SiteSum == 0)
2701 SiteSum = 1;
2702
2703 for (uint32_t V = 0; V < NV; V++) {
2704 OS << "\t[ " << format(Fmt: "%2u", Vals: I) << ", ";
2705 if (Symtab == nullptr)
2706 OS << format(Fmt: "%4" PRIu64, Vals: VD[V].Value);
2707 else
2708 OS << Symtab->getFuncOrVarName(MD5Hash: VD[V].Value);
2709 OS << ", " << format(Fmt: "%10" PRId64, Vals: VD[V].Count) << " ] ("
2710 << format(Fmt: "%.2f%%", Vals: (VD[V].Count * 100.0 / SiteSum)) << ")\n";
2711 }
2712 }
2713}
2714
2715static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK,
2716 ValueSitesStats &Stats) {
2717 OS << " Total number of sites: " << Stats.TotalNumValueSites << "\n";
2718 OS << " Total number of sites with values: "
2719 << Stats.TotalNumValueSitesWithValueProfile << "\n";
2720 OS << " Total number of profiled values: " << Stats.TotalNumValues << "\n";
2721
2722 OS << " Value sites histogram:\n\tNumTargets, SiteCount\n";
2723 for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) {
2724 if (Stats.ValueSitesHistogram[I] > 0)
2725 OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n";
2726 }
2727}
2728
2729static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
2730 if (SFormat == ShowFormat::Json)
2731 exitWithError(Message: "JSON output is not supported for instr profiles");
2732 if (SFormat == ShowFormat::Yaml)
2733 exitWithError(Message: "YAML output is not supported for instr profiles");
2734 auto FS = vfs::getRealFileSystem();
2735 auto ReaderOrErr = InstrProfReader::create(Path: Filename, FS&: *FS);
2736 std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
2737 if (ShowDetailedSummary && Cutoffs.empty()) {
2738 Cutoffs = ProfileSummaryBuilder::DefaultCutoffs;
2739 }
2740 InstrProfSummaryBuilder Builder(std::move(Cutoffs));
2741 if (Error E = ReaderOrErr.takeError())
2742 exitWithError(E: std::move(E), Whence: Filename);
2743
2744 auto Reader = std::move(ReaderOrErr.get());
2745 bool IsIRInstr = Reader->isIRLevelProfile();
2746 size_t ShownFunctions = 0;
2747 size_t BelowCutoffFunctions = 0;
2748 int NumVPKind = IPVK_Last - IPVK_First + 1;
2749 std::vector<ValueSitesStats> VPStats(NumVPKind);
2750
2751 auto MinCmp = [](const std::pair<std::string, uint64_t> &v1,
2752 const std::pair<std::string, uint64_t> &v2) {
2753 return v1.second > v2.second;
2754 };
2755
2756 std::priority_queue<std::pair<std::string, uint64_t>,
2757 std::vector<std::pair<std::string, uint64_t>>,
2758 decltype(MinCmp)>
2759 HottestFuncs(MinCmp);
2760
2761 if (!TextFormat && OnlyListBelow) {
2762 OS << "The list of functions with the maximum counter less than "
2763 << ShowValueCutoff << ":\n";
2764 }
2765
2766 // Add marker so that IR-level instrumentation round-trips properly.
2767 if (TextFormat && IsIRInstr)
2768 OS << ":ir\n";
2769
2770 for (const auto &Func : *Reader) {
2771 if (Reader->isIRLevelProfile()) {
2772 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(FuncHash: Func.Hash);
2773 if (FuncIsCS != ShowCS)
2774 continue;
2775 }
2776 bool Show = ShowAllFunctions ||
2777 (!FuncNameFilter.empty() && Func.Name.contains(Other: FuncNameFilter));
2778
2779 bool doTextFormatDump = (Show && TextFormat);
2780
2781 if (doTextFormatDump) {
2782 InstrProfSymtab &Symtab = Reader->getSymtab();
2783 InstrProfWriter::writeRecordInText(Name: Func.Name, Hash: Func.Hash, Counters: Func, Symtab,
2784 OS);
2785 continue;
2786 }
2787
2788 assert(Func.Counts.size() > 0 && "function missing entry counter");
2789 Builder.addRecord(Func);
2790
2791 if (ShowCovered) {
2792 if (llvm::any_of(Range: Func.Counts, P: [](uint64_t C) { return C; }))
2793 OS << Func.Name << "\n";
2794 continue;
2795 }
2796
2797 uint64_t FuncMax = 0;
2798 uint64_t FuncSum = 0;
2799
2800 auto PseudoKind = Func.getCountPseudoKind();
2801 if (PseudoKind != InstrProfRecord::NotPseudo) {
2802 if (Show) {
2803 if (!ShownFunctions)
2804 OS << "Counters:\n";
2805 ++ShownFunctions;
2806 OS << " " << Func.Name << ":\n"
2807 << " Hash: " << format(Fmt: "0x%016" PRIx64, Vals: Func.Hash) << "\n"
2808 << " Counters: " << Func.Counts.size();
2809 if (PseudoKind == InstrProfRecord::PseudoHot)
2810 OS << " <PseudoHot>\n";
2811 else if (PseudoKind == InstrProfRecord::PseudoWarm)
2812 OS << " <PseudoWarm>\n";
2813 else
2814 llvm_unreachable("Unknown PseudoKind");
2815 }
2816 continue;
2817 }
2818
2819 for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
2820 FuncMax = std::max(a: FuncMax, b: Func.Counts[I]);
2821 FuncSum += Func.Counts[I];
2822 }
2823
2824 if (FuncMax < ShowValueCutoff) {
2825 ++BelowCutoffFunctions;
2826 if (OnlyListBelow) {
2827 OS << " " << Func.Name << ": (Max = " << FuncMax
2828 << " Sum = " << FuncSum << ")\n";
2829 }
2830 continue;
2831 } else if (OnlyListBelow)
2832 continue;
2833
2834 if (TopNFunctions) {
2835 if (HottestFuncs.size() == TopNFunctions) {
2836 if (HottestFuncs.top().second < FuncMax) {
2837 HottestFuncs.pop();
2838 HottestFuncs.emplace(args: std::make_pair(x: std::string(Func.Name), y&: FuncMax));
2839 }
2840 } else
2841 HottestFuncs.emplace(args: std::make_pair(x: std::string(Func.Name), y&: FuncMax));
2842 }
2843
2844 if (Show) {
2845 if (!ShownFunctions)
2846 OS << "Counters:\n";
2847
2848 ++ShownFunctions;
2849
2850 OS << " " << Func.Name << ":\n"
2851 << " Hash: " << format(Fmt: "0x%016" PRIx64, Vals: Func.Hash) << "\n"
2852 << " Counters: " << Func.Counts.size() << "\n";
2853 if (!IsIRInstr)
2854 OS << " Function count: " << Func.Counts[0] << "\n";
2855
2856 if (ShowIndirectCallTargets)
2857 OS << " Indirect Call Site Count: "
2858 << Func.getNumValueSites(ValueKind: IPVK_IndirectCallTarget) << "\n";
2859
2860 if (ShowVTables)
2861 OS << " Number of instrumented vtables: "
2862 << Func.getNumValueSites(ValueKind: IPVK_VTableTarget) << "\n";
2863
2864 uint32_t NumMemOPCalls = Func.getNumValueSites(ValueKind: IPVK_MemOPSize);
2865 if (ShowMemOPSizes && NumMemOPCalls > 0)
2866 OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls
2867 << "\n";
2868
2869 if (ShowCounts) {
2870 OS << " Block counts: [";
2871 size_t Start = (IsIRInstr ? 0 : 1);
2872 for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) {
2873 OS << (I == Start ? "" : ", ") << Func.Counts[I];
2874 }
2875 OS << "]\n";
2876 }
2877
2878 if (ShowIndirectCallTargets) {
2879 OS << " Indirect Target Results:\n";
2880 traverseAllValueSites(Func, VK: IPVK_IndirectCallTarget,
2881 Stats&: VPStats[IPVK_IndirectCallTarget], OS,
2882 Symtab: &(Reader->getSymtab()));
2883 }
2884
2885 if (ShowVTables) {
2886 OS << " VTable Results:\n";
2887 traverseAllValueSites(Func, VK: IPVK_VTableTarget,
2888 Stats&: VPStats[IPVK_VTableTarget], OS,
2889 Symtab: &(Reader->getSymtab()));
2890 }
2891
2892 if (ShowMemOPSizes && NumMemOPCalls > 0) {
2893 OS << " Memory Intrinsic Size Results:\n";
2894 traverseAllValueSites(Func, VK: IPVK_MemOPSize, Stats&: VPStats[IPVK_MemOPSize], OS,
2895 Symtab: nullptr);
2896 }
2897 }
2898 }
2899 if (Reader->hasError())
2900 exitWithError(E: Reader->getError(), Whence: Filename);
2901
2902 if (TextFormat || ShowCovered)
2903 return 0;
2904 std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
2905 bool IsIR = Reader->isIRLevelProfile();
2906 OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
2907 if (IsIR)
2908 OS << " entry_first = " << Reader->instrEntryBBEnabled();
2909 OS << "\n";
2910 if (ShowAllFunctions || !FuncNameFilter.empty())
2911 OS << "Functions shown: " << ShownFunctions << "\n";
2912 OS << "Total functions: " << PS->getNumFunctions() << "\n";
2913 if (ShowValueCutoff > 0) {
2914 OS << "Number of functions with maximum count (< " << ShowValueCutoff
2915 << "): " << BelowCutoffFunctions << "\n";
2916 OS << "Number of functions with maximum count (>= " << ShowValueCutoff
2917 << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n";
2918 }
2919 OS << "Maximum function count: " << PS->getMaxFunctionCount() << "\n";
2920 OS << "Maximum internal block count: " << PS->getMaxInternalCount() << "\n";
2921
2922 if (TopNFunctions) {
2923 std::vector<std::pair<std::string, uint64_t>> SortedHottestFuncs;
2924 while (!HottestFuncs.empty()) {
2925 SortedHottestFuncs.emplace_back(args: HottestFuncs.top());
2926 HottestFuncs.pop();
2927 }
2928 OS << "Top " << TopNFunctions
2929 << " functions with the largest internal block counts: \n";
2930 for (auto &hotfunc : llvm::reverse(C&: SortedHottestFuncs))
2931 OS << " " << hotfunc.first << ", max count = " << hotfunc.second << "\n";
2932 }
2933
2934 if (ShownFunctions && ShowIndirectCallTargets) {
2935 OS << "Statistics for indirect call sites profile:\n";
2936 showValueSitesStats(OS, VK: IPVK_IndirectCallTarget,
2937 Stats&: VPStats[IPVK_IndirectCallTarget]);
2938 }
2939
2940 if (ShownFunctions && ShowVTables) {
2941 OS << "Statistics for vtable profile:\n";
2942 showValueSitesStats(OS, VK: IPVK_VTableTarget, Stats&: VPStats[IPVK_VTableTarget]);
2943 }
2944
2945 if (ShownFunctions && ShowMemOPSizes) {
2946 OS << "Statistics for memory intrinsic calls sizes profile:\n";
2947 showValueSitesStats(OS, VK: IPVK_MemOPSize, Stats&: VPStats[IPVK_MemOPSize]);
2948 }
2949
2950 if (ShowDetailedSummary) {
2951 OS << "Total number of blocks: " << PS->getNumCounts() << "\n";
2952 OS << "Total count: " << PS->getTotalCount() << "\n";
2953 PS->printDetailedSummary(OS);
2954 }
2955
2956 if (ShowBinaryIds)
2957 if (Error E = Reader->printBinaryIds(OS))
2958 exitWithError(E: std::move(E), Whence: Filename);
2959
2960 if (ShowProfileVersion)
2961 OS << "Profile version: " << Reader->getVersion() << "\n";
2962
2963 if (ShowTemporalProfTraces) {
2964 auto &Traces = Reader->getTemporalProfTraces();
2965 OS << "Temporal Profile Traces (samples=" << Traces.size()
2966 << " seen=" << Reader->getTemporalProfTraceStreamSize() << "):\n";
2967 for (unsigned i = 0; i < Traces.size(); i++) {
2968 OS << " Temporal Profile Trace " << i << " (weight=" << Traces[i].Weight
2969 << " count=" << Traces[i].FunctionNameRefs.size() << "):\n";
2970 for (auto &NameRef : Traces[i].FunctionNameRefs)
2971 OS << " " << Reader->getSymtab().getFuncOrVarName(MD5Hash: NameRef) << "\n";
2972 }
2973 }
2974
2975 return 0;
2976}
2977
2978static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
2979 raw_fd_ostream &OS) {
2980 if (!Reader->dumpSectionInfo(OS)) {
2981 WithColor::warning() << "-show-sec-info-only is only supported for "
2982 << "sample profile in extbinary format and is "
2983 << "ignored for other formats.\n";
2984 return;
2985 }
2986}
2987
2988namespace {
2989struct HotFuncInfo {
2990 std::string FuncName;
2991 uint64_t TotalCount = 0;
2992 double TotalCountPercent = 0.0f;
2993 uint64_t MaxCount = 0;
2994 uint64_t EntryCount = 0;
2995
2996 HotFuncInfo() = default;
2997
2998 HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES)
2999 : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP),
3000 MaxCount(MS), EntryCount(ES) {}
3001};
3002} // namespace
3003
3004// Print out detailed information about hot functions in PrintValues vector.
3005// Users specify titles and offset of every columns through ColumnTitle and
3006// ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
3007// and at least 4. Besides, users can optionally give a HotFuncMetric string to
3008// print out or let it be an empty string.
3009static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
3010 const std::vector<int> &ColumnOffset,
3011 const std::vector<HotFuncInfo> &PrintValues,
3012 uint64_t HotFuncCount, uint64_t TotalFuncCount,
3013 uint64_t HotProfCount, uint64_t TotalProfCount,
3014 const std::string &HotFuncMetric,
3015 uint32_t TopNFunctions, raw_fd_ostream &OS) {
3016 assert(ColumnOffset.size() == ColumnTitle.size() &&
3017 "ColumnOffset and ColumnTitle should have the same size");
3018 assert(ColumnTitle.size() >= 4 &&
3019 "ColumnTitle should have at least 4 elements");
3020 assert(TotalFuncCount > 0 &&
3021 "There should be at least one function in the profile");
3022 double TotalProfPercent = 0;
3023 if (TotalProfCount > 0)
3024 TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * 100;
3025
3026 formatted_raw_ostream FOS(OS);
3027 FOS << HotFuncCount << " out of " << TotalFuncCount
3028 << " functions with profile ("
3029 << format(Fmt: "%.2f%%",
3030 Vals: (static_cast<double>(HotFuncCount) / TotalFuncCount * 100))
3031 << ") are considered hot functions";
3032 if (!HotFuncMetric.empty())
3033 FOS << " (" << HotFuncMetric << ")";
3034 FOS << ".\n";
3035 FOS << HotProfCount << " out of " << TotalProfCount << " profile counts ("
3036 << format(Fmt: "%.2f%%", Vals: TotalProfPercent) << ") are from hot functions.\n";
3037
3038 for (size_t I = 0; I < ColumnTitle.size(); ++I) {
3039 FOS.PadToColumn(NewCol: ColumnOffset[I]);
3040 FOS << ColumnTitle[I];
3041 }
3042 FOS << "\n";
3043
3044 uint32_t Count = 0;
3045 for (const auto &R : PrintValues) {
3046 if (TopNFunctions && (Count++ == TopNFunctions))
3047 break;
3048 FOS.PadToColumn(NewCol: ColumnOffset[0]);
3049 FOS << R.TotalCount << " (" << format(Fmt: "%.2f%%", Vals: R.TotalCountPercent) << ")";
3050 FOS.PadToColumn(NewCol: ColumnOffset[1]);
3051 FOS << R.MaxCount;
3052 FOS.PadToColumn(NewCol: ColumnOffset[2]);
3053 FOS << R.EntryCount;
3054 FOS.PadToColumn(NewCol: ColumnOffset[3]);
3055 FOS << R.FuncName << "\n";
3056 }
3057}
3058
3059static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles,
3060 ProfileSummary &PS, uint32_t TopN,
3061 raw_fd_ostream &OS) {
3062 using namespace sampleprof;
3063
3064 const uint32_t HotFuncCutoff = 990000;
3065 auto &SummaryVector = PS.getDetailedSummary();
3066 uint64_t MinCountThreshold = 0;
3067 for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) {
3068 if (SummaryEntry.Cutoff == HotFuncCutoff) {
3069 MinCountThreshold = SummaryEntry.MinCount;
3070 break;
3071 }
3072 }
3073
3074 // Traverse all functions in the profile and keep only hot functions.
3075 // The following loop also calculates the sum of total samples of all
3076 // functions.
3077 std::multimap<uint64_t, std::pair<const FunctionSamples *, const uint64_t>,
3078 std::greater<uint64_t>>
3079 HotFunc;
3080 uint64_t ProfileTotalSample = 0;
3081 uint64_t HotFuncSample = 0;
3082 uint64_t HotFuncCount = 0;
3083
3084 for (const auto &I : Profiles) {
3085 FuncSampleStats FuncStats;
3086 const FunctionSamples &FuncProf = I.second;
3087 ProfileTotalSample += FuncProf.getTotalSamples();
3088 getFuncSampleStats(Func: FuncProf, FuncStats, HotThreshold: MinCountThreshold);
3089
3090 if (isFunctionHot(FuncStats, HotThreshold: MinCountThreshold)) {
3091 HotFunc.emplace(args: FuncProf.getTotalSamples(),
3092 args: std::make_pair(x: &(I.second), y&: FuncStats.MaxSample));
3093 HotFuncSample += FuncProf.getTotalSamples();
3094 ++HotFuncCount;
3095 }
3096 }
3097
3098 std::vector<std::string> ColumnTitle{"Total sample (%)", "Max sample",
3099 "Entry sample", "Function name"};
3100 std::vector<int> ColumnOffset{0, 24, 42, 58};
3101 std::string Metric =
3102 std::string("max sample >= ") + std::to_string(val: MinCountThreshold);
3103 std::vector<HotFuncInfo> PrintValues;
3104 for (const auto &FuncPair : HotFunc) {
3105 const FunctionSamples &Func = *FuncPair.second.first;
3106 double TotalSamplePercent =
3107 (ProfileTotalSample > 0)
3108 ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
3109 : 0;
3110 PrintValues.emplace_back(
3111 args: HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(),
3112 TotalSamplePercent, FuncPair.second.second,
3113 Func.getHeadSamplesEstimate()));
3114 }
3115 dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
3116 TotalFuncCount: Profiles.size(), HotProfCount: HotFuncSample, TotalProfCount: ProfileTotalSample,
3117 HotFuncMetric: Metric, TopNFunctions: TopN, OS);
3118
3119 return 0;
3120}
3121
3122static int showSampleProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3123 if (SFormat == ShowFormat::Yaml)
3124 exitWithError(Message: "YAML output is not supported for sample profiles");
3125 using namespace sampleprof;
3126 LLVMContext Context;
3127 auto FS = vfs::getRealFileSystem();
3128 auto ReaderOrErr = SampleProfileReader::create(Filename, C&: Context, FS&: *FS,
3129 P: FSDiscriminatorPassOption);
3130 if (std::error_code EC = ReaderOrErr.getError())
3131 exitWithErrorCode(EC, Whence: Filename);
3132
3133 auto Reader = std::move(ReaderOrErr.get());
3134 if (ShowSectionInfoOnly) {
3135 showSectionInfo(Reader: Reader.get(), OS);
3136 return 0;
3137 }
3138
3139 if (std::error_code EC = Reader->read())
3140 exitWithErrorCode(EC, Whence: Filename);
3141
3142 if (ShowAllFunctions || FuncNameFilter.empty()) {
3143 if (SFormat == ShowFormat::Json)
3144 Reader->dumpJson(OS);
3145 else
3146 Reader->dump(OS);
3147 } else {
3148 if (SFormat == ShowFormat::Json)
3149 exitWithError(
3150 Message: "the JSON format is supported only when all functions are to "
3151 "be printed");
3152
3153 // TODO: parse context string to support filtering by contexts.
3154 FunctionSamples *FS = Reader->getSamplesFor(Fname: StringRef(FuncNameFilter));
3155 Reader->dumpFunctionProfile(FS: FS ? *FS : FunctionSamples(), OS);
3156 }
3157
3158 if (ShowProfileSymbolList) {
3159 std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
3160 Reader->getProfileSymbolList();
3161 ReaderList->dump(OS);
3162 }
3163
3164 if (ShowDetailedSummary) {
3165 auto &PS = Reader->getSummary();
3166 PS.printSummary(OS);
3167 PS.printDetailedSummary(OS);
3168 }
3169
3170 if (ShowHotFuncList || TopNFunctions)
3171 showHotFunctionList(Profiles: Reader->getProfiles(), PS&: Reader->getSummary(),
3172 TopN: TopNFunctions, OS);
3173
3174 return 0;
3175}
3176
3177static int showMemProfProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3178 if (SFormat == ShowFormat::Json)
3179 exitWithError(Message: "JSON output is not supported for MemProf");
3180 auto ReaderOr = llvm::memprof::RawMemProfReader::create(
3181 Path: Filename, ProfiledBinary, /*KeepNames=*/KeepName: true);
3182 if (Error E = ReaderOr.takeError())
3183 // Since the error can be related to the profile or the binary we do not
3184 // pass whence. Instead additional context is provided where necessary in
3185 // the error message.
3186 exitWithError(E: std::move(E), /*Whence*/ "");
3187
3188 std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
3189 ReaderOr.get().release());
3190
3191 Reader->printYAML(OS);
3192 return 0;
3193}
3194
3195static int showDebugInfoCorrelation(const std::string &Filename,
3196 ShowFormat SFormat, raw_fd_ostream &OS) {
3197 if (SFormat == ShowFormat::Json)
3198 exitWithError(Message: "JSON output is not supported for debug info correlation");
3199 std::unique_ptr<InstrProfCorrelator> Correlator;
3200 if (auto Err =
3201 InstrProfCorrelator::get(Filename, FileKind: InstrProfCorrelator::DEBUG_INFO)
3202 .moveInto(Value&: Correlator))
3203 exitWithError(E: std::move(Err), Whence: Filename);
3204 if (SFormat == ShowFormat::Yaml) {
3205 if (auto Err = Correlator->dumpYaml(MaxWarnings: MaxDbgCorrelationWarnings, OS))
3206 exitWithError(E: std::move(Err), Whence: Filename);
3207 return 0;
3208 }
3209
3210 if (auto Err = Correlator->correlateProfileData(MaxWarnings: MaxDbgCorrelationWarnings))
3211 exitWithError(E: std::move(Err), Whence: Filename);
3212
3213 InstrProfSymtab Symtab;
3214 if (auto Err = Symtab.create(
3215 NameStrings: StringRef(Correlator->getNamesPointer(), Correlator->getNamesSize())))
3216 exitWithError(E: std::move(Err), Whence: Filename);
3217
3218 if (ShowProfileSymbolList)
3219 Symtab.dumpNames(OS);
3220 // TODO: Read "Profile Data Type" from debug info to compute and show how many
3221 // counters the section holds.
3222 if (ShowDetailedSummary)
3223 OS << "Counters section size: 0x"
3224 << Twine::utohexstr(Val: Correlator->getCountersSectionSize()) << " bytes\n";
3225 OS << "Found " << Correlator->getDataSize() << " functions\n";
3226
3227 return 0;
3228}
3229
3230static int show_main(int argc, const char *argv[]) {
3231 if (Filename.empty() && DebugInfoFilename.empty())
3232 exitWithError(
3233 Message: "the positional argument '<profdata-file>' is required unless '--" +
3234 DebugInfoFilename.ArgStr + "' is provided");
3235
3236 if (Filename == OutputFilename) {
3237 errs() << sys::path::filename(path: argv[0]) << " " << argv[1]
3238 << ": Input file name cannot be the same as the output file name!\n";
3239 return 1;
3240 }
3241 if (JsonFormat)
3242 SFormat = ShowFormat::Json;
3243
3244 std::error_code EC;
3245 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3246 if (EC)
3247 exitWithErrorCode(EC, Whence: OutputFilename);
3248
3249 if (ShowAllFunctions && !FuncNameFilter.empty())
3250 WithColor::warning() << "-function argument ignored: showing all functions\n";
3251
3252 if (!DebugInfoFilename.empty())
3253 return showDebugInfoCorrelation(Filename: DebugInfoFilename, SFormat, OS);
3254
3255 if (ShowProfileKind == instr)
3256 return showInstrProfile(SFormat, OS);
3257 if (ShowProfileKind == sample)
3258 return showSampleProfile(SFormat, OS);
3259 return showMemProfProfile(SFormat, OS);
3260}
3261
3262static int order_main(int argc, const char *argv[]) {
3263 std::error_code EC;
3264 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3265 if (EC)
3266 exitWithErrorCode(EC, Whence: OutputFilename);
3267 auto FS = vfs::getRealFileSystem();
3268 auto ReaderOrErr = InstrProfReader::create(Path: Filename, FS&: *FS);
3269 if (Error E = ReaderOrErr.takeError())
3270 exitWithError(E: std::move(E), Whence: Filename);
3271
3272 auto Reader = std::move(ReaderOrErr.get());
3273 for (auto &I : *Reader) {
3274 // Read all entries
3275 (void)I;
3276 }
3277 auto &Traces = Reader->getTemporalProfTraces();
3278 auto Nodes = TemporalProfTraceTy::createBPFunctionNodes(Traces);
3279 BalancedPartitioningConfig Config;
3280 BalancedPartitioning BP(Config);
3281 BP.run(Nodes);
3282
3283 OS << "# Ordered " << Nodes.size() << " functions\n";
3284 OS << "# Warning: Mach-O may prefix symbols with \"_\" depending on the "
3285 "linkage and this output does not take that into account. Some "
3286 "post-processing may be required before passing to the linker via "
3287 "-order_file.\n";
3288 for (auto &N : Nodes) {
3289 auto [Filename, ParsedFuncName] =
3290 getParsedIRPGOName(IRPGOName: Reader->getSymtab().getFuncOrVarName(MD5Hash: N.Id));
3291 if (!Filename.empty())
3292 OS << "# " << Filename << "\n";
3293 OS << ParsedFuncName << "\n";
3294 }
3295 return 0;
3296}
3297
3298int llvm_profdata_main(int argc, char **argvNonConst,
3299 const llvm::ToolContext &) {
3300 const char **argv = const_cast<const char **>(argvNonConst);
3301
3302 StringRef ProgName(sys::path::filename(path: argv[0]));
3303
3304 if (argc < 2) {
3305 errs() << ProgName
3306 << ": No subcommand specified! Run llvm-profata --help for usage.\n";
3307 return 1;
3308 }
3309
3310 cl::ParseCommandLineOptions(argc, argv, Overview: "LLVM profile data\n");
3311
3312 if (ShowSubcommand)
3313 return show_main(argc, argv);
3314
3315 if (OrderSubcommand)
3316 return order_main(argc, argv);
3317
3318 if (OverlapSubcommand)
3319 return overlap_main(argc, argv);
3320
3321 if (MergeSubcommand)
3322 return merge_main(argc, argv);
3323
3324 errs() << ProgName
3325 << ": Unknown command. Run llvm-profdata --help for usage.\n";
3326 return 1;
3327}
3328

source code of llvm/tools/llvm-profdata/llvm-profdata.cpp