1//===- InstrProfReader.cpp - Instrumented profiling reader ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for reading profiling data for clang's
10// instrumentation based PGO and coverage.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ProfileData/InstrProfReader.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/StringExtras.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/IR/ProfileSummary.h"
20#include "llvm/ProfileData/InstrProf.h"
21#include "llvm/ProfileData/MemProf.h"
22#include "llvm/ProfileData/ProfileCommon.h"
23#include "llvm/ProfileData/SymbolRemappingReader.h"
24#include "llvm/Support/Endian.h"
25#include "llvm/Support/Error.h"
26#include "llvm/Support/ErrorOr.h"
27#include "llvm/Support/MemoryBuffer.h"
28#include "llvm/Support/SwapByteOrder.h"
29#include "llvm/Support/VirtualFileSystem.h"
30#include <algorithm>
31#include <cstddef>
32#include <cstdint>
33#include <limits>
34#include <memory>
35#include <system_error>
36#include <utility>
37#include <vector>
38
39using namespace llvm;
40
41// Extracts the variant information from the top 32 bits in the version and
42// returns an enum specifying the variants present.
43static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
44 InstrProfKind ProfileKind = InstrProfKind::Unknown;
45 if (Version & VARIANT_MASK_IR_PROF) {
46 ProfileKind |= InstrProfKind::IRInstrumentation;
47 }
48 if (Version & VARIANT_MASK_CSIR_PROF) {
49 ProfileKind |= InstrProfKind::ContextSensitive;
50 }
51 if (Version & VARIANT_MASK_INSTR_ENTRY) {
52 ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
53 }
54 if (Version & VARIANT_MASK_BYTE_COVERAGE) {
55 ProfileKind |= InstrProfKind::SingleByteCoverage;
56 }
57 if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) {
58 ProfileKind |= InstrProfKind::FunctionEntryOnly;
59 }
60 if (Version & VARIANT_MASK_MEMPROF) {
61 ProfileKind |= InstrProfKind::MemProf;
62 }
63 if (Version & VARIANT_MASK_TEMPORAL_PROF) {
64 ProfileKind |= InstrProfKind::TemporalProfile;
65 }
66 return ProfileKind;
67}
68
69static Expected<std::unique_ptr<MemoryBuffer>>
70setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
71 auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
72 : FS.getBufferForFile(Name: Filename);
73 if (std::error_code EC = BufferOrErr.getError())
74 return errorCodeToError(EC);
75 return std::move(BufferOrErr.get());
76}
77
78static Error initializeReader(InstrProfReader &Reader) {
79 return Reader.readHeader();
80}
81
82/// Read a list of binary ids from a profile that consist of
83/// a. uint64_t binary id length
84/// b. uint8_t binary id data
85/// c. uint8_t padding (if necessary)
86/// This function is shared between raw and indexed profiles.
87/// Raw profiles are in host-endian format, and indexed profiles are in
88/// little-endian format. So, this function takes an argument indicating the
89/// associated endian format to read the binary ids correctly.
90static Error
91readBinaryIdsInternal(const MemoryBuffer &DataBuffer,
92 const uint64_t BinaryIdsSize,
93 const uint8_t *BinaryIdsStart,
94 std::vector<llvm::object::BuildID> &BinaryIds,
95 const llvm::endianness Endian) {
96 using namespace support;
97
98 if (BinaryIdsSize == 0)
99 return Error::success();
100
101 const uint8_t *BI = BinaryIdsStart;
102 const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize;
103 const uint8_t *End =
104 reinterpret_cast<const uint8_t *>(DataBuffer.getBufferEnd());
105
106 while (BI < BIEnd) {
107 size_t Remaining = BIEnd - BI;
108 // There should be enough left to read the binary id length.
109 if (Remaining < sizeof(uint64_t))
110 return make_error<InstrProfError>(
111 Args: instrprof_error::malformed,
112 Args: "not enough data to read binary id length");
113
114 uint64_t BILen = 0;
115 if (Endian == llvm::endianness::little)
116 BILen =
117 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: BI);
118 else
119 BILen = endian::readNext<uint64_t, llvm::endianness::big, unaligned>(memory&: BI);
120
121 if (BILen == 0)
122 return make_error<InstrProfError>(Args: instrprof_error::malformed,
123 Args: "binary id length is 0");
124
125 Remaining = BIEnd - BI;
126 // There should be enough left to read the binary id data.
127 if (Remaining < alignToPowerOf2(Value: BILen, Align: sizeof(uint64_t)))
128 return make_error<InstrProfError>(
129 Args: instrprof_error::malformed, Args: "not enough data to read binary id data");
130
131 // Add binary id to the binary ids list.
132 BinaryIds.push_back(x: object::BuildID(BI, BI + BILen));
133
134 // Increment by binary id data length, which aligned to the size of uint64.
135 BI += alignToPowerOf2(Value: BILen, Align: sizeof(uint64_t));
136 if (BI > End)
137 return make_error<InstrProfError>(
138 Args: instrprof_error::malformed,
139 Args: "binary id section is greater than buffer size");
140 }
141
142 return Error::success();
143}
144
145static void
146printBinaryIdsInternal(raw_ostream &OS,
147 std::vector<llvm::object::BuildID> &BinaryIds) {
148 OS << "Binary IDs: \n";
149 for (auto BI : BinaryIds) {
150 for (uint64_t I = 0; I < BI.size(); I++)
151 OS << format(Fmt: "%02x", Vals: BI[I]);
152 OS << "\n";
153 }
154}
155
156Expected<std::unique_ptr<InstrProfReader>>
157InstrProfReader::create(const Twine &Path, vfs::FileSystem &FS,
158 const InstrProfCorrelator *Correlator,
159 std::function<void(Error)> Warn) {
160 // Set up the buffer to read.
161 auto BufferOrError = setupMemoryBuffer(Filename: Path, FS);
162 if (Error E = BufferOrError.takeError())
163 return std::move(E);
164 return InstrProfReader::create(Buffer: std::move(BufferOrError.get()), Correlator,
165 Warn);
166}
167
168Expected<std::unique_ptr<InstrProfReader>>
169InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
170 const InstrProfCorrelator *Correlator,
171 std::function<void(Error)> Warn) {
172 if (Buffer->getBufferSize() == 0)
173 return make_error<InstrProfError>(Args: instrprof_error::empty_raw_profile);
174
175 std::unique_ptr<InstrProfReader> Result;
176 // Create the reader.
177 if (IndexedInstrProfReader::hasFormat(DataBuffer: *Buffer))
178 Result.reset(p: new IndexedInstrProfReader(std::move(Buffer)));
179 else if (RawInstrProfReader64::hasFormat(DataBuffer: *Buffer))
180 Result.reset(p: new RawInstrProfReader64(std::move(Buffer), Correlator, Warn));
181 else if (RawInstrProfReader32::hasFormat(DataBuffer: *Buffer))
182 Result.reset(p: new RawInstrProfReader32(std::move(Buffer), Correlator, Warn));
183 else if (TextInstrProfReader::hasFormat(Buffer: *Buffer))
184 Result.reset(p: new TextInstrProfReader(std::move(Buffer)));
185 else
186 return make_error<InstrProfError>(Args: instrprof_error::unrecognized_format);
187
188 // Initialize the reader and return the result.
189 if (Error E = initializeReader(Reader&: *Result))
190 return std::move(E);
191
192 return std::move(Result);
193}
194
195Expected<std::unique_ptr<IndexedInstrProfReader>>
196IndexedInstrProfReader::create(const Twine &Path, vfs::FileSystem &FS,
197 const Twine &RemappingPath) {
198 // Set up the buffer to read.
199 auto BufferOrError = setupMemoryBuffer(Filename: Path, FS);
200 if (Error E = BufferOrError.takeError())
201 return std::move(E);
202
203 // Set up the remapping buffer if requested.
204 std::unique_ptr<MemoryBuffer> RemappingBuffer;
205 std::string RemappingPathStr = RemappingPath.str();
206 if (!RemappingPathStr.empty()) {
207 auto RemappingBufferOrError = setupMemoryBuffer(Filename: RemappingPathStr, FS);
208 if (Error E = RemappingBufferOrError.takeError())
209 return std::move(E);
210 RemappingBuffer = std::move(RemappingBufferOrError.get());
211 }
212
213 return IndexedInstrProfReader::create(Buffer: std::move(BufferOrError.get()),
214 RemappingBuffer: std::move(RemappingBuffer));
215}
216
217Expected<std::unique_ptr<IndexedInstrProfReader>>
218IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
219 std::unique_ptr<MemoryBuffer> RemappingBuffer) {
220 // Create the reader.
221 if (!IndexedInstrProfReader::hasFormat(DataBuffer: *Buffer))
222 return make_error<InstrProfError>(Args: instrprof_error::bad_magic);
223 auto Result = std::make_unique<IndexedInstrProfReader>(
224 args: std::move(Buffer), args: std::move(RemappingBuffer));
225
226 // Initialize the reader and return the result.
227 if (Error E = initializeReader(Reader&: *Result))
228 return std::move(E);
229
230 return std::move(Result);
231}
232
233bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
234 // Verify that this really looks like plain ASCII text by checking a
235 // 'reasonable' number of characters (up to profile magic size).
236 size_t count = std::min(a: Buffer.getBufferSize(), b: sizeof(uint64_t));
237 StringRef buffer = Buffer.getBufferStart();
238 return count == 0 ||
239 std::all_of(first: buffer.begin(), last: buffer.begin() + count,
240 pred: [](char c) { return isPrint(C: c) || isSpace(C: c); });
241}
242
243// Read the profile variant flag from the header: ":FE" means this is a FE
244// generated profile. ":IR" means this is an IR level profile. Other strings
245// with a leading ':' will be reported an error format.
246Error TextInstrProfReader::readHeader() {
247 Symtab.reset(p: new InstrProfSymtab());
248
249 while (Line->starts_with(Prefix: ":")) {
250 StringRef Str = Line->substr(Start: 1);
251 if (Str.equals_insensitive(RHS: "ir"))
252 ProfileKind |= InstrProfKind::IRInstrumentation;
253 else if (Str.equals_insensitive(RHS: "fe"))
254 ProfileKind |= InstrProfKind::FrontendInstrumentation;
255 else if (Str.equals_insensitive(RHS: "csir")) {
256 ProfileKind |= InstrProfKind::IRInstrumentation;
257 ProfileKind |= InstrProfKind::ContextSensitive;
258 } else if (Str.equals_insensitive(RHS: "entry_first"))
259 ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
260 else if (Str.equals_insensitive(RHS: "not_entry_first"))
261 ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation;
262 else if (Str.equals_insensitive(RHS: "single_byte_coverage"))
263 ProfileKind |= InstrProfKind::SingleByteCoverage;
264 else if (Str.equals_insensitive(RHS: "temporal_prof_traces")) {
265 ProfileKind |= InstrProfKind::TemporalProfile;
266 if (auto Err = readTemporalProfTraceData())
267 return error(E: std::move(Err));
268 } else
269 return error(Err: instrprof_error::bad_header);
270 ++Line;
271 }
272 return success();
273}
274
275/// Temporal profile trace data is stored in the header immediately after
276/// ":temporal_prof_traces". The first integer is the number of traces, the
277/// second integer is the stream size, then the following lines are the actual
278/// traces which consist of a weight and a comma separated list of function
279/// names.
280Error TextInstrProfReader::readTemporalProfTraceData() {
281 if ((++Line).is_at_end())
282 return error(Err: instrprof_error::eof);
283
284 uint32_t NumTraces;
285 if (Line->getAsInteger(Radix: 0, Result&: NumTraces))
286 return error(Err: instrprof_error::malformed);
287
288 if ((++Line).is_at_end())
289 return error(Err: instrprof_error::eof);
290
291 if (Line->getAsInteger(Radix: 0, Result&: TemporalProfTraceStreamSize))
292 return error(Err: instrprof_error::malformed);
293
294 for (uint32_t i = 0; i < NumTraces; i++) {
295 if ((++Line).is_at_end())
296 return error(Err: instrprof_error::eof);
297
298 TemporalProfTraceTy Trace;
299 if (Line->getAsInteger(Radix: 0, Result&: Trace.Weight))
300 return error(Err: instrprof_error::malformed);
301
302 if ((++Line).is_at_end())
303 return error(Err: instrprof_error::eof);
304
305 SmallVector<StringRef> FuncNames;
306 Line->split(A&: FuncNames, Separator: ",", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
307 for (auto &FuncName : FuncNames)
308 Trace.FunctionNameRefs.push_back(
309 x: IndexedInstrProf::ComputeHash(K: FuncName.trim()));
310 TemporalProfTraces.push_back(Elt: std::move(Trace));
311 }
312 return success();
313}
314
315Error
316TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
317
318#define CHECK_LINE_END(Line) \
319 if (Line.is_at_end()) \
320 return error(instrprof_error::truncated);
321#define READ_NUM(Str, Dst) \
322 if ((Str).getAsInteger(10, (Dst))) \
323 return error(instrprof_error::malformed);
324#define VP_READ_ADVANCE(Val) \
325 CHECK_LINE_END(Line); \
326 uint32_t Val; \
327 READ_NUM((*Line), (Val)); \
328 Line++;
329
330 if (Line.is_at_end())
331 return success();
332
333 uint32_t NumValueKinds;
334 if (Line->getAsInteger(Radix: 10, Result&: NumValueKinds)) {
335 // No value profile data
336 return success();
337 }
338 if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1)
339 return error(Err: instrprof_error::malformed,
340 ErrMsg: "number of value kinds is invalid");
341 Line++;
342
343 for (uint32_t VK = 0; VK < NumValueKinds; VK++) {
344 VP_READ_ADVANCE(ValueKind);
345 if (ValueKind > IPVK_Last)
346 return error(Err: instrprof_error::malformed, ErrMsg: "value kind is invalid");
347 ;
348 VP_READ_ADVANCE(NumValueSites);
349 if (!NumValueSites)
350 continue;
351
352 Record.reserveSites(ValueKind: VK, NumValueSites);
353 for (uint32_t S = 0; S < NumValueSites; S++) {
354 VP_READ_ADVANCE(NumValueData);
355
356 std::vector<InstrProfValueData> CurrentValues;
357 for (uint32_t V = 0; V < NumValueData; V++) {
358 CHECK_LINE_END(Line);
359 std::pair<StringRef, StringRef> VD = Line->rsplit(Separator: ':');
360 uint64_t TakenCount, Value;
361 if (ValueKind == IPVK_IndirectCallTarget) {
362 if (InstrProfSymtab::isExternalSymbol(Symbol: VD.first)) {
363 Value = 0;
364 } else {
365 if (Error E = Symtab->addFuncName(FuncName: VD.first))
366 return E;
367 Value = IndexedInstrProf::ComputeHash(K: VD.first);
368 }
369 } else {
370 READ_NUM(VD.first, Value);
371 }
372 READ_NUM(VD.second, TakenCount);
373 CurrentValues.push_back(x: {.Value: Value, .Count: TakenCount});
374 Line++;
375 }
376 Record.addValueData(ValueKind, Site: S, VData: CurrentValues.data(), N: NumValueData,
377 SymTab: nullptr);
378 }
379 }
380 return success();
381
382#undef CHECK_LINE_END
383#undef READ_NUM
384#undef VP_READ_ADVANCE
385}
386
387Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
388 // Skip empty lines and comments.
389 while (!Line.is_at_end() && (Line->empty() || Line->starts_with(Prefix: "#")))
390 ++Line;
391 // If we hit EOF while looking for a name, we're done.
392 if (Line.is_at_end()) {
393 return error(Err: instrprof_error::eof);
394 }
395
396 // Read the function name.
397 Record.Name = *Line++;
398 if (Error E = Symtab->addFuncName(FuncName: Record.Name))
399 return error(E: std::move(E));
400
401 // Read the function hash.
402 if (Line.is_at_end())
403 return error(Err: instrprof_error::truncated);
404 if ((Line++)->getAsInteger(Radix: 0, Result&: Record.Hash))
405 return error(Err: instrprof_error::malformed,
406 ErrMsg: "function hash is not a valid integer");
407
408 // Read the number of counters.
409 uint64_t NumCounters;
410 if (Line.is_at_end())
411 return error(Err: instrprof_error::truncated);
412 if ((Line++)->getAsInteger(Radix: 10, Result&: NumCounters))
413 return error(Err: instrprof_error::malformed,
414 ErrMsg: "number of counters is not a valid integer");
415 if (NumCounters == 0)
416 return error(Err: instrprof_error::malformed, ErrMsg: "number of counters is zero");
417
418 // Read each counter and fill our internal storage with the values.
419 Record.Clear();
420 Record.Counts.reserve(n: NumCounters);
421 for (uint64_t I = 0; I < NumCounters; ++I) {
422 if (Line.is_at_end())
423 return error(Err: instrprof_error::truncated);
424 uint64_t Count;
425 if ((Line++)->getAsInteger(Radix: 10, Result&: Count))
426 return error(Err: instrprof_error::malformed, ErrMsg: "count is invalid");
427 Record.Counts.push_back(x: Count);
428 }
429
430 // Bitmap byte information is indicated with special character.
431 if (Line->starts_with(Prefix: "$")) {
432 Record.BitmapBytes.clear();
433 // Read the number of bitmap bytes.
434 uint64_t NumBitmapBytes;
435 if ((Line++)->drop_front(N: 1).trim().getAsInteger(Radix: 0, Result&: NumBitmapBytes))
436 return error(Err: instrprof_error::malformed,
437 ErrMsg: "number of bitmap bytes is not a valid integer");
438 if (NumBitmapBytes != 0) {
439 // Read each bitmap and fill our internal storage with the values.
440 Record.BitmapBytes.reserve(n: NumBitmapBytes);
441 for (uint8_t I = 0; I < NumBitmapBytes; ++I) {
442 if (Line.is_at_end())
443 return error(Err: instrprof_error::truncated);
444 uint8_t BitmapByte;
445 if ((Line++)->getAsInteger(Radix: 0, Result&: BitmapByte))
446 return error(Err: instrprof_error::malformed,
447 ErrMsg: "bitmap byte is not a valid integer");
448 Record.BitmapBytes.push_back(x: BitmapByte);
449 }
450 }
451 }
452
453 // Check if value profile data exists and read it if so.
454 if (Error E = readValueProfileData(Record))
455 return error(E: std::move(E));
456
457 return success();
458}
459
460template <class IntPtrT>
461InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const {
462 return getProfileKindFromVersion(Version);
463}
464
465template <class IntPtrT>
466SmallVector<TemporalProfTraceTy> &
467RawInstrProfReader<IntPtrT>::getTemporalProfTraces(
468 std::optional<uint64_t> Weight) {
469 if (TemporalProfTimestamps.empty()) {
470 assert(TemporalProfTraces.empty());
471 return TemporalProfTraces;
472 }
473 // Sort functions by their timestamps to build the trace.
474 std::sort(first: TemporalProfTimestamps.begin(), last: TemporalProfTimestamps.end());
475 TemporalProfTraceTy Trace;
476 if (Weight)
477 Trace.Weight = *Weight;
478 for (auto &[TimestampValue, NameRef] : TemporalProfTimestamps)
479 Trace.FunctionNameRefs.push_back(x: NameRef);
480 TemporalProfTraces = {std::move(Trace)};
481 return TemporalProfTraces;
482}
483
484template <class IntPtrT>
485bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
486 if (DataBuffer.getBufferSize() < sizeof(uint64_t))
487 return false;
488 uint64_t Magic =
489 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
490 return RawInstrProf::getMagic<IntPtrT>() == Magic ||
491 llvm::byteswap(RawInstrProf::getMagic<IntPtrT>()) == Magic;
492}
493
494template <class IntPtrT>
495Error RawInstrProfReader<IntPtrT>::readHeader() {
496 if (!hasFormat(DataBuffer: *DataBuffer))
497 return error(instrprof_error::bad_magic);
498 if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))
499 return error(instrprof_error::bad_header);
500 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(
501 DataBuffer->getBufferStart());
502 ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();
503 return readHeader(*Header);
504}
505
506template <class IntPtrT>
507Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
508 const char *End = DataBuffer->getBufferEnd();
509 // Skip zero padding between profiles.
510 while (CurrentPos != End && *CurrentPos == 0)
511 ++CurrentPos;
512 // If there's nothing left, we're done.
513 if (CurrentPos == End)
514 return make_error<InstrProfError>(Args: instrprof_error::eof);
515 // If there isn't enough space for another header, this is probably just
516 // garbage at the end of the file.
517 if (CurrentPos + sizeof(RawInstrProf::Header) > End)
518 return make_error<InstrProfError>(Args: instrprof_error::malformed,
519 Args: "not enough space for another header");
520 // The writer ensures each profile is padded to start at an aligned address.
521 if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t))
522 return make_error<InstrProfError>(Args: instrprof_error::malformed,
523 Args: "insufficient padding");
524 // The magic should have the same byte order as in the previous header.
525 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
526 if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
527 return make_error<InstrProfError>(Args: instrprof_error::bad_magic);
528
529 // There's another profile to read, so we need to process the header.
530 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);
531 return readHeader(*Header);
532}
533
534template <class IntPtrT>
535Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
536 if (Error E = Symtab.create(NameStrings: StringRef(NamesStart, NamesEnd - NamesStart)))
537 return error(std::move(E));
538 for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
539 const IntPtrT FPtr = swap(I->FunctionPointer);
540 if (!FPtr)
541 continue;
542 Symtab.mapAddress(Addr: FPtr, MD5Val: swap(I->NameRef));
543 }
544 return success();
545}
546
547template <class IntPtrT>
548Error RawInstrProfReader<IntPtrT>::readHeader(
549 const RawInstrProf::Header &Header) {
550 Version = swap(Header.Version);
551 if (GET_VERSION(Version) != RawInstrProf::Version)
552 return error(instrprof_error::raw_profile_version_mismatch,
553 ("Profile uses raw profile format version = " +
554 Twine(GET_VERSION(Version)) +
555 "; expected version = " + Twine(RawInstrProf::Version) +
556 "\nPLEASE update this tool to version in the raw profile, or "
557 "regenerate raw profile with expected version.")
558 .str());
559
560 uint64_t BinaryIdSize = swap(Header.BinaryIdsSize);
561 // Binary id start just after the header if exists.
562 const uint8_t *BinaryIdStart =
563 reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header);
564 const uint8_t *BinaryIdEnd = BinaryIdStart + BinaryIdSize;
565 const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd();
566 if (BinaryIdSize % sizeof(uint64_t) || BinaryIdEnd > BufferEnd)
567 return error(instrprof_error::bad_header);
568 if (BinaryIdSize != 0) {
569 if (Error Err =
570 readBinaryIdsInternal(*DataBuffer, BinaryIdSize, BinaryIdStart,
571 BinaryIds, getDataEndianness()))
572 return Err;
573 }
574
575 CountersDelta = swap(Header.CountersDelta);
576 BitmapDelta = swap(Header.BitmapDelta);
577 NamesDelta = swap(Header.NamesDelta);
578 auto NumData = swap(Header.NumData);
579 auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters);
580 auto CountersSize = swap(Header.NumCounters) * getCounterTypeSize();
581 auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
582 auto NumBitmapBytes = swap(Header.NumBitmapBytes);
583 auto PaddingBytesAfterBitmapBytes = swap(Header.PaddingBytesAfterBitmapBytes);
584 auto NamesSize = swap(Header.NamesSize);
585 ValueKindLast = swap(Header.ValueKindLast);
586
587 auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>);
588 auto PaddingSize = getNumPaddingBytes(SizeInBytes: NamesSize);
589
590 // Profile data starts after profile header and binary ids if exist.
591 ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdSize;
592 ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters;
593 ptrdiff_t BitmapOffset =
594 CountersOffset + CountersSize + PaddingBytesAfterCounters;
595 ptrdiff_t NamesOffset =
596 BitmapOffset + NumBitmapBytes + PaddingBytesAfterBitmapBytes;
597 ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
598
599 auto *Start = reinterpret_cast<const char *>(&Header);
600 if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
601 return error(instrprof_error::bad_header);
602
603 if (Correlator) {
604 // These sizes in the raw file are zero because we constructed them in the
605 // Correlator.
606 if (!(DataSize == 0 && NamesSize == 0 && CountersDelta == 0 &&
607 NamesDelta == 0))
608 return error(instrprof_error::unexpected_correlation_info);
609 Data = Correlator->getDataPointer();
610 DataEnd = Data + Correlator->getDataSize();
611 NamesStart = Correlator->getNamesPointer();
612 NamesEnd = NamesStart + Correlator->getNamesSize();
613 } else {
614 Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
615 Start + DataOffset);
616 DataEnd = Data + NumData;
617 NamesStart = Start + NamesOffset;
618 NamesEnd = NamesStart + NamesSize;
619 }
620
621 CountersStart = Start + CountersOffset;
622 CountersEnd = CountersStart + CountersSize;
623 BitmapStart = Start + BitmapOffset;
624 BitmapEnd = BitmapStart + NumBitmapBytes;
625 ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
626
627 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
628 if (Error E = createSymtab(Symtab&: *NewSymtab))
629 return E;
630
631 Symtab = std::move(NewSymtab);
632 return success();
633}
634
635template <class IntPtrT>
636Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) {
637 Record.Name = getName(NameRef: Data->NameRef);
638 return success();
639}
640
641template <class IntPtrT>
642Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) {
643 Record.Hash = swap(Data->FuncHash);
644 return success();
645}
646
647template <class IntPtrT>
648Error RawInstrProfReader<IntPtrT>::readRawCounts(
649 InstrProfRecord &Record) {
650 uint32_t NumCounters = swap(Data->NumCounters);
651 if (NumCounters == 0)
652 return error(instrprof_error::malformed, "number of counters is zero");
653
654 ptrdiff_t CounterBaseOffset = swap(Data->CounterPtr) - CountersDelta;
655 if (CounterBaseOffset < 0)
656 return error(
657 instrprof_error::malformed,
658 ("counter offset " + Twine(CounterBaseOffset) + " is negative").str());
659
660 if (CounterBaseOffset >= CountersEnd - CountersStart)
661 return error(instrprof_error::malformed,
662 ("counter offset " + Twine(CounterBaseOffset) +
663 " is greater than the maximum counter offset " +
664 Twine(CountersEnd - CountersStart - 1))
665 .str());
666
667 uint64_t MaxNumCounters =
668 (CountersEnd - (CountersStart + CounterBaseOffset)) /
669 getCounterTypeSize();
670 if (NumCounters > MaxNumCounters)
671 return error(instrprof_error::malformed,
672 ("number of counters " + Twine(NumCounters) +
673 " is greater than the maximum number of counters " +
674 Twine(MaxNumCounters))
675 .str());
676
677 Record.Counts.clear();
678 Record.Counts.reserve(n: NumCounters);
679 for (uint32_t I = 0; I < NumCounters; I++) {
680 const char *Ptr =
681 CountersStart + CounterBaseOffset + I * getCounterTypeSize();
682 if (I == 0 && hasTemporalProfile()) {
683 uint64_t TimestampValue = swap(*reinterpret_cast<const uint64_t *>(Ptr));
684 if (TimestampValue != 0 &&
685 TimestampValue != std::numeric_limits<uint64_t>::max()) {
686 TemporalProfTimestamps.emplace_back(TimestampValue,
687 swap(Data->NameRef));
688 TemporalProfTraceStreamSize = 1;
689 }
690 if (hasSingleByteCoverage()) {
691 // In coverage mode, getCounterTypeSize() returns 1 byte but our
692 // timestamp field has size uint64_t. Increment I so that the next
693 // iteration of this for loop points to the byte after the timestamp
694 // field, i.e., I += 8.
695 I += 7;
696 }
697 continue;
698 }
699 if (hasSingleByteCoverage()) {
700 // A value of zero signifies the block is covered.
701 Record.Counts.push_back(x: *Ptr == 0 ? 1 : 0);
702 } else {
703 uint64_t CounterValue = swap(*reinterpret_cast<const uint64_t *>(Ptr));
704 if (CounterValue > MaxCounterValue && Warn)
705 Warn(make_error<InstrProfError>(
706 Args: instrprof_error::counter_value_too_large, Args: Twine(CounterValue)));
707
708 Record.Counts.push_back(x: CounterValue);
709 }
710 }
711
712 return success();
713}
714
715template <class IntPtrT>
716Error RawInstrProfReader<IntPtrT>::readRawBitmapBytes(InstrProfRecord &Record) {
717 uint32_t NumBitmapBytes = swap(Data->NumBitmapBytes);
718
719 Record.BitmapBytes.clear();
720 Record.BitmapBytes.reserve(n: NumBitmapBytes);
721
722 // It's possible MCDC is either not enabled or only used for some functions
723 // and not others. So if we record 0 bytes, just move on.
724 if (NumBitmapBytes == 0)
725 return success();
726
727 // BitmapDelta decreases as we advance to the next data record.
728 ptrdiff_t BitmapOffset = swap(Data->BitmapPtr) - BitmapDelta;
729 if (BitmapOffset < 0)
730 return error(
731 instrprof_error::malformed,
732 ("bitmap offset " + Twine(BitmapOffset) + " is negative").str());
733
734 if (BitmapOffset >= BitmapEnd - BitmapStart)
735 return error(instrprof_error::malformed,
736 ("bitmap offset " + Twine(BitmapOffset) +
737 " is greater than the maximum bitmap offset " +
738 Twine(BitmapEnd - BitmapStart - 1))
739 .str());
740
741 uint64_t MaxNumBitmapBytes =
742 (BitmapEnd - (BitmapStart + BitmapOffset)) / sizeof(uint8_t);
743 if (NumBitmapBytes > MaxNumBitmapBytes)
744 return error(instrprof_error::malformed,
745 ("number of bitmap bytes " + Twine(NumBitmapBytes) +
746 " is greater than the maximum number of bitmap bytes " +
747 Twine(MaxNumBitmapBytes))
748 .str());
749
750 for (uint32_t I = 0; I < NumBitmapBytes; I++) {
751 const char *Ptr = BitmapStart + BitmapOffset + I;
752 Record.BitmapBytes.push_back(swap(*Ptr));
753 }
754
755 return success();
756}
757
758template <class IntPtrT>
759Error RawInstrProfReader<IntPtrT>::readValueProfilingData(
760 InstrProfRecord &Record) {
761 Record.clearValueData();
762 CurValueDataSize = 0;
763 // Need to match the logic in value profile dumper code in compiler-rt:
764 uint32_t NumValueKinds = 0;
765 for (uint32_t I = 0; I < IPVK_Last + 1; I++)
766 NumValueKinds += (Data->NumValueSites[I] != 0);
767
768 if (!NumValueKinds)
769 return success();
770
771 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
772 ValueProfData::getValueProfData(
773 SrcBuffer: ValueDataStart, SrcBufferEnd: (const unsigned char *)DataBuffer->getBufferEnd(),
774 SrcDataEndianness: getDataEndianness());
775
776 if (Error E = VDataPtrOrErr.takeError())
777 return E;
778
779 // Note that besides deserialization, this also performs the conversion for
780 // indirect call targets. The function pointers from the raw profile are
781 // remapped into function name hashes.
782 VDataPtrOrErr.get()->deserializeTo(Record, SymTab: Symtab.get());
783 CurValueDataSize = VDataPtrOrErr.get()->getSize();
784 return success();
785}
786
787template <class IntPtrT>
788Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) {
789 // Keep reading profiles that consist of only headers and no profile data and
790 // counters.
791 while (atEnd())
792 // At this point, ValueDataStart field points to the next header.
793 if (Error E = readNextHeader(CurrentPos: getNextHeaderPos()))
794 return error(std::move(E));
795
796 // Read name and set it in Record.
797 if (Error E = readName(Record))
798 return error(std::move(E));
799
800 // Read FuncHash and set it in Record.
801 if (Error E = readFuncHash(Record))
802 return error(std::move(E));
803
804 // Read raw counts and set Record.
805 if (Error E = readRawCounts(Record))
806 return error(std::move(E));
807
808 // Read raw bitmap bytes and set Record.
809 if (Error E = readRawBitmapBytes(Record))
810 return error(std::move(E));
811
812 // Read value data and set Record.
813 if (Error E = readValueProfilingData(Record))
814 return error(std::move(E));
815
816 // Iterate.
817 advanceData();
818 return success();
819}
820
821template <class IntPtrT>
822Error RawInstrProfReader<IntPtrT>::readBinaryIds(
823 std::vector<llvm::object::BuildID> &BinaryIds) {
824 BinaryIds.insert(BinaryIds.begin(), this->BinaryIds.begin(),
825 this->BinaryIds.end());
826 return Error::success();
827}
828
829template <class IntPtrT>
830Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
831 if (!BinaryIds.empty())
832 printBinaryIdsInternal(OS, BinaryIds);
833 return Error::success();
834}
835
836namespace llvm {
837
838template class RawInstrProfReader<uint32_t>;
839template class RawInstrProfReader<uint64_t>;
840
841} // end namespace llvm
842
843InstrProfLookupTrait::hash_value_type
844InstrProfLookupTrait::ComputeHash(StringRef K) {
845 return IndexedInstrProf::ComputeHash(Type: HashType, K);
846}
847
848using data_type = InstrProfLookupTrait::data_type;
849using offset_type = InstrProfLookupTrait::offset_type;
850
851bool InstrProfLookupTrait::readValueProfilingData(
852 const unsigned char *&D, const unsigned char *const End) {
853 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
854 ValueProfData::getValueProfData(SrcBuffer: D, SrcBufferEnd: End, SrcDataEndianness: ValueProfDataEndianness);
855
856 if (VDataPtrOrErr.takeError())
857 return false;
858
859 VDataPtrOrErr.get()->deserializeTo(Record&: DataBuffer.back(), SymTab: nullptr);
860 D += VDataPtrOrErr.get()->TotalSize;
861
862 return true;
863}
864
865data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
866 offset_type N) {
867 using namespace support;
868
869 // Check if the data is corrupt. If so, don't try to read it.
870 if (N % sizeof(uint64_t))
871 return data_type();
872
873 DataBuffer.clear();
874 std::vector<uint64_t> CounterBuffer;
875 std::vector<uint8_t> BitmapByteBuffer;
876
877 const unsigned char *End = D + N;
878 while (D < End) {
879 // Read hash.
880 if (D + sizeof(uint64_t) >= End)
881 return data_type();
882 uint64_t Hash =
883 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: D);
884
885 // Initialize number of counters for GET_VERSION(FormatVersion) == 1.
886 uint64_t CountsSize = N / sizeof(uint64_t) - 1;
887 // If format version is different then read the number of counters.
888 if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) {
889 if (D + sizeof(uint64_t) > End)
890 return data_type();
891 CountsSize =
892 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: D);
893 }
894 // Read counter values.
895 if (D + CountsSize * sizeof(uint64_t) > End)
896 return data_type();
897
898 CounterBuffer.clear();
899 CounterBuffer.reserve(n: CountsSize);
900 for (uint64_t J = 0; J < CountsSize; ++J)
901 CounterBuffer.push_back(
902 x: endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: D));
903
904 // Read bitmap bytes for GET_VERSION(FormatVersion) > 10.
905 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version10) {
906 uint64_t BitmapBytes = 0;
907 if (D + sizeof(uint64_t) > End)
908 return data_type();
909 BitmapBytes =
910 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: D);
911 // Read bitmap byte values.
912 if (D + BitmapBytes * sizeof(uint8_t) > End)
913 return data_type();
914 BitmapByteBuffer.clear();
915 BitmapByteBuffer.reserve(n: BitmapBytes);
916 for (uint64_t J = 0; J < BitmapBytes; ++J)
917 BitmapByteBuffer.push_back(x: static_cast<uint8_t>(
918 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(
919 memory&: D)));
920 }
921
922 DataBuffer.emplace_back(args&: K, args&: Hash, args: std::move(CounterBuffer),
923 args: std::move(BitmapByteBuffer));
924
925 // Read value profiling data.
926 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 &&
927 !readValueProfilingData(D, End)) {
928 DataBuffer.clear();
929 return data_type();
930 }
931 }
932 return DataBuffer;
933}
934
935template <typename HashTableImpl>
936Error InstrProfReaderIndex<HashTableImpl>::getRecords(
937 StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) {
938 auto Iter = HashTable->find(FuncName);
939 if (Iter == HashTable->end())
940 return make_error<InstrProfError>(Args: instrprof_error::unknown_function);
941
942 Data = (*Iter);
943 if (Data.empty())
944 return make_error<InstrProfError>(Args: instrprof_error::malformed,
945 Args: "profile data is empty");
946
947 return Error::success();
948}
949
950template <typename HashTableImpl>
951Error InstrProfReaderIndex<HashTableImpl>::getRecords(
952 ArrayRef<NamedInstrProfRecord> &Data) {
953 if (atEnd())
954 return make_error<InstrProfError>(Args: instrprof_error::eof);
955
956 Data = *RecordIterator;
957
958 if (Data.empty())
959 return make_error<InstrProfError>(Args: instrprof_error::malformed,
960 Args: "profile data is empty");
961
962 return Error::success();
963}
964
965template <typename HashTableImpl>
966InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
967 const unsigned char *Buckets, const unsigned char *const Payload,
968 const unsigned char *const Base, IndexedInstrProf::HashT HashType,
969 uint64_t Version) {
970 FormatVersion = Version;
971 HashTable.reset(HashTableImpl::Create(
972 Buckets, Payload, Base,
973 typename HashTableImpl::InfoType(HashType, Version)));
974 RecordIterator = HashTable->data_begin();
975}
976
977template <typename HashTableImpl>
978InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const {
979 return getProfileKindFromVersion(Version: FormatVersion);
980}
981
982namespace {
983/// A remapper that does not apply any remappings.
984class InstrProfReaderNullRemapper : public InstrProfReaderRemapper {
985 InstrProfReaderIndexBase &Underlying;
986
987public:
988 InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying)
989 : Underlying(Underlying) {}
990
991 Error getRecords(StringRef FuncName,
992 ArrayRef<NamedInstrProfRecord> &Data) override {
993 return Underlying.getRecords(FuncName, Data);
994 }
995};
996} // namespace
997
998/// A remapper that applies remappings based on a symbol remapping file.
999template <typename HashTableImpl>
1000class llvm::InstrProfReaderItaniumRemapper
1001 : public InstrProfReaderRemapper {
1002public:
1003 InstrProfReaderItaniumRemapper(
1004 std::unique_ptr<MemoryBuffer> RemapBuffer,
1005 InstrProfReaderIndex<HashTableImpl> &Underlying)
1006 : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) {
1007 }
1008
1009 /// Extract the original function name from a PGO function name.
1010 static StringRef extractName(StringRef Name) {
1011 // We can have multiple pieces separated by kGlobalIdentifierDelimiter (
1012 // semicolon now and colon in older profiles); there can be pieces both
1013 // before and after the mangled name. Find the first part that starts with
1014 // '_Z'; we'll assume that's the mangled name we want.
1015 std::pair<StringRef, StringRef> Parts = {StringRef(), Name};
1016 while (true) {
1017 Parts = Parts.second.split(Separator: kGlobalIdentifierDelimiter);
1018 if (Parts.first.starts_with(Prefix: "_Z"))
1019 return Parts.first;
1020 if (Parts.second.empty())
1021 return Name;
1022 }
1023 }
1024
1025 /// Given a mangled name extracted from a PGO function name, and a new
1026 /// form for that mangled name, reconstitute the name.
1027 static void reconstituteName(StringRef OrigName, StringRef ExtractedName,
1028 StringRef Replacement,
1029 SmallVectorImpl<char> &Out) {
1030 Out.reserve(N: OrigName.size() + Replacement.size() - ExtractedName.size());
1031 Out.insert(I: Out.end(), From: OrigName.begin(), To: ExtractedName.begin());
1032 Out.insert(I: Out.end(), From: Replacement.begin(), To: Replacement.end());
1033 Out.insert(I: Out.end(), From: ExtractedName.end(), To: OrigName.end());
1034 }
1035
1036 Error populateRemappings() override {
1037 if (Error E = Remappings.read(B&: *RemapBuffer))
1038 return E;
1039 for (StringRef Name : Underlying.HashTable->keys()) {
1040 StringRef RealName = extractName(Name);
1041 if (auto Key = Remappings.insert(FunctionName: RealName)) {
1042 // FIXME: We could theoretically map the same equivalence class to
1043 // multiple names in the profile data. If that happens, we should
1044 // return NamedInstrProfRecords from all of them.
1045 MappedNames.insert(KV: {Key, RealName});
1046 }
1047 }
1048 return Error::success();
1049 }
1050
1051 Error getRecords(StringRef FuncName,
1052 ArrayRef<NamedInstrProfRecord> &Data) override {
1053 StringRef RealName = extractName(Name: FuncName);
1054 if (auto Key = Remappings.lookup(FunctionName: RealName)) {
1055 StringRef Remapped = MappedNames.lookup(Val: Key);
1056 if (!Remapped.empty()) {
1057 if (RealName.begin() == FuncName.begin() &&
1058 RealName.end() == FuncName.end())
1059 FuncName = Remapped;
1060 else {
1061 // Try rebuilding the name from the given remapping.
1062 SmallString<256> Reconstituted;
1063 reconstituteName(OrigName: FuncName, ExtractedName: RealName, Replacement: Remapped, Out&: Reconstituted);
1064 Error E = Underlying.getRecords(Reconstituted, Data);
1065 if (!E)
1066 return E;
1067
1068 // If we failed because the name doesn't exist, fall back to asking
1069 // about the original name.
1070 if (Error Unhandled = handleErrors(
1071 std::move(E), [](std::unique_ptr<InstrProfError> Err) {
1072 return Err->get() == instrprof_error::unknown_function
1073 ? Error::success()
1074 : Error(std::move(Err));
1075 }))
1076 return Unhandled;
1077 }
1078 }
1079 }
1080 return Underlying.getRecords(FuncName, Data);
1081 }
1082
1083private:
1084 /// The memory buffer containing the remapping configuration. Remappings
1085 /// holds pointers into this buffer.
1086 std::unique_ptr<MemoryBuffer> RemapBuffer;
1087
1088 /// The mangling remapper.
1089 SymbolRemappingReader Remappings;
1090
1091 /// Mapping from mangled name keys to the name used for the key in the
1092 /// profile data.
1093 /// FIXME: Can we store a location within the on-disk hash table instead of
1094 /// redoing lookup?
1095 DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames;
1096
1097 /// The real profile data reader.
1098 InstrProfReaderIndex<HashTableImpl> &Underlying;
1099};
1100
1101bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
1102 using namespace support;
1103
1104 if (DataBuffer.getBufferSize() < 8)
1105 return false;
1106 uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>(
1107 memory: DataBuffer.getBufferStart());
1108 // Verify that it's magical.
1109 return Magic == IndexedInstrProf::Magic;
1110}
1111
1112const unsigned char *
1113IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
1114 const unsigned char *Cur, bool UseCS) {
1115 using namespace IndexedInstrProf;
1116 using namespace support;
1117
1118 if (Version >= IndexedInstrProf::Version4) {
1119 const IndexedInstrProf::Summary *SummaryInLE =
1120 reinterpret_cast<const IndexedInstrProf::Summary *>(Cur);
1121 uint64_t NFields = endian::byte_swap<uint64_t, llvm::endianness::little>(
1122 value: SummaryInLE->NumSummaryFields);
1123 uint64_t NEntries = endian::byte_swap<uint64_t, llvm::endianness::little>(
1124 value: SummaryInLE->NumCutoffEntries);
1125 uint32_t SummarySize =
1126 IndexedInstrProf::Summary::getSize(NumSumFields: NFields, NumCutoffEntries: NEntries);
1127 std::unique_ptr<IndexedInstrProf::Summary> SummaryData =
1128 IndexedInstrProf::allocSummary(TotalSize: SummarySize);
1129
1130 const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE);
1131 uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get());
1132 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
1133 Dst[I] = endian::byte_swap<uint64_t, llvm::endianness::little>(value: Src[I]);
1134
1135 SummaryEntryVector DetailedSummary;
1136 for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) {
1137 const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I);
1138 DetailedSummary.emplace_back(args: (uint32_t)Ent.Cutoff, args: Ent.MinBlockCount,
1139 args: Ent.NumBlocks);
1140 }
1141 std::unique_ptr<llvm::ProfileSummary> &Summary =
1142 UseCS ? this->CS_Summary : this->Summary;
1143
1144 // initialize InstrProfSummary using the SummaryData from disk.
1145 Summary = std::make_unique<ProfileSummary>(
1146 args: UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr,
1147 args&: DetailedSummary, args: SummaryData->get(K: Summary::TotalBlockCount),
1148 args: SummaryData->get(K: Summary::MaxBlockCount),
1149 args: SummaryData->get(K: Summary::MaxInternalBlockCount),
1150 args: SummaryData->get(K: Summary::MaxFunctionCount),
1151 args: SummaryData->get(K: Summary::TotalNumBlocks),
1152 args: SummaryData->get(K: Summary::TotalNumFunctions));
1153 return Cur + SummarySize;
1154 } else {
1155 // The older versions do not support a profile summary. This just computes
1156 // an empty summary, which will not result in accurate hot/cold detection.
1157 // We would need to call addRecord for all NamedInstrProfRecords to get the
1158 // correct summary. However, this version is old (prior to early 2016) and
1159 // has not been supporting an accurate summary for several years.
1160 InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1161 Summary = Builder.getSummary();
1162 return Cur;
1163 }
1164}
1165
1166Error IndexedInstrProfReader::readHeader() {
1167 using namespace support;
1168
1169 const unsigned char *Start =
1170 (const unsigned char *)DataBuffer->getBufferStart();
1171 const unsigned char *Cur = Start;
1172 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
1173 return error(Err: instrprof_error::truncated);
1174
1175 auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Buffer: Start);
1176 if (!HeaderOr)
1177 return HeaderOr.takeError();
1178
1179 const IndexedInstrProf::Header *Header = &HeaderOr.get();
1180 Cur += Header->size();
1181
1182 Cur = readSummary(Version: (IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur,
1183 /* UseCS */ false);
1184 if (Header->formatVersion() & VARIANT_MASK_CSIR_PROF)
1185 Cur =
1186 readSummary(Version: (IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur,
1187 /* UseCS */ true);
1188 // Read the hash type and start offset.
1189 IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
1190 endian::byte_swap<uint64_t, llvm::endianness::little>(value: Header->HashType));
1191 if (HashType > IndexedInstrProf::HashT::Last)
1192 return error(Err: instrprof_error::unsupported_hash_type);
1193
1194 uint64_t HashOffset =
1195 endian::byte_swap<uint64_t, llvm::endianness::little>(value: Header->HashOffset);
1196
1197 // The hash table with profile counts comes next.
1198 auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
1199 args: Start + HashOffset, args&: Cur, args&: Start, args&: HashType, args: Header->formatVersion());
1200
1201 // The MemProfOffset field in the header is only valid when the format
1202 // version is higher than 8 (when it was introduced).
1203 if (GET_VERSION(Header->formatVersion()) >= 8 &&
1204 Header->formatVersion() & VARIANT_MASK_MEMPROF) {
1205 uint64_t MemProfOffset =
1206 endian::byte_swap<uint64_t, llvm::endianness::little>(
1207 value: Header->MemProfOffset);
1208
1209 const unsigned char *Ptr = Start + MemProfOffset;
1210 // The value returned from RecordTableGenerator.Emit.
1211 const uint64_t RecordTableOffset =
1212 support::endian::readNext<uint64_t, llvm::endianness::little,
1213 unaligned>(memory&: Ptr);
1214 // The offset in the stream right before invoking
1215 // FrameTableGenerator.Emit.
1216 const uint64_t FramePayloadOffset =
1217 support::endian::readNext<uint64_t, llvm::endianness::little,
1218 unaligned>(memory&: Ptr);
1219 // The value returned from FrameTableGenerator.Emit.
1220 const uint64_t FrameTableOffset =
1221 support::endian::readNext<uint64_t, llvm::endianness::little,
1222 unaligned>(memory&: Ptr);
1223
1224 // Read the schema.
1225 auto SchemaOr = memprof::readMemProfSchema(Buffer&: Ptr);
1226 if (!SchemaOr)
1227 return SchemaOr.takeError();
1228 Schema = SchemaOr.get();
1229
1230 // Now initialize the table reader with a pointer into data buffer.
1231 MemProfRecordTable.reset(p: MemProfRecordHashTable::Create(
1232 /*Buckets=*/Start + RecordTableOffset,
1233 /*Payload=*/Ptr,
1234 /*Base=*/Start, InfoObj: memprof::RecordLookupTrait(Schema)));
1235
1236 // Initialize the frame table reader with the payload and bucket offsets.
1237 MemProfFrameTable.reset(p: MemProfFrameHashTable::Create(
1238 /*Buckets=*/Start + FrameTableOffset,
1239 /*Payload=*/Start + FramePayloadOffset,
1240 /*Base=*/Start, InfoObj: memprof::FrameLookupTrait()));
1241 }
1242
1243 // BinaryIdOffset field in the header is only valid when the format version
1244 // is higher than 9 (when it was introduced).
1245 if (GET_VERSION(Header->formatVersion()) >= 9) {
1246 uint64_t BinaryIdOffset =
1247 endian::byte_swap<uint64_t, llvm::endianness::little>(
1248 value: Header->BinaryIdOffset);
1249 const unsigned char *Ptr = Start + BinaryIdOffset;
1250 // Read binary ids size.
1251 BinaryIdsSize =
1252 support::endian::readNext<uint64_t, llvm::endianness::little,
1253 unaligned>(memory&: Ptr);
1254 if (BinaryIdsSize % sizeof(uint64_t))
1255 return error(Err: instrprof_error::bad_header);
1256 // Set the binary ids start.
1257 BinaryIdsStart = Ptr;
1258 if (BinaryIdsStart > (const unsigned char *)DataBuffer->getBufferEnd())
1259 return make_error<InstrProfError>(Args: instrprof_error::malformed,
1260 Args: "corrupted binary ids");
1261 }
1262
1263 if (GET_VERSION(Header->formatVersion()) >= 10 &&
1264 Header->formatVersion() & VARIANT_MASK_TEMPORAL_PROF) {
1265 uint64_t TemporalProfTracesOffset =
1266 endian::byte_swap<uint64_t, llvm::endianness::little>(
1267 value: Header->TemporalProfTracesOffset);
1268 const unsigned char *Ptr = Start + TemporalProfTracesOffset;
1269 const auto *PtrEnd = (const unsigned char *)DataBuffer->getBufferEnd();
1270 // Expect at least two 64 bit fields: NumTraces, and TraceStreamSize
1271 if (Ptr + 2 * sizeof(uint64_t) > PtrEnd)
1272 return error(Err: instrprof_error::truncated);
1273 const uint64_t NumTraces =
1274 support::endian::readNext<uint64_t, llvm::endianness::little,
1275 unaligned>(memory&: Ptr);
1276 TemporalProfTraceStreamSize =
1277 support::endian::readNext<uint64_t, llvm::endianness::little,
1278 unaligned>(memory&: Ptr);
1279 for (unsigned i = 0; i < NumTraces; i++) {
1280 // Expect at least two 64 bit fields: Weight and NumFunctions
1281 if (Ptr + 2 * sizeof(uint64_t) > PtrEnd)
1282 return error(Err: instrprof_error::truncated);
1283 TemporalProfTraceTy Trace;
1284 Trace.Weight =
1285 support::endian::readNext<uint64_t, llvm::endianness::little,
1286 unaligned>(memory&: Ptr);
1287 const uint64_t NumFunctions =
1288 support::endian::readNext<uint64_t, llvm::endianness::little,
1289 unaligned>(memory&: Ptr);
1290 // Expect at least NumFunctions 64 bit fields
1291 if (Ptr + NumFunctions * sizeof(uint64_t) > PtrEnd)
1292 return error(Err: instrprof_error::truncated);
1293 for (unsigned j = 0; j < NumFunctions; j++) {
1294 const uint64_t NameRef =
1295 support::endian::readNext<uint64_t, llvm::endianness::little,
1296 unaligned>(memory&: Ptr);
1297 Trace.FunctionNameRefs.push_back(x: NameRef);
1298 }
1299 TemporalProfTraces.push_back(Elt: std::move(Trace));
1300 }
1301 }
1302
1303 // Load the remapping table now if requested.
1304 if (RemappingBuffer) {
1305 Remapper =
1306 std::make_unique<InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>(
1307 args: std::move(RemappingBuffer), args&: *IndexPtr);
1308 if (Error E = Remapper->populateRemappings())
1309 return E;
1310 } else {
1311 Remapper = std::make_unique<InstrProfReaderNullRemapper>(args&: *IndexPtr);
1312 }
1313 Index = std::move(IndexPtr);
1314
1315 return success();
1316}
1317
1318InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
1319 if (Symtab)
1320 return *Symtab;
1321
1322 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
1323 if (Error E = Index->populateSymtab(*NewSymtab)) {
1324 auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
1325 consumeError(Err: error(Err: ErrCode, ErrMsg: Msg));
1326 }
1327
1328 Symtab = std::move(NewSymtab);
1329 return *Symtab;
1330}
1331
1332Expected<InstrProfRecord> IndexedInstrProfReader::getInstrProfRecord(
1333 StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName,
1334 uint64_t *MismatchedFuncSum) {
1335 ArrayRef<NamedInstrProfRecord> Data;
1336 uint64_t FuncSum = 0;
1337 auto Err = Remapper->getRecords(FuncName, Data);
1338 if (Err) {
1339 // If we don't find FuncName, try DeprecatedFuncName to handle profiles
1340 // built by older compilers.
1341 auto Err2 =
1342 handleErrors(E: std::move(Err), Hs: [&](const InstrProfError &IE) -> Error {
1343 if (IE.get() != instrprof_error::unknown_function)
1344 return make_error<InstrProfError>(Args: IE);
1345 if (auto Err = Remapper->getRecords(FuncName: DeprecatedFuncName, Data))
1346 return Err;
1347 return Error::success();
1348 });
1349 if (Err2)
1350 return std::move(Err2);
1351 }
1352 // Found it. Look for counters with the right hash.
1353
1354 // A flag to indicate if the records are from the same type
1355 // of profile (i.e cs vs nocs).
1356 bool CSBitMatch = false;
1357 auto getFuncSum = [](const std::vector<uint64_t> &Counts) {
1358 uint64_t ValueSum = 0;
1359 for (uint64_t CountValue : Counts) {
1360 if (CountValue == (uint64_t)-1)
1361 continue;
1362 // Handle overflow -- if that happens, return max.
1363 if (std::numeric_limits<uint64_t>::max() - CountValue <= ValueSum)
1364 return std::numeric_limits<uint64_t>::max();
1365 ValueSum += CountValue;
1366 }
1367 return ValueSum;
1368 };
1369
1370 for (const NamedInstrProfRecord &I : Data) {
1371 // Check for a match and fill the vector if there is one.
1372 if (I.Hash == FuncHash)
1373 return std::move(I);
1374 if (NamedInstrProfRecord::hasCSFlagInHash(FuncHash: I.Hash) ==
1375 NamedInstrProfRecord::hasCSFlagInHash(FuncHash)) {
1376 CSBitMatch = true;
1377 if (MismatchedFuncSum == nullptr)
1378 continue;
1379 FuncSum = std::max(a: FuncSum, b: getFuncSum(I.Counts));
1380 }
1381 }
1382 if (CSBitMatch) {
1383 if (MismatchedFuncSum != nullptr)
1384 *MismatchedFuncSum = FuncSum;
1385 return error(Err: instrprof_error::hash_mismatch);
1386 }
1387 return error(Err: instrprof_error::unknown_function);
1388}
1389
1390Expected<memprof::MemProfRecord>
1391IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
1392 // TODO: Add memprof specific errors.
1393 if (MemProfRecordTable == nullptr)
1394 return make_error<InstrProfError>(Args: instrprof_error::invalid_prof,
1395 Args: "no memprof data available in profile");
1396 auto Iter = MemProfRecordTable->find(EKey: FuncNameHash);
1397 if (Iter == MemProfRecordTable->end())
1398 return make_error<InstrProfError>(
1399 Args: instrprof_error::unknown_function,
1400 Args: "memprof record not found for function hash " + Twine(FuncNameHash));
1401
1402 // Setup a callback to convert from frame ids to frame using the on-disk
1403 // FrameData hash table.
1404 memprof::FrameId LastUnmappedFrameId = 0;
1405 bool HasFrameMappingError = false;
1406 auto IdToFrameCallback = [&](const memprof::FrameId Id) {
1407 auto FrIter = MemProfFrameTable->find(EKey: Id);
1408 if (FrIter == MemProfFrameTable->end()) {
1409 LastUnmappedFrameId = Id;
1410 HasFrameMappingError = true;
1411 return memprof::Frame(0, 0, 0, false);
1412 }
1413 return *FrIter;
1414 };
1415
1416 memprof::MemProfRecord Record(*Iter, IdToFrameCallback);
1417
1418 // Check that all frame ids were successfully converted to frames.
1419 if (HasFrameMappingError) {
1420 return make_error<InstrProfError>(Args: instrprof_error::hash_mismatch,
1421 Args: "memprof frame not found for frame id " +
1422 Twine(LastUnmappedFrameId));
1423 }
1424 return Record;
1425}
1426
1427Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
1428 uint64_t FuncHash,
1429 std::vector<uint64_t> &Counts) {
1430 Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash);
1431 if (Error E = Record.takeError())
1432 return error(E: std::move(E));
1433
1434 Counts = Record.get().Counts;
1435 return success();
1436}
1437
1438Error IndexedInstrProfReader::getFunctionBitmap(StringRef FuncName,
1439 uint64_t FuncHash,
1440 BitVector &Bitmap) {
1441 Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash);
1442 if (Error E = Record.takeError())
1443 return error(E: std::move(E));
1444
1445 const auto &BitmapBytes = Record.get().BitmapBytes;
1446 size_t I = 0, E = BitmapBytes.size();
1447 Bitmap.resize(N: E * CHAR_BIT);
1448 BitVector::apply(
1449 f: [&](auto X) {
1450 using XTy = decltype(X);
1451 alignas(XTy) uint8_t W[sizeof(X)];
1452 size_t N = std::min(a: E - I, b: sizeof(W));
1453 std::memset(s: W, c: 0, n: sizeof(W));
1454 std::memcpy(dest: W, src: &BitmapBytes[I], n: N);
1455 I += N;
1456 return support::endian::read<XTy, llvm::endianness::little,
1457 support::aligned>(W);
1458 },
1459 Out&: Bitmap, Arg: Bitmap);
1460 assert(I == E);
1461
1462 return success();
1463}
1464
1465Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
1466 ArrayRef<NamedInstrProfRecord> Data;
1467
1468 Error E = Index->getRecords(Data);
1469 if (E)
1470 return error(E: std::move(E));
1471
1472 Record = Data[RecordIndex++];
1473 if (RecordIndex >= Data.size()) {
1474 Index->advanceToNextKey();
1475 RecordIndex = 0;
1476 }
1477 return success();
1478}
1479
1480Error IndexedInstrProfReader::readBinaryIds(
1481 std::vector<llvm::object::BuildID> &BinaryIds) {
1482 return readBinaryIdsInternal(DataBuffer: *DataBuffer, BinaryIdsSize, BinaryIdsStart,
1483 BinaryIds, Endian: llvm::endianness::little);
1484}
1485
1486Error IndexedInstrProfReader::printBinaryIds(raw_ostream &OS) {
1487 std::vector<llvm::object::BuildID> BinaryIds;
1488 if (Error E = readBinaryIds(BinaryIds))
1489 return E;
1490 printBinaryIdsInternal(OS, BinaryIds);
1491 return Error::success();
1492}
1493
1494void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) {
1495 uint64_t NumFuncs = 0;
1496 for (const auto &Func : *this) {
1497 if (isIRLevelProfile()) {
1498 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(FuncHash: Func.Hash);
1499 if (FuncIsCS != IsCS)
1500 continue;
1501 }
1502 Func.accumulateCounts(Sum);
1503 ++NumFuncs;
1504 }
1505 Sum.NumEntries = NumFuncs;
1506}
1507

source code of llvm/lib/ProfileData/InstrProfReader.cpp