RawMemProfReader.cpp source code [llvm/lib/ProfileData/RawMemProfReader.cpp]

1	//===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains support for reading MemProf profiling data.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include <algorithm>
14	#include <cstdint>
15	#include <memory>
16	#include <type_traits>
17
18	#include "llvm/ADT/ArrayRef.h"
19	#include "llvm/ADT/DenseMap.h"
20	#include "llvm/ADT/SetVector.h"
21	#include "llvm/ADT/SmallSet.h"
22	#include "llvm/ADT/SmallVector.h"
23	#include "llvm/ADT/StringExtras.h"
24	#include "llvm/ADT/Twine.h"
25	#include "llvm/DebugInfo/DWARF/DWARFContext.h"
26	#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
27	#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
28	#include "llvm/Object/Binary.h"
29	#include "llvm/Object/BuildID.h"
30	#include "llvm/Object/ELFObjectFile.h"
31	#include "llvm/Object/ObjectFile.h"
32	#include "llvm/ProfileData/InstrProf.h"
33	#include "llvm/ProfileData/MemProf.h"
34	#include "llvm/ProfileData/MemProfData.inc"
35	#include "llvm/ProfileData/RawMemProfReader.h"
36	#include "llvm/ProfileData/SampleProf.h"
37	#include "llvm/Support/Debug.h"
38	#include "llvm/Support/Endian.h"
39	#include "llvm/Support/Error.h"
40	#include "llvm/Support/MemoryBuffer.h"
41	#include "llvm/Support/Path.h"
42
43	#define DEBUG_TYPE "memprof"
44
45	namespace llvm {
46	namespace memprof {
47	namespace {
48	template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
49	static_assert(std::is_pod<T>::value, "Not a pod type.");
50	assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == `0` && "Unaligned Read");
51	return *reinterpret_cast<const T *>(Ptr);
52	}
53
54	Error checkBuffer(const MemoryBuffer &Buffer) {
55	if (!RawMemProfReader::hasFormat(DataBuffer: Buffer))
56	return make_error<InstrProfError>(Args: instrprof_error::bad_magic);
57
58	if (Buffer.getBufferSize() == `0`)
59	return make_error<InstrProfError>(Args: instrprof_error::empty_raw_profile);
60
61	if (Buffer.getBufferSize() < sizeof(Header)) {
62	return make_error<InstrProfError>(Args: instrprof_error::truncated);
63	}
64
65	// The size of the buffer can be > header total size since we allow repeated
66	// serialization of memprof profiles to the same file.
67	uint64_t TotalSize = `0`;
68	const char *Next = Buffer.getBufferStart();
69	while (Next < Buffer.getBufferEnd()) {
70	auto H = reinterpret_cast<const* Header *>(Next);
71	if (H->Version != MEMPROF_RAW_VERSION) {
72	return make_error<InstrProfError>(Args: instrprof_error::unsupported_version);
73	}
74
75	TotalSize += H->TotalSize;
76	Next += H->TotalSize;
77	}
78
79	if (Buffer.getBufferSize() != TotalSize) {
80	return make_error<InstrProfError>(Args: instrprof_error::malformed);
81	}
82	return Error::success();
83	}
84
85	llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
86	using namespace support;
87
88	const uint64_t NumItemsToRead =
89	endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr);
90	llvm::SmallVector<SegmentEntry> Items;
91	for (uint64_t I = `0`; I < NumItemsToRead; I++) {
92	Items.push_back(Elt: *reinterpret_cast<const SegmentEntry *>(
93	Ptr + I * sizeof(SegmentEntry)));
94	}
95	return Items;
96	}
97
98	llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
99	readMemInfoBlocks(const char *Ptr) {
100	using namespace support;
101
102	const uint64_t NumItemsToRead =
103	endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr);
104	llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
105	for (uint64_t I = `0`; I < NumItemsToRead; I++) {
106	const uint64_t Id =
107	endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr);
108	const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
109	Items.push_back(Elt: {Id, MIB});
110	// Only increment by size of MIB since readNext implicitly increments.
111	Ptr += sizeof(MemInfoBlock);
112	}
113	return Items;
114	}
115
116	CallStackMap readStackInfo(const char *Ptr) {
117	using namespace support;
118
119	const uint64_t NumItemsToRead =
120	endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr);
121	CallStackMap Items;
122
123	for (uint64_t I = `0`; I < NumItemsToRead; I++) {
124	const uint64_t StackId =
125	endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr);
126	const uint64_t NumPCs =
127	endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr);
128
129	SmallVector<uint64_t> CallStack;
130	for (uint64_t J = `0`; J < NumPCs; J++) {
131	CallStack.push_back(
132	Elt: endian::readNext<uint64_t, llvm::endianness::little, unaligned>(memory&: Ptr));
133	}
134
135	Items [StackId] = CallStack;
136	}
137	return Items;
138	}
139
140	// Merges the contents of stack information in \p From to \p To. Returns true if
141	// any stack ids observed previously map to a different set of program counter
142	// addresses.
143	bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
144	for (const auto &IdStack : From) {
145	auto I = To.find(Val: IdStack.first);
146	if (I == To.end()) {
147	To [IdStack.first] = IdStack.second;
148	} else {
149	// Check that the PCs are the same (in order).
150	if (IdStack.second != I ->second)
151	return true;
152	}
153	}
154	return false;
155	}
156
157	Error report(Error E, const StringRef Context) {
158	return joinErrors(E1: createStringError(EC: inconvertibleErrorCode(), S: Context),
159	E2: std::move(E));
160	}
161
162	bool isRuntimePath(const StringRef Path) {
163	const StringRef Filename = llvm::sys::path::filename(path: Path);
164	// This list should be updated in case new files with additional interceptors
165	// are added to the memprof runtime.
166	return Filename.equals(RHS: "memprof_malloc_linux.cpp") \|\|
167	Filename.equals(RHS: "memprof_interceptors.cpp") \|\|
168	Filename.equals(RHS: "memprof_new_delete.cpp");
169	}
170
171	std::string getBuildIdString(const SegmentEntry &Entry) {
172	// If the build id is unset print a helpful string instead of all zeros.
173	if (Entry.BuildIdSize == `0`)
174	return "<None>";
175
176	std::string Str;
177	raw_string_ostream OS(Str);
178	for (size_t I = `0`; I < Entry.BuildIdSize; I++) {
179	OS << format_hex_no_prefix(N: Entry.BuildId[I], Width: `2`);
180	}
181	return OS.str();
182	}
183	} // namespace
184
185	Expected<std::unique_ptr<RawMemProfReader>>
186	RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
187	bool KeepName) {
188	auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path);
189	if (std::error_code EC = BufferOr.getError())
190	return report(E: errorCodeToError(EC), Context: Path.getSingleStringRef());
191
192	std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
193	return create(Buffer: std::move(Buffer), ProfiledBinary, KeepName);
194	}
195
196	Expected<std::unique_ptr<RawMemProfReader>>
197	RawMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
198	const StringRef ProfiledBinary, bool KeepName) {
199	if (Error E = checkBuffer(Buffer: *Buffer))
200	return report(E: std::move(E), Context: Buffer ->getBufferIdentifier());
201
202	if (ProfiledBinary.empty()) {
203	// Peek the build ids to print a helpful error message.
204	const std::vector<std::string> BuildIds = peekBuildIds(DataBuffer: Buffer.get());
205	std::string ErrorMessage(
206	R"(Path to profiled binary is empty, expected binary with one of the following build ids:
207	)");
208	for (const auto &Id : BuildIds) {
209	ErrorMessage += "\n BuildId: ";
210	ErrorMessage += Id;
211	}
212	return report(
213	E: make_error<StringError>(Args&: ErrorMessage, Args: inconvertibleErrorCode()),
214	/Context=/"");
215	}
216
217	auto BinaryOr = llvm::object::createBinary(Path: ProfiledBinary);
218	if (!BinaryOr) {
219	return report(E: BinaryOr.takeError(), Context: ProfiledBinary);
220	}
221
222	// Use new here since constructor is private.
223	std::unique_ptr<RawMemProfReader> Reader(
224	new RawMemProfReader (std::move(BinaryOr.get()), KeepName));
225	if (Error E = Reader ->initialize(DataBuffer: std::move(Buffer))) {
226	return std::move(E);
227	}
228	return std::move(Reader);
229	}
230
231	bool RawMemProfReader::hasFormat(const StringRef Path) {
232	auto BufferOr = MemoryBuffer::getFileOrSTDIN(Filename: Path);
233	if (!BufferOr)
234	return false;
235
236	std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
237	return hasFormat(DataBuffer: *Buffer);
238	}
239
240	bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
241	if (Buffer.getBufferSize() < sizeof(uint64_t))
242	return false;
243	// Aligned read to sanity check that the buffer was allocated with at least 8b
244	// alignment.
245	const uint64_t Magic = alignedRead(Ptr: Buffer.getBufferStart());
246	return Magic == MEMPROF_RAW_MAGIC_64;
247	}
248
249	void RawMemProfReader::printYAML(raw_ostream &OS) {
250	uint64_t NumAllocFunctions = `0`, NumMibInfo = `0`;
251	for (const auto &KV : FunctionProfileData) {
252	const size_t NumAllocSites = KV.second.AllocSites.size();
253	if (NumAllocSites > `0`) {
254	NumAllocFunctions++;
255	NumMibInfo += NumAllocSites;
256	}
257	}
258
259	OS << "MemprofProfile:\n";
260	OS << " Summary:\n";
261	OS << " Version: " << MEMPROF_RAW_VERSION << "\n";
262	OS << " NumSegments: " << SegmentInfo.size() << "\n";
263	OS << " NumMibInfo: " << NumMibInfo << "\n";
264	OS << " NumAllocFunctions: " << NumAllocFunctions << "\n";
265	OS << " NumStackOffsets: " << StackMap.size() << "\n";
266	// Print out the segment information.
267	OS << " Segments:\n";
268	for (const auto &Entry : SegmentInfo) {
269	OS << " -\n";
270	OS << " BuildId: " << getBuildIdString(Entry) << "\n";
271	OS << " Start: 0x" << llvm::utohexstr(X: Entry.Start) << "\n";
272	OS << " End: 0x" << llvm::utohexstr(X: Entry.End) << "\n";
273	OS << " Offset: 0x" << llvm::utohexstr(X: Entry.Offset) << "\n";
274	}
275	// Print out the merged contents of the profiles.
276	OS << " Records:\n";
277	for (const auto &Entry : *this) {
278	OS << " -\n";
279	OS << " FunctionGUID: " << Entry.first << "\n";
280	Entry.second.print(OS);
281	}
282	}
283
284	Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
285	const StringRef FileName = Binary.getBinary()->getFileName();
286
287	auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Val: Binary.getBinary());
288	if (!ElfObject) {
289	return report(E: make_error<StringError>(Args: Twine ("Not an ELF file: "),
290	Args: inconvertibleErrorCode()),
291	Context: FileName);
292	}
293
294	// Check whether the profiled binary was built with position independent code
295	// (PIC). Perform sanity checks for assumptions we rely on to simplify
296	// symbolization.
297	auto* Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(Val: ElfObject);
298	const llvm::object::ELF64LEFile& ElfFile = Elf64LEObject->getELFFile();
299	auto PHdrsOr = ElfFile.program_headers();
300	if (!PHdrsOr)
301	return report(
302	E: make_error<StringError>(Args: Twine ("Could not read program headers: "),
303	Args: inconvertibleErrorCode()),
304	Context: FileName);
305
306	int NumExecutableSegments = `0`;
307	for (const auto &Phdr : *PHdrsOr) {
308	if (Phdr.p_type == ELF::PT_LOAD) {
309	if (Phdr.p_flags & ELF::PF_X) {
310	// We assume only one text segment in the main binary for simplicity and
311	// reduce the overhead of checking multiple ranges during symbolization.
312	if (++NumExecutableSegments > `1`) {
313	return report(
314	E: make_error<StringError>(
315	Args: "Expect only one executable load segment in the binary",
316	Args: inconvertibleErrorCode()),
317	Context: FileName);
318	}
319	// Segment will always be loaded at a page boundary, expect it to be
320	// aligned already. Assume 4K pagesize for the machine from which the
321	// profile has been collected. This should be fine for now, in case we
322	// want to support other pagesizes it can be recorded in the raw profile
323	// during collection.
324	PreferredTextSegmentAddress = Phdr.p_vaddr;
325	assert(Phdr.p_vaddr == (Phdr.p_vaddr & ~(`0x1000` - `1U`)) &&
326	"Expect p_vaddr to always be page aligned");
327	assert(Phdr.p_offset == `0` && "Expect p_offset = 0 for symbolization.");
328	}
329	}
330	}
331
332	auto Triple = ElfObject->makeTriple();
333	if (!Triple.isX86())
334	return report(E: make_error<StringError>(Args: Twine ("Unsupported target: ") +
335	Triple.getArchName(),
336	Args: inconvertibleErrorCode()),
337	Context: FileName);
338
339	// Process the raw profile.
340	if (Error E = readRawProfile(DataBuffer: std::move(DataBuffer)))
341	return E;
342
343	if (Error E = setupForSymbolization())
344	return E;
345
346	auto *Object = cast<object::ObjectFile>(Val: Binary.getBinary());
347	std::unique_ptr<DIContext> Context = DWARFContext::create(
348	Obj: *Object, RelocAction: DWARFContext::ProcessDebugRelocations::Process);
349
350	auto SOFOr = symbolize::SymbolizableObjectFile::create(
351	Obj: Object, DICtx: std::move(Context), /UntagAddresses=/false);
352	if (!SOFOr)
353	return report(E: SOFOr.takeError(), Context: FileName);
354	auto Symbolizer = std::move(SOFOr.get());
355
356	// The symbolizer ownership is moved into symbolizeAndFilterStackFrames so
357	// that it is freed automatically at the end, when it is no longer used. This
358	// reduces peak memory since it won't be live while also mapping the raw
359	// profile into records afterwards.
360	if (Error E = symbolizeAndFilterStackFrames(Symbolizer: std::move(Symbolizer)))
361	return E;
362
363	return mapRawProfileToRecords();
364	}
365
366	Error RawMemProfReader::setupForSymbolization() {
367	auto *Object = cast<object::ObjectFile>(Val: Binary.getBinary());
368	object::BuildIDRef BinaryId = object::getBuildID(Obj: Object);
369	if (BinaryId.empty())
370	return make_error<StringError>(Args: Twine ("No build id found in binary ") +
371	Binary.getBinary()->getFileName(),
372	Args: inconvertibleErrorCode());
373
374	int NumMatched = `0`;
375	for (const auto &Entry : SegmentInfo) {
376	llvm::ArrayRef<uint8_t> SegmentId(Entry.BuildId, Entry.BuildIdSize);
377	if (BinaryId == SegmentId) {
378	// We assume only one text segment in the main binary for simplicity and
379	// reduce the overhead of checking multiple ranges during symbolization.
380	if (++NumMatched > `1`) {
381	return make_error<StringError>(
382	Args: "We expect only one executable segment in the profiled binary",
383	Args: inconvertibleErrorCode());
384	}
385	ProfiledTextSegmentStart = Entry.Start;
386	ProfiledTextSegmentEnd = Entry.End;
387	}
388	}
389	assert(NumMatched != `0` && "No matching executable segments in segment info.");
390	assert((PreferredTextSegmentAddress == `0` \|\|
391	(PreferredTextSegmentAddress == ProfiledTextSegmentStart)) &&
392	"Expect text segment address to be 0 or equal to profiled text "
393	"segment start.");
394	return Error::success();
395	}
396
397	Error RawMemProfReader::mapRawProfileToRecords() {
398	// Hold a mapping from function to each callsite location we encounter within
399	// it that is part of some dynamic allocation context. The location is stored
400	// as a pointer to a symbolized list of inline frames.
401	using LocationPtr = const llvm::SmallVector<FrameId> *;
402	llvm::MapVector<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
403	PerFunctionCallSites;
404
405	// Convert the raw profile callstack data into memprof records. While doing so
406	// keep track of related contexts so that we can fill these in later.
407	for (const auto &Entry : CallstackProfileData) {
408	const uint64_t StackId = Entry.first;
409
410	auto It = StackMap.find(Val: StackId);
411	if (It == StackMap.end())
412	return make_error<InstrProfError>(
413	Args: instrprof_error::malformed,
414	Args: "memprof callstack record does not contain id: " + Twine (StackId));
415
416	// Construct the symbolized callstack.
417	llvm::SmallVector<FrameId> Callstack;
418	Callstack.reserve(N: It ->getSecond().size());
419
420	llvm::ArrayRef<uint64_t> Addresses = It ->getSecond();
421	for (size_t I = `0`; I < Addresses.size(); I++) {
422	const uint64_t Address = Addresses [I];
423	assert(SymbolizedFrame.count(Address) > `0` &&
424	"Address not found in SymbolizedFrame map");
425	const SmallVector<FrameId> &Frames = SymbolizedFrame [Address];
426
427	assert(!idToFrame(Frames.back()).IsInlineFrame &&
428	"The last frame should not be inlined");
429
430	// Record the callsites for each function. Skip the first frame of the
431	// first address since it is the allocation site itself that is recorded
432	// as an alloc site.
433	for (size_t J = `0`; J < Frames.size(); J++) {
434	if (I == `0` && J == `0`)
435	continue;
436	// We attach the entire bottom-up frame here for the callsite even
437	// though we only need the frames up to and including the frame for
438	// Frames[J].Function. This will enable better deduplication for
439	// compression in the future.
440	const GlobalValue::GUID Guid = idToFrame(Id: Frames [J]).Function;
441	PerFunctionCallSites [Guid].insert(X: &Frames);
442	}
443
444	// Add all the frames to the current allocation callstack.
445	Callstack.append(in_start: Frames.begin(), in_end: Frames.end());
446	}
447
448	// We attach the memprof record to each function bottom-up including the
449	// first non-inline frame.
450	for (size_t I = `0`; /Break out using the condition below/; I++) {
451	const Frame &F = idToFrame(Id: Callstack [I]);
452	auto Result =
453	FunctionProfileData.insert(KV: {F.Function, IndexedMemProfRecord ()});
454	IndexedMemProfRecord &Record = Result.first->second;
455	Record.AllocSites.emplace_back(Args&: Callstack, Args: Entry.second);
456
457	if (!F.IsInlineFrame)
458	break;
459	}
460	}
461
462	// Fill in the related callsites per function.
463	for (const auto &[Id, Locs] : PerFunctionCallSites) {
464	// Some functions may have only callsite data and no allocation data. Here
465	// we insert a new entry for callsite data if we need to.
466	auto Result = FunctionProfileData.insert(KV: {Id, IndexedMemProfRecord ()});
467	IndexedMemProfRecord &Record = Result.first->second;
468	for (LocationPtr Loc : Locs) {
469	Record.CallSites.push_back(Elt: *Loc);
470	}
471	}
472
473	return Error::success();
474	}
475
476	Error RawMemProfReader::symbolizeAndFilterStackFrames(
477	std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer) {
478	// The specifier to use when symbolization is requested.
479	const DILineInfoSpecifier Specifier(
480	DILineInfoSpecifier::FileLineInfoKind::RawValue,
481	DILineInfoSpecifier::FunctionNameKind::LinkageName);
482
483	// For entries where all PCs in the callstack are discarded, we erase the
484	// entry from the stack map.
485	llvm::SmallVector<uint64_t> EntriesToErase;
486	// We keep track of all prior discarded entries so that we can avoid invoking
487	// the symbolizer for such entries.
488	llvm::DenseSet<uint64_t> AllVAddrsToDiscard;
489	for (auto &Entry : StackMap) {
490	for (const uint64_t VAddr : Entry.getSecond()) {
491	// Check if we have already symbolized and cached the result or if we
492	// don't want to attempt symbolization since we know this address is bad.
493	// In this case the address is also removed from the current callstack.
494	if (SymbolizedFrame.count(Val: VAddr) > `0` \|\|
495	AllVAddrsToDiscard.contains(V: VAddr))
496	continue;
497
498	Expected<DIInliningInfo> DIOr = Symbolizer ->symbolizeInlinedCode(
499	ModuleOffset: getModuleOffset(VirtualAddress: VAddr), LineInfoSpecifier: Specifier, /UseSymbolTable=/false);
500	if (!DIOr)
501	return DIOr.takeError();
502	DIInliningInfo DI = DIOr.get();
503
504	// Drop frames which we can't symbolize or if they belong to the runtime.
505	if (DI.getFrame(Index: `0`).FunctionName == DILineInfo::BadString \|\|
506	isRuntimePath(Path: DI.getFrame(Index: `0`).FileName)) {
507	AllVAddrsToDiscard.insert(V: VAddr);
508	continue;
509	}
510
511	for (size_t I = `0`, NumFrames = DI.getNumberOfFrames(); I < NumFrames;
512	I++) {
513	const auto &DIFrame = DI.getFrame(Index: I);
514	const uint64_t Guid =
515	IndexedMemProfRecord::getGUID(FunctionName: DIFrame.FunctionName);
516	const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
517	// Only the last entry is not an inlined location.
518	I != NumFrames - `1`);
519	// Here we retain a mapping from the GUID to canonical symbol name
520	// instead of adding it to the frame object directly to reduce memory
521	// overhead. This is because there can be many unique frames,
522	// particularly for callsite frames.
523	if (KeepSymbolName) {
524	StringRef CanonicalName =
525	sampleprof::FunctionSamples::getCanonicalFnName(
526	FnName: DIFrame.FunctionName);
527	GuidToSymbolName.insert(KV: {Guid, CanonicalName.str()});
528	}
529
530	const FrameId Hash = F.hash();
531	IdToFrame.insert(KV: {Hash, F});
532	SymbolizedFrame [VAddr].push_back(Elt: Hash);
533	}
534	}
535
536	auto &CallStack = Entry.getSecond();
537	llvm::erase_if(C&: CallStack, P: [&AllVAddrsToDiscard](const uint64_t A) {
538	return AllVAddrsToDiscard.contains(V: A);
539	});
540	if (CallStack.empty())
541	EntriesToErase.push_back(Elt: Entry.getFirst());
542	}
543
544	// Drop the entries where the callstack is empty.
545	for (const uint64_t Id : EntriesToErase) {
546	StackMap.erase(Val: Id);
547	CallstackProfileData.erase(Key: Id);
548	}
549
550	if (StackMap.empty())
551	return make_error<InstrProfError>(
552	Args: instrprof_error::malformed,
553	Args: "no entries in callstack map after symbolization");
554
555	return Error::success();
556	}
557
558	std::vector<std::string>
559	RawMemProfReader::peekBuildIds(MemoryBuffer *DataBuffer) {
560	const char *Next = DataBuffer->getBufferStart();
561	// Use a set + vector since a profile file may contain multiple raw profile
562	// dumps, each with segment information. We want them unique and in order they
563	// were stored in the profile; the profiled binary should be the first entry.
564	// The runtime uses dl_iterate_phdr and the "... first object visited by
565	// callback is the main program."
566	// https://man7.org/linux/man-pages/man3/dl_iterate_phdr.3.html
567	std::vector<std::string> BuildIds;
568	llvm::SmallSet<std::string, `10`> BuildIdsSet;
569	while (Next < DataBuffer->getBufferEnd()) {
570	auto Header = reinterpret_cast<const* memprof::Header *>(Next);
571
572	const llvm::SmallVector<SegmentEntry> Entries =
573	readSegmentEntries(Ptr: Next + Header->SegmentOffset);
574
575	for (const auto &Entry : Entries) {
576	const std::string Id = getBuildIdString(Entry);
577	if (BuildIdsSet.contains(V: Id))
578	continue;
579	BuildIds.push_back(x: Id);
580	BuildIdsSet.insert(V: Id);
581	}
582
583	Next += Header->TotalSize;
584	}
585	return BuildIds;
586	}
587
588	Error RawMemProfReader::readRawProfile(
589	std::unique_ptr<MemoryBuffer> DataBuffer) {
590	const char *Next = DataBuffer ->getBufferStart();
591
592	while (Next < DataBuffer ->getBufferEnd()) {
593	auto Header = reinterpret_cast<const* memprof::Header *>(Next);
594
595	// Read in the segment information, check whether its the same across all
596	// profiles in this binary file.
597	const llvm::SmallVector<SegmentEntry> Entries =
598	readSegmentEntries(Ptr: Next + Header->SegmentOffset);
599	if (!SegmentInfo.empty() && SegmentInfo != Entries) {
600	// We do not expect segment information to change when deserializing from
601	// the same binary profile file. This can happen if dynamic libraries are
602	// loaded/unloaded between profile dumping.
603	return make_error<InstrProfError>(
604	Args: instrprof_error::malformed,
605	Args: "memprof raw profile has different segment information");
606	}
607	SegmentInfo.assign(in_start: Entries.begin(), in_end: Entries.end());
608
609	// Read in the MemInfoBlocks. Merge them based on stack id - we assume that
610	// raw profiles in the same binary file are from the same process so the
611	// stackdepot ids are the same.
612	for (const auto &Value : readMemInfoBlocks(Ptr: Next + Header->MIBOffset)) {
613	if (CallstackProfileData.count(Key: Value.first)) {
614	CallstackProfileData [Value.first].Merge(newMIB: Value.second);
615	} else {
616	CallstackProfileData [Value.first] = Value.second;
617	}
618	}
619
620	// Read in the callstack for each ids. For multiple raw profiles in the same
621	// file, we expect that the callstack is the same for a unique id.
622	const CallStackMap CSM = readStackInfo(Ptr: Next + Header->StackOffset);
623	if (StackMap.empty()) {
624	StackMap = CSM;
625	} else {
626	if (mergeStackMap(From: CSM, To&: StackMap))
627	return make_error<InstrProfError>(
628	Args: instrprof_error::malformed,
629	Args: "memprof raw profile got different call stack for same id");
630	}
631
632	Next += Header->TotalSize;
633	}
634
635	return Error::success();
636	}
637
638	object::SectionedAddress
639	RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
640	if (VirtualAddress > ProfiledTextSegmentStart &&
641	VirtualAddress <= ProfiledTextSegmentEnd) {
642	// For PIE binaries, the preferred address is zero and we adjust the virtual
643	// address by start of the profiled segment assuming that the offset of the
644	// segment in the binary is zero. For non-PIE binaries the preferred and
645	// profiled segment addresses should be equal and this is a no-op.
646	const uint64_t AdjustedAddress =
647	VirtualAddress + PreferredTextSegmentAddress - ProfiledTextSegmentStart;
648	return object::SectionedAddress{.Address: AdjustedAddress};
649	}
650	// Addresses which do not originate from the profiled text segment in the
651	// binary are not adjusted. These will fail symbolization and be filtered out
652	// during processing.
653	return object::SectionedAddress{.Address: VirtualAddress};
654	}
655
656	Error RawMemProfReader::readNextRecord(
657	GuidMemProfRecordPair &GuidRecord,
658	std::function<const Frame(const FrameId)> Callback) {
659	// Create a new callback for the RawMemProfRecord iterator so that we can
660	// provide the symbol name if the reader was initialized with KeepSymbolName =
661	// true. This is useful for debugging and testing.
662	auto IdToFrameCallback = [this](const FrameId Id) {
663	Frame F = this->idToFrame(Id);
664	if (!this->KeepSymbolName)
665	return F;
666	auto Iter = this->GuidToSymbolName.find(Val: F.Function);
667	assert(Iter != this->GuidToSymbolName.end());
668	F.SymbolName = Iter ->getSecond();
669	return F;
670	};
671	return MemProfReader::readNextRecord(GuidRecord, Callback: IdToFrameCallback);
672	}
673	} // namespace memprof
674	} // namespace llvm
675

source code of llvm/lib/ProfileData/RawMemProfReader.cpp