1//===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for reading MemProf profiling data.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_PROFILEDATA_MEMPROFREADER_H_
14#define LLVM_PROFILEDATA_MEMPROFREADER_H_
15
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/MapVector.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
20#include "llvm/DebugInfo/Symbolize/Symbolize.h"
21#include "llvm/IR/GlobalValue.h"
22#include "llvm/Object/Binary.h"
23#include "llvm/Object/ObjectFile.h"
24#include "llvm/ProfileData/InstrProfReader.h"
25#include "llvm/ProfileData/MemProf.h"
26#include "llvm/ProfileData/MemProfData.inc"
27#include "llvm/Support/Error.h"
28#include "llvm/Support/MemoryBuffer.h"
29
30#include <functional>
31
32namespace llvm {
33namespace memprof {
34// A class for memprof profile data populated directly from external
35// sources.
36class MemProfReader {
37public:
38 // The MemProfReader only holds memory profile information.
39 InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; }
40
41 using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>;
42 using Iterator = InstrProfIterator<GuidMemProfRecordPair, MemProfReader>;
43 Iterator end() { return Iterator(); }
44 Iterator begin() {
45 Iter = FunctionProfileData.begin();
46 return Iterator(this);
47 }
48
49 // Return a const reference to the internal Id to Frame mappings.
50 const llvm::DenseMap<FrameId, Frame> &getFrameMapping() const {
51 return IdToFrame;
52 }
53
54 // Return a const reference to the internal Id to call stacks.
55 const llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> &
56 getCallStacks() const {
57 return CSIdToCallStack;
58 }
59
60 // Return a const reference to the internal function profile data.
61 const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> &
62 getProfileData() const {
63 return FunctionProfileData;
64 }
65
66 virtual Error
67 readNextRecord(GuidMemProfRecordPair &GuidRecord,
68 std::function<const Frame(const FrameId)> Callback = nullptr) {
69 if (FunctionProfileData.empty())
70 return make_error<InstrProfError>(Args: instrprof_error::empty_raw_profile);
71
72 if (Iter == FunctionProfileData.end())
73 return make_error<InstrProfError>(Args: instrprof_error::eof);
74
75 if (Callback == nullptr)
76 Callback =
77 std::bind(f: &MemProfReader::idToFrame, args: this, args: std::placeholders::_1);
78
79 auto CallStackCallback = [&](CallStackId CSId) {
80 llvm::SmallVector<Frame> CallStack;
81 auto Iter = CSIdToCallStack.find(Val: CSId);
82 assert(Iter != CSIdToCallStack.end());
83 for (FrameId Id : Iter->second)
84 CallStack.push_back(Elt: Callback(Id));
85 return CallStack;
86 };
87
88 const IndexedMemProfRecord &IndexedRecord = Iter->second;
89 GuidRecord = {
90 Iter->first,
91 IndexedRecord.toMemProfRecord(Callback: CallStackCallback),
92 };
93 Iter++;
94 return Error::success();
95 }
96
97 // Allow default construction for derived classes which can populate the
98 // contents after construction.
99 MemProfReader() = default;
100 virtual ~MemProfReader() = default;
101
102 // Initialize the MemProfReader with the frame mappings and profile contents.
103 MemProfReader(
104 llvm::DenseMap<FrameId, Frame> FrameIdMap,
105 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData);
106
107 // Initialize the MemProfReader with the frame mappings, call stack mappings,
108 // and profile contents.
109 MemProfReader(
110 llvm::DenseMap<FrameId, Frame> FrameIdMap,
111 llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> CSIdMap,
112 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData)
113 : IdToFrame(std::move(FrameIdMap)), CSIdToCallStack(std::move(CSIdMap)),
114 FunctionProfileData(std::move(ProfData)) {}
115
116protected:
117 // A helper method to extract the frame from the IdToFrame map.
118 const Frame &idToFrame(const FrameId Id) const {
119 auto It = IdToFrame.find(Val: Id);
120 assert(It != IdToFrame.end() && "Id not found in map.");
121 return It->getSecond();
122 }
123 // A mapping from FrameId (a hash of the contents) to the frame.
124 llvm::DenseMap<FrameId, Frame> IdToFrame;
125 // A mapping from CallStackId to the call stack.
126 llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> CSIdToCallStack;
127 // A mapping from function GUID, hash of the canonical function symbol to the
128 // memprof profile data for that function, i.e allocation and callsite info.
129 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData;
130 // An iterator to the internal function profile data structure.
131 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter;
132};
133
134// Map from id (recorded from sanitizer stack depot) to virtual addresses for
135// each program counter address in the callstack.
136using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t>>;
137
138// Specializes the MemProfReader class to populate the contents from raw binary
139// memprof profiles from instrumentation based profiling.
140class RawMemProfReader final : public MemProfReader {
141public:
142 RawMemProfReader(const RawMemProfReader &) = delete;
143 RawMemProfReader &operator=(const RawMemProfReader &) = delete;
144 virtual ~RawMemProfReader() override = default;
145
146 // Prints the contents of the profile in YAML format.
147 void printYAML(raw_ostream &OS);
148
149 // Return true if the \p DataBuffer starts with magic bytes indicating it is
150 // a raw binary memprof profile.
151 static bool hasFormat(const MemoryBuffer &DataBuffer);
152 // Return true if the file at \p Path starts with magic bytes indicating it is
153 // a raw binary memprof profile.
154 static bool hasFormat(const StringRef Path);
155
156 // Create a RawMemProfReader after sanity checking the contents of the file at
157 // \p Path or the \p Buffer. The binary from which the profile has been
158 // collected is specified via a path in \p ProfiledBinary.
159 static Expected<std::unique_ptr<RawMemProfReader>>
160 create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false);
161 static Expected<std::unique_ptr<RawMemProfReader>>
162 create(std::unique_ptr<MemoryBuffer> Buffer, StringRef ProfiledBinary,
163 bool KeepName = false);
164
165 // Returns a list of build ids recorded in the segment information.
166 static std::vector<std::string> peekBuildIds(MemoryBuffer *DataBuffer);
167
168 virtual Error
169 readNextRecord(GuidMemProfRecordPair &GuidRecord,
170 std::function<const Frame(const FrameId)> Callback) override;
171
172 // Constructor for unittests only.
173 RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym,
174 llvm::SmallVectorImpl<SegmentEntry> &Seg,
175 llvm::MapVector<uint64_t, MemInfoBlock> &Prof,
176 CallStackMap &SM, bool KeepName = false)
177 : SegmentInfo(Seg.begin(), Seg.end()), CallstackProfileData(Prof),
178 StackMap(SM), KeepSymbolName(KeepName) {
179 // We don't call initialize here since there is no raw profile to read. The
180 // test should pass in the raw profile as structured data.
181
182 // If there is an error here then the mock symbolizer has not been
183 // initialized properly.
184 if (Error E = symbolizeAndFilterStackFrames(Symbolizer: std::move(Sym)))
185 report_fatal_error(Err: std::move(E));
186 if (Error E = mapRawProfileToRecords())
187 report_fatal_error(Err: std::move(E));
188 }
189
190private:
191 RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName)
192 : Binary(std::move(Bin)), KeepSymbolName(KeepName) {}
193 // Initializes the RawMemProfReader with the contents in `DataBuffer`.
194 Error initialize(std::unique_ptr<MemoryBuffer> DataBuffer);
195 // Read and parse the contents of the `DataBuffer` as a binary format profile.
196 Error readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer);
197 // Initialize the segment mapping information for symbolization.
198 Error setupForSymbolization();
199 // Symbolize and cache all the virtual addresses we encounter in the
200 // callstacks from the raw profile. Also prune callstack frames which we can't
201 // symbolize or those that belong to the runtime. For profile entries where
202 // the entire callstack is pruned, we drop the entry from the profile.
203 Error symbolizeAndFilterStackFrames(
204 std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer);
205 // Construct memprof records for each function and store it in the
206 // `FunctionProfileData` map. A function may have allocation profile data or
207 // callsite data or both.
208 Error mapRawProfileToRecords();
209
210 object::SectionedAddress getModuleOffset(uint64_t VirtualAddress);
211
212 // The profiled binary.
213 object::OwningBinary<object::Binary> Binary;
214 // The preferred load address of the executable segment.
215 uint64_t PreferredTextSegmentAddress = 0;
216 // The base address of the text segment in the process during profiling.
217 uint64_t ProfiledTextSegmentStart = 0;
218 // The limit address of the text segment in the process during profiling.
219 uint64_t ProfiledTextSegmentEnd = 0;
220
221 // The memory mapped segment information for all executable segments in the
222 // profiled binary (filtered from the raw profile using the build id).
223 llvm::SmallVector<SegmentEntry, 2> SegmentInfo;
224
225 // A map from callstack id (same as key in CallStackMap below) to the heap
226 // information recorded for that allocation context.
227 llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData;
228 CallStackMap StackMap;
229
230 // Cached symbolization from PC to Frame.
231 llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame;
232
233 // Whether to keep the symbol name for each frame after hashing.
234 bool KeepSymbolName = false;
235 // A mapping of the hash to symbol name, only used if KeepSymbolName is true.
236 llvm::DenseMap<uint64_t, std::string> GuidToSymbolName;
237};
238} // namespace memprof
239} // namespace llvm
240
241#endif // LLVM_PROFILEDATA_MEMPROFREADER_H_
242

source code of llvm/include/llvm/ProfileData/MemProfReader.h