1//===- MCPseudoProbe.h - Pseudo probe encoding support ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the declaration of the MCPseudoProbe to support the pseudo
10// probe encoding for AutoFDO. Pseudo probes together with their inline context
11// are encoded in a DFS recursive way in the .pseudoprobe sections. For each
12// .pseudoprobe section, the encoded binary data consist of a single or mutiple
13// function records each for one outlined function. A function record has the
14// following format :
15//
16// FUNCTION BODY (one for each outlined function present in the text section)
17// GUID (uint64)
18// GUID of the function's source name which may be different from the
19// actual binary linkage name. This GUID will be used to decode and
20// generate a profile against the source function name.
21// NPROBES (ULEB128)
22// Number of probes originating from this function.
23// NUM_INLINED_FUNCTIONS (ULEB128)
24// Number of callees inlined into this function, aka number of
25// first-level inlinees
26// PROBE RECORDS
27// A list of NPROBES entries. Each entry contains:
28// INDEX (ULEB128)
29// TYPE (uint4)
30// 0 - block probe, 1 - indirect call, 2 - direct call
31// ATTRIBUTE (uint3)
32// 1 - reserved
33// 2 - Sentinel
34// 4 - HasDiscriminator
35// ADDRESS_TYPE (uint1)
36// 0 - code address for regular probes (for downwards compatibility)
37// - GUID of linkage name for sentinel probes
38// 1 - address delta
39// CODE_ADDRESS (uint64 or ULEB128)
40// code address or address delta, depending on ADDRESS_TYPE
41// DISCRIMINATOR (ULEB128) if HasDiscriminator
42// INLINED FUNCTION RECORDS
43// A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
44// callees. Each record contains:
45// INLINE SITE
46// ID of the callsite probe (ULEB128)
47// FUNCTION BODY
48// A FUNCTION BODY entry describing the inlined function.
49//
50// TODO: retire the ADDRESS_TYPE encoding for code addresses once compatibility
51// is no longer an issue.
52//===----------------------------------------------------------------------===//
53
54#ifndef LLVM_MC_MCPSEUDOPROBE_H
55#define LLVM_MC_MCPSEUDOPROBE_H
56
57#include "llvm/ADT/DenseMap.h"
58#include "llvm/ADT/DenseSet.h"
59#include "llvm/ADT/SmallVector.h"
60#include "llvm/ADT/StringRef.h"
61#include "llvm/IR/PseudoProbe.h"
62#include "llvm/Support/ErrorOr.h"
63#include <list>
64#include <memory>
65#include <string>
66#include <tuple>
67#include <type_traits>
68#include <unordered_map>
69#include <unordered_set>
70#include <vector>
71
72namespace llvm {
73
74class MCSymbol;
75class MCObjectStreamer;
76class raw_ostream;
77
78enum class MCPseudoProbeFlag {
79 // If set, indicates that the probe is encoded as an address delta
80 // instead of a real code address.
81 AddressDelta = 0x1,
82};
83
84// Function descriptor decoded from .pseudo_probe_desc section
85struct MCPseudoProbeFuncDesc {
86 uint64_t FuncGUID = 0;
87 uint64_t FuncHash = 0;
88 std::string FuncName;
89
90 MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name)
91 : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){};
92
93 void print(raw_ostream &OS);
94};
95
96class MCDecodedPseudoProbe;
97
98// An inline frame has the form <CalleeGuid, ProbeID>
99using InlineSite = std::tuple<uint64_t, uint32_t>;
100using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
101// GUID to PseudoProbeFuncDesc map
102using GUIDProbeFunctionMap =
103 std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
104// Address to pseudo probes map.
105using AddressProbesMap =
106 std::unordered_map<uint64_t, std::list<MCDecodedPseudoProbe>>;
107
108class MCDecodedPseudoProbeInlineTree;
109
110class MCPseudoProbeBase {
111protected:
112 uint64_t Guid;
113 uint64_t Index;
114 uint32_t Discriminator;
115 uint8_t Attributes;
116 uint8_t Type;
117 // The value should be equal to PseudoProbeReservedId::Last + 1 which is
118 // defined in SampleProfileProbe.h. The header file is not included here to
119 // reduce the dependency from MC to IPO.
120 const static uint32_t PseudoProbeFirstId = 1;
121
122public:
123 MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T, uint32_t D)
124 : Guid(G), Index(I), Discriminator(D), Attributes(At), Type(T) {}
125
126 bool isEntry() const { return Index == PseudoProbeFirstId; }
127
128 uint64_t getGuid() const { return Guid; }
129
130 uint64_t getIndex() const { return Index; }
131
132 uint32_t getDiscriminator() const { return Discriminator; }
133
134 uint8_t getAttributes() const { return Attributes; }
135
136 uint8_t getType() const { return Type; }
137
138 bool isBlock() const {
139 return Type == static_cast<uint8_t>(PseudoProbeType::Block);
140 }
141
142 bool isIndirectCall() const {
143 return Type == static_cast<uint8_t>(PseudoProbeType::IndirectCall);
144 }
145
146 bool isDirectCall() const {
147 return Type == static_cast<uint8_t>(PseudoProbeType::DirectCall);
148 }
149
150 bool isCall() const { return isIndirectCall() || isDirectCall(); }
151
152 void setAttributes(uint8_t Attr) { Attributes = Attr; }
153};
154
155/// Instances of this class represent a pseudo probe instance for a pseudo probe
156/// table entry, which is created during a machine instruction is assembled and
157/// uses an address from a temporary label created at the current address in the
158/// current section.
159class MCPseudoProbe : public MCPseudoProbeBase {
160 MCSymbol *Label;
161
162public:
163 MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
164 uint64_t Attributes, uint32_t Discriminator)
165 : MCPseudoProbeBase(Guid, Index, Attributes, Type, Discriminator),
166 Label(Label) {
167 assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
168 assert(Attributes <= 0xFF &&
169 "Probe attributes too big to encode, exceeding 2^16");
170 }
171
172 MCSymbol *getLabel() const { return Label; }
173 void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
174};
175
176// Represents a callsite with caller function name and probe id
177using MCPseduoProbeFrameLocation = std::pair<StringRef, uint32_t>;
178
179class MCDecodedPseudoProbe : public MCPseudoProbeBase {
180 uint64_t Address;
181 MCDecodedPseudoProbeInlineTree *InlineTree;
182
183public:
184 MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K,
185 uint8_t At, uint32_t D,
186 MCDecodedPseudoProbeInlineTree *Tree)
187 : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K), D), Address(Ad),
188 InlineTree(Tree){};
189
190 uint64_t getAddress() const { return Address; }
191
192 void setAddress(uint64_t Addr) { Address = Addr; }
193
194 MCDecodedPseudoProbeInlineTree *getInlineTreeNode() const {
195 return InlineTree;
196 }
197
198 // Get the inlined context by traversing current inline tree backwards,
199 // each tree node has its InlineSite which is taken as the context.
200 // \p ContextStack is populated in root to leaf order
201 void
202 getInlineContext(SmallVectorImpl<MCPseduoProbeFrameLocation> &ContextStack,
203 const GUIDProbeFunctionMap &GUID2FuncMAP) const;
204
205 // Helper function to get the string from context stack
206 std::string
207 getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const;
208
209 // Print pseudo probe while disassembling
210 void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP,
211 bool ShowName) const;
212};
213
214template <typename ProbeType, typename DerivedProbeInlineTreeType>
215class MCPseudoProbeInlineTreeBase {
216 struct InlineSiteHash {
217 uint64_t operator()(const InlineSite &Site) const {
218 return std::get<0>(t: Site) ^ std::get<1>(t: Site);
219 }
220 };
221
222protected:
223 // Track children (e.g. inlinees) of current context
224 using InlinedProbeTreeMap = std::unordered_map<
225 InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>;
226 InlinedProbeTreeMap Children;
227 // Set of probes that come with the function.
228 std::vector<ProbeType> Probes;
229 MCPseudoProbeInlineTreeBase() {
230 static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase,
231 DerivedProbeInlineTreeType>::value,
232 "DerivedProbeInlineTreeType must be subclass of "
233 "MCPseudoProbeInlineTreeBase");
234 }
235
236public:
237 uint64_t Guid = 0;
238
239 // Root node has a GUID 0.
240 bool isRoot() const { return Guid == 0; }
241 InlinedProbeTreeMap &getChildren() { return Children; }
242 const InlinedProbeTreeMap &getChildren() const { return Children; }
243 std::vector<ProbeType> &getProbes() { return Probes; }
244 void addProbes(ProbeType Probe) { Probes.push_back(Probe); }
245 // Caller node of the inline site
246 MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent =
247 nullptr;
248 DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) {
249 auto Ret = Children.emplace(
250 Site, std::make_unique<DerivedProbeInlineTreeType>(Site));
251 Ret.first->second->Parent = this;
252 return Ret.first->second.get();
253 };
254};
255
256// A Tri-tree based data structure to group probes by inline stack.
257// A tree is allocated for a standalone .text section. A fake
258// instance is created as the root of a tree.
259// A real instance of this class is created for each function, either a
260// not inlined function that has code in .text section or an inlined function.
261class MCPseudoProbeInlineTree
262 : public MCPseudoProbeInlineTreeBase<MCPseudoProbe,
263 MCPseudoProbeInlineTree> {
264public:
265 MCPseudoProbeInlineTree() = default;
266 MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; }
267 MCPseudoProbeInlineTree(const InlineSite &Site) {
268 this->Guid = std::get<0>(t: Site);
269 }
270
271 // MCPseudoProbeInlineTree method based on Inlinees
272 void addPseudoProbe(const MCPseudoProbe &Probe,
273 const MCPseudoProbeInlineStack &InlineStack);
274 void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe);
275};
276
277// inline tree node for the decoded pseudo probe
278class MCDecodedPseudoProbeInlineTree
279 : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *,
280 MCDecodedPseudoProbeInlineTree> {
281public:
282 InlineSite ISite;
283 // Used for decoding
284 uint32_t ChildrenToProcess = 0;
285
286 MCDecodedPseudoProbeInlineTree() = default;
287 MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
288
289 // Return false if it's a dummy inline site
290 bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); }
291};
292
293/// Instances of this class represent the pseudo probes inserted into a compile
294/// unit.
295class MCPseudoProbeSections {
296public:
297 void addPseudoProbe(MCSymbol *FuncSym, const MCPseudoProbe &Probe,
298 const MCPseudoProbeInlineStack &InlineStack) {
299 MCProbeDivisions[FuncSym].addPseudoProbe(Probe, InlineStack);
300 }
301
302 // The addresses of MCPseudoProbeInlineTree are used by the tree structure and
303 // need to be stable.
304 using MCProbeDivisionMap = std::unordered_map<MCSymbol *, MCPseudoProbeInlineTree>;
305
306private:
307 // A collection of MCPseudoProbe for each function. The MCPseudoProbes are
308 // grouped by GUIDs due to inlining that can bring probes from different
309 // functions into one function.
310 MCProbeDivisionMap MCProbeDivisions;
311
312public:
313 const MCProbeDivisionMap &getMCProbes() const { return MCProbeDivisions; }
314
315 bool empty() const { return MCProbeDivisions.empty(); }
316
317 void emit(MCObjectStreamer *MCOS);
318};
319
320class MCPseudoProbeTable {
321 // A collection of MCPseudoProbe in the current module grouped by
322 // functions. MCPseudoProbes will be encoded into a corresponding
323 // .pseudoprobe section. With functions emitted as separate comdats,
324 // a text section really only contains the code of a function solely, and the
325 // probes associated with the text section will be emitted into a standalone
326 // .pseudoprobe section that shares the same comdat group with the function.
327 MCPseudoProbeSections MCProbeSections;
328
329public:
330 static void emit(MCObjectStreamer *MCOS);
331
332 MCPseudoProbeSections &getProbeSections() { return MCProbeSections; }
333
334#ifndef NDEBUG
335 static int DdgPrintIndent;
336#endif
337};
338
339class MCPseudoProbeDecoder {
340 // GUID to PseudoProbeFuncDesc map.
341 GUIDProbeFunctionMap GUID2FuncDescMap;
342
343 // Address to probes map.
344 AddressProbesMap Address2ProbesMap;
345
346 // The dummy root of the inline trie, all the outlined function will directly
347 // be the children of the dummy root, all the inlined function will be the
348 // children of its inlineer. So the relation would be like:
349 // DummyRoot --> OutlinedFunc --> InlinedFunc1 --> InlinedFunc2
350 MCDecodedPseudoProbeInlineTree DummyInlineRoot;
351
352 /// Points to the current location in the buffer.
353 const uint8_t *Data = nullptr;
354
355 /// Points to the end of the buffer.
356 const uint8_t *End = nullptr;
357
358 /// Whether encoding is based on a starting probe with absolute code address.
359 bool EncodingIsAddrBased = false;
360
361 // Decoding helper function
362 template <typename T> ErrorOr<T> readUnencodedNumber();
363 template <typename T> ErrorOr<T> readUnsignedNumber();
364 template <typename T> ErrorOr<T> readSignedNumber();
365 ErrorOr<StringRef> readString(uint32_t Size);
366
367public:
368 using Uint64Set = DenseSet<uint64_t>;
369 using Uint64Map = DenseMap<uint64_t, uint64_t>;
370
371 // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map.
372 bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size);
373
374 // Decode pseudo_probe section to build address to probes map for specifed
375 // functions only.
376 bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size,
377 const Uint64Set &GuildFilter,
378 const Uint64Map &FuncStartAddrs);
379
380 bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
381 uint64_t &LastAddr, const Uint64Set &GuildFilter,
382 const Uint64Map &FuncStartAddrs);
383
384 // Print pseudo_probe_desc section info
385 void printGUID2FuncDescMap(raw_ostream &OS);
386
387 // Print pseudo_probe section info, used along with show-disassembly
388 void printProbeForAddress(raw_ostream &OS, uint64_t Address);
389
390 // do printProbeForAddress for all addresses
391 void printProbesForAllAddresses(raw_ostream &OS);
392
393 // Look up the probe of a call for the input address
394 const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const;
395
396 const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const;
397
398 // Helper function to populate one probe's inline stack into
399 // \p InlineContextStack.
400 // Current leaf location info will be added if IncludeLeaf is true
401 // Example:
402 // Current probe(bar:3) inlined at foo:2 then inlined at main:1
403 // IncludeLeaf = true, Output: [main:1, foo:2, bar:3]
404 // IncludeLeaf = false, Output: [main:1, foo:2]
405 void getInlineContextForProbe(
406 const MCDecodedPseudoProbe *Probe,
407 SmallVectorImpl<MCPseduoProbeFrameLocation> &InlineContextStack,
408 bool IncludeLeaf) const;
409
410 const AddressProbesMap &getAddress2ProbesMap() const {
411 return Address2ProbesMap;
412 }
413
414 AddressProbesMap &getAddress2ProbesMap() { return Address2ProbesMap; }
415
416 const GUIDProbeFunctionMap &getGUID2FuncDescMap() const {
417 return GUID2FuncDescMap;
418 }
419
420 const MCPseudoProbeFuncDesc *
421 getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) const;
422
423 const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
424 return DummyInlineRoot;
425 }
426};
427
428} // end namespace llvm
429
430#endif // LLVM_MC_MCPSEUDOPROBE_H
431

source code of llvm/include/llvm/MC/MCPseudoProbe.h