1//===- bolt/Profile/BoltAddressTranslation.h --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef BOLT_PROFILE_BOLTADDRESSTRANSLATION_H
10#define BOLT_PROFILE_BOLTADDRESSTRANSLATION_H
11
12#include "llvm/ADT/SmallVector.h"
13#include "llvm/ADT/StringRef.h"
14#include "llvm/Support/DataExtractor.h"
15#include <cstdint>
16#include <map>
17#include <optional>
18#include <system_error>
19#include <unordered_map>
20
21namespace llvm {
22class MCSymbol;
23class raw_ostream;
24
25namespace object {
26class ELFObjectFileBase;
27} // namespace object
28
29namespace bolt {
30class BinaryBasicBlock;
31class BinaryContext;
32class BinaryFunction;
33
34/// The map of output addresses to input ones to be used when translating
35/// samples collected in a binary that was already processed by BOLT. We do not
36/// support reoptimizing a binary already processed by BOLT, but we do support
37/// collecting samples in a binary processed by BOLT. We then translate samples
38/// back to addresses from the input (original) binary, one that can be
39/// optimized. The goal is to avoid special deployments of non-bolted binaries
40/// just for the purposes of data collection.
41///
42/// The in-memory representation of the map is as follows. Each function has its
43/// own map. A function is identified by its output address. This is the key to
44/// retrieve a translation map. The translation map is a collection of ordered
45/// keys identifying the start of a region (relative to the function start) in
46/// the output address space (addresses in the binary processed by BOLT).
47///
48/// A translation then happens when perf2bolt needs to convert sample addresses
49/// in the output address space back to input addresses, valid to run BOLT in
50/// the original input binary. To convert, perf2bolt first needs to fetch the
51/// translation map for a sample recorded in a given function. It then finds
52/// the largest key that is still smaller or equal than the recorded address.
53/// It then converts this address to use the value of this key.
54///
55/// Example translation Map for function foo
56/// KEY VALUE BB?
57/// Output offset1 (first BB) Original input offset1 Y
58/// ...
59/// Output offsetN (last branch) Original input offsetN N
60///
61/// The information on whether a given entry is a BB start or an instruction
62/// that changes control flow is encoded in the last (highest) bit of VALUE.
63///
64/// Notes:
65/// Instructions that will never appear in LBR because they do not cause control
66/// flow change are omitted from this map. Basic block locations are recorded
67/// because they can be a target of a jump (To address in the LBR) and also to
68/// recreate the BB layout of this function. We use the BB layout map to
69/// recreate fall-through jumps in the profile, given an LBR trace.
70class BoltAddressTranslation {
71public:
72 // In-memory representation of the address translation table
73 using MapTy = std::map<uint32_t, uint32_t>;
74
75 // List of taken fall-throughs
76 using FallthroughListTy = SmallVector<std::pair<uint64_t, uint64_t>, 16>;
77
78 /// Name of the ELF section where the table will be serialized to in the
79 /// output binary
80 static const char *SECTION_NAME;
81
82 BoltAddressTranslation() {}
83
84 /// Write the serialized address translation tables for each reordered
85 /// function
86 void write(const BinaryContext &BC, raw_ostream &OS);
87
88 /// Read the serialized address translation tables and load them internally
89 /// in memory. Return a parse error if failed.
90 std::error_code parse(raw_ostream &OS, StringRef Buf);
91
92 /// Dump the parsed address translation tables
93 void dump(raw_ostream &OS);
94
95 /// If the maps are loaded in memory, perform the lookup to translate LBR
96 /// addresses in function located at \p FuncAddress.
97 uint64_t translate(uint64_t FuncAddress, uint64_t Offset,
98 bool IsBranchSrc) const;
99
100 /// Use the map keys containing basic block addresses to infer fall-throughs
101 /// taken in the path started at FirstLBR.To and ending at SecondLBR.From.
102 /// Return std::nullopt if trace is invalid or the list of fall-throughs
103 /// otherwise.
104 std::optional<FallthroughListTy> getFallthroughsInTrace(uint64_t FuncAddress,
105 uint64_t From,
106 uint64_t To) const;
107
108 /// If available, fetch the address of the hot part linked to the cold part
109 /// at \p Address. Return 0 otherwise.
110 uint64_t fetchParentAddress(uint64_t Address) const;
111
112 /// True if the input binary has a translation table we can use to convert
113 /// addresses when aggregating profile
114 bool enabledFor(llvm::object::ELFObjectFileBase *InputFile) const;
115
116 /// Save function and basic block hashes used for metadata dump.
117 void saveMetadata(BinaryContext &BC);
118
119 /// True if a given \p Address is a function with translation table entry.
120 bool isBATFunction(uint64_t Address) const { return Maps.count(x: Address); }
121
122 /// For a given \p Symbol in the output binary and known \p InputOffset
123 /// return a corresponding pair of parent BinaryFunction and secondary entry
124 /// point in it.
125 std::pair<const BinaryFunction *, unsigned>
126 translateSymbol(const BinaryContext &BC, const MCSymbol &Symbol,
127 uint32_t InputOffset) const;
128
129private:
130 /// Helper to update \p Map by inserting one or more BAT entries reflecting
131 /// \p BB for function located at \p FuncAddress. At least one entry will be
132 /// emitted for the start of the BB. More entries may be emitted to cover
133 /// the location of calls or any instruction that may change control flow.
134 void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB,
135 uint64_t FuncInputAddress, uint64_t FuncOutputAddress);
136
137 /// Write the serialized address translation table for a function.
138 template <bool Cold>
139 void writeMaps(std::map<uint64_t, MapTy> &Maps, uint64_t &PrevAddress,
140 raw_ostream &OS);
141
142 /// Read the serialized address translation table for a function.
143 /// Return a parse error if failed.
144 template <bool Cold>
145 void parseMaps(std::vector<uint64_t> &HotFuncs, uint64_t &PrevAddress,
146 DataExtractor &DE, uint64_t &Offset, Error &Err);
147
148 /// Returns the bitmask with set bits corresponding to indices of BRANCHENTRY
149 /// entries in function address translation map.
150 APInt calculateBranchEntriesBitMask(MapTy &Map, size_t EqualElems);
151
152 /// Calculate the number of equal offsets (output = input - skew) in the
153 /// beginning of the function.
154 size_t getNumEqualOffsets(const MapTy &Map, uint32_t Skew) const;
155
156 std::map<uint64_t, MapTy> Maps;
157
158 /// Map a function to its basic blocks count
159 std::unordered_map<uint64_t, size_t> NumBasicBlocksMap;
160
161 /// Map a function to its secondary entry points vector
162 std::unordered_map<uint64_t, std::vector<uint32_t>> SecondaryEntryPointsMap;
163
164 /// Return a secondary entry point ID for a function located at \p Address and
165 /// \p Offset within that function.
166 unsigned getSecondaryEntryPointId(uint64_t Address, uint32_t Offset) const;
167
168 /// Links outlined cold bocks to their original function
169 std::map<uint64_t, uint64_t> ColdPartSource;
170
171 /// Links output address of a main fragment back to input address.
172 std::unordered_map<uint64_t, uint64_t> ReverseMap;
173
174 /// Identifies the address of a control-flow changing instructions in a
175 /// translation map entry
176 const static uint32_t BRANCHENTRY = 0x1;
177
178public:
179 /// Map basic block input offset to a basic block index and hash pair.
180 class BBHashMapTy {
181 class EntryTy {
182 unsigned Index;
183 size_t Hash;
184
185 public:
186 unsigned getBBIndex() const { return Index; }
187 size_t getBBHash() const { return Hash; }
188 EntryTy(unsigned Index, size_t Hash) : Index(Index), Hash(Hash) {}
189 };
190
191 std::map<uint32_t, EntryTy> Map;
192 const EntryTy &getEntry(uint32_t BBInputOffset) const {
193 auto It = Map.find(x: BBInputOffset);
194 assert(It != Map.end());
195 return It->second;
196 }
197
198 public:
199 bool isInputBlock(uint32_t InputOffset) const {
200 return Map.count(x: InputOffset);
201 }
202
203 unsigned getBBIndex(uint32_t BBInputOffset) const {
204 return getEntry(BBInputOffset).getBBIndex();
205 }
206
207 size_t getBBHash(uint32_t BBInputOffset) const {
208 return getEntry(BBInputOffset).getBBHash();
209 }
210
211 void addEntry(uint32_t BBInputOffset, unsigned BBIndex, size_t BBHash) {
212 Map.emplace(args&: BBInputOffset, args: EntryTy(BBIndex, BBHash));
213 }
214
215 size_t getNumBasicBlocks() const { return Map.size(); }
216
217 auto begin() const { return Map.begin(); }
218 auto end() const { return Map.end(); }
219 auto upper_bound(uint32_t Offset) const { return Map.upper_bound(x: Offset); }
220 };
221
222 /// Map function output address to its hash and basic blocks hash map.
223 class FuncHashesTy {
224 class EntryTy {
225 size_t Hash;
226 BBHashMapTy BBHashMap;
227
228 public:
229 size_t getBFHash() const { return Hash; }
230 const BBHashMapTy &getBBHashMap() const { return BBHashMap; }
231 EntryTy(size_t Hash) : Hash(Hash) {}
232 };
233
234 std::unordered_map<uint64_t, EntryTy> Map;
235 const EntryTy &getEntry(uint64_t FuncOutputAddress) const {
236 auto It = Map.find(x: FuncOutputAddress);
237 assert(It != Map.end());
238 return It->second;
239 }
240
241 public:
242 size_t getBFHash(uint64_t FuncOutputAddress) const {
243 return getEntry(FuncOutputAddress).getBFHash();
244 }
245
246 const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
247 return getEntry(FuncOutputAddress).getBBHashMap();
248 }
249
250 void addEntry(uint64_t FuncOutputAddress, size_t BFHash) {
251 Map.emplace(args&: FuncOutputAddress, args: EntryTy(BFHash));
252 }
253
254 size_t getNumFunctions() const { return Map.size(); };
255
256 size_t getNumBasicBlocks() const {
257 size_t NumBasicBlocks{0};
258 for (auto &I : Map)
259 NumBasicBlocks += I.second.getBBHashMap().getNumBasicBlocks();
260 return NumBasicBlocks;
261 }
262 };
263
264 /// Returns BF hash by function output address (after BOLT).
265 size_t getBFHash(uint64_t FuncOutputAddress) const {
266 return FuncHashes.getBFHash(FuncOutputAddress);
267 }
268
269 /// Returns BBHashMap by function output address (after BOLT).
270 const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
271 return FuncHashes.getBBHashMap(FuncOutputAddress);
272 }
273
274 BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) {
275 return const_cast<BBHashMapTy &>(
276 std::as_const(t&: *this).getBBHashMap(FuncOutputAddress));
277 }
278
279 /// Returns the number of basic blocks in a function.
280 size_t getNumBasicBlocks(uint64_t OutputAddress) const {
281 return NumBasicBlocksMap.at(k: OutputAddress);
282 }
283
284private:
285 FuncHashesTy FuncHashes;
286};
287} // namespace bolt
288
289} // namespace llvm
290
291#endif
292

source code of bolt/include/bolt/Profile/BoltAddressTranslation.h