1 | //===- bolt/Profile/BoltAddressTranslation.h --------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef BOLT_PROFILE_BOLTADDRESSTRANSLATION_H |
10 | #define BOLT_PROFILE_BOLTADDRESSTRANSLATION_H |
11 | |
12 | #include "llvm/ADT/SmallVector.h" |
13 | #include "llvm/ADT/StringRef.h" |
14 | #include "llvm/Support/DataExtractor.h" |
15 | #include <cstdint> |
16 | #include <map> |
17 | #include <optional> |
18 | #include <system_error> |
19 | #include <unordered_map> |
20 | |
21 | namespace llvm { |
22 | class MCSymbol; |
23 | class raw_ostream; |
24 | |
25 | namespace object { |
26 | class ELFObjectFileBase; |
27 | } // namespace object |
28 | |
29 | namespace bolt { |
30 | class BinaryBasicBlock; |
31 | class BinaryContext; |
32 | class BinaryFunction; |
33 | |
34 | /// The map of output addresses to input ones to be used when translating |
35 | /// samples collected in a binary that was already processed by BOLT. We do not |
36 | /// support reoptimizing a binary already processed by BOLT, but we do support |
37 | /// collecting samples in a binary processed by BOLT. We then translate samples |
38 | /// back to addresses from the input (original) binary, one that can be |
39 | /// optimized. The goal is to avoid special deployments of non-bolted binaries |
40 | /// just for the purposes of data collection. |
41 | /// |
42 | /// The in-memory representation of the map is as follows. Each function has its |
43 | /// own map. A function is identified by its output address. This is the key to |
44 | /// retrieve a translation map. The translation map is a collection of ordered |
45 | /// keys identifying the start of a region (relative to the function start) in |
46 | /// the output address space (addresses in the binary processed by BOLT). |
47 | /// |
48 | /// A translation then happens when perf2bolt needs to convert sample addresses |
49 | /// in the output address space back to input addresses, valid to run BOLT in |
50 | /// the original input binary. To convert, perf2bolt first needs to fetch the |
51 | /// translation map for a sample recorded in a given function. It then finds |
52 | /// the largest key that is still smaller or equal than the recorded address. |
53 | /// It then converts this address to use the value of this key. |
54 | /// |
55 | /// Example translation Map for function foo |
56 | /// KEY VALUE BB? |
57 | /// Output offset1 (first BB) Original input offset1 Y |
58 | /// ... |
59 | /// Output offsetN (last branch) Original input offsetN N |
60 | /// |
61 | /// The information on whether a given entry is a BB start or an instruction |
62 | /// that changes control flow is encoded in the last (highest) bit of VALUE. |
63 | /// |
64 | /// Notes: |
65 | /// Instructions that will never appear in LBR because they do not cause control |
66 | /// flow change are omitted from this map. Basic block locations are recorded |
67 | /// because they can be a target of a jump (To address in the LBR) and also to |
68 | /// recreate the BB layout of this function. We use the BB layout map to |
69 | /// recreate fall-through jumps in the profile, given an LBR trace. |
70 | class BoltAddressTranslation { |
71 | public: |
72 | // In-memory representation of the address translation table |
73 | using MapTy = std::map<uint32_t, uint32_t>; |
74 | |
75 | // List of taken fall-throughs |
76 | using FallthroughListTy = SmallVector<std::pair<uint64_t, uint64_t>, 16>; |
77 | |
78 | /// Name of the ELF section where the table will be serialized to in the |
79 | /// output binary |
80 | static const char *SECTION_NAME; |
81 | |
82 | BoltAddressTranslation() {} |
83 | |
84 | /// Write the serialized address translation tables for each reordered |
85 | /// function |
86 | void write(const BinaryContext &BC, raw_ostream &OS); |
87 | |
88 | /// Read the serialized address translation tables and load them internally |
89 | /// in memory. Return a parse error if failed. |
90 | std::error_code parse(raw_ostream &OS, StringRef Buf); |
91 | |
92 | /// Dump the parsed address translation tables |
93 | void dump(raw_ostream &OS); |
94 | |
95 | /// If the maps are loaded in memory, perform the lookup to translate LBR |
96 | /// addresses in function located at \p FuncAddress. |
97 | uint64_t translate(uint64_t FuncAddress, uint64_t Offset, |
98 | bool IsBranchSrc) const; |
99 | |
100 | /// Use the map keys containing basic block addresses to infer fall-throughs |
101 | /// taken in the path started at FirstLBR.To and ending at SecondLBR.From. |
102 | /// Return std::nullopt if trace is invalid or the list of fall-throughs |
103 | /// otherwise. |
104 | std::optional<FallthroughListTy> getFallthroughsInTrace(uint64_t FuncAddress, |
105 | uint64_t From, |
106 | uint64_t To) const; |
107 | |
108 | /// If available, fetch the address of the hot part linked to the cold part |
109 | /// at \p Address. Return 0 otherwise. |
110 | uint64_t fetchParentAddress(uint64_t Address) const; |
111 | |
112 | /// True if the input binary has a translation table we can use to convert |
113 | /// addresses when aggregating profile |
114 | bool enabledFor(llvm::object::ELFObjectFileBase *InputFile) const; |
115 | |
116 | /// Save function and basic block hashes used for metadata dump. |
117 | void saveMetadata(BinaryContext &BC); |
118 | |
119 | /// True if a given \p Address is a function with translation table entry. |
120 | bool isBATFunction(uint64_t Address) const { return Maps.count(x: Address); } |
121 | |
122 | /// For a given \p Symbol in the output binary and known \p InputOffset |
123 | /// return a corresponding pair of parent BinaryFunction and secondary entry |
124 | /// point in it. |
125 | std::pair<const BinaryFunction *, unsigned> |
126 | translateSymbol(const BinaryContext &BC, const MCSymbol &Symbol, |
127 | uint32_t InputOffset) const; |
128 | |
129 | private: |
130 | /// Helper to update \p Map by inserting one or more BAT entries reflecting |
131 | /// \p BB for function located at \p FuncAddress. At least one entry will be |
132 | /// emitted for the start of the BB. More entries may be emitted to cover |
133 | /// the location of calls or any instruction that may change control flow. |
134 | void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB, |
135 | uint64_t FuncInputAddress, uint64_t FuncOutputAddress); |
136 | |
137 | /// Write the serialized address translation table for a function. |
138 | template <bool Cold> |
139 | void writeMaps(std::map<uint64_t, MapTy> &Maps, uint64_t &PrevAddress, |
140 | raw_ostream &OS); |
141 | |
142 | /// Read the serialized address translation table for a function. |
143 | /// Return a parse error if failed. |
144 | template <bool Cold> |
145 | void (std::vector<uint64_t> &HotFuncs, uint64_t &PrevAddress, |
146 | DataExtractor &DE, uint64_t &Offset, Error &Err); |
147 | |
148 | /// Returns the bitmask with set bits corresponding to indices of BRANCHENTRY |
149 | /// entries in function address translation map. |
150 | APInt calculateBranchEntriesBitMask(MapTy &Map, size_t EqualElems); |
151 | |
152 | /// Calculate the number of equal offsets (output = input - skew) in the |
153 | /// beginning of the function. |
154 | size_t getNumEqualOffsets(const MapTy &Map, uint32_t Skew) const; |
155 | |
156 | std::map<uint64_t, MapTy> Maps; |
157 | |
158 | /// Map a function to its basic blocks count |
159 | std::unordered_map<uint64_t, size_t> NumBasicBlocksMap; |
160 | |
161 | /// Map a function to its secondary entry points vector |
162 | std::unordered_map<uint64_t, std::vector<uint32_t>> SecondaryEntryPointsMap; |
163 | |
164 | /// Return a secondary entry point ID for a function located at \p Address and |
165 | /// \p Offset within that function. |
166 | unsigned getSecondaryEntryPointId(uint64_t Address, uint32_t Offset) const; |
167 | |
168 | /// Links outlined cold bocks to their original function |
169 | std::map<uint64_t, uint64_t> ColdPartSource; |
170 | |
171 | /// Links output address of a main fragment back to input address. |
172 | std::unordered_map<uint64_t, uint64_t> ReverseMap; |
173 | |
174 | /// Identifies the address of a control-flow changing instructions in a |
175 | /// translation map entry |
176 | const static uint32_t BRANCHENTRY = 0x1; |
177 | |
178 | public: |
179 | /// Map basic block input offset to a basic block index and hash pair. |
180 | class BBHashMapTy { |
181 | class EntryTy { |
182 | unsigned Index; |
183 | size_t Hash; |
184 | |
185 | public: |
186 | unsigned getBBIndex() const { return Index; } |
187 | size_t getBBHash() const { return Hash; } |
188 | EntryTy(unsigned Index, size_t Hash) : Index(Index), Hash(Hash) {} |
189 | }; |
190 | |
191 | std::map<uint32_t, EntryTy> Map; |
192 | const EntryTy &getEntry(uint32_t BBInputOffset) const { |
193 | auto It = Map.find(x: BBInputOffset); |
194 | assert(It != Map.end()); |
195 | return It->second; |
196 | } |
197 | |
198 | public: |
199 | bool isInputBlock(uint32_t InputOffset) const { |
200 | return Map.count(x: InputOffset); |
201 | } |
202 | |
203 | unsigned getBBIndex(uint32_t BBInputOffset) const { |
204 | return getEntry(BBInputOffset).getBBIndex(); |
205 | } |
206 | |
207 | size_t getBBHash(uint32_t BBInputOffset) const { |
208 | return getEntry(BBInputOffset).getBBHash(); |
209 | } |
210 | |
211 | void addEntry(uint32_t BBInputOffset, unsigned BBIndex, size_t BBHash) { |
212 | Map.emplace(args&: BBInputOffset, args: EntryTy(BBIndex, BBHash)); |
213 | } |
214 | |
215 | size_t getNumBasicBlocks() const { return Map.size(); } |
216 | |
217 | auto begin() const { return Map.begin(); } |
218 | auto end() const { return Map.end(); } |
219 | auto upper_bound(uint32_t Offset) const { return Map.upper_bound(x: Offset); } |
220 | }; |
221 | |
222 | /// Map function output address to its hash and basic blocks hash map. |
223 | class FuncHashesTy { |
224 | class EntryTy { |
225 | size_t Hash; |
226 | BBHashMapTy BBHashMap; |
227 | |
228 | public: |
229 | size_t getBFHash() const { return Hash; } |
230 | const BBHashMapTy &getBBHashMap() const { return BBHashMap; } |
231 | EntryTy(size_t Hash) : Hash(Hash) {} |
232 | }; |
233 | |
234 | std::unordered_map<uint64_t, EntryTy> Map; |
235 | const EntryTy &getEntry(uint64_t FuncOutputAddress) const { |
236 | auto It = Map.find(x: FuncOutputAddress); |
237 | assert(It != Map.end()); |
238 | return It->second; |
239 | } |
240 | |
241 | public: |
242 | size_t getBFHash(uint64_t FuncOutputAddress) const { |
243 | return getEntry(FuncOutputAddress).getBFHash(); |
244 | } |
245 | |
246 | const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const { |
247 | return getEntry(FuncOutputAddress).getBBHashMap(); |
248 | } |
249 | |
250 | void addEntry(uint64_t FuncOutputAddress, size_t BFHash) { |
251 | Map.emplace(args&: FuncOutputAddress, args: EntryTy(BFHash)); |
252 | } |
253 | |
254 | size_t getNumFunctions() const { return Map.size(); }; |
255 | |
256 | size_t getNumBasicBlocks() const { |
257 | size_t NumBasicBlocks{0}; |
258 | for (auto &I : Map) |
259 | NumBasicBlocks += I.second.getBBHashMap().getNumBasicBlocks(); |
260 | return NumBasicBlocks; |
261 | } |
262 | }; |
263 | |
264 | /// Returns BF hash by function output address (after BOLT). |
265 | size_t getBFHash(uint64_t FuncOutputAddress) const { |
266 | return FuncHashes.getBFHash(FuncOutputAddress); |
267 | } |
268 | |
269 | /// Returns BBHashMap by function output address (after BOLT). |
270 | const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const { |
271 | return FuncHashes.getBBHashMap(FuncOutputAddress); |
272 | } |
273 | |
274 | BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) { |
275 | return const_cast<BBHashMapTy &>( |
276 | std::as_const(t&: *this).getBBHashMap(FuncOutputAddress)); |
277 | } |
278 | |
279 | /// Returns the number of basic blocks in a function. |
280 | size_t getNumBasicBlocks(uint64_t OutputAddress) const { |
281 | return NumBasicBlocksMap.at(k: OutputAddress); |
282 | } |
283 | |
284 | private: |
285 | FuncHashesTy FuncHashes; |
286 | }; |
287 | } // namespace bolt |
288 | |
289 | } // namespace llvm |
290 | |
291 | #endif |
292 | |