1 | //===- GsymReader.h ---------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H |
10 | #define LLVM_DEBUGINFO_GSYM_GSYMREADER_H |
11 | |
12 | #include "llvm/ADT/ArrayRef.h" |
13 | #include "llvm/DebugInfo/GSYM/FileEntry.h" |
14 | #include "llvm/DebugInfo/GSYM/FunctionInfo.h" |
15 | #include "llvm/DebugInfo/GSYM/Header.h" |
16 | #include "llvm/DebugInfo/GSYM/LineEntry.h" |
17 | #include "llvm/DebugInfo/GSYM/StringTable.h" |
18 | #include "llvm/Support/DataExtractor.h" |
19 | #include "llvm/Support/Endian.h" |
20 | #include "llvm/Support/ErrorOr.h" |
21 | #include <inttypes.h> |
22 | #include <memory> |
23 | #include <stdint.h> |
24 | #include <vector> |
25 | |
26 | namespace llvm { |
27 | class MemoryBuffer; |
28 | class raw_ostream; |
29 | |
30 | namespace gsym { |
31 | |
32 | /// GsymReader is used to read GSYM data from a file or buffer. |
33 | /// |
34 | /// This class is optimized for very quick lookups when the endianness matches |
35 | /// the host system. The Header, address table, address info offsets, and file |
36 | /// table is designed to be mmap'ed as read only into memory and used without |
37 | /// any parsing needed. If the endianness doesn't match, we swap these objects |
38 | /// and tables into GsymReader::SwappedData and then point our header and |
39 | /// ArrayRefs to this swapped internal data. |
40 | /// |
41 | /// GsymReader objects must use one of the static functions to create an |
42 | /// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...). |
43 | |
44 | class GsymReader { |
45 | GsymReader(std::unique_ptr<MemoryBuffer> Buffer); |
46 | llvm::Error parse(); |
47 | |
48 | std::unique_ptr<MemoryBuffer> MemBuffer; |
49 | StringRef GsymBytes; |
50 | llvm::endianness Endian; |
51 | const Header *Hdr = nullptr; |
52 | ArrayRef<uint8_t> AddrOffsets; |
53 | ArrayRef<uint32_t> AddrInfoOffsets; |
54 | ArrayRef<FileEntry> Files; |
55 | StringTable StrTab; |
56 | /// When the GSYM file's endianness doesn't match the host system then |
57 | /// we must decode all data structures that need to be swapped into |
58 | /// local storage and set point the ArrayRef objects above to these swapped |
59 | /// copies. |
60 | struct SwappedData { |
61 | Header Hdr; |
62 | std::vector<uint8_t> AddrOffsets; |
63 | std::vector<uint32_t> AddrInfoOffsets; |
64 | std::vector<FileEntry> Files; |
65 | }; |
66 | std::unique_ptr<SwappedData> Swap; |
67 | |
68 | public: |
69 | GsymReader(GsymReader &&RHS); |
70 | ~GsymReader(); |
71 | |
72 | /// Construct a GsymReader from a file on disk. |
73 | /// |
74 | /// \param Path The file path the GSYM file to read. |
75 | /// \returns An expected GsymReader that contains the object or an error |
76 | /// object that indicates reason for failing to read the GSYM. |
77 | static llvm::Expected<GsymReader> openFile(StringRef Path); |
78 | |
79 | /// Construct a GsymReader from a buffer. |
80 | /// |
81 | /// \param Bytes A set of bytes that will be copied and owned by the |
82 | /// returned object on success. |
83 | /// \returns An expected GsymReader that contains the object or an error |
84 | /// object that indicates reason for failing to read the GSYM. |
85 | static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes); |
86 | |
87 | /// Access the GSYM header. |
88 | /// \returns A native endian version of the GSYM header. |
89 | const Header &() const; |
90 | |
91 | /// Get the full function info for an address. |
92 | /// |
93 | /// This should be called when a client will store a copy of the complete |
94 | /// FunctionInfo for a given address. For one off lookups, use the lookup() |
95 | /// function below. |
96 | /// |
97 | /// Symbolication server processes might want to parse the entire function |
98 | /// info for a given address and cache it if the process stays around to |
99 | /// service many symbolication addresses, like for parsing profiling |
100 | /// information. |
101 | /// |
102 | /// \param Addr A virtual address from the orignal object file to lookup. |
103 | /// |
104 | /// \returns An expected FunctionInfo that contains the function info object |
105 | /// or an error object that indicates reason for failing to lookup the |
106 | /// address. |
107 | llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const; |
108 | |
109 | /// Get the full function info given an address index. |
110 | /// |
111 | /// \param AddrIdx A address index for an address in the address table. |
112 | /// |
113 | /// \returns An expected FunctionInfo that contains the function info object |
114 | /// or an error object that indicates reason for failing get the function |
115 | /// info object. |
116 | llvm::Expected<FunctionInfo> getFunctionInfoAtIndex(uint64_t AddrIdx) const; |
117 | |
118 | /// Lookup an address in the a GSYM. |
119 | /// |
120 | /// Lookup just the information needed for a specific address \a Addr. This |
121 | /// function is faster that calling getFunctionInfo() as it will only return |
122 | /// information that pertains to \a Addr and allows the parsing to skip any |
123 | /// extra information encoded for other addresses. For example the line table |
124 | /// parsing can stop when a matching LineEntry has been fouhnd, and the |
125 | /// InlineInfo can stop parsing early once a match has been found and also |
126 | /// skip information that doesn't match. This avoids memory allocations and |
127 | /// is much faster for lookups. |
128 | /// |
129 | /// \param Addr A virtual address from the orignal object file to lookup. |
130 | /// \returns An expected LookupResult that contains only the information |
131 | /// needed for the current address, or an error object that indicates reason |
132 | /// for failing to lookup the address. |
133 | llvm::Expected<LookupResult> lookup(uint64_t Addr) const; |
134 | |
135 | /// Get a string from the string table. |
136 | /// |
137 | /// \param Offset The string table offset for the string to retrieve. |
138 | /// \returns The string from the strin table. |
139 | StringRef getString(uint32_t Offset) const { return StrTab[Offset]; } |
140 | |
141 | /// Get the a file entry for the suppplied file index. |
142 | /// |
143 | /// Used to convert any file indexes in the FunctionInfo data back into |
144 | /// files. This function can be used for iteration, but is more commonly used |
145 | /// for random access when doing lookups. |
146 | /// |
147 | /// \param Index An index into the file table. |
148 | /// \returns An optional FileInfo that will be valid if the file index is |
149 | /// valid, or std::nullopt if the file index is out of bounds, |
150 | std::optional<FileEntry> getFile(uint32_t Index) const { |
151 | if (Index < Files.size()) |
152 | return Files[Index]; |
153 | return std::nullopt; |
154 | } |
155 | |
156 | /// Dump the entire Gsym data contained in this object. |
157 | /// |
158 | /// \param OS The output stream to dump to. |
159 | void dump(raw_ostream &OS); |
160 | |
161 | /// Dump a FunctionInfo object. |
162 | /// |
163 | /// This function will convert any string table indexes and file indexes |
164 | /// into human readable format. |
165 | /// |
166 | /// \param OS The output stream to dump to. |
167 | /// |
168 | /// \param FI The object to dump. |
169 | void dump(raw_ostream &OS, const FunctionInfo &FI); |
170 | |
171 | /// Dump a LineTable object. |
172 | /// |
173 | /// This function will convert any string table indexes and file indexes |
174 | /// into human readable format. |
175 | /// |
176 | /// |
177 | /// \param OS The output stream to dump to. |
178 | /// |
179 | /// \param LT The object to dump. |
180 | void dump(raw_ostream &OS, const LineTable <); |
181 | |
182 | /// Dump a InlineInfo object. |
183 | /// |
184 | /// This function will convert any string table indexes and file indexes |
185 | /// into human readable format. |
186 | /// |
187 | /// \param OS The output stream to dump to. |
188 | /// |
189 | /// \param II The object to dump. |
190 | /// |
191 | /// \param Indent The indentation as number of spaces. Used for recurive |
192 | /// dumping. |
193 | void dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent = 0); |
194 | |
195 | /// Dump a FileEntry object. |
196 | /// |
197 | /// This function will convert any string table indexes into human readable |
198 | /// format. |
199 | /// |
200 | /// \param OS The output stream to dump to. |
201 | /// |
202 | /// \param FE The object to dump. |
203 | void dump(raw_ostream &OS, std::optional<FileEntry> FE); |
204 | |
205 | /// Get the number of addresses in this Gsym file. |
206 | uint32_t getNumAddresses() const { |
207 | return Hdr->NumAddresses; |
208 | } |
209 | |
210 | /// Gets an address from the address table. |
211 | /// |
212 | /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress. |
213 | /// |
214 | /// \param Index A index into the address table. |
215 | /// \returns A resolved virtual address for adddress in the address table |
216 | /// or std::nullopt if Index is out of bounds. |
217 | std::optional<uint64_t> getAddress(size_t Index) const; |
218 | |
219 | protected: |
220 | |
221 | /// Get an appropriate address info offsets array. |
222 | /// |
223 | /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 |
224 | /// byte offsets from the The gsym::Header::BaseAddress. The table is stored |
225 | /// internally as a array of bytes that are in the correct endianness. When |
226 | /// we access this table we must get an array that matches those sizes. This |
227 | /// templatized helper function is used when accessing address offsets in the |
228 | /// AddrOffsets member variable. |
229 | /// |
230 | /// \returns An ArrayRef of an appropriate address offset size. |
231 | template <class T> ArrayRef<T> |
232 | getAddrOffsets() const { |
233 | return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()), |
234 | AddrOffsets.size()/sizeof(T)); |
235 | } |
236 | |
237 | /// Get an appropriate address from the address table. |
238 | /// |
239 | /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 |
240 | /// byte address offsets from the The gsym::Header::BaseAddress. The table is |
241 | /// stored internally as a array of bytes that are in the correct endianness. |
242 | /// In order to extract an address from the address table we must access the |
243 | /// address offset using the correct size and then add it to the BaseAddress |
244 | /// in the header. |
245 | /// |
246 | /// \param Index An index into the AddrOffsets array. |
247 | /// \returns An virtual address that matches the original object file for the |
248 | /// address as the specified index, or std::nullopt if Index is out of bounds. |
249 | template <class T> |
250 | std::optional<uint64_t> addressForIndex(size_t Index) const { |
251 | ArrayRef<T> AIO = getAddrOffsets<T>(); |
252 | if (Index < AIO.size()) |
253 | return AIO[Index] + Hdr->BaseAddress; |
254 | return std::nullopt; |
255 | } |
256 | /// Lookup an address offset in the AddrOffsets table. |
257 | /// |
258 | /// Given an address offset, look it up using a binary search of the |
259 | /// AddrOffsets table. |
260 | /// |
261 | /// \param AddrOffset An address offset, that has already been computed by |
262 | /// subtracting the gsym::Header::BaseAddress. |
263 | /// \returns The matching address offset index. This index will be used to |
264 | /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. |
265 | template <class T> |
266 | std::optional<uint64_t> |
267 | getAddressOffsetIndex(const uint64_t AddrOffset) const { |
268 | ArrayRef<T> AIO = getAddrOffsets<T>(); |
269 | const auto Begin = AIO.begin(); |
270 | const auto End = AIO.end(); |
271 | auto Iter = std::lower_bound(Begin, End, AddrOffset); |
272 | // Watch for addresses that fall between the gsym::Header::BaseAddress and |
273 | // the first address offset. |
274 | if (Iter == Begin && AddrOffset < *Begin) |
275 | return std::nullopt; |
276 | if (Iter == End || AddrOffset < *Iter) |
277 | --Iter; |
278 | |
279 | // GSYM files have sorted function infos with the most information (line |
280 | // table and/or inline info) first in the array of function infos, so |
281 | // always backup as much as possible as long as the address offset is the |
282 | // same as the previous entry. |
283 | while (Iter != Begin) { |
284 | auto Prev = Iter - 1; |
285 | if (*Prev == *Iter) |
286 | Iter = Prev; |
287 | else |
288 | break; |
289 | } |
290 | |
291 | return std::distance(Begin, Iter); |
292 | } |
293 | |
294 | /// Create a GSYM from a memory buffer. |
295 | /// |
296 | /// Called by both openFile() and copyBuffer(), this function does all of the |
297 | /// work of parsing the GSYM file and returning an error. |
298 | /// |
299 | /// \param MemBuffer A memory buffer that will transfer ownership into the |
300 | /// GsymReader. |
301 | /// \returns An expected GsymReader that contains the object or an error |
302 | /// object that indicates reason for failing to read the GSYM. |
303 | static llvm::Expected<llvm::gsym::GsymReader> |
304 | create(std::unique_ptr<MemoryBuffer> &MemBuffer); |
305 | |
306 | |
307 | /// Given an address, find the address index. |
308 | /// |
309 | /// Binary search the address table and find the matching address index. |
310 | /// |
311 | /// \param Addr A virtual address that matches the original object file |
312 | /// to lookup. |
313 | /// \returns An index into the address table. This index can be used to |
314 | /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. |
315 | /// Returns an error if the address isn't in the GSYM with details of why. |
316 | Expected<uint64_t> getAddressIndex(const uint64_t Addr) const; |
317 | |
318 | /// Given an address index, get the offset for the FunctionInfo. |
319 | /// |
320 | /// Looking up an address is done by finding the corresponding address |
321 | /// index for the address. This index is then used to get the offset of the |
322 | /// FunctionInfo data that we will decode using this function. |
323 | /// |
324 | /// \param Index An index into the address table. |
325 | /// \returns An optional GSYM data offset for the offset of the FunctionInfo |
326 | /// that needs to be decoded. |
327 | std::optional<uint64_t> getAddressInfoOffset(size_t Index) const; |
328 | |
329 | /// Given an address, find the correct function info data and function |
330 | /// address. |
331 | /// |
332 | /// Binary search the address table and find the matching address info |
333 | /// and make sure that the function info contains the address. GSYM allows |
334 | /// functions to overlap, and the most debug info is contained in the first |
335 | /// entries due to the sorting when GSYM files are created. We can have |
336 | /// multiple function info that start at the same address only if their |
337 | /// address range doesn't match. So find the first entry that matches \a Addr |
338 | /// and iterate forward until we find one that contains the address. |
339 | /// |
340 | /// \param[in] Addr A virtual address that matches the original object file |
341 | /// to lookup. |
342 | /// |
343 | /// \param[out] FuncStartAddr A virtual address that is the base address of |
344 | /// the function that is used for decoding the FunctionInfo. |
345 | /// |
346 | /// \returns An valid data extractor on success, or an error if we fail to |
347 | /// find the address in a function info or corrrectly decode the data |
348 | llvm::Expected<llvm::DataExtractor> |
349 | getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const; |
350 | |
351 | /// Get the function data and address given an address index. |
352 | /// |
353 | /// \param AddrIdx A address index from the address table. |
354 | /// |
355 | /// \returns An expected FunctionInfo that contains the function info object |
356 | /// or an error object that indicates reason for failing to lookup the |
357 | /// address. |
358 | llvm::Expected<llvm::DataExtractor> |
359 | getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const; |
360 | }; |
361 | |
362 | } // namespace gsym |
363 | } // namespace llvm |
364 | |
365 | #endif // LLVM_DEBUGINFO_GSYM_GSYMREADER_H |
366 | |