1//===- GsymReader.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/DebugInfo/GSYM/GsymReader.h"
10
11#include <assert.h>
12#include <inttypes.h>
13#include <stdio.h>
14#include <stdlib.h>
15
16#include "llvm/DebugInfo/GSYM/GsymCreator.h"
17#include "llvm/DebugInfo/GSYM/InlineInfo.h"
18#include "llvm/DebugInfo/GSYM/LineTable.h"
19#include "llvm/Support/BinaryStreamReader.h"
20#include "llvm/Support/DataExtractor.h"
21#include "llvm/Support/MemoryBuffer.h"
22
23using namespace llvm;
24using namespace gsym;
25
26GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer)
27 : MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {}
28
29GsymReader::GsymReader(GsymReader &&RHS) = default;
30
31GsymReader::~GsymReader() = default;
32
33llvm::Expected<GsymReader> GsymReader::openFile(StringRef Filename) {
34 // Open the input file and return an appropriate error if needed.
35 ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
36 MemoryBuffer::getFileOrSTDIN(Filename);
37 auto Err = BuffOrErr.getError();
38 if (Err)
39 return llvm::errorCodeToError(EC: Err);
40 return create(MemBuffer&: BuffOrErr.get());
41}
42
43llvm::Expected<GsymReader> GsymReader::copyBuffer(StringRef Bytes) {
44 auto MemBuffer = MemoryBuffer::getMemBufferCopy(InputData: Bytes, BufferName: "GSYM bytes");
45 return create(MemBuffer);
46}
47
48llvm::Expected<llvm::gsym::GsymReader>
49GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
50 if (!MemBuffer)
51 return createStringError(EC: std::errc::invalid_argument,
52 Fmt: "invalid memory buffer");
53 GsymReader GR(std::move(MemBuffer));
54 llvm::Error Err = GR.parse();
55 if (Err)
56 return std::move(Err);
57 return std::move(GR);
58}
59
60llvm::Error
61GsymReader::parse() {
62 BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native);
63 // Check for the magic bytes. This file format is designed to be mmap'ed
64 // into a process and accessed as read only. This is done for performance
65 // and efficiency for symbolicating and parsing GSYM data.
66 if (FileData.readObject(Dest&: Hdr))
67 return createStringError(EC: std::errc::invalid_argument,
68 Fmt: "not enough data for a GSYM header");
69
70 const auto HostByteOrder = llvm::endianness::native;
71 switch (Hdr->Magic) {
72 case GSYM_MAGIC:
73 Endian = HostByteOrder;
74 break;
75 case GSYM_CIGAM:
76 // This is a GSYM file, but not native endianness.
77 Endian = sys::IsBigEndianHost ? llvm::endianness::little
78 : llvm::endianness::big;
79 Swap.reset(p: new SwappedData);
80 break;
81 default:
82 return createStringError(EC: std::errc::invalid_argument,
83 Fmt: "not a GSYM file");
84 }
85
86 bool DataIsLittleEndian = HostByteOrder != llvm::endianness::little;
87 // Read a correctly byte swapped header if we need to.
88 if (Swap) {
89 DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
90 if (auto ExpectedHdr = Header::decode(Data))
91 Swap->Hdr = ExpectedHdr.get();
92 else
93 return ExpectedHdr.takeError();
94 Hdr = &Swap->Hdr;
95 }
96
97 // Detect errors in the header and report any that are found. If we make it
98 // past this without errors, we know we have a good magic value, a supported
99 // version number, verified address offset size and a valid UUID size.
100 if (Error Err = Hdr->checkForError())
101 return Err;
102
103 if (!Swap) {
104 // This is the native endianness case that is most common and optimized for
105 // efficient lookups. Here we just grab pointers to the native data and
106 // use ArrayRef objects to allow efficient read only access.
107
108 // Read the address offsets.
109 if (FileData.padToAlignment(Align: Hdr->AddrOffSize) ||
110 FileData.readArray(Array&: AddrOffsets,
111 NumElements: Hdr->NumAddresses * Hdr->AddrOffSize))
112 return createStringError(EC: std::errc::invalid_argument,
113 Fmt: "failed to read address table");
114
115 // Read the address info offsets.
116 if (FileData.padToAlignment(Align: 4) ||
117 FileData.readArray(Array&: AddrInfoOffsets, NumElements: Hdr->NumAddresses))
118 return createStringError(EC: std::errc::invalid_argument,
119 Fmt: "failed to read address info offsets table");
120
121 // Read the file table.
122 uint32_t NumFiles = 0;
123 if (FileData.readInteger(Dest&: NumFiles) || FileData.readArray(Array&: Files, NumElements: NumFiles))
124 return createStringError(EC: std::errc::invalid_argument,
125 Fmt: "failed to read file table");
126
127 // Get the string table.
128 FileData.setOffset(Hdr->StrtabOffset);
129 if (FileData.readFixedString(Dest&: StrTab.Data, Length: Hdr->StrtabSize))
130 return createStringError(EC: std::errc::invalid_argument,
131 Fmt: "failed to read string table");
132} else {
133 // This is the non native endianness case that is not common and not
134 // optimized for lookups. Here we decode the important tables into local
135 // storage and then set the ArrayRef objects to point to these swapped
136 // copies of the read only data so lookups can be as efficient as possible.
137 DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
138
139 // Read the address offsets.
140 uint64_t Offset = alignTo(Value: sizeof(Header), Align: Hdr->AddrOffSize);
141 Swap->AddrOffsets.resize(new_size: Hdr->NumAddresses * Hdr->AddrOffSize);
142 switch (Hdr->AddrOffSize) {
143 case 1:
144 if (!Data.getU8(offset_ptr: &Offset, dst: Swap->AddrOffsets.data(), count: Hdr->NumAddresses))
145 return createStringError(EC: std::errc::invalid_argument,
146 Fmt: "failed to read address table");
147 break;
148 case 2:
149 if (!Data.getU16(offset_ptr: &Offset,
150 dst: reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()),
151 count: Hdr->NumAddresses))
152 return createStringError(EC: std::errc::invalid_argument,
153 Fmt: "failed to read address table");
154 break;
155 case 4:
156 if (!Data.getU32(offset_ptr: &Offset,
157 dst: reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()),
158 count: Hdr->NumAddresses))
159 return createStringError(EC: std::errc::invalid_argument,
160 Fmt: "failed to read address table");
161 break;
162 case 8:
163 if (!Data.getU64(offset_ptr: &Offset,
164 dst: reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()),
165 count: Hdr->NumAddresses))
166 return createStringError(EC: std::errc::invalid_argument,
167 Fmt: "failed to read address table");
168 }
169 AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets);
170
171 // Read the address info offsets.
172 Offset = alignTo(Value: Offset, Align: 4);
173 Swap->AddrInfoOffsets.resize(new_size: Hdr->NumAddresses);
174 if (Data.getU32(offset_ptr: &Offset, dst: Swap->AddrInfoOffsets.data(), count: Hdr->NumAddresses))
175 AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets);
176 else
177 return createStringError(EC: std::errc::invalid_argument,
178 Fmt: "failed to read address table");
179 // Read the file table.
180 const uint32_t NumFiles = Data.getU32(offset_ptr: &Offset);
181 if (NumFiles > 0) {
182 Swap->Files.resize(new_size: NumFiles);
183 if (Data.getU32(offset_ptr: &Offset, dst: &Swap->Files[0].Dir, count: NumFiles*2))
184 Files = ArrayRef<FileEntry>(Swap->Files);
185 else
186 return createStringError(EC: std::errc::invalid_argument,
187 Fmt: "failed to read file table");
188 }
189 // Get the string table.
190 StrTab.Data = MemBuffer->getBuffer().substr(Start: Hdr->StrtabOffset,
191 N: Hdr->StrtabSize);
192 if (StrTab.Data.empty())
193 return createStringError(EC: std::errc::invalid_argument,
194 Fmt: "failed to read string table");
195 }
196 return Error::success();
197
198}
199
200const Header &GsymReader::getHeader() const {
201 // The only way to get a GsymReader is from GsymReader::openFile(...) or
202 // GsymReader::copyBuffer() and the header must be valid and initialized to
203 // a valid pointer value, so the assert below should not trigger.
204 assert(Hdr);
205 return *Hdr;
206}
207
208std::optional<uint64_t> GsymReader::getAddress(size_t Index) const {
209 switch (Hdr->AddrOffSize) {
210 case 1: return addressForIndex<uint8_t>(Index);
211 case 2: return addressForIndex<uint16_t>(Index);
212 case 4: return addressForIndex<uint32_t>(Index);
213 case 8: return addressForIndex<uint64_t>(Index);
214 }
215 return std::nullopt;
216}
217
218std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const {
219 const auto NumAddrInfoOffsets = AddrInfoOffsets.size();
220 if (Index < NumAddrInfoOffsets)
221 return AddrInfoOffsets[Index];
222 return std::nullopt;
223}
224
225Expected<uint64_t>
226GsymReader::getAddressIndex(const uint64_t Addr) const {
227 if (Addr >= Hdr->BaseAddress) {
228 const uint64_t AddrOffset = Addr - Hdr->BaseAddress;
229 std::optional<uint64_t> AddrOffsetIndex;
230 switch (Hdr->AddrOffSize) {
231 case 1:
232 AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset);
233 break;
234 case 2:
235 AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset);
236 break;
237 case 4:
238 AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset);
239 break;
240 case 8:
241 AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset);
242 break;
243 default:
244 return createStringError(EC: std::errc::invalid_argument,
245 Fmt: "unsupported address offset size %u",
246 Vals: Hdr->AddrOffSize);
247 }
248 if (AddrOffsetIndex)
249 return *AddrOffsetIndex;
250 }
251 return createStringError(EC: std::errc::invalid_argument,
252 Fmt: "address 0x%" PRIx64 " is not in GSYM", Vals: Addr);
253
254}
255
256llvm::Expected<DataExtractor>
257GsymReader::getFunctionInfoDataForAddress(uint64_t Addr,
258 uint64_t &FuncStartAddr) const {
259 Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr);
260 if (!ExpectedAddrIdx)
261 return ExpectedAddrIdx.takeError();
262 const uint64_t FirstAddrIdx = *ExpectedAddrIdx;
263 // The AddrIdx is the first index of the function info entries that match
264 // \a Addr. We need to iterate over all function info objects that start with
265 // the same address until we find a range that contains \a Addr.
266 std::optional<uint64_t> FirstFuncStartAddr;
267 const size_t NumAddresses = getNumAddresses();
268 for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) {
269 auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr);
270 // If there was an error, return the error.
271 if (!ExpextedData)
272 return ExpextedData;
273
274 // Remember the first function start address if it hasn't already been set.
275 // If it is already valid, check to see if it matches the first function
276 // start address and only continue if it matches.
277 if (FirstFuncStartAddr.has_value()) {
278 if (*FirstFuncStartAddr != FuncStartAddr)
279 break; // Done with consecutive function entries with same address.
280 } else {
281 FirstFuncStartAddr = FuncStartAddr;
282 }
283 // Make sure the current function address ranges contains \a Addr.
284 // Some symbols on Darwin don't have valid sizes, so if we run into a
285 // symbol with zero size, then we have found a match for our address.
286
287 // The first thing the encoding of a FunctionInfo object is the function
288 // size.
289 uint64_t Offset = 0;
290 uint32_t FuncSize = ExpextedData->getU32(offset_ptr: &Offset);
291 if (FuncSize == 0 ||
292 AddressRange(FuncStartAddr, FuncStartAddr + FuncSize).contains(Addr))
293 return ExpextedData;
294 }
295 return createStringError(EC: std::errc::invalid_argument,
296 Fmt: "address 0x%" PRIx64 " is not in GSYM", Vals: Addr);
297}
298
299llvm::Expected<DataExtractor>
300GsymReader::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
301 uint64_t &FuncStartAddr) const {
302 if (AddrIdx >= getNumAddresses())
303 return createStringError(EC: std::errc::invalid_argument,
304 Fmt: "invalid address index %" PRIu64, Vals: AddrIdx);
305 const uint32_t AddrInfoOffset = AddrInfoOffsets[AddrIdx];
306 assert((Endian == endianness::big || Endian == endianness::little) &&
307 "Endian must be either big or little");
308 StringRef Bytes = MemBuffer->getBuffer().substr(Start: AddrInfoOffset);
309 if (Bytes.empty())
310 return createStringError(EC: std::errc::invalid_argument,
311 Fmt: "invalid address info offset 0x%" PRIx32,
312 Vals: AddrInfoOffset);
313 std::optional<uint64_t> OptFuncStartAddr = getAddress(Index: AddrIdx);
314 if (!OptFuncStartAddr)
315 return createStringError(EC: std::errc::invalid_argument,
316 Fmt: "failed to extract address[%" PRIu64 "]", Vals: AddrIdx);
317 FuncStartAddr = *OptFuncStartAddr;
318 return DataExtractor(Bytes, Endian == llvm::endianness::little, 4);
319}
320
321llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {
322 uint64_t FuncStartAddr = 0;
323 if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
324 return FunctionInfo::decode(Data&: *ExpectedData, BaseAddr: FuncStartAddr);
325 else
326 return ExpectedData.takeError();
327}
328
329llvm::Expected<FunctionInfo>
330GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const {
331 uint64_t FuncStartAddr = 0;
332 if (auto ExpectedData = getFunctionInfoDataAtIndex(AddrIdx: Idx, FuncStartAddr))
333 return FunctionInfo::decode(Data&: *ExpectedData, BaseAddr: FuncStartAddr);
334 else
335 return ExpectedData.takeError();
336}
337
338llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const {
339 uint64_t FuncStartAddr = 0;
340 if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
341 return FunctionInfo::lookup(Data&: *ExpectedData, GR: *this, FuncAddr: FuncStartAddr, Addr);
342 else
343 return ExpectedData.takeError();
344}
345
346void GsymReader::dump(raw_ostream &OS) {
347 const auto &Header = getHeader();
348 // Dump the GSYM header.
349 OS << Header << "\n";
350 // Dump the address table.
351 OS << "Address Table:\n";
352 OS << "INDEX OFFSET";
353
354 switch (Hdr->AddrOffSize) {
355 case 1: OS << "8 "; break;
356 case 2: OS << "16"; break;
357 case 4: OS << "32"; break;
358 case 8: OS << "64"; break;
359 default: OS << "??"; break;
360 }
361 OS << " (ADDRESS)\n";
362 OS << "====== =============================== \n";
363 for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
364 OS << format(Fmt: "[%4u] ", Vals: I);
365 switch (Hdr->AddrOffSize) {
366 case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break;
367 case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break;
368 case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break;
369 case 8: OS << HEX32(getAddrOffsets<uint64_t>()[I]); break;
370 default: break;
371 }
372 OS << " (" << HEX64(*getAddress(I)) << ")\n";
373 }
374 // Dump the address info offsets table.
375 OS << "\nAddress Info Offsets:\n";
376 OS << "INDEX Offset\n";
377 OS << "====== ==========\n";
378 for (uint32_t I = 0; I < Header.NumAddresses; ++I)
379 OS << format(Fmt: "[%4u] ", Vals: I) << HEX32(AddrInfoOffsets[I]) << "\n";
380 // Dump the file table.
381 OS << "\nFiles:\n";
382 OS << "INDEX DIRECTORY BASENAME PATH\n";
383 OS << "====== ========== ========== ==============================\n";
384 for (uint32_t I = 0; I < Files.size(); ++I) {
385 OS << format(Fmt: "[%4u] ", Vals: I) << HEX32(Files[I].Dir) << ' '
386 << HEX32(Files[I].Base) << ' ';
387 dump(OS, FE: getFile(Index: I));
388 OS << "\n";
389 }
390 OS << "\n" << StrTab << "\n";
391
392 for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
393 OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": ";
394 if (auto FI = getFunctionInfoAtIndex(Idx: I))
395 dump(OS, FI: *FI);
396 else
397 logAllUnhandledErrors(E: FI.takeError(), OS, ErrorBanner: "FunctionInfo:");
398 }
399}
400
401void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI) {
402 OS << FI.Range << " \"" << getString(Offset: FI.Name) << "\"\n";
403 if (FI.OptLineTable)
404 dump(OS, LT: *FI.OptLineTable);
405 if (FI.Inline)
406 dump(OS, II: *FI.Inline);
407}
408
409void GsymReader::dump(raw_ostream &OS, const LineTable &LT) {
410 OS << "LineTable:\n";
411 for (auto &LE: LT) {
412 OS << " " << HEX64(LE.Addr) << ' ';
413 if (LE.File)
414 dump(OS, FE: getFile(Index: LE.File));
415 OS << ':' << LE.Line << '\n';
416 }
417}
418
419void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
420 if (Indent == 0)
421 OS << "InlineInfo:\n";
422 else
423 OS.indent(NumSpaces: Indent);
424 OS << II.Ranges << ' ' << getString(Offset: II.Name);
425 if (II.CallFile != 0) {
426 if (auto File = getFile(Index: II.CallFile)) {
427 OS << " called from ";
428 dump(OS, FE: File);
429 OS << ':' << II.CallLine;
430 }
431 }
432 OS << '\n';
433 for (const auto &ChildII: II.Children)
434 dump(OS, II: ChildII, Indent: Indent + 2);
435}
436
437void GsymReader::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
438 if (FE) {
439 // IF we have the file from index 0, then don't print anything
440 if (FE->Dir == 0 && FE->Base == 0)
441 return;
442 StringRef Dir = getString(Offset: FE->Dir);
443 StringRef Base = getString(Offset: FE->Base);
444 if (!Dir.empty()) {
445 OS << Dir;
446 if (Dir.contains(C: '\\') && !Dir.contains(C: '/'))
447 OS << '\\';
448 else
449 OS << '/';
450 }
451 if (!Base.empty()) {
452 OS << Base;
453 }
454 if (!Dir.empty() || !Base.empty())
455 return;
456 }
457 OS << "<invalid-file>";
458}
459

source code of llvm/lib/DebugInfo/GSYM/GsymReader.cpp