1 | //===- SymbolizableObjectFile.cpp -----------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Implementation of SymbolizableObjectFile class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "SymbolizableObjectFile.h" |
14 | #include "llvm/ADT/STLExtras.h" |
15 | #include "llvm/ADT/Triple.h" |
16 | #include "llvm/BinaryFormat/COFF.h" |
17 | #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
18 | #include "llvm/Object/COFF.h" |
19 | #include "llvm/Object/ELFObjectFile.h" |
20 | #include "llvm/Object/ObjectFile.h" |
21 | #include "llvm/Object/SymbolSize.h" |
22 | #include "llvm/Support/Casting.h" |
23 | #include "llvm/Support/DataExtractor.h" |
24 | #include <algorithm> |
25 | |
26 | using namespace llvm; |
27 | using namespace object; |
28 | using namespace symbolize; |
29 | |
30 | Expected<std::unique_ptr<SymbolizableObjectFile>> |
31 | SymbolizableObjectFile::create(const object::ObjectFile *Obj, |
32 | std::unique_ptr<DIContext> DICtx, |
33 | bool UntagAddresses) { |
34 | assert(DICtx); |
35 | std::unique_ptr<SymbolizableObjectFile> res( |
36 | new SymbolizableObjectFile(Obj, std::move(DICtx), UntagAddresses)); |
37 | std::unique_ptr<DataExtractor> ; |
38 | uint64_t OpdAddress = 0; |
39 | // Find the .opd (function descriptor) section if any, for big-endian |
40 | // PowerPC64 ELF. |
41 | if (Obj->getArch() == Triple::ppc64) { |
42 | for (section_iterator Section : Obj->sections()) { |
43 | Expected<StringRef> NameOrErr = Section->getName(); |
44 | if (!NameOrErr) |
45 | return NameOrErr.takeError(); |
46 | |
47 | if (*NameOrErr == ".opd" ) { |
48 | Expected<StringRef> E = Section->getContents(); |
49 | if (!E) |
50 | return E.takeError(); |
51 | OpdExtractor.reset(new DataExtractor(*E, Obj->isLittleEndian(), |
52 | Obj->getBytesInAddress())); |
53 | OpdAddress = Section->getAddress(); |
54 | break; |
55 | } |
56 | } |
57 | } |
58 | std::vector<std::pair<SymbolRef, uint64_t>> Symbols = |
59 | computeSymbolSizes(*Obj); |
60 | for (auto &P : Symbols) |
61 | if (Error E = |
62 | res->addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress)) |
63 | return std::move(E); |
64 | |
65 | // If this is a COFF object and we didn't find any symbols, try the export |
66 | // table. |
67 | if (Symbols.empty()) { |
68 | if (auto *CoffObj = dyn_cast<COFFObjectFile>(Obj)) |
69 | if (Error E = res->addCoffExportSymbols(CoffObj)) |
70 | return std::move(E); |
71 | } |
72 | |
73 | std::vector<SymbolDesc> &SS = res->Symbols; |
74 | // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr, |
75 | // pick the one with the largest Size. This helps us avoid symbols with no |
76 | // size information (Size=0). |
77 | llvm::stable_sort(SS); |
78 | auto I = SS.begin(), E = SS.end(), J = SS.begin(); |
79 | while (I != E) { |
80 | auto OI = I; |
81 | while (++I != E && OI->Addr == I->Addr) { |
82 | } |
83 | *J++ = I[-1]; |
84 | } |
85 | SS.erase(J, SS.end()); |
86 | |
87 | return std::move(res); |
88 | } |
89 | |
90 | SymbolizableObjectFile::SymbolizableObjectFile(const ObjectFile *Obj, |
91 | std::unique_ptr<DIContext> DICtx, |
92 | bool UntagAddresses) |
93 | : Module(Obj), DebugInfoContext(std::move(DICtx)), |
94 | UntagAddresses(UntagAddresses) {} |
95 | |
96 | namespace { |
97 | |
98 | struct OffsetNamePair { |
99 | uint32_t Offset; |
100 | StringRef Name; |
101 | |
102 | bool operator<(const OffsetNamePair &R) const { |
103 | return Offset < R.Offset; |
104 | } |
105 | }; |
106 | |
107 | } // end anonymous namespace |
108 | |
109 | Error SymbolizableObjectFile::addCoffExportSymbols( |
110 | const COFFObjectFile *CoffObj) { |
111 | // Get all export names and offsets. |
112 | std::vector<OffsetNamePair> ExportSyms; |
113 | for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) { |
114 | StringRef Name; |
115 | uint32_t Offset; |
116 | if (auto EC = Ref.getSymbolName(Name)) |
117 | return EC; |
118 | if (auto EC = Ref.getExportRVA(Offset)) |
119 | return EC; |
120 | ExportSyms.push_back(OffsetNamePair{Offset, Name}); |
121 | } |
122 | if (ExportSyms.empty()) |
123 | return Error::success(); |
124 | |
125 | // Sort by ascending offset. |
126 | array_pod_sort(ExportSyms.begin(), ExportSyms.end()); |
127 | |
128 | // Approximate the symbol sizes by assuming they run to the next symbol. |
129 | // FIXME: This assumes all exports are functions. |
130 | uint64_t ImageBase = CoffObj->getImageBase(); |
131 | for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) { |
132 | OffsetNamePair &Export = *I; |
133 | // FIXME: The last export has a one byte size now. |
134 | uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1; |
135 | uint64_t SymbolStart = ImageBase + Export.Offset; |
136 | uint64_t SymbolSize = NextOffset - Export.Offset; |
137 | Symbols.push_back({SymbolStart, SymbolSize, Export.Name, 0}); |
138 | } |
139 | return Error::success(); |
140 | } |
141 | |
142 | Error SymbolizableObjectFile::(const SymbolRef &Symbol, |
143 | uint64_t SymbolSize, |
144 | DataExtractor *, |
145 | uint64_t OpdAddress) { |
146 | // Avoid adding symbols from an unknown/undefined section. |
147 | const ObjectFile &Obj = *Symbol.getObject(); |
148 | Expected<StringRef> SymbolNameOrErr = Symbol.getName(); |
149 | if (!SymbolNameOrErr) |
150 | return SymbolNameOrErr.takeError(); |
151 | StringRef SymbolName = *SymbolNameOrErr; |
152 | |
153 | uint32_t ELFSymIdx = |
154 | Obj.isELF() ? ELFSymbolRef(Symbol).getRawDataRefImpl().d.b : 0; |
155 | Expected<section_iterator> Sec = Symbol.getSection(); |
156 | if (!Sec || Obj.section_end() == *Sec) { |
157 | if (Obj.isELF()) { |
158 | // Store the (index, filename) pair for a file symbol. |
159 | ELFSymbolRef ESym(Symbol); |
160 | if (ESym.getELFType() == ELF::STT_FILE) |
161 | FileSymbols.emplace_back(ELFSymIdx, SymbolName); |
162 | } |
163 | return Error::success(); |
164 | } |
165 | |
166 | Expected<SymbolRef::Type> SymbolTypeOrErr = Symbol.getType(); |
167 | if (!SymbolTypeOrErr) |
168 | return SymbolTypeOrErr.takeError(); |
169 | SymbolRef::Type SymbolType = *SymbolTypeOrErr; |
170 | if (Obj.isELF()) { |
171 | // Allow function and data symbols. Additionally allow STT_NONE, which are |
172 | // common for functions defined in assembly. |
173 | uint8_t Type = ELFSymbolRef(Symbol).getELFType(); |
174 | if (Type != ELF::STT_NOTYPE && Type != ELF::STT_FUNC && |
175 | Type != ELF::STT_OBJECT && Type != ELF::STT_GNU_IFUNC) |
176 | return Error::success(); |
177 | // Some STT_NOTYPE symbols are not desired. This excludes STT_SECTION and |
178 | // ARM mapping symbols. |
179 | uint32_t Flags = cantFail(Symbol.getFlags()); |
180 | if (Flags & SymbolRef::SF_FormatSpecific) |
181 | return Error::success(); |
182 | } else if (SymbolType != SymbolRef::ST_Function && |
183 | SymbolType != SymbolRef::ST_Data) { |
184 | return Error::success(); |
185 | } |
186 | |
187 | Expected<uint64_t> SymbolAddressOrErr = Symbol.getAddress(); |
188 | if (!SymbolAddressOrErr) |
189 | return SymbolAddressOrErr.takeError(); |
190 | uint64_t SymbolAddress = *SymbolAddressOrErr; |
191 | if (UntagAddresses) { |
192 | // For kernel addresses, bits 56-63 need to be set, so we sign extend bit 55 |
193 | // into bits 56-63 instead of masking them out. |
194 | SymbolAddress &= (1ull << 56) - 1; |
195 | SymbolAddress = (int64_t(SymbolAddress) << 8) >> 8; |
196 | } |
197 | if (OpdExtractor) { |
198 | // For big-endian PowerPC64 ELF, symbols in the .opd section refer to |
199 | // function descriptors. The first word of the descriptor is a pointer to |
200 | // the function's code. |
201 | // For the purposes of symbolization, pretend the symbol's address is that |
202 | // of the function's code, not the descriptor. |
203 | uint64_t OpdOffset = SymbolAddress - OpdAddress; |
204 | if (OpdExtractor->isValidOffsetForAddress(OpdOffset)) |
205 | SymbolAddress = OpdExtractor->getAddress(&OpdOffset); |
206 | } |
207 | // Mach-O symbol table names have leading underscore, skip it. |
208 | if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_') |
209 | SymbolName = SymbolName.drop_front(); |
210 | |
211 | if (Obj.isELF() && ELFSymbolRef(Symbol).getBinding() != ELF::STB_LOCAL) |
212 | ELFSymIdx = 0; |
213 | Symbols.push_back({SymbolAddress, SymbolSize, SymbolName, ELFSymIdx}); |
214 | return Error::success(); |
215 | } |
216 | |
217 | // Return true if this is a 32-bit x86 PE COFF module. |
218 | bool SymbolizableObjectFile::isWin32Module() const { |
219 | auto *CoffObject = dyn_cast<COFFObjectFile>(Module); |
220 | return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386; |
221 | } |
222 | |
223 | uint64_t SymbolizableObjectFile::getModulePreferredBase() const { |
224 | if (auto *CoffObject = dyn_cast<COFFObjectFile>(Module)) |
225 | return CoffObject->getImageBase(); |
226 | return 0; |
227 | } |
228 | |
229 | bool SymbolizableObjectFile::getNameFromSymbolTable( |
230 | uint64_t Address, std::string &Name, uint64_t &Addr, uint64_t &Size, |
231 | std::string &FileName) const { |
232 | SymbolDesc SD{Address, UINT64_C(-1), StringRef(), 0}; |
233 | auto SymbolIterator = llvm::upper_bound(Symbols, SD); |
234 | if (SymbolIterator == Symbols.begin()) |
235 | return false; |
236 | --SymbolIterator; |
237 | if (SymbolIterator->Size != 0 && |
238 | SymbolIterator->Addr + SymbolIterator->Size <= Address) |
239 | return false; |
240 | Name = SymbolIterator->Name.str(); |
241 | Addr = SymbolIterator->Addr; |
242 | Size = SymbolIterator->Size; |
243 | |
244 | if (SymbolIterator->ELFLocalSymIdx != 0) { |
245 | // If this is an ELF local symbol, find the STT_FILE symbol preceding |
246 | // SymbolIterator to get the filename. The ELF spec requires the STT_FILE |
247 | // symbol (if present) precedes the other STB_LOCAL symbols for the file. |
248 | assert(Module->isELF()); |
249 | auto It = llvm::upper_bound( |
250 | FileSymbols, |
251 | std::make_pair(SymbolIterator->ELFLocalSymIdx, StringRef())); |
252 | if (It != FileSymbols.begin()) |
253 | FileName = It[-1].second.str(); |
254 | } |
255 | return true; |
256 | } |
257 | |
258 | bool SymbolizableObjectFile::shouldOverrideWithSymbolTable( |
259 | FunctionNameKind FNKind, bool UseSymbolTable) const { |
260 | // When DWARF is used with -gline-tables-only / -gmlt, the symbol table gives |
261 | // better answers for linkage names than the DIContext. Otherwise, we are |
262 | // probably using PEs and PDBs, and we shouldn't do the override. PE files |
263 | // generally only contain the names of exported symbols. |
264 | return FNKind == FunctionNameKind::LinkageName && UseSymbolTable && |
265 | isa<DWARFContext>(DebugInfoContext.get()); |
266 | } |
267 | |
268 | DILineInfo |
269 | SymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset, |
270 | DILineInfoSpecifier LineInfoSpecifier, |
271 | bool UseSymbolTable) const { |
272 | if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection) |
273 | ModuleOffset.SectionIndex = |
274 | getModuleSectionIndexForAddress(ModuleOffset.Address); |
275 | DILineInfo LineInfo = |
276 | DebugInfoContext->getLineInfoForAddress(ModuleOffset, LineInfoSpecifier); |
277 | |
278 | // Override function name from symbol table if necessary. |
279 | if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) { |
280 | std::string FunctionName, FileName; |
281 | uint64_t Start, Size; |
282 | if (getNameFromSymbolTable(ModuleOffset.Address, FunctionName, Start, Size, |
283 | FileName)) { |
284 | LineInfo.FunctionName = FunctionName; |
285 | if (LineInfo.FileName == DILineInfo::BadString && !FileName.empty()) |
286 | LineInfo.FileName = FileName; |
287 | } |
288 | } |
289 | return LineInfo; |
290 | } |
291 | |
292 | DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode( |
293 | object::SectionedAddress ModuleOffset, |
294 | DILineInfoSpecifier LineInfoSpecifier, bool UseSymbolTable) const { |
295 | if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection) |
296 | ModuleOffset.SectionIndex = |
297 | getModuleSectionIndexForAddress(ModuleOffset.Address); |
298 | DIInliningInfo InlinedContext = DebugInfoContext->getInliningInfoForAddress( |
299 | ModuleOffset, LineInfoSpecifier); |
300 | |
301 | // Make sure there is at least one frame in context. |
302 | if (InlinedContext.getNumberOfFrames() == 0) |
303 | InlinedContext.addFrame(DILineInfo()); |
304 | |
305 | // Override the function name in lower frame with name from symbol table. |
306 | if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) { |
307 | std::string FunctionName, FileName; |
308 | uint64_t Start, Size; |
309 | if (getNameFromSymbolTable(ModuleOffset.Address, FunctionName, Start, Size, |
310 | FileName)) { |
311 | DILineInfo *LI = InlinedContext.getMutableFrame( |
312 | InlinedContext.getNumberOfFrames() - 1); |
313 | LI->FunctionName = FunctionName; |
314 | if (LI->FileName == DILineInfo::BadString && !FileName.empty()) |
315 | LI->FileName = FileName; |
316 | } |
317 | } |
318 | |
319 | return InlinedContext; |
320 | } |
321 | |
322 | DIGlobal SymbolizableObjectFile::symbolizeData( |
323 | object::SectionedAddress ModuleOffset) const { |
324 | DIGlobal Res; |
325 | std::string FileName; |
326 | getNameFromSymbolTable(ModuleOffset.Address, Res.Name, Res.Start, Res.Size, |
327 | FileName); |
328 | return Res; |
329 | } |
330 | |
331 | std::vector<DILocal> SymbolizableObjectFile::symbolizeFrame( |
332 | object::SectionedAddress ModuleOffset) const { |
333 | if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection) |
334 | ModuleOffset.SectionIndex = |
335 | getModuleSectionIndexForAddress(ModuleOffset.Address); |
336 | return DebugInfoContext->getLocalsForAddress(ModuleOffset); |
337 | } |
338 | |
339 | /// Search for the first occurence of specified Address in ObjectFile. |
340 | uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress( |
341 | uint64_t Address) const { |
342 | |
343 | for (SectionRef Sec : Module->sections()) { |
344 | if (!Sec.isText() || Sec.isVirtual()) |
345 | continue; |
346 | |
347 | if (Address >= Sec.getAddress() && |
348 | Address < Sec.getAddress() + Sec.getSize()) |
349 | return Sec.getIndex(); |
350 | } |
351 | |
352 | return object::SectionedAddress::UndefSection; |
353 | } |
354 | |