1 | //===- Symbols.h ------------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLD_COFF_SYMBOLS_H |
10 | #define LLD_COFF_SYMBOLS_H |
11 | |
12 | #include "Chunks.h" |
13 | #include "Config.h" |
14 | #include "lld/Common/LLVM.h" |
15 | #include "lld/Common/Memory.h" |
16 | #include "llvm/ADT/ArrayRef.h" |
17 | #include "llvm/Object/Archive.h" |
18 | #include "llvm/Object/COFF.h" |
19 | #include <atomic> |
20 | #include <memory> |
21 | #include <vector> |
22 | |
23 | namespace lld { |
24 | |
25 | std::string toString(coff::Symbol &b); |
26 | |
27 | // There are two different ways to convert an Archive::Symbol to a string: |
28 | // One for Microsoft name mangling and one for Itanium name mangling. |
29 | // Call the functions toCOFFString and toELFString, not just toString. |
30 | std::string toCOFFString(const coff::Archive::Symbol &b); |
31 | |
32 | namespace coff { |
33 | |
34 | using llvm::object::Archive; |
35 | using llvm::object::COFFSymbolRef; |
36 | using llvm::object::coff_import_header; |
37 | using llvm::object::coff_symbol_generic; |
38 | |
39 | class ArchiveFile; |
40 | class InputFile; |
41 | class ObjFile; |
42 | class SymbolTable; |
43 | |
44 | // The base class for real symbol classes. |
45 | class Symbol { |
46 | public: |
47 | enum Kind { |
48 | // The order of these is significant. We start with the regular defined |
49 | // symbols as those are the most prevalent and the zero tag is the cheapest |
50 | // to set. Among the defined kinds, the lower the kind is preferred over |
51 | // the higher kind when testing whether one symbol should take precedence |
52 | // over another. |
53 | DefinedRegularKind = 0, |
54 | DefinedCommonKind, |
55 | DefinedLocalImportKind, |
56 | DefinedImportThunkKind, |
57 | DefinedImportDataKind, |
58 | DefinedAbsoluteKind, |
59 | DefinedSyntheticKind, |
60 | |
61 | UndefinedKind, |
62 | LazyArchiveKind, |
63 | LazyObjectKind, |
64 | |
65 | LastDefinedCOFFKind = DefinedCommonKind, |
66 | LastDefinedKind = DefinedSyntheticKind, |
67 | }; |
68 | |
69 | Kind kind() const { return static_cast<Kind>(symbolKind); } |
70 | |
71 | // Returns the symbol name. |
72 | StringRef getName() { |
73 | // COFF symbol names are read lazily for a performance reason. |
74 | // Non-external symbol names are never used by the linker except for logging |
75 | // or debugging. Their internal references are resolved not by name but by |
76 | // symbol index. And because they are not external, no one can refer them by |
77 | // name. Object files contain lots of non-external symbols, and creating |
78 | // StringRefs for them (which involves lots of strlen() on the string table) |
79 | // is a waste of time. |
80 | if (nameData == nullptr) |
81 | computeName(); |
82 | return StringRef(nameData, nameSize); |
83 | } |
84 | |
85 | void replaceKeepingName(Symbol *other, size_t size); |
86 | |
87 | // Returns the file from which this symbol was created. |
88 | InputFile *getFile(); |
89 | |
90 | // Indicates that this symbol will be included in the final image. Only valid |
91 | // after calling markLive. |
92 | bool isLive() const; |
93 | |
94 | bool isLazy() const { |
95 | return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind; |
96 | } |
97 | |
98 | private: |
99 | void computeName(); |
100 | |
101 | protected: |
102 | friend SymbolTable; |
103 | explicit Symbol(Kind k, StringRef n = "" ) |
104 | : symbolKind(k), isExternal(true), isCOMDAT(false), |
105 | writtenToSymtab(false), pendingArchiveLoad(false), isGCRoot(false), |
106 | isRuntimePseudoReloc(false), deferUndefined(false), canInline(true), |
107 | nameSize(n.size()), nameData(n.empty() ? nullptr : n.data()) {} |
108 | |
109 | const unsigned symbolKind : 8; |
110 | unsigned isExternal : 1; |
111 | |
112 | public: |
113 | // This bit is used by the \c DefinedRegular subclass. |
114 | unsigned isCOMDAT : 1; |
115 | |
116 | // This bit is used by Writer::createSymbolAndStringTable() to prevent |
117 | // symbols from being written to the symbol table more than once. |
118 | unsigned writtenToSymtab : 1; |
119 | |
120 | // True if this symbol was referenced by a regular (non-bitcode) object. |
121 | unsigned isUsedInRegularObj : 1; |
122 | |
123 | // True if we've seen both a lazy and an undefined symbol with this symbol |
124 | // name, which means that we have enqueued an archive member load and should |
125 | // not load any more archive members to resolve the same symbol. |
126 | unsigned pendingArchiveLoad : 1; |
127 | |
128 | /// True if we've already added this symbol to the list of GC roots. |
129 | unsigned isGCRoot : 1; |
130 | |
131 | unsigned isRuntimePseudoReloc : 1; |
132 | |
133 | // True if we want to allow this symbol to be undefined in the early |
134 | // undefined check pass in SymbolTable::reportUnresolvable(), as it |
135 | // might be fixed up later. |
136 | unsigned deferUndefined : 1; |
137 | |
138 | // False if LTO shouldn't inline whatever this symbol points to. If a symbol |
139 | // is overwritten after LTO, LTO shouldn't inline the symbol because it |
140 | // doesn't know the final contents of the symbol. |
141 | unsigned canInline : 1; |
142 | |
143 | protected: |
144 | // Symbol name length. Assume symbol lengths fit in a 32-bit integer. |
145 | uint32_t nameSize; |
146 | |
147 | const char *nameData; |
148 | }; |
149 | |
150 | // The base class for any defined symbols, including absolute symbols, |
151 | // etc. |
152 | class Defined : public Symbol { |
153 | public: |
154 | Defined(Kind k, StringRef n) : Symbol(k, n) {} |
155 | |
156 | static bool classof(const Symbol *s) { return s->kind() <= LastDefinedKind; } |
157 | |
158 | // Returns the RVA (relative virtual address) of this symbol. The |
159 | // writer sets and uses RVAs. |
160 | uint64_t getRVA(); |
161 | |
162 | // Returns the chunk containing this symbol. Absolute symbols and __ImageBase |
163 | // do not have chunks, so this may return null. |
164 | Chunk *getChunk(); |
165 | }; |
166 | |
167 | // Symbols defined via a COFF object file or bitcode file. For COFF files, this |
168 | // stores a coff_symbol_generic*, and names of internal symbols are lazily |
169 | // loaded through that. For bitcode files, Sym is nullptr and the name is stored |
170 | // as a decomposed StringRef. |
171 | class DefinedCOFF : public Defined { |
172 | friend Symbol; |
173 | |
174 | public: |
175 | DefinedCOFF(Kind k, InputFile *f, StringRef n, const coff_symbol_generic *s) |
176 | : Defined(k, n), file(f), sym(s) {} |
177 | |
178 | static bool classof(const Symbol *s) { |
179 | return s->kind() <= LastDefinedCOFFKind; |
180 | } |
181 | |
182 | InputFile *getFile() { return file; } |
183 | |
184 | COFFSymbolRef getCOFFSymbol(); |
185 | |
186 | InputFile *file; |
187 | |
188 | protected: |
189 | const coff_symbol_generic *sym; |
190 | }; |
191 | |
192 | // Regular defined symbols read from object file symbol tables. |
193 | class DefinedRegular : public DefinedCOFF { |
194 | public: |
195 | DefinedRegular(InputFile *f, StringRef n, bool isCOMDAT, |
196 | bool isExternal = false, |
197 | const coff_symbol_generic *s = nullptr, |
198 | SectionChunk *c = nullptr) |
199 | : DefinedCOFF(DefinedRegularKind, f, n, s), data(c ? &c->repl : nullptr) { |
200 | this->isExternal = isExternal; |
201 | this->isCOMDAT = isCOMDAT; |
202 | } |
203 | |
204 | static bool classof(const Symbol *s) { |
205 | return s->kind() == DefinedRegularKind; |
206 | } |
207 | |
208 | uint64_t getRVA() const { return (*data)->getRVA() + sym->Value; } |
209 | SectionChunk *getChunk() const { return *data; } |
210 | uint32_t getValue() const { return sym->Value; } |
211 | |
212 | SectionChunk **data; |
213 | }; |
214 | |
215 | class DefinedCommon : public DefinedCOFF { |
216 | public: |
217 | DefinedCommon(InputFile *f, StringRef n, uint64_t size, |
218 | const coff_symbol_generic *s = nullptr, |
219 | CommonChunk *c = nullptr) |
220 | : DefinedCOFF(DefinedCommonKind, f, n, s), data(c), size(size) { |
221 | this->isExternal = true; |
222 | } |
223 | |
224 | static bool classof(const Symbol *s) { |
225 | return s->kind() == DefinedCommonKind; |
226 | } |
227 | |
228 | uint64_t getRVA() { return data->getRVA(); } |
229 | CommonChunk *getChunk() { return data; } |
230 | |
231 | private: |
232 | friend SymbolTable; |
233 | uint64_t getSize() const { return size; } |
234 | CommonChunk *data; |
235 | uint64_t size; |
236 | }; |
237 | |
238 | // Absolute symbols. |
239 | class DefinedAbsolute : public Defined { |
240 | public: |
241 | DefinedAbsolute(StringRef n, COFFSymbolRef s) |
242 | : Defined(DefinedAbsoluteKind, n), va(s.getValue()) { |
243 | isExternal = s.isExternal(); |
244 | } |
245 | |
246 | DefinedAbsolute(StringRef n, uint64_t v) |
247 | : Defined(DefinedAbsoluteKind, n), va(v) {} |
248 | |
249 | static bool classof(const Symbol *s) { |
250 | return s->kind() == DefinedAbsoluteKind; |
251 | } |
252 | |
253 | uint64_t getRVA() { return va - config->imageBase; } |
254 | void setVA(uint64_t v) { va = v; } |
255 | uint64_t getVA() const { return va; } |
256 | |
257 | // Section index relocations against absolute symbols resolve to |
258 | // this 16 bit number, and it is the largest valid section index |
259 | // plus one. This variable keeps it. |
260 | static uint16_t numOutputSections; |
261 | |
262 | private: |
263 | uint64_t va; |
264 | }; |
265 | |
266 | // This symbol is used for linker-synthesized symbols like __ImageBase and |
267 | // __safe_se_handler_table. |
268 | class DefinedSynthetic : public Defined { |
269 | public: |
270 | explicit DefinedSynthetic(StringRef name, Chunk *c) |
271 | : Defined(DefinedSyntheticKind, name), c(c) {} |
272 | |
273 | static bool classof(const Symbol *s) { |
274 | return s->kind() == DefinedSyntheticKind; |
275 | } |
276 | |
277 | // A null chunk indicates that this is __ImageBase. Otherwise, this is some |
278 | // other synthesized chunk, like SEHTableChunk. |
279 | uint32_t getRVA() { return c ? c->getRVA() : 0; } |
280 | Chunk *getChunk() { return c; } |
281 | |
282 | private: |
283 | Chunk *c; |
284 | }; |
285 | |
286 | // This class represents a symbol defined in an archive file. It is |
287 | // created from an archive file header, and it knows how to load an |
288 | // object file from an archive to replace itself with a defined |
289 | // symbol. If the resolver finds both Undefined and LazyArchive for |
290 | // the same name, it will ask the LazyArchive to load a file. |
291 | class LazyArchive : public Symbol { |
292 | public: |
293 | LazyArchive(ArchiveFile *f, const Archive::Symbol s) |
294 | : Symbol(LazyArchiveKind, s.getName()), file(f), sym(s) {} |
295 | |
296 | static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; } |
297 | |
298 | MemoryBufferRef getMemberBuffer(); |
299 | |
300 | ArchiveFile *file; |
301 | const Archive::Symbol sym; |
302 | }; |
303 | |
304 | class LazyObject : public Symbol { |
305 | public: |
306 | LazyObject(LazyObjFile *f, StringRef n) |
307 | : Symbol(LazyObjectKind, n), file(f) {} |
308 | static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } |
309 | LazyObjFile *file; |
310 | }; |
311 | |
312 | // Undefined symbols. |
313 | class Undefined : public Symbol { |
314 | public: |
315 | explicit Undefined(StringRef n) : Symbol(UndefinedKind, n) {} |
316 | |
317 | static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } |
318 | |
319 | // An undefined symbol can have a fallback symbol which gives an |
320 | // undefined symbol a second chance if it would remain undefined. |
321 | // If it remains undefined, it'll be replaced with whatever the |
322 | // Alias pointer points to. |
323 | Symbol *weakAlias = nullptr; |
324 | |
325 | // If this symbol is external weak, try to resolve it to a defined |
326 | // symbol by searching the chain of fallback symbols. Returns the symbol if |
327 | // successful, otherwise returns null. |
328 | Defined *getWeakAlias(); |
329 | }; |
330 | |
331 | // Windows-specific classes. |
332 | |
333 | // This class represents a symbol imported from a DLL. This has two |
334 | // names for internal use and external use. The former is used for |
335 | // name resolution, and the latter is used for the import descriptor |
336 | // table in an output. The former has "__imp_" prefix. |
337 | class DefinedImportData : public Defined { |
338 | public: |
339 | DefinedImportData(StringRef n, ImportFile *f) |
340 | : Defined(DefinedImportDataKind, n), file(f) { |
341 | } |
342 | |
343 | static bool classof(const Symbol *s) { |
344 | return s->kind() == DefinedImportDataKind; |
345 | } |
346 | |
347 | uint64_t getRVA() { return file->location->getRVA(); } |
348 | Chunk *getChunk() { return file->location; } |
349 | void setLocation(Chunk *addressTable) { file->location = addressTable; } |
350 | |
351 | StringRef getDLLName() { return file->dllName; } |
352 | StringRef getExternalName() { return file->externalName; } |
353 | uint16_t getOrdinal() { return file->hdr->OrdinalHint; } |
354 | |
355 | ImportFile *file; |
356 | |
357 | // This is a pointer to the synthetic symbol associated with the load thunk |
358 | // for this symbol that will be called if the DLL is delay-loaded. This is |
359 | // needed for Control Flow Guard because if this DefinedImportData symbol is a |
360 | // valid call target, the corresponding load thunk must also be marked as a |
361 | // valid call target. |
362 | DefinedSynthetic *loadThunkSym = nullptr; |
363 | }; |
364 | |
365 | // This class represents a symbol for a jump table entry which jumps |
366 | // to a function in a DLL. Linker are supposed to create such symbols |
367 | // without "__imp_" prefix for all function symbols exported from |
368 | // DLLs, so that you can call DLL functions as regular functions with |
369 | // a regular name. A function pointer is given as a DefinedImportData. |
370 | class DefinedImportThunk : public Defined { |
371 | public: |
372 | DefinedImportThunk(StringRef name, DefinedImportData *s, uint16_t machine); |
373 | |
374 | static bool classof(const Symbol *s) { |
375 | return s->kind() == DefinedImportThunkKind; |
376 | } |
377 | |
378 | uint64_t getRVA() { return data->getRVA(); } |
379 | Chunk *getChunk() { return data; } |
380 | |
381 | DefinedImportData *wrappedSym; |
382 | |
383 | private: |
384 | Chunk *data; |
385 | }; |
386 | |
387 | // If you have a symbol "foo" in your object file, a symbol name |
388 | // "__imp_foo" becomes automatically available as a pointer to "foo". |
389 | // This class is for such automatically-created symbols. |
390 | // Yes, this is an odd feature. We didn't intend to implement that. |
391 | // This is here just for compatibility with MSVC. |
392 | class DefinedLocalImport : public Defined { |
393 | public: |
394 | DefinedLocalImport(StringRef n, Defined *s) |
395 | : Defined(DefinedLocalImportKind, n), data(make<LocalImportChunk>(s)) {} |
396 | |
397 | static bool classof(const Symbol *s) { |
398 | return s->kind() == DefinedLocalImportKind; |
399 | } |
400 | |
401 | uint64_t getRVA() { return data->getRVA(); } |
402 | Chunk *getChunk() { return data; } |
403 | |
404 | private: |
405 | LocalImportChunk *data; |
406 | }; |
407 | |
408 | inline uint64_t Defined::getRVA() { |
409 | switch (kind()) { |
410 | case DefinedAbsoluteKind: |
411 | return cast<DefinedAbsolute>(this)->getRVA(); |
412 | case DefinedSyntheticKind: |
413 | return cast<DefinedSynthetic>(this)->getRVA(); |
414 | case DefinedImportDataKind: |
415 | return cast<DefinedImportData>(this)->getRVA(); |
416 | case DefinedImportThunkKind: |
417 | return cast<DefinedImportThunk>(this)->getRVA(); |
418 | case DefinedLocalImportKind: |
419 | return cast<DefinedLocalImport>(this)->getRVA(); |
420 | case DefinedCommonKind: |
421 | return cast<DefinedCommon>(this)->getRVA(); |
422 | case DefinedRegularKind: |
423 | return cast<DefinedRegular>(this)->getRVA(); |
424 | case LazyArchiveKind: |
425 | case LazyObjectKind: |
426 | case UndefinedKind: |
427 | llvm_unreachable("Cannot get the address for an undefined symbol." ); |
428 | } |
429 | llvm_unreachable("unknown symbol kind" ); |
430 | } |
431 | |
432 | inline Chunk *Defined::getChunk() { |
433 | switch (kind()) { |
434 | case DefinedRegularKind: |
435 | return cast<DefinedRegular>(this)->getChunk(); |
436 | case DefinedAbsoluteKind: |
437 | return nullptr; |
438 | case DefinedSyntheticKind: |
439 | return cast<DefinedSynthetic>(this)->getChunk(); |
440 | case DefinedImportDataKind: |
441 | return cast<DefinedImportData>(this)->getChunk(); |
442 | case DefinedImportThunkKind: |
443 | return cast<DefinedImportThunk>(this)->getChunk(); |
444 | case DefinedLocalImportKind: |
445 | return cast<DefinedLocalImport>(this)->getChunk(); |
446 | case DefinedCommonKind: |
447 | return cast<DefinedCommon>(this)->getChunk(); |
448 | case LazyArchiveKind: |
449 | case LazyObjectKind: |
450 | case UndefinedKind: |
451 | llvm_unreachable("Cannot get the chunk of an undefined symbol." ); |
452 | } |
453 | llvm_unreachable("unknown symbol kind" ); |
454 | } |
455 | |
456 | // A buffer class that is large enough to hold any Symbol-derived |
457 | // object. We allocate memory using this class and instantiate a symbol |
458 | // using the placement new. |
459 | union SymbolUnion { |
460 | alignas(DefinedRegular) char a[sizeof(DefinedRegular)]; |
461 | alignas(DefinedCommon) char b[sizeof(DefinedCommon)]; |
462 | alignas(DefinedAbsolute) char c[sizeof(DefinedAbsolute)]; |
463 | alignas(DefinedSynthetic) char d[sizeof(DefinedSynthetic)]; |
464 | alignas(LazyArchive) char e[sizeof(LazyArchive)]; |
465 | alignas(Undefined) char f[sizeof(Undefined)]; |
466 | alignas(DefinedImportData) char g[sizeof(DefinedImportData)]; |
467 | alignas(DefinedImportThunk) char h[sizeof(DefinedImportThunk)]; |
468 | alignas(DefinedLocalImport) char i[sizeof(DefinedLocalImport)]; |
469 | alignas(LazyObject) char j[sizeof(LazyObject)]; |
470 | }; |
471 | |
472 | template <typename T, typename... ArgT> |
473 | void replaceSymbol(Symbol *s, ArgT &&... arg) { |
474 | static_assert(std::is_trivially_destructible<T>(), |
475 | "Symbol types must be trivially destructible" ); |
476 | static_assert(sizeof(T) <= sizeof(SymbolUnion), "Symbol too small" ); |
477 | static_assert(alignof(T) <= alignof(SymbolUnion), |
478 | "SymbolUnion not aligned enough" ); |
479 | assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && |
480 | "Not a Symbol" ); |
481 | bool canInline = s->canInline; |
482 | new (s) T(std::forward<ArgT>(arg)...); |
483 | s->canInline = canInline; |
484 | } |
485 | } // namespace coff |
486 | |
487 | } // namespace lld |
488 | |
489 | #endif |
490 | |