1//===- Symbols.h ------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_MACHO_SYMBOLS_H
10#define LLD_MACHO_SYMBOLS_H
11
12#include "InputFiles.h"
13#include "InputSection.h"
14#include "Target.h"
15#include "lld/Common/ErrorHandler.h"
16#include "lld/Common/Strings.h"
17#include "llvm/Object/Archive.h"
18#include "llvm/Support/MathExtras.h"
19
20namespace lld {
21namespace macho {
22
23class InputSection;
24class MachHeaderSection;
25
26struct StringRefZ {
27 StringRefZ(const char *s) : data(s), size(-1) {}
28 StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
29
30 const char *data;
31 const uint32_t size;
32};
33
34class Symbol {
35public:
36 enum Kind {
37 DefinedKind,
38 UndefinedKind,
39 CommonKind,
40 DylibKind,
41 LazyKind,
42 };
43
44 virtual ~Symbol() {}
45
46 Kind kind() const { return symbolKind; }
47
48 StringRef getName() const {
49 if (nameSize == (uint32_t)-1)
50 nameSize = strlen(nameData);
51 return {nameData, nameSize};
52 }
53
54 virtual uint64_t getVA() const { return 0; }
55
56 virtual uint64_t getFileOffset() const {
57 llvm_unreachable("attempt to get an offset from a non-defined symbol");
58 }
59
60 virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); }
61
62 // Only undefined or dylib symbols can be weak references. A weak reference
63 // need not be satisfied at runtime, e.g. due to the symbol not being
64 // available on a given target platform.
65 virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); }
66
67 virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
68
69 // Whether this symbol is in the GOT or TLVPointer sections.
70 bool isInGot() const { return gotIndex != UINT32_MAX; }
71
72 // Whether this symbol is in the StubsSection.
73 bool isInStubs() const { return stubsIndex != UINT32_MAX; }
74
75 // The index of this symbol in the GOT or the TLVPointer section, depending
76 // on whether it is a thread-local. A given symbol cannot be referenced by
77 // both these sections at once.
78 uint32_t gotIndex = UINT32_MAX;
79
80 uint32_t stubsIndex = UINT32_MAX;
81
82 uint32_t symtabIndex = UINT32_MAX;
83
84 InputFile *getFile() const { return file; }
85
86protected:
87 Symbol(Kind k, StringRefZ name, InputFile *file)
88 : symbolKind(k), nameData(name.data), nameSize(name.size), file(file),
89 isUsedInRegularObj(!file || isa<ObjFile>(file)) {}
90
91 Kind symbolKind;
92 const char *nameData;
93 mutable uint32_t nameSize;
94 InputFile *file;
95
96public:
97 // True if this symbol was referenced by a regular (non-bitcode) object.
98 bool isUsedInRegularObj;
99};
100
101class Defined : public Symbol {
102public:
103 Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
104 uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
105 bool isThumb)
106 : Symbol(DefinedKind, name, file), isec(isec), value(value), size(size),
107 overridesWeakDef(false), privateExtern(isPrivateExtern),
108 includeInSymtab(true), thumb(isThumb), weakDef(isWeakDef),
109 external(isExternal) {}
110
111 bool isWeakDef() const override { return weakDef; }
112 bool isExternalWeakDef() const {
113 return isWeakDef() && isExternal() && !privateExtern;
114 }
115 bool isTlv() const override {
116 return !isAbsolute() && isThreadLocalVariables(isec->flags);
117 }
118
119 bool isExternal() const { return external; }
120 bool isAbsolute() const { return isec == nullptr; }
121
122 uint64_t getVA() const override;
123 uint64_t getFileOffset() const override;
124
125 static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
126
127 InputSection *isec;
128 // Contains the offset from the containing subsection. Note that this is
129 // different from nlist::n_value, which is the absolute address of the symbol.
130 uint64_t value;
131 // size is only calculated for regular (non-bitcode) symbols.
132 uint64_t size;
133
134 bool overridesWeakDef : 1;
135 // Whether this symbol should appear in the output binary's export trie.
136 bool privateExtern : 1;
137 // Whether this symbol should appear in the output symbol table.
138 bool includeInSymtab : 1;
139 // Only relevant when compiling for Thumb-supporting arm32 archs.
140 bool thumb : 1;
141
142private:
143 const bool weakDef : 1;
144 const bool external : 1;
145};
146
147// This enum does double-duty: as a symbol property, it indicates whether & how
148// a dylib symbol is referenced. As a DylibFile property, it indicates the kind
149// of referenced symbols contained within the file. If there are both weak
150// and strong references to the same file, we will count the file as
151// strongly-referenced.
152enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
153
154class Undefined : public Symbol {
155public:
156 Undefined(StringRefZ name, InputFile *file, RefState refState)
157 : Symbol(UndefinedKind, name, file), refState(refState) {
158 assert(refState != RefState::Unreferenced);
159 }
160
161 bool isWeakRef() const override { return refState == RefState::Weak; }
162
163 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
164
165 RefState refState : 2;
166};
167
168// On Unix, it is traditionally allowed to write variable definitions without
169// initialization expressions (such as "int foo;") to header files. These are
170// called tentative definitions.
171//
172// Using tentative definitions is usually considered a bad practice; you should
173// write only declarations (such as "extern int foo;") to header files.
174// Nevertheless, the linker and the compiler have to do something to support
175// bad code by allowing duplicate definitions for this particular case.
176//
177// The compiler creates common symbols when it sees tentative definitions.
178// (You can suppress this behavior and let the compiler create a regular
179// defined symbol by passing -fno-common. -fno-common is the default in clang
180// as of LLVM 11.0.) When linking the final binary, if there are remaining
181// common symbols after name resolution is complete, the linker converts them
182// to regular defined symbols in a __common section.
183class CommonSymbol : public Symbol {
184public:
185 CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,
186 bool isPrivateExtern)
187 : Symbol(CommonKind, name, file), size(size),
188 align(align != 1 ? align : llvm::PowerOf2Ceil(size)),
189 privateExtern(isPrivateExtern) {
190 // TODO: cap maximum alignment
191 }
192
193 static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
194
195 const uint64_t size;
196 const uint32_t align;
197 const bool privateExtern;
198};
199
200class DylibSymbol : public Symbol {
201public:
202 DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
203 RefState refState, bool isTlv)
204 : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef),
205 tlv(isTlv) {}
206
207 bool isWeakDef() const override { return weakDef; }
208 bool isWeakRef() const override { return refState == RefState::Weak; }
209 bool isReferenced() const { return refState != RefState::Unreferenced; }
210 bool isTlv() const override { return tlv; }
211 bool isDynamicLookup() const { return file == nullptr; }
212 bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
213
214 DylibFile *getFile() const {
215 assert(!isDynamicLookup());
216 return cast<DylibFile>(file);
217 }
218
219 static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
220
221 uint32_t stubsHelperIndex = UINT32_MAX;
222 uint32_t lazyBindOffset = UINT32_MAX;
223
224 RefState refState : 2;
225
226private:
227 const bool weakDef : 1;
228 const bool tlv : 1;
229};
230
231class LazySymbol : public Symbol {
232public:
233 LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
234 : Symbol(LazyKind, sym.getName(), file), sym(sym) {}
235
236 ArchiveFile *getFile() const { return cast<ArchiveFile>(file); }
237 void fetchArchiveMember();
238
239 static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
240
241private:
242 const llvm::object::Archive::Symbol sym;
243};
244
245union SymbolUnion {
246 alignas(Defined) char a[sizeof(Defined)];
247 alignas(Undefined) char b[sizeof(Undefined)];
248 alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
249 alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
250 alignas(LazySymbol) char e[sizeof(LazySymbol)];
251};
252
253template <typename T, typename... ArgT>
254T *replaceSymbol(Symbol *s, ArgT &&...arg) {
255 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
256 static_assert(alignof(T) <= alignof(SymbolUnion),
257 "SymbolUnion not aligned enough");
258 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
259 "Not a Symbol");
260
261 bool isUsedInRegularObj = s->isUsedInRegularObj;
262 T *sym = new (s) T(std::forward<ArgT>(arg)...);
263 sym->isUsedInRegularObj |= isUsedInRegularObj;
264 return sym;
265}
266
267} // namespace macho
268
269std::string toString(const macho::Symbol &);
270std::string toMachOString(const llvm::object::Archive::Symbol &);
271
272} // namespace lld
273
274#endif
275