1 | //===- IRSymtab.cpp - implementation of IR symbol tables ------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/Object/IRSymtab.h" |
10 | #include "llvm/ADT/ArrayRef.h" |
11 | #include "llvm/ADT/DenseMap.h" |
12 | #include "llvm/ADT/SmallPtrSet.h" |
13 | #include "llvm/ADT/SmallString.h" |
14 | #include "llvm/ADT/SmallVector.h" |
15 | #include "llvm/ADT/StringRef.h" |
16 | #include "llvm/Bitcode/BitcodeReader.h" |
17 | #include "llvm/Config/llvm-config.h" |
18 | #include "llvm/IR/Comdat.h" |
19 | #include "llvm/IR/DataLayout.h" |
20 | #include "llvm/IR/GlobalAlias.h" |
21 | #include "llvm/IR/GlobalObject.h" |
22 | #include "llvm/IR/Mangler.h" |
23 | #include "llvm/IR/Metadata.h" |
24 | #include "llvm/IR/Module.h" |
25 | #include "llvm/MC/StringTableBuilder.h" |
26 | #include "llvm/Object/ModuleSymbolTable.h" |
27 | #include "llvm/Object/SymbolicFile.h" |
28 | #include "llvm/Support/Allocator.h" |
29 | #include "llvm/Support/Casting.h" |
30 | #include "llvm/Support/CommandLine.h" |
31 | #include "llvm/Support/Error.h" |
32 | #include "llvm/Support/StringSaver.h" |
33 | #include "llvm/Support/VCSRevision.h" |
34 | #include "llvm/Support/raw_ostream.h" |
35 | #include "llvm/TargetParser/Triple.h" |
36 | #include <cassert> |
37 | #include <string> |
38 | #include <utility> |
39 | #include <vector> |
40 | |
41 | using namespace llvm; |
42 | using namespace irsymtab; |
43 | |
44 | static cl::opt<bool> DisableBitcodeVersionUpgrade( |
45 | "disable-bitcode-version-upgrade" , cl::Hidden, |
46 | cl::desc("Disable automatic bitcode upgrade for version mismatch" )); |
47 | |
48 | static const char *PreservedSymbols[] = { |
49 | #define HANDLE_LIBCALL(code, name) name, |
50 | #include "llvm/IR/RuntimeLibcalls.def" |
51 | #undef HANDLE_LIBCALL |
52 | // There are global variables, so put it here instead of in |
53 | // RuntimeLibcalls.def. |
54 | // TODO: Are there similar such variables? |
55 | "__ssp_canary_word" , |
56 | "__stack_chk_guard" , |
57 | }; |
58 | |
59 | namespace { |
60 | |
61 | const char *getExpectedProducerName() { |
62 | static char DefaultName[] = LLVM_VERSION_STRING |
63 | #ifdef LLVM_REVISION |
64 | " " LLVM_REVISION |
65 | #endif |
66 | ; |
67 | // Allows for testing of the irsymtab writer and upgrade mechanism. This |
68 | // environment variable should not be set by users. |
69 | if (char *OverrideName = getenv(name: "LLVM_OVERRIDE_PRODUCER" )) |
70 | return OverrideName; |
71 | return DefaultName; |
72 | } |
73 | |
74 | const char *kExpectedProducerName = getExpectedProducerName(); |
75 | |
76 | /// Stores the temporary state that is required to build an IR symbol table. |
77 | struct Builder { |
78 | SmallVector<char, 0> &Symtab; |
79 | StringTableBuilder &StrtabBuilder; |
80 | StringSaver Saver; |
81 | |
82 | // This ctor initializes a StringSaver using the passed in BumpPtrAllocator. |
83 | // The StringTableBuilder does not create a copy of any strings added to it, |
84 | // so this provides somewhere to store any strings that we create. |
85 | Builder(SmallVector<char, 0> &Symtab, StringTableBuilder &StrtabBuilder, |
86 | BumpPtrAllocator &Alloc) |
87 | : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {} |
88 | |
89 | DenseMap<const Comdat *, int> ComdatMap; |
90 | Mangler Mang; |
91 | Triple TT; |
92 | |
93 | std::vector<storage::Comdat> Comdats; |
94 | std::vector<storage::Module> Mods; |
95 | std::vector<storage::Symbol> Syms; |
96 | std::vector<storage::Uncommon> Uncommons; |
97 | |
98 | std::string COFFLinkerOpts; |
99 | raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts}; |
100 | |
101 | std::vector<storage::Str> DependentLibraries; |
102 | |
103 | void setStr(storage::Str &S, StringRef Value) { |
104 | S.Offset = StrtabBuilder.add(S: Value); |
105 | S.Size = Value.size(); |
106 | } |
107 | |
108 | template <typename T> |
109 | void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) { |
110 | R.Offset = Symtab.size(); |
111 | R.Size = Objs.size(); |
112 | Symtab.insert(I: Symtab.end(), From: reinterpret_cast<const char *>(Objs.data()), |
113 | To: reinterpret_cast<const char *>(Objs.data() + Objs.size())); |
114 | } |
115 | |
116 | Expected<int> getComdatIndex(const Comdat *C, const Module *M); |
117 | |
118 | Error addModule(Module *M); |
119 | Error addSymbol(const ModuleSymbolTable &Msymtab, |
120 | const SmallPtrSet<GlobalValue *, 4> &Used, |
121 | ModuleSymbolTable::Symbol Sym); |
122 | |
123 | Error build(ArrayRef<Module *> Mods); |
124 | }; |
125 | |
126 | Error Builder::addModule(Module *M) { |
127 | if (M->getDataLayoutStr().empty()) |
128 | return make_error<StringError>(Args: "input module has no datalayout" , |
129 | Args: inconvertibleErrorCode()); |
130 | |
131 | // Symbols in the llvm.used list will get the FB_Used bit and will not be |
132 | // internalized. We do this for llvm.compiler.used as well: |
133 | // |
134 | // IR symbol table tracks module-level asm symbol references but not inline |
135 | // asm. A symbol only referenced by inline asm is not in the IR symbol table, |
136 | // so we may not know that the definition (in another translation unit) is |
137 | // referenced. That definition may have __attribute__((used)) (which lowers to |
138 | // llvm.compiler.used on ELF targets) to communicate to the compiler that it |
139 | // may be used by inline asm. The usage is perfectly fine, so we treat |
140 | // llvm.compiler.used conservatively as llvm.used to work around our own |
141 | // limitation. |
142 | SmallVector<GlobalValue *, 4> UsedV; |
143 | collectUsedGlobalVariables(M: *M, Vec&: UsedV, /*CompilerUsed=*/false); |
144 | collectUsedGlobalVariables(M: *M, Vec&: UsedV, /*CompilerUsed=*/true); |
145 | SmallPtrSet<GlobalValue *, 4> Used(UsedV.begin(), UsedV.end()); |
146 | |
147 | ModuleSymbolTable Msymtab; |
148 | Msymtab.addModule(M); |
149 | |
150 | storage::Module Mod; |
151 | Mod.Begin = Syms.size(); |
152 | Mod.End = Syms.size() + Msymtab.symbols().size(); |
153 | Mod.UncBegin = Uncommons.size(); |
154 | Mods.push_back(x: Mod); |
155 | |
156 | if (TT.isOSBinFormatCOFF()) { |
157 | if (auto E = M->materializeMetadata()) |
158 | return E; |
159 | if (NamedMDNode *LinkerOptions = |
160 | M->getNamedMetadata(Name: "llvm.linker.options" )) { |
161 | for (MDNode *MDOptions : LinkerOptions->operands()) |
162 | for (const MDOperand &MDOption : cast<MDNode>(Val: MDOptions)->operands()) |
163 | COFFLinkerOptsOS << " " << cast<MDString>(Val: MDOption)->getString(); |
164 | } |
165 | } |
166 | |
167 | if (TT.isOSBinFormatELF()) { |
168 | if (auto E = M->materializeMetadata()) |
169 | return E; |
170 | if (NamedMDNode *N = M->getNamedMetadata(Name: "llvm.dependent-libraries" )) { |
171 | for (MDNode *MDOptions : N->operands()) { |
172 | const auto OperandStr = |
173 | cast<MDString>(Val: cast<MDNode>(Val: MDOptions)->getOperand(I: 0))->getString(); |
174 | storage::Str Specifier; |
175 | setStr(S&: Specifier, Value: OperandStr); |
176 | DependentLibraries.emplace_back(args&: Specifier); |
177 | } |
178 | } |
179 | } |
180 | |
181 | for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols()) |
182 | if (Error Err = addSymbol(Msymtab, Used, Sym: Msym)) |
183 | return Err; |
184 | |
185 | return Error::success(); |
186 | } |
187 | |
188 | Expected<int> Builder::getComdatIndex(const Comdat *C, const Module *M) { |
189 | auto P = ComdatMap.insert(KV: std::make_pair(x&: C, y: Comdats.size())); |
190 | if (P.second) { |
191 | std::string Name; |
192 | if (TT.isOSBinFormatCOFF()) { |
193 | const GlobalValue *GV = M->getNamedValue(Name: C->getName()); |
194 | if (!GV) |
195 | return make_error<StringError>(Args: "Could not find leader" , |
196 | Args: inconvertibleErrorCode()); |
197 | // Internal leaders do not affect symbol resolution, therefore they do not |
198 | // appear in the symbol table. |
199 | if (GV->hasLocalLinkage()) { |
200 | P.first->second = -1; |
201 | return -1; |
202 | } |
203 | llvm::raw_string_ostream OS(Name); |
204 | Mang.getNameWithPrefix(OS, GV, CannotUsePrivateLabel: false); |
205 | } else { |
206 | Name = std::string(C->getName()); |
207 | } |
208 | |
209 | storage::Comdat Comdat; |
210 | setStr(S&: Comdat.Name, Value: Saver.save(S: Name)); |
211 | Comdat.SelectionKind = C->getSelectionKind(); |
212 | Comdats.push_back(x: Comdat); |
213 | } |
214 | |
215 | return P.first->second; |
216 | } |
217 | |
218 | static DenseSet<StringRef> buildPreservedSymbolsSet() { |
219 | return DenseSet<StringRef>(std::begin(arr&: PreservedSymbols), |
220 | std::end(arr&: PreservedSymbols)); |
221 | } |
222 | |
223 | Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, |
224 | const SmallPtrSet<GlobalValue *, 4> &Used, |
225 | ModuleSymbolTable::Symbol Msym) { |
226 | Syms.emplace_back(); |
227 | storage::Symbol &Sym = Syms.back(); |
228 | Sym = {}; |
229 | |
230 | storage::Uncommon *Unc = nullptr; |
231 | auto Uncommon = [&]() -> storage::Uncommon & { |
232 | if (Unc) |
233 | return *Unc; |
234 | Sym.Flags |= 1 << storage::Symbol::FB_has_uncommon; |
235 | Uncommons.emplace_back(); |
236 | Unc = &Uncommons.back(); |
237 | *Unc = {}; |
238 | setStr(S&: Unc->COFFWeakExternFallbackName, Value: "" ); |
239 | setStr(S&: Unc->SectionName, Value: "" ); |
240 | return *Unc; |
241 | }; |
242 | |
243 | SmallString<64> Name; |
244 | { |
245 | raw_svector_ostream OS(Name); |
246 | Msymtab.printSymbolName(OS, S: Msym); |
247 | } |
248 | setStr(S&: Sym.Name, Value: Saver.save(S: Name.str())); |
249 | |
250 | auto Flags = Msymtab.getSymbolFlags(S: Msym); |
251 | if (Flags & object::BasicSymbolRef::SF_Undefined) |
252 | Sym.Flags |= 1 << storage::Symbol::FB_undefined; |
253 | if (Flags & object::BasicSymbolRef::SF_Weak) |
254 | Sym.Flags |= 1 << storage::Symbol::FB_weak; |
255 | if (Flags & object::BasicSymbolRef::SF_Common) |
256 | Sym.Flags |= 1 << storage::Symbol::FB_common; |
257 | if (Flags & object::BasicSymbolRef::SF_Indirect) |
258 | Sym.Flags |= 1 << storage::Symbol::FB_indirect; |
259 | if (Flags & object::BasicSymbolRef::SF_Global) |
260 | Sym.Flags |= 1 << storage::Symbol::FB_global; |
261 | if (Flags & object::BasicSymbolRef::SF_FormatSpecific) |
262 | Sym.Flags |= 1 << storage::Symbol::FB_format_specific; |
263 | if (Flags & object::BasicSymbolRef::SF_Executable) |
264 | Sym.Flags |= 1 << storage::Symbol::FB_executable; |
265 | |
266 | Sym.ComdatIndex = -1; |
267 | auto *GV = dyn_cast_if_present<GlobalValue *>(Val&: Msym); |
268 | if (!GV) { |
269 | // Undefined module asm symbols act as GC roots and are implicitly used. |
270 | if (Flags & object::BasicSymbolRef::SF_Undefined) |
271 | Sym.Flags |= 1 << storage::Symbol::FB_used; |
272 | setStr(S&: Sym.IRName, Value: "" ); |
273 | return Error::success(); |
274 | } |
275 | |
276 | setStr(S&: Sym.IRName, Value: GV->getName()); |
277 | |
278 | static const DenseSet<StringRef> PreservedSymbolsSet = |
279 | buildPreservedSymbolsSet(); |
280 | bool IsPreservedSymbol = PreservedSymbolsSet.contains(V: GV->getName()); |
281 | |
282 | if (Used.count(Ptr: GV) || IsPreservedSymbol) |
283 | Sym.Flags |= 1 << storage::Symbol::FB_used; |
284 | if (GV->isThreadLocal()) |
285 | Sym.Flags |= 1 << storage::Symbol::FB_tls; |
286 | if (GV->hasGlobalUnnamedAddr()) |
287 | Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr; |
288 | if (GV->canBeOmittedFromSymbolTable()) |
289 | Sym.Flags |= 1 << storage::Symbol::FB_may_omit; |
290 | Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility; |
291 | |
292 | if (Flags & object::BasicSymbolRef::SF_Common) { |
293 | auto *GVar = dyn_cast<GlobalVariable>(Val: GV); |
294 | if (!GVar) |
295 | return make_error<StringError>(Args: "Only variables can have common linkage!" , |
296 | Args: inconvertibleErrorCode()); |
297 | Uncommon().CommonSize = |
298 | GV->getParent()->getDataLayout().getTypeAllocSize(Ty: GV->getValueType()); |
299 | Uncommon().CommonAlign = GVar->getAlign() ? GVar->getAlign()->value() : 0; |
300 | } |
301 | |
302 | const GlobalObject *GO = GV->getAliaseeObject(); |
303 | if (!GO) { |
304 | if (isa<GlobalIFunc>(Val: GV)) |
305 | GO = cast<GlobalIFunc>(Val: GV)->getResolverFunction(); |
306 | if (!GO) |
307 | return make_error<StringError>(Args: "Unable to determine comdat of alias!" , |
308 | Args: inconvertibleErrorCode()); |
309 | } |
310 | if (const Comdat *C = GO->getComdat()) { |
311 | Expected<int> ComdatIndexOrErr = getComdatIndex(C, M: GV->getParent()); |
312 | if (!ComdatIndexOrErr) |
313 | return ComdatIndexOrErr.takeError(); |
314 | Sym.ComdatIndex = *ComdatIndexOrErr; |
315 | } |
316 | |
317 | if (TT.isOSBinFormatCOFF()) { |
318 | emitLinkerFlagsForGlobalCOFF(OS&: COFFLinkerOptsOS, GV, TT, Mangler&: Mang); |
319 | |
320 | if ((Flags & object::BasicSymbolRef::SF_Weak) && |
321 | (Flags & object::BasicSymbolRef::SF_Indirect)) { |
322 | auto *Fallback = dyn_cast<GlobalValue>( |
323 | Val: cast<GlobalAlias>(Val: GV)->getAliasee()->stripPointerCasts()); |
324 | if (!Fallback) |
325 | return make_error<StringError>(Args: "Invalid weak external" , |
326 | Args: inconvertibleErrorCode()); |
327 | std::string FallbackName; |
328 | raw_string_ostream OS(FallbackName); |
329 | Msymtab.printSymbolName(OS, S: Fallback); |
330 | OS.flush(); |
331 | setStr(S&: Uncommon().COFFWeakExternFallbackName, Value: Saver.save(S: FallbackName)); |
332 | } |
333 | } |
334 | |
335 | if (!GO->getSection().empty()) |
336 | setStr(S&: Uncommon().SectionName, Value: Saver.save(S: GO->getSection())); |
337 | |
338 | return Error::success(); |
339 | } |
340 | |
341 | Error Builder::build(ArrayRef<Module *> IRMods) { |
342 | storage::Header Hdr; |
343 | |
344 | assert(!IRMods.empty()); |
345 | Hdr.Version = storage::Header::kCurrentVersion; |
346 | setStr(S&: Hdr.Producer, Value: kExpectedProducerName); |
347 | setStr(S&: Hdr.TargetTriple, Value: IRMods[0]->getTargetTriple()); |
348 | setStr(S&: Hdr.SourceFileName, Value: IRMods[0]->getSourceFileName()); |
349 | TT = Triple(IRMods[0]->getTargetTriple()); |
350 | |
351 | for (auto *M : IRMods) |
352 | if (Error Err = addModule(M)) |
353 | return Err; |
354 | |
355 | COFFLinkerOptsOS.flush(); |
356 | setStr(S&: Hdr.COFFLinkerOpts, Value: Saver.save(S: COFFLinkerOpts)); |
357 | |
358 | // We are about to fill in the header's range fields, so reserve space for it |
359 | // and copy it in afterwards. |
360 | Symtab.resize(N: sizeof(storage::Header)); |
361 | writeRange(R&: Hdr.Modules, Objs: Mods); |
362 | writeRange(R&: Hdr.Comdats, Objs: Comdats); |
363 | writeRange(R&: Hdr.Symbols, Objs: Syms); |
364 | writeRange(R&: Hdr.Uncommons, Objs: Uncommons); |
365 | writeRange(R&: Hdr.DependentLibraries, Objs: DependentLibraries); |
366 | *reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr; |
367 | return Error::success(); |
368 | } |
369 | |
370 | } // end anonymous namespace |
371 | |
372 | Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab, |
373 | StringTableBuilder &StrtabBuilder, |
374 | BumpPtrAllocator &Alloc) { |
375 | return Builder(Symtab, StrtabBuilder, Alloc).build(IRMods: Mods); |
376 | } |
377 | |
378 | // Upgrade a vector of bitcode modules created by an old version of LLVM by |
379 | // creating an irsymtab for them in the current format. |
380 | static Expected<FileContents> upgrade(ArrayRef<BitcodeModule> BMs) { |
381 | FileContents FC; |
382 | |
383 | LLVMContext Ctx; |
384 | std::vector<Module *> Mods; |
385 | std::vector<std::unique_ptr<Module>> OwnedMods; |
386 | for (auto BM : BMs) { |
387 | Expected<std::unique_ptr<Module>> MOrErr = |
388 | BM.getLazyModule(Context&: Ctx, /*ShouldLazyLoadMetadata*/ true, |
389 | /*IsImporting*/ false); |
390 | if (!MOrErr) |
391 | return MOrErr.takeError(); |
392 | |
393 | Mods.push_back(x: MOrErr->get()); |
394 | OwnedMods.push_back(x: std::move(*MOrErr)); |
395 | } |
396 | |
397 | StringTableBuilder StrtabBuilder(StringTableBuilder::RAW); |
398 | BumpPtrAllocator Alloc; |
399 | if (Error E = build(Mods, Symtab&: FC.Symtab, StrtabBuilder, Alloc)) |
400 | return std::move(E); |
401 | |
402 | StrtabBuilder.finalizeInOrder(); |
403 | FC.Strtab.resize(N: StrtabBuilder.getSize()); |
404 | StrtabBuilder.write(Buf: (uint8_t *)FC.Strtab.data()); |
405 | |
406 | FC.TheReader = {{FC.Symtab.data(), FC.Symtab.size()}, |
407 | {FC.Strtab.data(), FC.Strtab.size()}}; |
408 | return std::move(FC); |
409 | } |
410 | |
411 | Expected<FileContents> irsymtab::readBitcode(const BitcodeFileContents &BFC) { |
412 | if (BFC.Mods.empty()) |
413 | return make_error<StringError>(Args: "Bitcode file does not contain any modules" , |
414 | Args: inconvertibleErrorCode()); |
415 | |
416 | if (!DisableBitcodeVersionUpgrade) { |
417 | if (BFC.StrtabForSymtab.empty() || |
418 | BFC.Symtab.size() < sizeof(storage::Header)) |
419 | return upgrade(BMs: BFC.Mods); |
420 | |
421 | // We cannot use the regular reader to read the version and producer, |
422 | // because it will expect the header to be in the current format. The only |
423 | // thing we can rely on is that the version and producer will be present as |
424 | // the first struct elements. |
425 | auto *Hdr = reinterpret_cast<const storage::Header *>(BFC.Symtab.data()); |
426 | unsigned Version = Hdr->Version; |
427 | StringRef Producer = Hdr->Producer.get(Strtab: BFC.StrtabForSymtab); |
428 | if (Version != storage::Header::kCurrentVersion || |
429 | Producer != kExpectedProducerName) |
430 | return upgrade(BMs: BFC.Mods); |
431 | } |
432 | |
433 | FileContents FC; |
434 | FC.TheReader = {{BFC.Symtab.data(), BFC.Symtab.size()}, |
435 | {BFC.StrtabForSymtab.data(), BFC.StrtabForSymtab.size()}}; |
436 | |
437 | // Finally, make sure that the number of modules in the symbol table matches |
438 | // the number of modules in the bitcode file. If they differ, it may mean that |
439 | // the bitcode file was created by binary concatenation, so we need to create |
440 | // a new symbol table from scratch. |
441 | if (FC.TheReader.getNumModules() != BFC.Mods.size()) |
442 | return upgrade(BMs: std::move(BFC.Mods)); |
443 | |
444 | return std::move(FC); |
445 | } |
446 | |