1//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_COFF_INPUT_FILES_H
10#define LLD_COFF_INPUT_FILES_H
11
12#include "Config.h"
13#include "lld/Common/LLVM.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
17#include "llvm/BinaryFormat/Magic.h"
18#include "llvm/Object/Archive.h"
19#include "llvm/Object/COFF.h"
20#include "llvm/Support/StringSaver.h"
21#include <memory>
22#include <set>
23#include <vector>
24
25namespace llvm {
26struct DILineInfo;
27namespace pdb {
28class DbiModuleDescriptorBuilder;
29class NativeSession;
30}
31namespace lto {
32class InputFile;
33}
34}
35
36namespace lld {
37class DWARFCache;
38
39namespace coff {
40
41std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file);
42
43using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
44using llvm::COFF::MachineTypes;
45using llvm::object::Archive;
46using llvm::object::COFFObjectFile;
47using llvm::object::COFFSymbolRef;
48using llvm::object::coff_import_header;
49using llvm::object::coff_section;
50
51class Chunk;
52class Defined;
53class DefinedImportData;
54class DefinedImportThunk;
55class DefinedRegular;
56class SectionChunk;
57class Symbol;
58class Undefined;
59class TpiSource;
60
61// The root class of input files.
62class InputFile {
63public:
64 enum Kind {
65 ArchiveKind,
66 ObjectKind,
67 LazyObjectKind,
68 PDBKind,
69 ImportKind,
70 BitcodeKind
71 };
72 Kind kind() const { return fileKind; }
73 virtual ~InputFile() {}
74
75 // Returns the filename.
76 StringRef getName() const { return mb.getBufferIdentifier(); }
77
78 // Reads a file (the constructor doesn't do that).
79 virtual void parse() = 0;
80
81 // Returns the CPU type this file was compiled to.
82 virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; }
83
84 MemoryBufferRef mb;
85
86 // An archive file name if this file is created from an archive.
87 StringRef parentName;
88
89 // Returns .drectve section contents if exist.
90 StringRef getDirectives() { return directives; }
91
92protected:
93 InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {}
94
95 StringRef directives;
96
97private:
98 const Kind fileKind;
99};
100
101// .lib or .a file.
102class ArchiveFile : public InputFile {
103public:
104 explicit ArchiveFile(MemoryBufferRef m);
105 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
106 void parse() override;
107
108 // Enqueues an archive member load for the given symbol. If we've already
109 // enqueued a load for the same archive member, this function does nothing,
110 // which ensures that we don't load the same member more than once.
111 void addMember(const Archive::Symbol &sym);
112
113private:
114 std::unique_ptr<Archive> file;
115 llvm::DenseSet<uint64_t> seen;
116};
117
118// .obj or .o file between -start-lib and -end-lib.
119class LazyObjFile : public InputFile {
120public:
121 explicit LazyObjFile(MemoryBufferRef m) : InputFile(LazyObjectKind, m) {}
122 static bool classof(const InputFile *f) {
123 return f->kind() == LazyObjectKind;
124 }
125 // Makes this object file part of the link.
126 void fetch();
127 // Adds the symbols in this file to the symbol table as LazyObject symbols.
128 void parse() override;
129
130private:
131 std::vector<Symbol *> symbols;
132};
133
134// .obj or .o file. This may be a member of an archive file.
135class ObjFile : public InputFile {
136public:
137 explicit ObjFile(MemoryBufferRef m) : InputFile(ObjectKind, m) {}
138 explicit ObjFile(MemoryBufferRef m, std::vector<Symbol *> &&symbols)
139 : InputFile(ObjectKind, m), symbols(std::move(symbols)) {}
140 static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
141 void parse() override;
142 MachineTypes getMachineType() override;
143 ArrayRef<Chunk *> getChunks() { return chunks; }
144 ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; }
145 ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; }
146 ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; }
147 ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; }
148 ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; }
149 ArrayRef<SectionChunk *> getGuardEHContChunks() { return guardEHContChunks; }
150 ArrayRef<Symbol *> getSymbols() { return symbols; }
151
152 MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
153
154 ArrayRef<uint8_t> getDebugSection(StringRef secName);
155
156 // Returns a Symbol object for the symbolIndex'th symbol in the
157 // underlying object file.
158 Symbol *getSymbol(uint32_t symbolIndex) {
159 return symbols[symbolIndex];
160 }
161
162 // Returns the underlying COFF file.
163 COFFObjectFile *getCOFFObj() { return coffObj.get(); }
164
165 // Add a symbol for a range extension thunk. Return the new symbol table
166 // index. This index can be used to modify a relocation.
167 uint32_t addRangeThunkSymbol(Symbol *thunk) {
168 symbols.push_back(thunk);
169 return symbols.size() - 1;
170 }
171
172 void includeResourceChunks();
173
174 bool isResourceObjFile() const { return !resourceChunks.empty(); }
175
176 static std::vector<ObjFile *> instances;
177
178 // Flags in the absolute @feat.00 symbol if it is present. These usually
179 // indicate if an object was compiled with certain security features enabled
180 // like stack guard, safeseh, /guard:cf, or other things.
181 uint32_t feat00Flags = 0;
182
183 // True if this object file is compatible with SEH. COFF-specific and
184 // x86-only. COFF spec 5.10.1. The .sxdata section.
185 bool hasSafeSEH() { return feat00Flags & 0x1; }
186
187 // True if this file was compiled with /guard:cf.
188 bool hasGuardCF() { return feat00Flags & 0x4800; }
189
190 // Pointer to the PDB module descriptor builder. Various debug info records
191 // will reference object files by "module index", which is here. Things like
192 // source files and section contributions are also recorded here. Will be null
193 // if we are not producing a PDB.
194 llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr;
195
196 const coff_section *addrsigSec = nullptr;
197
198 const coff_section *callgraphSec = nullptr;
199
200 // When using Microsoft precompiled headers, this is the PCH's key.
201 // The same key is used by both the precompiled object, and objects using the
202 // precompiled object. Any difference indicates out-of-date objects.
203 llvm::Optional<uint32_t> pchSignature;
204
205 // Whether this file was compiled with /hotpatch.
206 bool hotPatchable = false;
207
208 // Whether the object was already merged into the final PDB.
209 bool mergedIntoPDB = false;
210
211 // If the OBJ has a .debug$T stream, this tells how it will be handled.
212 TpiSource *debugTypesObj = nullptr;
213
214 // The .debug$P or .debug$T section data if present. Empty otherwise.
215 ArrayRef<uint8_t> debugTypes;
216
217 llvm::Optional<std::pair<StringRef, uint32_t>>
218 getVariableLocation(StringRef var);
219
220 llvm::Optional<llvm::DILineInfo> getDILineInfo(uint32_t offset,
221 uint32_t sectionIndex);
222
223private:
224 const coff_section* getSection(uint32_t i);
225 const coff_section *getSection(COFFSymbolRef sym) {
226 return getSection(sym.getSectionNumber());
227 }
228
229 void initializeChunks();
230 void initializeSymbols();
231 void initializeFlags();
232 void initializeDependencies();
233
234 SectionChunk *
235 readSection(uint32_t sectionNumber,
236 const llvm::object::coff_aux_section_definition *def,
237 StringRef leaderName);
238
239 void readAssociativeDefinition(
240 COFFSymbolRef coffSym,
241 const llvm::object::coff_aux_section_definition *def);
242
243 void readAssociativeDefinition(
244 COFFSymbolRef coffSym,
245 const llvm::object::coff_aux_section_definition *def,
246 uint32_t parentSection);
247
248 void recordPrevailingSymbolForMingw(
249 COFFSymbolRef coffSym,
250 llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
251
252 void maybeAssociateSEHForMingw(
253 COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def,
254 const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
255
256 // Given a new symbol Sym with comdat selection Selection, if the new
257 // symbol is not (yet) Prevailing and the existing comdat leader set to
258 // Leader, emits a diagnostic if the new symbol and its selection doesn't
259 // match the existing symbol and its selection. If either old or new
260 // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace
261 // the existing leader. In that case, Prevailing is set to true.
262 void
263 handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection,
264 bool &prevailing, DefinedRegular *leader,
265 const llvm::object::coff_aux_section_definition *def);
266
267 llvm::Optional<Symbol *>
268 createDefined(COFFSymbolRef sym,
269 std::vector<const llvm::object::coff_aux_section_definition *>
270 &comdatDefs,
271 bool &prevailingComdat);
272 Symbol *createRegular(COFFSymbolRef sym);
273 Symbol *createUndefined(COFFSymbolRef sym);
274
275 std::unique_ptr<COFFObjectFile> coffObj;
276
277 // List of all chunks defined by this file. This includes both section
278 // chunks and non-section chunks for common symbols.
279 std::vector<Chunk *> chunks;
280
281 std::vector<SectionChunk *> resourceChunks;
282
283 // CodeView debug info sections.
284 std::vector<SectionChunk *> debugChunks;
285
286 // Chunks containing symbol table indices of exception handlers. Only used for
287 // 32-bit x86.
288 std::vector<SectionChunk *> sxDataChunks;
289
290 // Chunks containing symbol table indices of address taken symbols, address
291 // taken IAT entries, longjmp and ehcont targets. These are not linked into
292 // the final binary when /guard:cf is set.
293 std::vector<SectionChunk *> guardFidChunks;
294 std::vector<SectionChunk *> guardIATChunks;
295 std::vector<SectionChunk *> guardLJmpChunks;
296 std::vector<SectionChunk *> guardEHContChunks;
297
298 // This vector contains a list of all symbols defined or referenced by this
299 // file. They are indexed such that you can get a Symbol by symbol
300 // index. Nonexistent indices (which are occupied by auxiliary
301 // symbols in the real symbol table) are filled with null pointers.
302 std::vector<Symbol *> symbols;
303
304 // This vector contains the same chunks as Chunks, but they are
305 // indexed such that you can get a SectionChunk by section index.
306 // Nonexistent section indices are filled with null pointers.
307 // (Because section number is 1-based, the first slot is always a
308 // null pointer.) This vector is only valid during initialization.
309 std::vector<SectionChunk *> sparseChunks;
310
311 DWARFCache *dwarf = nullptr;
312};
313
314// This is a PDB type server dependency, that is not a input file per se, but
315// needs to be treated like one. Such files are discovered from the debug type
316// stream.
317class PDBInputFile : public InputFile {
318public:
319 explicit PDBInputFile(MemoryBufferRef m);
320 ~PDBInputFile();
321 static bool classof(const InputFile *f) { return f->kind() == PDBKind; }
322 void parse() override;
323
324 static void enqueue(StringRef path, ObjFile *fromFile);
325
326 static PDBInputFile *findFromRecordPath(StringRef path, ObjFile *fromFile);
327
328 static std::map<std::string, PDBInputFile *> instances;
329
330 // Record possible errors while opening the PDB file
331 llvm::Optional<Error> loadErr;
332
333 // This is the actual interface to the PDB (if it was opened successfully)
334 std::unique_ptr<llvm::pdb::NativeSession> session;
335
336 // If the PDB has a .debug$T stream, this tells how it will be handled.
337 TpiSource *debugTypesObj = nullptr;
338};
339
340// This type represents import library members that contain DLL names
341// and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
342// for details about the format.
343class ImportFile : public InputFile {
344public:
345 explicit ImportFile(MemoryBufferRef m) : InputFile(ImportKind, m) {}
346
347 static bool classof(const InputFile *f) { return f->kind() == ImportKind; }
348
349 static std::vector<ImportFile *> instances;
350
351 Symbol *impSym = nullptr;
352 Symbol *thunkSym = nullptr;
353 std::string dllName;
354
355private:
356 void parse() override;
357
358public:
359 StringRef externalName;
360 const coff_import_header *hdr;
361 Chunk *location = nullptr;
362
363 // We want to eliminate dllimported symbols if no one actually refers to them.
364 // These "Live" bits are used to keep track of which import library members
365 // are actually in use.
366 //
367 // If the Live bit is turned off by MarkLive, Writer will ignore dllimported
368 // symbols provided by this import library member. We also track whether the
369 // imported symbol is used separately from whether the thunk is used in order
370 // to avoid creating unnecessary thunks.
371 bool live = !config->doGC;
372 bool thunkLive = !config->doGC;
373};
374
375// Used for LTO.
376class BitcodeFile : public InputFile {
377public:
378 BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
379 uint64_t offsetInArchive);
380 explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName,
381 uint64_t offsetInArchive,
382 std::vector<Symbol *> &&symbols);
383 ~BitcodeFile();
384 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
385 ArrayRef<Symbol *> getSymbols() { return symbols; }
386 MachineTypes getMachineType() override;
387 static std::vector<BitcodeFile *> instances;
388 std::unique_ptr<llvm::lto::InputFile> obj;
389
390private:
391 void parse() override;
392
393 std::vector<Symbol *> symbols;
394};
395
396inline bool isBitcode(MemoryBufferRef mb) {
397 return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
398}
399
400std::string replaceThinLTOSuffix(StringRef path);
401} // namespace coff
402
403std::string toString(const coff::InputFile *file);
404} // namespace lld
405
406#endif
407