1 | //===- InputFiles.h ---------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLD_MACHO_INPUT_FILES_H |
10 | #define LLD_MACHO_INPUT_FILES_H |
11 | |
12 | #include "MachOStructs.h" |
13 | #include "Target.h" |
14 | |
15 | #include "lld/Common/LLVM.h" |
16 | #include "lld/Common/Memory.h" |
17 | #include "llvm/ADT/DenseSet.h" |
18 | #include "llvm/ADT/SetVector.h" |
19 | #include "llvm/BinaryFormat/MachO.h" |
20 | #include "llvm/DebugInfo/DWARF/DWARFUnit.h" |
21 | #include "llvm/Object/Archive.h" |
22 | #include "llvm/Support/MemoryBuffer.h" |
23 | #include "llvm/TextAPI/TextAPIReader.h" |
24 | |
25 | #include <map> |
26 | #include <vector> |
27 | |
28 | namespace llvm { |
29 | namespace lto { |
30 | class InputFile; |
31 | } // namespace lto |
32 | namespace MachO { |
33 | class InterfaceFile; |
34 | } // namespace MachO |
35 | class TarWriter; |
36 | } // namespace llvm |
37 | |
38 | namespace lld { |
39 | namespace macho { |
40 | |
41 | struct PlatformInfo; |
42 | class InputSection; |
43 | class Symbol; |
44 | struct Reloc; |
45 | enum class RefState : uint8_t; |
46 | |
47 | // If --reproduce option is given, all input files are written |
48 | // to this tar archive. |
49 | extern std::unique_ptr<llvm::TarWriter> tar; |
50 | |
51 | // If .subsections_via_symbols is set, each InputSection will be split along |
52 | // symbol boundaries. The field offset represents the offset of the subsection |
53 | // from the start of the original pre-split InputSection. |
54 | struct SubsectionEntry { |
55 | uint64_t offset; |
56 | InputSection *isec; |
57 | }; |
58 | using SubsectionMap = std::vector<SubsectionEntry>; |
59 | |
60 | class InputFile { |
61 | public: |
62 | enum Kind { |
63 | ObjKind, |
64 | OpaqueKind, |
65 | DylibKind, |
66 | ArchiveKind, |
67 | BitcodeKind, |
68 | }; |
69 | |
70 | virtual ~InputFile() = default; |
71 | Kind kind() const { return fileKind; } |
72 | StringRef getName() const { return name; } |
73 | |
74 | MemoryBufferRef mb; |
75 | |
76 | std::vector<Symbol *> symbols; |
77 | std::vector<SubsectionMap> subsections; |
78 | // Provides an easy way to sort InputFiles deterministically. |
79 | const int id; |
80 | |
81 | // If not empty, this stores the name of the archive containing this file. |
82 | // We use this string for creating error messages. |
83 | std::string archiveName; |
84 | |
85 | protected: |
86 | InputFile(Kind kind, MemoryBufferRef mb) |
87 | : mb(mb), id(idCount++), fileKind(kind), name(mb.getBufferIdentifier()) {} |
88 | |
89 | InputFile(Kind, const llvm::MachO::InterfaceFile &); |
90 | |
91 | private: |
92 | const Kind fileKind; |
93 | const StringRef name; |
94 | |
95 | static int idCount; |
96 | }; |
97 | |
98 | // .o file |
99 | class ObjFile : public InputFile { |
100 | public: |
101 | ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName); |
102 | static bool classof(const InputFile *f) { return f->kind() == ObjKind; } |
103 | |
104 | llvm::DWARFUnit *compileUnit = nullptr; |
105 | const uint32_t modTime; |
106 | std::vector<InputSection *> debugSections; |
107 | |
108 | private: |
109 | template <class LP> void parse(); |
110 | template <class Section> void parseSections(ArrayRef<Section>); |
111 | template <class LP> |
112 | void parseSymbols(ArrayRef<typename LP::section> , |
113 | ArrayRef<typename LP::nlist> nList, const char *strtab, |
114 | bool subsectionsViaSymbols); |
115 | template <class NList> |
116 | Symbol *parseNonSectionSymbol(const NList &sym, StringRef name); |
117 | template <class Section> |
118 | void parseRelocations(ArrayRef<Section> , const Section &, |
119 | SubsectionMap &); |
120 | void parseDebugInfo(); |
121 | }; |
122 | |
123 | // command-line -sectcreate file |
124 | class OpaqueFile : public InputFile { |
125 | public: |
126 | OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName); |
127 | static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; } |
128 | }; |
129 | |
130 | // .dylib file |
131 | class DylibFile : public InputFile { |
132 | public: |
133 | // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the |
134 | // symbols in those sub-libraries will be available under the umbrella |
135 | // library's namespace. Those sub-libraries can also have their own |
136 | // re-exports. When loading a re-exported dylib, `umbrella` should be set to |
137 | // the root dylib to ensure symbols in the child library are correctly bound |
138 | // to the root. On the other hand, if a dylib is being directly loaded |
139 | // (through an -lfoo flag), then `umbrella` should be a nullptr. |
140 | explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella, |
141 | bool isBundleLoader = false); |
142 | |
143 | explicit DylibFile(const llvm::MachO::InterfaceFile &interface, |
144 | DylibFile *umbrella = nullptr, |
145 | bool isBundleLoader = false); |
146 | |
147 | static bool classof(const InputFile *f) { return f->kind() == DylibKind; } |
148 | |
149 | StringRef dylibName; |
150 | uint32_t compatibilityVersion = 0; |
151 | uint32_t currentVersion = 0; |
152 | int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel |
153 | RefState refState; |
154 | bool reexport = false; |
155 | bool forceWeakImport = false; |
156 | |
157 | // An executable can be used as a bundle loader that will load the output |
158 | // file being linked, and that contains symbols referenced, but not |
159 | // implemented in the bundle. When used like this, it is very similar |
160 | // to a Dylib, so we re-used the same class to represent it. |
161 | bool isBundleLoader; |
162 | }; |
163 | |
164 | // .a file |
165 | class ArchiveFile : public InputFile { |
166 | public: |
167 | explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file); |
168 | static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } |
169 | void fetch(const llvm::object::Archive::Symbol &sym); |
170 | |
171 | private: |
172 | std::unique_ptr<llvm::object::Archive> file; |
173 | // Keep track of children fetched from the archive by tracking |
174 | // which address offsets have been fetched already. |
175 | llvm::DenseSet<uint64_t> seen; |
176 | }; |
177 | |
178 | class BitcodeFile : public InputFile { |
179 | public: |
180 | explicit BitcodeFile(MemoryBufferRef mb); |
181 | static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } |
182 | |
183 | std::unique_ptr<llvm::lto::InputFile> obj; |
184 | }; |
185 | |
186 | extern llvm::SetVector<InputFile *> inputFiles; |
187 | |
188 | llvm::Optional<MemoryBufferRef> readFile(StringRef path); |
189 | |
190 | // anyHdr should be a pointer to either mach_header or mach_header_64 |
191 | template <class CommandType = llvm::MachO::load_command, class... Types> |
192 | const CommandType *findCommand(const void *anyHdr, Types... types) { |
193 | std::initializer_list<uint32_t> typesList{types...}; |
194 | const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr); |
195 | const uint8_t *p = |
196 | reinterpret_cast<const uint8_t *>(hdr) + target->headerSize; |
197 | for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) { |
198 | auto *cmd = reinterpret_cast<const CommandType *>(p); |
199 | if (llvm::is_contained(typesList, cmd->cmd)) |
200 | return cmd; |
201 | p += cmd->cmdsize; |
202 | } |
203 | return nullptr; |
204 | } |
205 | |
206 | } // namespace macho |
207 | |
208 | std::string toString(const macho::InputFile *file); |
209 | } // namespace lld |
210 | |
211 | #endif |
212 | |