1 | //===- Writer.cpp ---------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "Writer.h" |
10 | #include "ConcatOutputSection.h" |
11 | #include "Config.h" |
12 | #include "InputFiles.h" |
13 | #include "InputSection.h" |
14 | #include "MapFile.h" |
15 | #include "OutputSection.h" |
16 | #include "OutputSegment.h" |
17 | #include "SectionPriorities.h" |
18 | #include "SymbolTable.h" |
19 | #include "Symbols.h" |
20 | #include "SyntheticSections.h" |
21 | #include "Target.h" |
22 | #include "UnwindInfoSection.h" |
23 | |
24 | #include "lld/Common/Arrays.h" |
25 | #include "lld/Common/CommonLinkerContext.h" |
26 | #include "llvm/BinaryFormat/MachO.h" |
27 | #include "llvm/Config/llvm-config.h" |
28 | #include "llvm/Support/LEB128.h" |
29 | #include "llvm/Support/Parallel.h" |
30 | #include "llvm/Support/Path.h" |
31 | #include "llvm/Support/ThreadPool.h" |
32 | #include "llvm/Support/TimeProfiler.h" |
33 | #include "llvm/Support/xxhash.h" |
34 | |
35 | #include <algorithm> |
36 | |
37 | using namespace llvm; |
38 | using namespace llvm::MachO; |
39 | using namespace llvm::sys; |
40 | using namespace lld; |
41 | using namespace lld::macho; |
42 | |
43 | namespace { |
44 | class LCUuid; |
45 | |
46 | class Writer { |
47 | public: |
48 | Writer() : buffer(errorHandler().outputBuffer) {} |
49 | |
50 | void treatSpecialUndefineds(); |
51 | void scanRelocations(); |
52 | void scanSymbols(); |
53 | template <class LP> void createOutputSections(); |
54 | template <class LP> void createLoadCommands(); |
55 | void finalizeAddresses(); |
56 | void finalizeLinkEditSegment(); |
57 | void assignAddresses(OutputSegment *); |
58 | |
59 | void openFile(); |
60 | void writeSections(); |
61 | void applyOptimizationHints(); |
62 | void buildFixupChains(); |
63 | void writeUuid(); |
64 | void writeCodeSignature(); |
65 | void writeOutputFile(); |
66 | |
67 | template <class LP> void run(); |
68 | |
69 | ThreadPool threadPool; |
70 | std::unique_ptr<FileOutputBuffer> &buffer; |
71 | uint64_t addr = 0; |
72 | uint64_t fileOff = 0; |
73 | MachHeaderSection * = nullptr; |
74 | StringTableSection *stringTableSection = nullptr; |
75 | SymtabSection *symtabSection = nullptr; |
76 | IndirectSymtabSection *indirectSymtabSection = nullptr; |
77 | CodeSignatureSection *codeSignatureSection = nullptr; |
78 | DataInCodeSection *dataInCodeSection = nullptr; |
79 | FunctionStartsSection *functionStartsSection = nullptr; |
80 | |
81 | LCUuid *uuidCommand = nullptr; |
82 | OutputSegment *linkEditSegment = nullptr; |
83 | }; |
84 | |
85 | // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. |
86 | class LCDyldInfo final : public LoadCommand { |
87 | public: |
88 | LCDyldInfo(RebaseSection *rebaseSection, BindingSection *bindingSection, |
89 | WeakBindingSection *weakBindingSection, |
90 | LazyBindingSection *lazyBindingSection, |
91 | ExportSection *exportSection) |
92 | : rebaseSection(rebaseSection), bindingSection(bindingSection), |
93 | weakBindingSection(weakBindingSection), |
94 | lazyBindingSection(lazyBindingSection), exportSection(exportSection) {} |
95 | |
96 | uint32_t getSize() const override { return sizeof(dyld_info_command); } |
97 | |
98 | void writeTo(uint8_t *buf) const override { |
99 | auto *c = reinterpret_cast<dyld_info_command *>(buf); |
100 | c->cmd = LC_DYLD_INFO_ONLY; |
101 | c->cmdsize = getSize(); |
102 | if (rebaseSection->isNeeded()) { |
103 | c->rebase_off = rebaseSection->fileOff; |
104 | c->rebase_size = rebaseSection->getFileSize(); |
105 | } |
106 | if (bindingSection->isNeeded()) { |
107 | c->bind_off = bindingSection->fileOff; |
108 | c->bind_size = bindingSection->getFileSize(); |
109 | } |
110 | if (weakBindingSection->isNeeded()) { |
111 | c->weak_bind_off = weakBindingSection->fileOff; |
112 | c->weak_bind_size = weakBindingSection->getFileSize(); |
113 | } |
114 | if (lazyBindingSection->isNeeded()) { |
115 | c->lazy_bind_off = lazyBindingSection->fileOff; |
116 | c->lazy_bind_size = lazyBindingSection->getFileSize(); |
117 | } |
118 | if (exportSection->isNeeded()) { |
119 | c->export_off = exportSection->fileOff; |
120 | c->export_size = exportSection->getFileSize(); |
121 | } |
122 | } |
123 | |
124 | RebaseSection *rebaseSection; |
125 | BindingSection *bindingSection; |
126 | WeakBindingSection *weakBindingSection; |
127 | LazyBindingSection *lazyBindingSection; |
128 | ExportSection *exportSection; |
129 | }; |
130 | |
131 | class LCSubFramework final : public LoadCommand { |
132 | public: |
133 | LCSubFramework(StringRef umbrella) : umbrella(umbrella) {} |
134 | |
135 | uint32_t getSize() const override { |
136 | return alignToPowerOf2(Value: sizeof(sub_framework_command) + umbrella.size() + 1, |
137 | Align: target->wordSize); |
138 | } |
139 | |
140 | void writeTo(uint8_t *buf) const override { |
141 | auto *c = reinterpret_cast<sub_framework_command *>(buf); |
142 | buf += sizeof(sub_framework_command); |
143 | |
144 | c->cmd = LC_SUB_FRAMEWORK; |
145 | c->cmdsize = getSize(); |
146 | c->umbrella = sizeof(sub_framework_command); |
147 | |
148 | memcpy(dest: buf, src: umbrella.data(), n: umbrella.size()); |
149 | buf[umbrella.size()] = '\0'; |
150 | } |
151 | |
152 | private: |
153 | const StringRef umbrella; |
154 | }; |
155 | |
156 | class LCFunctionStarts final : public LoadCommand { |
157 | public: |
158 | explicit LCFunctionStarts(FunctionStartsSection *functionStartsSection) |
159 | : functionStartsSection(functionStartsSection) {} |
160 | |
161 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
162 | |
163 | void writeTo(uint8_t *buf) const override { |
164 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
165 | c->cmd = LC_FUNCTION_STARTS; |
166 | c->cmdsize = getSize(); |
167 | c->dataoff = functionStartsSection->fileOff; |
168 | c->datasize = functionStartsSection->getFileSize(); |
169 | } |
170 | |
171 | private: |
172 | FunctionStartsSection *functionStartsSection; |
173 | }; |
174 | |
175 | class LCDataInCode final : public LoadCommand { |
176 | public: |
177 | explicit LCDataInCode(DataInCodeSection *dataInCodeSection) |
178 | : dataInCodeSection(dataInCodeSection) {} |
179 | |
180 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
181 | |
182 | void writeTo(uint8_t *buf) const override { |
183 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
184 | c->cmd = LC_DATA_IN_CODE; |
185 | c->cmdsize = getSize(); |
186 | c->dataoff = dataInCodeSection->fileOff; |
187 | c->datasize = dataInCodeSection->getFileSize(); |
188 | } |
189 | |
190 | private: |
191 | DataInCodeSection *dataInCodeSection; |
192 | }; |
193 | |
194 | class LCDysymtab final : public LoadCommand { |
195 | public: |
196 | LCDysymtab(SymtabSection *symtabSection, |
197 | IndirectSymtabSection *indirectSymtabSection) |
198 | : symtabSection(symtabSection), |
199 | indirectSymtabSection(indirectSymtabSection) {} |
200 | |
201 | uint32_t getSize() const override { return sizeof(dysymtab_command); } |
202 | |
203 | void writeTo(uint8_t *buf) const override { |
204 | auto *c = reinterpret_cast<dysymtab_command *>(buf); |
205 | c->cmd = LC_DYSYMTAB; |
206 | c->cmdsize = getSize(); |
207 | |
208 | c->ilocalsym = 0; |
209 | c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols(); |
210 | c->nextdefsym = symtabSection->getNumExternalSymbols(); |
211 | c->iundefsym = c->iextdefsym + c->nextdefsym; |
212 | c->nundefsym = symtabSection->getNumUndefinedSymbols(); |
213 | |
214 | c->indirectsymoff = indirectSymtabSection->fileOff; |
215 | c->nindirectsyms = indirectSymtabSection->getNumSymbols(); |
216 | } |
217 | |
218 | SymtabSection *symtabSection; |
219 | IndirectSymtabSection *indirectSymtabSection; |
220 | }; |
221 | |
222 | template <class LP> class LCSegment final : public LoadCommand { |
223 | public: |
224 | LCSegment(StringRef name, OutputSegment *seg) : name(name), seg(seg) {} |
225 | |
226 | uint32_t getSize() const override { |
227 | return sizeof(typename LP::segment_command) + |
228 | seg->numNonHiddenSections() * sizeof(typename LP::section); |
229 | } |
230 | |
231 | void writeTo(uint8_t *buf) const override { |
232 | using SegmentCommand = typename LP::segment_command; |
233 | using = typename LP::section; |
234 | |
235 | auto *c = reinterpret_cast<SegmentCommand *>(buf); |
236 | buf += sizeof(SegmentCommand); |
237 | |
238 | c->cmd = LP::segmentLCType; |
239 | c->cmdsize = getSize(); |
240 | memcpy(c->segname, name.data(), name.size()); |
241 | c->fileoff = seg->fileOff; |
242 | c->maxprot = seg->maxProt; |
243 | c->initprot = seg->initProt; |
244 | |
245 | c->vmaddr = seg->addr; |
246 | c->vmsize = seg->vmSize; |
247 | c->filesize = seg->fileSize; |
248 | c->nsects = seg->numNonHiddenSections(); |
249 | c->flags = seg->flags; |
250 | |
251 | for (const OutputSection *osec : seg->getSections()) { |
252 | if (osec->isHidden()) |
253 | continue; |
254 | |
255 | auto *sectHdr = reinterpret_cast<SectionHeader *>(buf); |
256 | buf += sizeof(SectionHeader); |
257 | |
258 | memcpy(sectHdr->sectname, osec->name.data(), osec->name.size()); |
259 | memcpy(sectHdr->segname, name.data(), name.size()); |
260 | |
261 | sectHdr->addr = osec->addr; |
262 | sectHdr->offset = osec->fileOff; |
263 | sectHdr->align = Log2_32(Value: osec->align); |
264 | sectHdr->flags = osec->flags; |
265 | sectHdr->size = osec->getSize(); |
266 | sectHdr->reserved1 = osec->reserved1; |
267 | sectHdr->reserved2 = osec->reserved2; |
268 | } |
269 | } |
270 | |
271 | private: |
272 | StringRef name; |
273 | OutputSegment *seg; |
274 | }; |
275 | |
276 | class LCMain final : public LoadCommand { |
277 | uint32_t getSize() const override { |
278 | return sizeof(structs::entry_point_command); |
279 | } |
280 | |
281 | void writeTo(uint8_t *buf) const override { |
282 | auto *c = reinterpret_cast<structs::entry_point_command *>(buf); |
283 | c->cmd = LC_MAIN; |
284 | c->cmdsize = getSize(); |
285 | |
286 | if (config->entry->isInStubs()) |
287 | c->entryoff = |
288 | in.stubs->fileOff + config->entry->stubsIndex * target->stubSize; |
289 | else |
290 | c->entryoff = config->entry->getVA() - in.header->addr; |
291 | |
292 | c->stacksize = 0; |
293 | } |
294 | }; |
295 | |
296 | class LCSymtab final : public LoadCommand { |
297 | public: |
298 | LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection) |
299 | : symtabSection(symtabSection), stringTableSection(stringTableSection) {} |
300 | |
301 | uint32_t getSize() const override { return sizeof(symtab_command); } |
302 | |
303 | void writeTo(uint8_t *buf) const override { |
304 | auto *c = reinterpret_cast<symtab_command *>(buf); |
305 | c->cmd = LC_SYMTAB; |
306 | c->cmdsize = getSize(); |
307 | c->symoff = symtabSection->fileOff; |
308 | c->nsyms = symtabSection->getNumSymbols(); |
309 | c->stroff = stringTableSection->fileOff; |
310 | c->strsize = stringTableSection->getFileSize(); |
311 | } |
312 | |
313 | SymtabSection *symtabSection = nullptr; |
314 | StringTableSection *stringTableSection = nullptr; |
315 | }; |
316 | |
317 | // There are several dylib load commands that share the same structure: |
318 | // * LC_LOAD_DYLIB |
319 | // * LC_ID_DYLIB |
320 | // * LC_REEXPORT_DYLIB |
321 | class LCDylib final : public LoadCommand { |
322 | public: |
323 | LCDylib(LoadCommandType type, StringRef path, |
324 | uint32_t compatibilityVersion = 0, uint32_t currentVersion = 0) |
325 | : type(type), path(path), compatibilityVersion(compatibilityVersion), |
326 | currentVersion(currentVersion) { |
327 | instanceCount++; |
328 | } |
329 | |
330 | uint32_t getSize() const override { |
331 | return alignToPowerOf2(Value: sizeof(dylib_command) + path.size() + 1, |
332 | Align: target->wordSize); |
333 | } |
334 | |
335 | void writeTo(uint8_t *buf) const override { |
336 | auto *c = reinterpret_cast<dylib_command *>(buf); |
337 | buf += sizeof(dylib_command); |
338 | |
339 | c->cmd = type; |
340 | c->cmdsize = getSize(); |
341 | c->dylib.name = sizeof(dylib_command); |
342 | c->dylib.timestamp = 0; |
343 | c->dylib.compatibility_version = compatibilityVersion; |
344 | c->dylib.current_version = currentVersion; |
345 | |
346 | memcpy(dest: buf, src: path.data(), n: path.size()); |
347 | buf[path.size()] = '\0'; |
348 | } |
349 | |
350 | static uint32_t getInstanceCount() { return instanceCount; } |
351 | static void resetInstanceCount() { instanceCount = 0; } |
352 | |
353 | private: |
354 | LoadCommandType type; |
355 | StringRef path; |
356 | uint32_t compatibilityVersion; |
357 | uint32_t currentVersion; |
358 | static uint32_t instanceCount; |
359 | }; |
360 | |
361 | uint32_t LCDylib::instanceCount = 0; |
362 | |
363 | class LCLoadDylinker final : public LoadCommand { |
364 | public: |
365 | uint32_t getSize() const override { |
366 | return alignToPowerOf2(Value: sizeof(dylinker_command) + path.size() + 1, |
367 | Align: target->wordSize); |
368 | } |
369 | |
370 | void writeTo(uint8_t *buf) const override { |
371 | auto *c = reinterpret_cast<dylinker_command *>(buf); |
372 | buf += sizeof(dylinker_command); |
373 | |
374 | c->cmd = LC_LOAD_DYLINKER; |
375 | c->cmdsize = getSize(); |
376 | c->name = sizeof(dylinker_command); |
377 | |
378 | memcpy(dest: buf, src: path.data(), n: path.size()); |
379 | buf[path.size()] = '\0'; |
380 | } |
381 | |
382 | private: |
383 | // Recent versions of Darwin won't run any binary that has dyld at a |
384 | // different location. |
385 | const StringRef path = "/usr/lib/dyld" ; |
386 | }; |
387 | |
388 | class LCRPath final : public LoadCommand { |
389 | public: |
390 | explicit LCRPath(StringRef path) : path(path) {} |
391 | |
392 | uint32_t getSize() const override { |
393 | return alignToPowerOf2(Value: sizeof(rpath_command) + path.size() + 1, |
394 | Align: target->wordSize); |
395 | } |
396 | |
397 | void writeTo(uint8_t *buf) const override { |
398 | auto *c = reinterpret_cast<rpath_command *>(buf); |
399 | buf += sizeof(rpath_command); |
400 | |
401 | c->cmd = LC_RPATH; |
402 | c->cmdsize = getSize(); |
403 | c->path = sizeof(rpath_command); |
404 | |
405 | memcpy(dest: buf, src: path.data(), n: path.size()); |
406 | buf[path.size()] = '\0'; |
407 | } |
408 | |
409 | private: |
410 | StringRef path; |
411 | }; |
412 | |
413 | class LCDyldEnv final : public LoadCommand { |
414 | public: |
415 | explicit LCDyldEnv(StringRef name) : name(name) {} |
416 | |
417 | uint32_t getSize() const override { |
418 | return alignToPowerOf2(Value: sizeof(dyld_env_command) + name.size() + 1, |
419 | Align: target->wordSize); |
420 | } |
421 | |
422 | void writeTo(uint8_t *buf) const override { |
423 | auto *c = reinterpret_cast<dyld_env_command *>(buf); |
424 | buf += sizeof(dyld_env_command); |
425 | |
426 | c->cmd = LC_DYLD_ENVIRONMENT; |
427 | c->cmdsize = getSize(); |
428 | c->name = sizeof(dyld_env_command); |
429 | |
430 | memcpy(dest: buf, src: name.data(), n: name.size()); |
431 | buf[name.size()] = '\0'; |
432 | } |
433 | |
434 | private: |
435 | StringRef name; |
436 | }; |
437 | |
438 | class LCMinVersion final : public LoadCommand { |
439 | public: |
440 | explicit LCMinVersion(const PlatformInfo &platformInfo) |
441 | : platformInfo(platformInfo) {} |
442 | |
443 | uint32_t getSize() const override { return sizeof(version_min_command); } |
444 | |
445 | void writeTo(uint8_t *buf) const override { |
446 | auto *c = reinterpret_cast<version_min_command *>(buf); |
447 | switch (platformInfo.target.Platform) { |
448 | case PLATFORM_MACOS: |
449 | c->cmd = LC_VERSION_MIN_MACOSX; |
450 | break; |
451 | case PLATFORM_IOS: |
452 | case PLATFORM_IOSSIMULATOR: |
453 | c->cmd = LC_VERSION_MIN_IPHONEOS; |
454 | break; |
455 | case PLATFORM_TVOS: |
456 | case PLATFORM_TVOSSIMULATOR: |
457 | c->cmd = LC_VERSION_MIN_TVOS; |
458 | break; |
459 | case PLATFORM_WATCHOS: |
460 | case PLATFORM_WATCHOSSIMULATOR: |
461 | c->cmd = LC_VERSION_MIN_WATCHOS; |
462 | break; |
463 | default: |
464 | llvm_unreachable("invalid platform" ); |
465 | break; |
466 | } |
467 | c->cmdsize = getSize(); |
468 | c->version = encodeVersion(version: platformInfo.target.MinDeployment); |
469 | c->sdk = encodeVersion(version: platformInfo.sdk); |
470 | } |
471 | |
472 | private: |
473 | const PlatformInfo &platformInfo; |
474 | }; |
475 | |
476 | class LCBuildVersion final : public LoadCommand { |
477 | public: |
478 | explicit LCBuildVersion(const PlatformInfo &platformInfo) |
479 | : platformInfo(platformInfo) {} |
480 | |
481 | const int ntools = 1; |
482 | |
483 | uint32_t getSize() const override { |
484 | return sizeof(build_version_command) + ntools * sizeof(build_tool_version); |
485 | } |
486 | |
487 | void writeTo(uint8_t *buf) const override { |
488 | auto *c = reinterpret_cast<build_version_command *>(buf); |
489 | c->cmd = LC_BUILD_VERSION; |
490 | c->cmdsize = getSize(); |
491 | |
492 | c->platform = static_cast<uint32_t>(platformInfo.target.Platform); |
493 | c->minos = encodeVersion(version: platformInfo.target.MinDeployment); |
494 | c->sdk = encodeVersion(version: platformInfo.sdk); |
495 | |
496 | c->ntools = ntools; |
497 | auto *t = reinterpret_cast<build_tool_version *>(&c[1]); |
498 | t->tool = TOOL_LLD; |
499 | t->version = encodeVersion(version: VersionTuple( |
500 | LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH)); |
501 | } |
502 | |
503 | private: |
504 | const PlatformInfo &platformInfo; |
505 | }; |
506 | |
507 | // Stores a unique identifier for the output file based on an MD5 hash of its |
508 | // contents. In order to hash the contents, we must first write them, but |
509 | // LC_UUID itself must be part of the written contents in order for all the |
510 | // offsets to be calculated correctly. We resolve this circular paradox by |
511 | // first writing an LC_UUID with an all-zero UUID, then updating the UUID with |
512 | // its real value later. |
513 | class LCUuid final : public LoadCommand { |
514 | public: |
515 | uint32_t getSize() const override { return sizeof(uuid_command); } |
516 | |
517 | void writeTo(uint8_t *buf) const override { |
518 | auto *c = reinterpret_cast<uuid_command *>(buf); |
519 | c->cmd = LC_UUID; |
520 | c->cmdsize = getSize(); |
521 | uuidBuf = c->uuid; |
522 | } |
523 | |
524 | void writeUuid(uint64_t digest) const { |
525 | // xxhash only gives us 8 bytes, so put some fixed data in the other half. |
526 | static_assert(sizeof(uuid_command::uuid) == 16, "unexpected uuid size" ); |
527 | memcpy(dest: uuidBuf, src: "LLD\xa1UU1D" , n: 8); |
528 | memcpy(dest: uuidBuf + 8, src: &digest, n: 8); |
529 | |
530 | // RFC 4122 conformance. We need to fix 4 bits in byte 6 and 2 bits in |
531 | // byte 8. Byte 6 is already fine due to the fixed data we put in. We don't |
532 | // want to lose bits of the digest in byte 8, so swap that with a byte of |
533 | // fixed data that happens to have the right bits set. |
534 | std::swap(a&: uuidBuf[3], b&: uuidBuf[8]); |
535 | |
536 | // Claim that this is an MD5-based hash. It isn't, but this signals that |
537 | // this is not a time-based and not a random hash. MD5 seems like the least |
538 | // bad lie we can put here. |
539 | assert((uuidBuf[6] & 0xf0) == 0x30 && "See RFC 4122 Sections 4.2.2, 4.1.3" ); |
540 | assert((uuidBuf[8] & 0xc0) == 0x80 && "See RFC 4122 Section 4.2.2" ); |
541 | } |
542 | |
543 | mutable uint8_t *uuidBuf; |
544 | }; |
545 | |
546 | template <class LP> class LCEncryptionInfo final : public LoadCommand { |
547 | public: |
548 | uint32_t getSize() const override { |
549 | return sizeof(typename LP::encryption_info_command); |
550 | } |
551 | |
552 | void writeTo(uint8_t *buf) const override { |
553 | using EncryptionInfo = typename LP::encryption_info_command; |
554 | auto *c = reinterpret_cast<EncryptionInfo *>(buf); |
555 | buf += sizeof(EncryptionInfo); |
556 | c->cmd = LP::encryptionInfoLCType; |
557 | c->cmdsize = getSize(); |
558 | c->cryptoff = in.header->getSize(); |
559 | auto it = find_if(outputSegments, [](const OutputSegment *seg) { |
560 | return seg->name == segment_names::text; |
561 | }); |
562 | assert(it != outputSegments.end()); |
563 | c->cryptsize = (*it)->fileSize - c->cryptoff; |
564 | } |
565 | }; |
566 | |
567 | class LCCodeSignature final : public LoadCommand { |
568 | public: |
569 | LCCodeSignature(CodeSignatureSection *section) : section(section) {} |
570 | |
571 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
572 | |
573 | void writeTo(uint8_t *buf) const override { |
574 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
575 | c->cmd = LC_CODE_SIGNATURE; |
576 | c->cmdsize = getSize(); |
577 | c->dataoff = static_cast<uint32_t>(section->fileOff); |
578 | c->datasize = section->getSize(); |
579 | } |
580 | |
581 | CodeSignatureSection *section; |
582 | }; |
583 | |
584 | class LCExportsTrie final : public LoadCommand { |
585 | public: |
586 | LCExportsTrie(ExportSection *section) : section(section) {} |
587 | |
588 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
589 | |
590 | void writeTo(uint8_t *buf) const override { |
591 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
592 | c->cmd = LC_DYLD_EXPORTS_TRIE; |
593 | c->cmdsize = getSize(); |
594 | c->dataoff = section->fileOff; |
595 | c->datasize = section->getSize(); |
596 | } |
597 | |
598 | ExportSection *section; |
599 | }; |
600 | |
601 | class LCChainedFixups final : public LoadCommand { |
602 | public: |
603 | LCChainedFixups(ChainedFixupsSection *section) : section(section) {} |
604 | |
605 | uint32_t getSize() const override { return sizeof(linkedit_data_command); } |
606 | |
607 | void writeTo(uint8_t *buf) const override { |
608 | auto *c = reinterpret_cast<linkedit_data_command *>(buf); |
609 | c->cmd = LC_DYLD_CHAINED_FIXUPS; |
610 | c->cmdsize = getSize(); |
611 | c->dataoff = section->fileOff; |
612 | c->datasize = section->getSize(); |
613 | } |
614 | |
615 | ChainedFixupsSection *section; |
616 | }; |
617 | |
618 | } // namespace |
619 | |
620 | void Writer::treatSpecialUndefineds() { |
621 | if (config->entry) |
622 | if (auto *undefined = dyn_cast<Undefined>(Val: config->entry)) |
623 | treatUndefinedSymbol(*undefined, source: "the entry point" ); |
624 | |
625 | // FIXME: This prints symbols that are undefined both in input files and |
626 | // via -u flag twice. |
627 | for (const Symbol *sym : config->explicitUndefineds) { |
628 | if (const auto *undefined = dyn_cast<Undefined>(Val: sym)) |
629 | treatUndefinedSymbol(*undefined, source: "-u" ); |
630 | } |
631 | // Literal exported-symbol names must be defined, but glob |
632 | // patterns need not match. |
633 | for (const CachedHashStringRef &cachedName : |
634 | config->exportedSymbols.literals) { |
635 | if (const Symbol *sym = symtab->find(name: cachedName)) |
636 | if (const auto *undefined = dyn_cast<Undefined>(Val: sym)) |
637 | treatUndefinedSymbol(*undefined, source: "-exported_symbol(s_list)" ); |
638 | } |
639 | } |
640 | |
641 | static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec, |
642 | const lld::macho::Reloc &r) { |
643 | assert(sym->isLive()); |
644 | const RelocAttrs &relocAttrs = target->getRelocAttrs(type: r.type); |
645 | |
646 | if (relocAttrs.hasAttr(b: RelocAttrBits::BRANCH)) { |
647 | if (needsBinding(sym)) |
648 | in.stubs->addEntry(sym); |
649 | } else if (relocAttrs.hasAttr(b: RelocAttrBits::GOT)) { |
650 | if (relocAttrs.hasAttr(b: RelocAttrBits::POINTER) || needsBinding(sym)) |
651 | in.got->addEntry(sym); |
652 | } else if (relocAttrs.hasAttr(b: RelocAttrBits::TLV)) { |
653 | if (needsBinding(sym)) |
654 | in.tlvPointers->addEntry(sym); |
655 | } else if (relocAttrs.hasAttr(b: RelocAttrBits::UNSIGNED)) { |
656 | // References from thread-local variable sections are treated as offsets |
657 | // relative to the start of the referent section, and therefore have no |
658 | // need of rebase opcodes. |
659 | if (!(isThreadLocalVariables(flags: isec->getFlags()) && isa<Defined>(Val: sym))) |
660 | addNonLazyBindingEntries(sym, isec, offset: r.offset, addend: r.addend); |
661 | } |
662 | } |
663 | |
664 | void Writer::scanRelocations() { |
665 | TimeTraceScope timeScope("Scan relocations" ); |
666 | |
667 | // This can't use a for-each loop: It calls treatUndefinedSymbol(), which can |
668 | // add to inputSections, which invalidates inputSections's iterators. |
669 | for (size_t i = 0; i < inputSections.size(); ++i) { |
670 | ConcatInputSection *isec = inputSections[i]; |
671 | |
672 | if (isec->shouldOmitFromOutput()) |
673 | continue; |
674 | |
675 | for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) { |
676 | lld::macho::Reloc &r = *it; |
677 | |
678 | // Canonicalize the referent so that later accesses in Writer won't |
679 | // have to worry about it. |
680 | if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) |
681 | r.referent = referentIsec->canonical(); |
682 | |
683 | if (target->hasAttr(type: r.type, bit: RelocAttrBits::SUBTRAHEND)) { |
684 | // Skip over the following UNSIGNED relocation -- it's just there as the |
685 | // minuend, and doesn't have the usual UNSIGNED semantics. We don't want |
686 | // to emit rebase opcodes for it. |
687 | ++it; |
688 | // Canonicalize the referent so that later accesses in Writer won't |
689 | // have to worry about it. |
690 | if (auto *referentIsec = it->referent.dyn_cast<InputSection *>()) |
691 | it->referent = referentIsec->canonical(); |
692 | continue; |
693 | } |
694 | if (auto *sym = r.referent.dyn_cast<Symbol *>()) { |
695 | if (auto *undefined = dyn_cast<Undefined>(Val: sym)) |
696 | treatUndefinedSymbol(*undefined, isec, offset: r.offset); |
697 | // treatUndefinedSymbol() can replace sym with a DylibSymbol; re-check. |
698 | if (!isa<Undefined>(Val: sym) && validateSymbolRelocation(sym, isec, r)) |
699 | prepareSymbolRelocation(sym, isec, r); |
700 | } else { |
701 | if (!r.pcrel) { |
702 | if (config->emitChainedFixups) |
703 | in.chainedFixups->addRebase(isec, offset: r.offset); |
704 | else |
705 | in.rebase->addEntry(isec, offset: r.offset); |
706 | } |
707 | } |
708 | } |
709 | } |
710 | |
711 | in.unwindInfo->prepare(); |
712 | } |
713 | |
714 | static void addNonWeakDefinition(const Defined *defined) { |
715 | if (config->emitChainedFixups) |
716 | in.chainedFixups->setHasNonWeakDefinition(); |
717 | else |
718 | in.weakBinding->addNonWeakDefinition(defined); |
719 | } |
720 | |
721 | void Writer::scanSymbols() { |
722 | TimeTraceScope timeScope("Scan symbols" ); |
723 | in.objcStubs->initialize(); |
724 | for (Symbol *sym : symtab->getSymbols()) { |
725 | if (auto *defined = dyn_cast<Defined>(Val: sym)) { |
726 | if (!defined->isLive()) |
727 | continue; |
728 | defined->canonicalize(); |
729 | if (defined->overridesWeakDef) |
730 | addNonWeakDefinition(defined); |
731 | if (!defined->isAbsolute() && isCodeSection(defined->isec)) |
732 | in.unwindInfo->addSymbol(defined); |
733 | } else if (const auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) { |
734 | // This branch intentionally doesn't check isLive(). |
735 | if (dysym->isDynamicLookup()) |
736 | continue; |
737 | dysym->getFile()->refState = |
738 | std::max(a: dysym->getFile()->refState, b: dysym->getRefState()); |
739 | } else if (isa<Undefined>(Val: sym)) { |
740 | if (ObjCStubsSection::isObjCStubSymbol(sym)) { |
741 | // When -dead_strip is enabled, we don't want to emit any dead stubs. |
742 | // Although this stub symbol is yet undefined, addSym() was called |
743 | // during MarkLive. |
744 | if (config->deadStrip) { |
745 | if (!sym->isLive()) |
746 | continue; |
747 | } |
748 | in.objcStubs->addEntry(sym); |
749 | } |
750 | } |
751 | } |
752 | |
753 | for (const InputFile *file : inputFiles) { |
754 | if (auto *objFile = dyn_cast<ObjFile>(Val: file)) |
755 | for (Symbol *sym : objFile->symbols) { |
756 | if (auto *defined = dyn_cast_or_null<Defined>(Val: sym)) { |
757 | if (!defined->isLive()) |
758 | continue; |
759 | defined->canonicalize(); |
760 | if (!defined->isExternal() && !defined->isAbsolute() && |
761 | isCodeSection(defined->isec)) |
762 | in.unwindInfo->addSymbol(defined); |
763 | } |
764 | } |
765 | } |
766 | } |
767 | |
768 | // TODO: ld64 enforces the old load commands in a few other cases. |
769 | static bool useLCBuildVersion(const PlatformInfo &platformInfo) { |
770 | static const std::array<std::pair<PlatformType, VersionTuple>, 7> minVersion = |
771 | {._M_elems: {{PLATFORM_MACOS, VersionTuple(10, 14)}, |
772 | {PLATFORM_IOS, VersionTuple(12, 0)}, |
773 | {PLATFORM_IOSSIMULATOR, VersionTuple(13, 0)}, |
774 | {PLATFORM_TVOS, VersionTuple(12, 0)}, |
775 | {PLATFORM_TVOSSIMULATOR, VersionTuple(13, 0)}, |
776 | {PLATFORM_WATCHOS, VersionTuple(5, 0)}, |
777 | {PLATFORM_WATCHOSSIMULATOR, VersionTuple(6, 0)}}}; |
778 | auto it = llvm::find_if(Range: minVersion, P: [&](const auto &p) { |
779 | return p.first == platformInfo.target.Platform; |
780 | }); |
781 | return it == minVersion.end() |
782 | ? true |
783 | : platformInfo.target.MinDeployment >= it->second; |
784 | } |
785 | |
786 | template <class LP> void Writer::createLoadCommands() { |
787 | uint8_t segIndex = 0; |
788 | for (OutputSegment *seg : outputSegments) { |
789 | in.header->addLoadCommand(make<LCSegment<LP>>(seg->name, seg)); |
790 | seg->index = segIndex++; |
791 | } |
792 | |
793 | if (config->emitChainedFixups) { |
794 | in.header->addLoadCommand(make<LCChainedFixups>(args&: in.chainedFixups)); |
795 | in.header->addLoadCommand(make<LCExportsTrie>(args&: in.exports)); |
796 | } else { |
797 | in.header->addLoadCommand(make<LCDyldInfo>( |
798 | args&: in.rebase, args&: in.binding, args&: in.weakBinding, args&: in.lazyBinding, args&: in.exports)); |
799 | } |
800 | in.header->addLoadCommand(make<LCSymtab>(args&: symtabSection, args&: stringTableSection)); |
801 | in.header->addLoadCommand( |
802 | make<LCDysymtab>(args&: symtabSection, args&: indirectSymtabSection)); |
803 | if (!config->umbrella.empty()) |
804 | in.header->addLoadCommand(make<LCSubFramework>(args&: config->umbrella)); |
805 | if (config->emitEncryptionInfo) |
806 | in.header->addLoadCommand(make<LCEncryptionInfo<LP>>()); |
807 | for (StringRef path : config->runtimePaths) |
808 | in.header->addLoadCommand(make<LCRPath>(args&: path)); |
809 | |
810 | switch (config->outputType) { |
811 | case MH_EXECUTE: |
812 | in.header->addLoadCommand(make<LCLoadDylinker>()); |
813 | break; |
814 | case MH_DYLIB: |
815 | in.header->addLoadCommand(make<LCDylib>(args: LC_ID_DYLIB, args&: config->installName, |
816 | args&: config->dylibCompatibilityVersion, |
817 | args&: config->dylibCurrentVersion)); |
818 | break; |
819 | case MH_BUNDLE: |
820 | break; |
821 | default: |
822 | llvm_unreachable("unhandled output file type" ); |
823 | } |
824 | |
825 | if (config->generateUuid) { |
826 | uuidCommand = make<LCUuid>(); |
827 | in.header->addLoadCommand(uuidCommand); |
828 | } |
829 | |
830 | if (useLCBuildVersion(platformInfo: config->platformInfo)) |
831 | in.header->addLoadCommand(make<LCBuildVersion>(args&: config->platformInfo)); |
832 | else |
833 | in.header->addLoadCommand(make<LCMinVersion>(args&: config->platformInfo)); |
834 | |
835 | if (config->secondaryPlatformInfo) { |
836 | in.header->addLoadCommand( |
837 | make<LCBuildVersion>(args&: *config->secondaryPlatformInfo)); |
838 | } |
839 | |
840 | // This is down here to match ld64's load command order. |
841 | if (config->outputType == MH_EXECUTE) |
842 | in.header->addLoadCommand(make<LCMain>()); |
843 | |
844 | // See ld64's OutputFile::buildDylibOrdinalMapping for the corresponding |
845 | // library ordinal computation code in ld64. |
846 | int64_t dylibOrdinal = 1; |
847 | DenseMap<StringRef, int64_t> ordinalForInstallName; |
848 | |
849 | std::vector<DylibFile *> dylibFiles; |
850 | for (InputFile *file : inputFiles) { |
851 | if (auto *dylibFile = dyn_cast<DylibFile>(Val: file)) |
852 | dylibFiles.push_back(x: dylibFile); |
853 | } |
854 | for (size_t i = 0; i < dylibFiles.size(); ++i) |
855 | dylibFiles.insert(position: dylibFiles.end(), first: dylibFiles[i]->extraDylibs.begin(), |
856 | last: dylibFiles[i]->extraDylibs.end()); |
857 | |
858 | for (DylibFile *dylibFile : dylibFiles) { |
859 | if (dylibFile->isBundleLoader) { |
860 | dylibFile->ordinal = BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE; |
861 | // Shortcut since bundle-loader does not re-export the symbols. |
862 | |
863 | dylibFile->reexport = false; |
864 | continue; |
865 | } |
866 | |
867 | // Don't emit load commands for a dylib that is not referenced if: |
868 | // - it was added implicitly (via a reexport, an LC_LOAD_DYLINKER -- |
869 | // if it's on the linker command line, it's explicit) |
870 | // - or it's marked MH_DEAD_STRIPPABLE_DYLIB |
871 | // - or the flag -dead_strip_dylibs is used |
872 | // FIXME: `isReferenced()` is currently computed before dead code |
873 | // stripping, so references from dead code keep a dylib alive. This |
874 | // matches ld64, but it's something we should do better. |
875 | if (!dylibFile->isReferenced() && !dylibFile->forceNeeded && |
876 | (!dylibFile->isExplicitlyLinked() || dylibFile->deadStrippable || |
877 | config->deadStripDylibs)) |
878 | continue; |
879 | |
880 | // Several DylibFiles can have the same installName. Only emit a single |
881 | // load command for that installName and give all these DylibFiles the |
882 | // same ordinal. |
883 | // This can happen in several cases: |
884 | // - a new framework could change its installName to an older |
885 | // framework name via an $ld$ symbol depending on platform_version |
886 | // - symlinks (for example, libpthread.tbd is a symlink to libSystem.tbd; |
887 | // Foo.framework/Foo.tbd is usually a symlink to |
888 | // Foo.framework/Versions/Current/Foo.tbd, where |
889 | // Foo.framework/Versions/Current is usually a symlink to |
890 | // Foo.framework/Versions/A) |
891 | // - a framework can be linked both explicitly on the linker |
892 | // command line and implicitly as a reexport from a different |
893 | // framework. The re-export will usually point to the tbd file |
894 | // in Foo.framework/Versions/A/Foo.tbd, while the explicit link will |
895 | // usually find Foo.framework/Foo.tbd. These are usually symlinks, |
896 | // but in a --reproduce archive they will be identical but distinct |
897 | // files. |
898 | // In the first case, *semantically distinct* DylibFiles will have the |
899 | // same installName. |
900 | int64_t &ordinal = ordinalForInstallName[dylibFile->installName]; |
901 | if (ordinal) { |
902 | dylibFile->ordinal = ordinal; |
903 | continue; |
904 | } |
905 | |
906 | ordinal = dylibFile->ordinal = dylibOrdinal++; |
907 | LoadCommandType lcType = |
908 | dylibFile->forceWeakImport || dylibFile->refState == RefState::Weak |
909 | ? LC_LOAD_WEAK_DYLIB |
910 | : LC_LOAD_DYLIB; |
911 | in.header->addLoadCommand(make<LCDylib>(args&: lcType, args&: dylibFile->installName, |
912 | args&: dylibFile->compatibilityVersion, |
913 | args&: dylibFile->currentVersion)); |
914 | |
915 | if (dylibFile->reexport) |
916 | in.header->addLoadCommand( |
917 | make<LCDylib>(args: LC_REEXPORT_DYLIB, args&: dylibFile->installName)); |
918 | } |
919 | |
920 | for (const auto &dyldEnv : config->dyldEnvs) |
921 | in.header->addLoadCommand(make<LCDyldEnv>(args: dyldEnv)); |
922 | |
923 | if (functionStartsSection) |
924 | in.header->addLoadCommand(make<LCFunctionStarts>(args&: functionStartsSection)); |
925 | if (dataInCodeSection) |
926 | in.header->addLoadCommand(make<LCDataInCode>(args&: dataInCodeSection)); |
927 | if (codeSignatureSection) |
928 | in.header->addLoadCommand(make<LCCodeSignature>(args&: codeSignatureSection)); |
929 | |
930 | const uint32_t MACOS_MAXPATHLEN = 1024; |
931 | config->headerPad = std::max( |
932 | a: config->headerPad, b: (config->headerPadMaxInstallNames |
933 | ? LCDylib::getInstanceCount() * MACOS_MAXPATHLEN |
934 | : 0)); |
935 | } |
936 | |
937 | // Sorting only can happen once all outputs have been collected. Here we sort |
938 | // segments, output sections within each segment, and input sections within each |
939 | // output segment. |
940 | static void sortSegmentsAndSections() { |
941 | TimeTraceScope timeScope("Sort segments and sections" ); |
942 | sortOutputSegments(); |
943 | |
944 | DenseMap<const InputSection *, size_t> isecPriorities = |
945 | priorityBuilder.buildInputSectionPriorities(); |
946 | |
947 | uint32_t sectionIndex = 0; |
948 | for (OutputSegment *seg : outputSegments) { |
949 | seg->sortOutputSections(); |
950 | // References from thread-local variable sections are treated as offsets |
951 | // relative to the start of the thread-local data memory area, which |
952 | // is initialized via copying all the TLV data sections (which are all |
953 | // contiguous). If later data sections require a greater alignment than |
954 | // earlier ones, the offsets of data within those sections won't be |
955 | // guaranteed to aligned unless we normalize alignments. We therefore use |
956 | // the largest alignment for all TLV data sections. |
957 | uint32_t tlvAlign = 0; |
958 | for (const OutputSection *osec : seg->getSections()) |
959 | if (isThreadLocalData(flags: osec->flags) && osec->align > tlvAlign) |
960 | tlvAlign = osec->align; |
961 | |
962 | for (OutputSection *osec : seg->getSections()) { |
963 | // Now that the output sections are sorted, assign the final |
964 | // output section indices. |
965 | if (!osec->isHidden()) |
966 | osec->index = ++sectionIndex; |
967 | if (isThreadLocalData(flags: osec->flags)) { |
968 | if (!firstTLVDataSection) |
969 | firstTLVDataSection = osec; |
970 | osec->align = tlvAlign; |
971 | } |
972 | |
973 | if (!isecPriorities.empty()) { |
974 | if (auto *merged = dyn_cast<ConcatOutputSection>(Val: osec)) { |
975 | llvm::stable_sort( |
976 | Range&: merged->inputs, C: [&](InputSection *a, InputSection *b) { |
977 | return isecPriorities.lookup(Val: a) > isecPriorities.lookup(Val: b); |
978 | }); |
979 | } |
980 | } |
981 | } |
982 | } |
983 | } |
984 | |
985 | template <class LP> void Writer::createOutputSections() { |
986 | TimeTraceScope timeScope("Create output sections" ); |
987 | // First, create hidden sections |
988 | stringTableSection = make<StringTableSection>(); |
989 | symtabSection = makeSymtabSection<LP>(*stringTableSection); |
990 | indirectSymtabSection = make<IndirectSymtabSection>(); |
991 | if (config->adhocCodesign) |
992 | codeSignatureSection = make<CodeSignatureSection>(); |
993 | if (config->emitDataInCodeInfo) |
994 | dataInCodeSection = make<DataInCodeSection>(); |
995 | if (config->emitFunctionStarts) |
996 | functionStartsSection = make<FunctionStartsSection>(); |
997 | |
998 | switch (config->outputType) { |
999 | case MH_EXECUTE: |
1000 | make<PageZeroSection>(); |
1001 | break; |
1002 | case MH_DYLIB: |
1003 | case MH_BUNDLE: |
1004 | break; |
1005 | default: |
1006 | llvm_unreachable("unhandled output file type" ); |
1007 | } |
1008 | |
1009 | // Then add input sections to output sections. |
1010 | for (ConcatInputSection *isec : inputSections) { |
1011 | if (isec->shouldOmitFromOutput()) |
1012 | continue; |
1013 | ConcatOutputSection *osec = cast<ConcatOutputSection>(Val: isec->parent); |
1014 | osec->addInput(input: isec); |
1015 | osec->inputOrder = |
1016 | std::min(a: osec->inputOrder, b: static_cast<int>(isec->outSecOff)); |
1017 | } |
1018 | |
1019 | // Once all the inputs are added, we can finalize the output section |
1020 | // properties and create the corresponding output segments. |
1021 | for (const auto &it : concatOutputSections) { |
1022 | StringRef segname = it.first.first; |
1023 | ConcatOutputSection *osec = it.second; |
1024 | assert(segname != segment_names::ld); |
1025 | if (osec->isNeeded()) { |
1026 | // See comment in ObjFile::splitEhFrames() |
1027 | if (osec->name == section_names::ehFrame && |
1028 | segname == segment_names::text) |
1029 | osec->align = target->wordSize; |
1030 | |
1031 | // MC keeps the default 1-byte alignment for __thread_vars, even though it |
1032 | // contains pointers that are fixed up by dyld, which requires proper |
1033 | // alignment. |
1034 | if (isThreadLocalVariables(flags: osec->flags)) |
1035 | osec->align = std::max<uint32_t>(a: osec->align, b: target->wordSize); |
1036 | |
1037 | getOrCreateOutputSegment(name: segname)->addOutputSection(os: osec); |
1038 | } |
1039 | } |
1040 | |
1041 | for (SyntheticSection *ssec : syntheticSections) { |
1042 | auto it = concatOutputSections.find(Key: {ssec->segname, ssec->name}); |
1043 | // We add all LinkEdit sections here because we don't know if they are |
1044 | // needed until their finalizeContents() methods get called later. While |
1045 | // this means that we add some redundant sections to __LINKEDIT, there is |
1046 | // is no redundancy in the output, as we do not emit section headers for |
1047 | // any LinkEdit sections. |
1048 | if (ssec->isNeeded() || ssec->segname == segment_names::linkEdit) { |
1049 | if (it == concatOutputSections.end()) { |
1050 | getOrCreateOutputSegment(name: ssec->segname)->addOutputSection(os: ssec); |
1051 | } else { |
1052 | fatal(msg: "section from " + |
1053 | toString(file: it->second->firstSection()->getFile()) + |
1054 | " conflicts with synthetic section " + ssec->segname + "," + |
1055 | ssec->name); |
1056 | } |
1057 | } |
1058 | } |
1059 | |
1060 | // dyld requires __LINKEDIT segment to always exist (even if empty). |
1061 | linkEditSegment = getOrCreateOutputSegment(name: segment_names::linkEdit); |
1062 | } |
1063 | |
1064 | void Writer::finalizeAddresses() { |
1065 | TimeTraceScope timeScope("Finalize addresses" ); |
1066 | uint64_t pageSize = target->getPageSize(); |
1067 | |
1068 | // We could parallelize this loop, but local benchmarking indicates it is |
1069 | // faster to do it all in the main thread. |
1070 | for (OutputSegment *seg : outputSegments) { |
1071 | if (seg == linkEditSegment) |
1072 | continue; |
1073 | for (OutputSection *osec : seg->getSections()) { |
1074 | if (!osec->isNeeded()) |
1075 | continue; |
1076 | // Other kinds of OutputSections have already been finalized. |
1077 | if (auto *concatOsec = dyn_cast<ConcatOutputSection>(Val: osec)) |
1078 | concatOsec->finalizeContents(); |
1079 | } |
1080 | } |
1081 | |
1082 | // Ensure that segments (and the sections they contain) are allocated |
1083 | // addresses in ascending order, which dyld requires. |
1084 | // |
1085 | // Note that at this point, __LINKEDIT sections are empty, but we need to |
1086 | // determine addresses of other segments/sections before generating its |
1087 | // contents. |
1088 | for (OutputSegment *seg : outputSegments) { |
1089 | if (seg == linkEditSegment) |
1090 | continue; |
1091 | seg->addr = addr; |
1092 | assignAddresses(seg); |
1093 | // codesign / libstuff checks for segment ordering by verifying that |
1094 | // `fileOff + fileSize == next segment fileOff`. So we call |
1095 | // alignToPowerOf2() before (instead of after) computing fileSize to ensure |
1096 | // that the segments are contiguous. We handle addr / vmSize similarly for |
1097 | // the same reason. |
1098 | fileOff = alignToPowerOf2(Value: fileOff, Align: pageSize); |
1099 | addr = alignToPowerOf2(Value: addr, Align: pageSize); |
1100 | seg->vmSize = addr - seg->addr; |
1101 | seg->fileSize = fileOff - seg->fileOff; |
1102 | seg->assignAddressesToStartEndSymbols(); |
1103 | } |
1104 | } |
1105 | |
1106 | void Writer::finalizeLinkEditSegment() { |
1107 | TimeTraceScope timeScope("Finalize __LINKEDIT segment" ); |
1108 | // Fill __LINKEDIT contents. |
1109 | std::array<LinkEditSection *, 10> linkEditSections{ |
1110 | in.rebase, in.binding, |
1111 | in.weakBinding, in.lazyBinding, |
1112 | in.exports, in.chainedFixups, |
1113 | symtabSection, indirectSymtabSection, |
1114 | dataInCodeSection, functionStartsSection, |
1115 | }; |
1116 | SmallVector<std::shared_future<void>> threadFutures; |
1117 | threadFutures.reserve(N: linkEditSections.size()); |
1118 | for (LinkEditSection *osec : linkEditSections) |
1119 | if (osec) |
1120 | threadFutures.emplace_back(Args: threadPool.async( |
1121 | F: [](LinkEditSection *osec) { osec->finalizeContents(); }, ArgList&: osec)); |
1122 | for (std::shared_future<void> &future : threadFutures) |
1123 | future.wait(); |
1124 | |
1125 | // Now that __LINKEDIT is filled out, do a proper calculation of its |
1126 | // addresses and offsets. |
1127 | linkEditSegment->addr = addr; |
1128 | assignAddresses(linkEditSegment); |
1129 | // No need to page-align fileOff / addr here since this is the last segment. |
1130 | linkEditSegment->vmSize = addr - linkEditSegment->addr; |
1131 | linkEditSegment->fileSize = fileOff - linkEditSegment->fileOff; |
1132 | } |
1133 | |
1134 | void Writer::assignAddresses(OutputSegment *seg) { |
1135 | seg->fileOff = fileOff; |
1136 | |
1137 | for (OutputSection *osec : seg->getSections()) { |
1138 | if (!osec->isNeeded()) |
1139 | continue; |
1140 | addr = alignToPowerOf2(Value: addr, Align: osec->align); |
1141 | fileOff = alignToPowerOf2(Value: fileOff, Align: osec->align); |
1142 | osec->addr = addr; |
1143 | osec->fileOff = isZeroFill(flags: osec->flags) ? 0 : fileOff; |
1144 | osec->finalize(); |
1145 | osec->assignAddressesToStartEndSymbols(); |
1146 | |
1147 | addr += osec->getSize(); |
1148 | fileOff += osec->getFileSize(); |
1149 | } |
1150 | } |
1151 | |
1152 | void Writer::openFile() { |
1153 | Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr = |
1154 | FileOutputBuffer::create(FilePath: config->outputFile, Size: fileOff, |
1155 | Flags: FileOutputBuffer::F_executable); |
1156 | |
1157 | if (!bufferOrErr) |
1158 | fatal(msg: "failed to open " + config->outputFile + ": " + |
1159 | llvm::toString(E: bufferOrErr.takeError())); |
1160 | buffer = std::move(*bufferOrErr); |
1161 | in.bufferStart = buffer->getBufferStart(); |
1162 | } |
1163 | |
1164 | void Writer::writeSections() { |
1165 | uint8_t *buf = buffer->getBufferStart(); |
1166 | std::vector<const OutputSection *> osecs; |
1167 | for (const OutputSegment *seg : outputSegments) |
1168 | append_range(C&: osecs, R: seg->getSections()); |
1169 | |
1170 | parallelForEach(Begin: osecs.begin(), End: osecs.end(), Fn: [&](const OutputSection *osec) { |
1171 | osec->writeTo(buf: buf + osec->fileOff); |
1172 | }); |
1173 | } |
1174 | |
1175 | void Writer::applyOptimizationHints() { |
1176 | if (config->arch() != AK_arm64 || config->ignoreOptimizationHints) |
1177 | return; |
1178 | |
1179 | uint8_t *buf = buffer->getBufferStart(); |
1180 | TimeTraceScope timeScope("Apply linker optimization hints" ); |
1181 | parallelForEach(R&: inputFiles, Fn: [buf](const InputFile *file) { |
1182 | if (const auto *objFile = dyn_cast<ObjFile>(Val: file)) |
1183 | target->applyOptimizationHints(buf, *objFile); |
1184 | }); |
1185 | } |
1186 | |
1187 | // In order to utilize multiple cores, we first split the buffer into chunks, |
1188 | // compute a hash for each chunk, and then compute a hash value of the hash |
1189 | // values. |
1190 | void Writer::writeUuid() { |
1191 | TimeTraceScope timeScope("Computing UUID" ); |
1192 | |
1193 | ArrayRef<uint8_t> data{buffer->getBufferStart(), buffer->getBufferEnd()}; |
1194 | std::vector<ArrayRef<uint8_t>> chunks = split(arr: data, chunkSize: 1024 * 1024); |
1195 | // Leave one slot for filename |
1196 | std::vector<uint64_t> hashes(chunks.size() + 1); |
1197 | SmallVector<std::shared_future<void>> threadFutures; |
1198 | threadFutures.reserve(N: chunks.size()); |
1199 | for (size_t i = 0; i < chunks.size(); ++i) |
1200 | threadFutures.emplace_back(Args: threadPool.async( |
1201 | F: [&](size_t j) { hashes[j] = xxh3_64bits(data: chunks[j]); }, ArgList&: i)); |
1202 | for (std::shared_future<void> &future : threadFutures) |
1203 | future.wait(); |
1204 | // Append the output filename so that identical binaries with different names |
1205 | // don't get the same UUID. |
1206 | hashes[chunks.size()] = xxh3_64bits(data: sys::path::filename(path: config->finalOutput)); |
1207 | uint64_t digest = xxh3_64bits(data: {reinterpret_cast<uint8_t *>(hashes.data()), |
1208 | hashes.size() * sizeof(uint64_t)}); |
1209 | uuidCommand->writeUuid(digest); |
1210 | } |
1211 | |
1212 | // This is step 5 of the algorithm described in the class comment of |
1213 | // ChainedFixupsSection. |
1214 | void Writer::buildFixupChains() { |
1215 | if (!config->emitChainedFixups) |
1216 | return; |
1217 | |
1218 | const std::vector<Location> &loc = in.chainedFixups->getLocations(); |
1219 | if (loc.empty()) |
1220 | return; |
1221 | |
1222 | TimeTraceScope timeScope("Build fixup chains" ); |
1223 | |
1224 | const uint64_t pageSize = target->getPageSize(); |
1225 | constexpr uint32_t stride = 4; // for DYLD_CHAINED_PTR_64 |
1226 | |
1227 | for (size_t i = 0, count = loc.size(); i < count;) { |
1228 | const OutputSegment *oseg = loc[i].isec->parent->parent; |
1229 | uint8_t *buf = buffer->getBufferStart() + oseg->fileOff; |
1230 | uint64_t pageIdx = loc[i].offset / pageSize; |
1231 | ++i; |
1232 | |
1233 | while (i < count && loc[i].isec->parent->parent == oseg && |
1234 | (loc[i].offset / pageSize) == pageIdx) { |
1235 | uint64_t offset = loc[i].offset - loc[i - 1].offset; |
1236 | |
1237 | auto fail = [&](Twine message) { |
1238 | error(msg: loc[i].isec->getSegName() + "," + loc[i].isec->getName() + |
1239 | ", offset " + |
1240 | Twine(loc[i].offset - loc[i].isec->parent->getSegmentOffset()) + |
1241 | ": " + message); |
1242 | }; |
1243 | |
1244 | if (offset < target->wordSize) |
1245 | return fail("fixups overlap" ); |
1246 | if (offset % stride != 0) |
1247 | return fail( |
1248 | "fixups are unaligned (offset " + Twine(offset) + |
1249 | " is not a multiple of the stride). Re-link with -no_fixup_chains" ); |
1250 | |
1251 | // The "next" field is in the same location for bind and rebase entries. |
1252 | reinterpret_cast<dyld_chained_ptr_64_bind *>(buf + loc[i - 1].offset) |
1253 | ->next = offset / stride; |
1254 | ++i; |
1255 | } |
1256 | } |
1257 | } |
1258 | |
1259 | void Writer::writeCodeSignature() { |
1260 | if (codeSignatureSection) { |
1261 | TimeTraceScope timeScope("Write code signature" ); |
1262 | codeSignatureSection->writeHashes(buf: buffer->getBufferStart()); |
1263 | } |
1264 | } |
1265 | |
1266 | void Writer::writeOutputFile() { |
1267 | TimeTraceScope timeScope("Write output file" ); |
1268 | openFile(); |
1269 | reportPendingUndefinedSymbols(); |
1270 | if (errorCount()) |
1271 | return; |
1272 | writeSections(); |
1273 | applyOptimizationHints(); |
1274 | buildFixupChains(); |
1275 | if (config->generateUuid) |
1276 | writeUuid(); |
1277 | writeCodeSignature(); |
1278 | |
1279 | if (auto e = buffer->commit()) |
1280 | fatal(msg: "failed to write output '" + buffer->getPath() + |
1281 | "': " + toString(E: std::move(e))); |
1282 | } |
1283 | |
1284 | template <class LP> void Writer::run() { |
1285 | treatSpecialUndefineds(); |
1286 | if (config->entry && needsBinding(sym: config->entry)) |
1287 | in.stubs->addEntry(config->entry); |
1288 | |
1289 | // Canonicalization of all pointers to InputSections should be handled by |
1290 | // these two scan* methods. I.e. from this point onward, for all live |
1291 | // InputSections, we should have `isec->canonical() == isec`. |
1292 | scanSymbols(); |
1293 | if (in.objcStubs->isNeeded()) |
1294 | in.objcStubs->setUp(); |
1295 | scanRelocations(); |
1296 | if (in.initOffsets->isNeeded()) |
1297 | in.initOffsets->setUp(); |
1298 | |
1299 | // Do not proceed if there were undefined or duplicate symbols. |
1300 | reportPendingUndefinedSymbols(); |
1301 | reportPendingDuplicateSymbols(); |
1302 | if (errorCount()) |
1303 | return; |
1304 | |
1305 | if (in.stubHelper && in.stubHelper->isNeeded()) |
1306 | in.stubHelper->setUp(); |
1307 | |
1308 | if (in.objCImageInfo->isNeeded()) |
1309 | in.objCImageInfo->finalizeContents(); |
1310 | |
1311 | // At this point, we should know exactly which output sections are needed, |
1312 | // courtesy of scanSymbols() and scanRelocations(). |
1313 | createOutputSections<LP>(); |
1314 | |
1315 | // After this point, we create no new segments; HOWEVER, we might |
1316 | // yet create branch-range extension thunks for architectures whose |
1317 | // hardware call instructions have limited range, e.g., ARM(64). |
1318 | // The thunks are created as InputSections interspersed among |
1319 | // the ordinary __TEXT,_text InputSections. |
1320 | sortSegmentsAndSections(); |
1321 | createLoadCommands<LP>(); |
1322 | finalizeAddresses(); |
1323 | threadPool.async([&] { |
1324 | if (LLVM_ENABLE_THREADS && config->timeTraceEnabled) |
1325 | timeTraceProfilerInitialize(TimeTraceGranularity: config->timeTraceGranularity, ProcName: "writeMapFile" ); |
1326 | writeMapFile(); |
1327 | if (LLVM_ENABLE_THREADS && config->timeTraceEnabled) |
1328 | timeTraceProfilerFinishThread(); |
1329 | }); |
1330 | finalizeLinkEditSegment(); |
1331 | writeOutputFile(); |
1332 | } |
1333 | |
1334 | template <class LP> void macho::writeResult() { Writer().run<LP>(); } |
1335 | |
1336 | void macho::resetWriter() { LCDylib::resetInstanceCount(); } |
1337 | |
1338 | void macho::createSyntheticSections() { |
1339 | in.header = make<MachHeaderSection>(); |
1340 | if (config->dedupStrings) |
1341 | in.cStringSection = |
1342 | make<DeduplicatedCStringSection>(args: section_names::cString); |
1343 | else |
1344 | in.cStringSection = make<CStringSection>(args: section_names::cString); |
1345 | in.objcMethnameSection = |
1346 | make<DeduplicatedCStringSection>(args: section_names::objcMethname); |
1347 | in.wordLiteralSection = make<WordLiteralSection>(); |
1348 | if (config->emitChainedFixups) { |
1349 | in.chainedFixups = make<ChainedFixupsSection>(); |
1350 | } else { |
1351 | in.rebase = make<RebaseSection>(); |
1352 | in.binding = make<BindingSection>(); |
1353 | in.weakBinding = make<WeakBindingSection>(); |
1354 | in.lazyBinding = make<LazyBindingSection>(); |
1355 | in.lazyPointers = make<LazyPointerSection>(); |
1356 | in.stubHelper = make<StubHelperSection>(); |
1357 | } |
1358 | in.exports = make<ExportSection>(); |
1359 | in.got = make<GotSection>(); |
1360 | in.tlvPointers = make<TlvPointerSection>(); |
1361 | in.stubs = make<StubsSection>(); |
1362 | in.objcStubs = make<ObjCStubsSection>(); |
1363 | in.unwindInfo = makeUnwindInfoSection(); |
1364 | in.objCImageInfo = make<ObjCImageInfoSection>(); |
1365 | in.initOffsets = make<InitOffsetsSection>(); |
1366 | |
1367 | // This section contains space for just a single word, and will be used by |
1368 | // dyld to cache an address to the image loader it uses. |
1369 | uint8_t *arr = bAlloc().Allocate<uint8_t>(Num: target->wordSize); |
1370 | memset(s: arr, c: 0, n: target->wordSize); |
1371 | in.imageLoaderCache = makeSyntheticInputSection( |
1372 | segName: segment_names::data, sectName: section_names::data, flags: S_REGULAR, |
1373 | data: ArrayRef<uint8_t>{arr, target->wordSize}, |
1374 | /*align=*/target->wordSize); |
1375 | // References from dyld are not visible to us, so ensure this section is |
1376 | // always treated as live. |
1377 | in.imageLoaderCache->live = true; |
1378 | } |
1379 | |
1380 | OutputSection *macho::firstTLVDataSection = nullptr; |
1381 | |
1382 | template void macho::writeResult<LP64>(); |
1383 | template void macho::writeResult<ILP32>(); |
1384 | |