1 | //===- MachO.h - MachO object file implementation ---------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file declares the MachOObjectFile class, which implement the ObjectFile |
10 | // interface for MachO files. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_OBJECT_MACHO_H |
15 | #define LLVM_OBJECT_MACHO_H |
16 | |
17 | #include "llvm/ADT/ArrayRef.h" |
18 | #include "llvm/ADT/SmallString.h" |
19 | #include "llvm/ADT/SmallVector.h" |
20 | #include "llvm/ADT/StringExtras.h" |
21 | #include "llvm/ADT/StringRef.h" |
22 | #include "llvm/ADT/iterator_range.h" |
23 | #include "llvm/BinaryFormat/MachO.h" |
24 | #include "llvm/BinaryFormat/Swift.h" |
25 | #include "llvm/Object/Binary.h" |
26 | #include "llvm/Object/ObjectFile.h" |
27 | #include "llvm/Object/SymbolicFile.h" |
28 | #include "llvm/Support/Error.h" |
29 | #include "llvm/Support/Format.h" |
30 | #include "llvm/Support/MemoryBuffer.h" |
31 | #include "llvm/Support/raw_ostream.h" |
32 | #include "llvm/TargetParser/SubtargetFeature.h" |
33 | #include "llvm/TargetParser/Triple.h" |
34 | #include <cstdint> |
35 | #include <memory> |
36 | #include <string> |
37 | #include <system_error> |
38 | |
39 | namespace llvm { |
40 | namespace object { |
41 | |
42 | /// DiceRef - This is a value type class that represents a single |
43 | /// data in code entry in the table in a Mach-O object file. |
44 | class DiceRef { |
45 | DataRefImpl DicePimpl; |
46 | const ObjectFile *OwningObject = nullptr; |
47 | |
48 | public: |
49 | DiceRef() = default; |
50 | DiceRef(DataRefImpl DiceP, const ObjectFile *Owner); |
51 | |
52 | bool operator==(const DiceRef &Other) const; |
53 | bool operator<(const DiceRef &Other) const; |
54 | |
55 | void moveNext(); |
56 | |
57 | std::error_code getOffset(uint32_t &Result) const; |
58 | std::error_code getLength(uint16_t &Result) const; |
59 | std::error_code getKind(uint16_t &Result) const; |
60 | |
61 | DataRefImpl getRawDataRefImpl() const; |
62 | const ObjectFile *getObjectFile() const; |
63 | }; |
64 | using dice_iterator = content_iterator<DiceRef>; |
65 | |
66 | /// ExportEntry encapsulates the current-state-of-the-walk used when doing a |
67 | /// non-recursive walk of the trie data structure. This allows you to iterate |
68 | /// across all exported symbols using: |
69 | /// Error Err = Error::success(); |
70 | /// for (const llvm::object::ExportEntry &AnExport : Obj->exports(&Err)) { |
71 | /// } |
72 | /// if (Err) { report error ... |
73 | class ExportEntry { |
74 | public: |
75 | ExportEntry(Error *Err, const MachOObjectFile *O, ArrayRef<uint8_t> Trie); |
76 | |
77 | StringRef name() const; |
78 | uint64_t flags() const; |
79 | uint64_t address() const; |
80 | uint64_t other() const; |
81 | StringRef otherName() const; |
82 | uint32_t nodeOffset() const; |
83 | |
84 | bool operator==(const ExportEntry &) const; |
85 | |
86 | void moveNext(); |
87 | |
88 | private: |
89 | friend class MachOObjectFile; |
90 | |
91 | void moveToFirst(); |
92 | void moveToEnd(); |
93 | uint64_t readULEB128(const uint8_t *&p, const char **error); |
94 | void pushDownUntilBottom(); |
95 | void pushNode(uint64_t Offset); |
96 | |
97 | // Represents a node in the mach-o exports trie. |
98 | struct NodeState { |
99 | NodeState(const uint8_t *Ptr); |
100 | |
101 | const uint8_t *Start; |
102 | const uint8_t *Current; |
103 | uint64_t Flags = 0; |
104 | uint64_t Address = 0; |
105 | uint64_t Other = 0; |
106 | const char *ImportName = nullptr; |
107 | unsigned ChildCount = 0; |
108 | unsigned NextChildIndex = 0; |
109 | unsigned ParentStringLength = 0; |
110 | bool IsExportNode = false; |
111 | }; |
112 | using NodeList = SmallVector<NodeState, 16>; |
113 | using node_iterator = NodeList::const_iterator; |
114 | |
115 | Error *E; |
116 | const MachOObjectFile *O; |
117 | ArrayRef<uint8_t> Trie; |
118 | SmallString<256> CumulativeString; |
119 | NodeList Stack; |
120 | bool Done = false; |
121 | |
122 | iterator_range<node_iterator> nodes() const { |
123 | return make_range(x: Stack.begin(), y: Stack.end()); |
124 | } |
125 | }; |
126 | using export_iterator = content_iterator<ExportEntry>; |
127 | |
128 | // Segment info so SegIndex/SegOffset pairs in a Mach-O Bind or Rebase entry |
129 | // can be checked and translated. Only the SegIndex/SegOffset pairs from |
130 | // checked entries are to be used with the segmentName(), sectionName() and |
131 | // address() methods below. |
132 | class BindRebaseSegInfo { |
133 | public: |
134 | BindRebaseSegInfo(const MachOObjectFile *Obj); |
135 | |
136 | // Used to check a Mach-O Bind or Rebase entry for errors when iterating. |
137 | const char* checkSegAndOffsets(int32_t SegIndex, uint64_t SegOffset, |
138 | uint8_t PointerSize, uint32_t Count=1, |
139 | uint32_t Skip=0); |
140 | // Used with valid SegIndex/SegOffset values from checked entries. |
141 | StringRef segmentName(int32_t SegIndex); |
142 | StringRef sectionName(int32_t SegIndex, uint64_t SegOffset); |
143 | uint64_t address(uint32_t SegIndex, uint64_t SegOffset); |
144 | |
145 | private: |
146 | struct SectionInfo { |
147 | uint64_t Address; |
148 | uint64_t Size; |
149 | StringRef SectionName; |
150 | StringRef SegmentName; |
151 | uint64_t OffsetInSegment; |
152 | uint64_t SegmentStartAddress; |
153 | int32_t SegmentIndex; |
154 | }; |
155 | const SectionInfo &findSection(int32_t SegIndex, uint64_t SegOffset); |
156 | |
157 | SmallVector<SectionInfo, 32> Sections; |
158 | int32_t MaxSegIndex; |
159 | }; |
160 | |
161 | /// MachORebaseEntry encapsulates the current state in the decompression of |
162 | /// rebasing opcodes. This allows you to iterate through the compressed table of |
163 | /// rebasing using: |
164 | /// Error Err = Error::success(); |
165 | /// for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable(&Err)) { |
166 | /// } |
167 | /// if (Err) { report error ... |
168 | class MachORebaseEntry { |
169 | public: |
170 | MachORebaseEntry(Error *Err, const MachOObjectFile *O, |
171 | ArrayRef<uint8_t> opcodes, bool is64Bit); |
172 | |
173 | int32_t segmentIndex() const; |
174 | uint64_t segmentOffset() const; |
175 | StringRef typeName() const; |
176 | StringRef segmentName() const; |
177 | StringRef sectionName() const; |
178 | uint64_t address() const; |
179 | |
180 | bool operator==(const MachORebaseEntry &) const; |
181 | |
182 | void moveNext(); |
183 | |
184 | private: |
185 | friend class MachOObjectFile; |
186 | |
187 | void moveToFirst(); |
188 | void moveToEnd(); |
189 | uint64_t readULEB128(const char **error); |
190 | |
191 | Error *E; |
192 | const MachOObjectFile *O; |
193 | ArrayRef<uint8_t> Opcodes; |
194 | const uint8_t *Ptr; |
195 | uint64_t SegmentOffset = 0; |
196 | int32_t SegmentIndex = -1; |
197 | uint64_t RemainingLoopCount = 0; |
198 | uint64_t AdvanceAmount = 0; |
199 | uint8_t RebaseType = 0; |
200 | uint8_t PointerSize; |
201 | bool Done = false; |
202 | }; |
203 | using rebase_iterator = content_iterator<MachORebaseEntry>; |
204 | |
205 | /// MachOBindEntry encapsulates the current state in the decompression of |
206 | /// binding opcodes. This allows you to iterate through the compressed table of |
207 | /// bindings using: |
208 | /// Error Err = Error::success(); |
209 | /// for (const llvm::object::MachOBindEntry &Entry : Obj->bindTable(&Err)) { |
210 | /// } |
211 | /// if (Err) { report error ... |
212 | class MachOBindEntry { |
213 | public: |
214 | enum class Kind { Regular, Lazy, Weak }; |
215 | |
216 | MachOBindEntry(Error *Err, const MachOObjectFile *O, |
217 | ArrayRef<uint8_t> Opcodes, bool is64Bit, MachOBindEntry::Kind); |
218 | |
219 | int32_t segmentIndex() const; |
220 | uint64_t segmentOffset() const; |
221 | StringRef typeName() const; |
222 | StringRef symbolName() const; |
223 | uint32_t flags() const; |
224 | int64_t addend() const; |
225 | int ordinal() const; |
226 | |
227 | StringRef segmentName() const; |
228 | StringRef sectionName() const; |
229 | uint64_t address() const; |
230 | |
231 | bool operator==(const MachOBindEntry &) const; |
232 | |
233 | void moveNext(); |
234 | |
235 | private: |
236 | friend class MachOObjectFile; |
237 | |
238 | void moveToFirst(); |
239 | void moveToEnd(); |
240 | uint64_t readULEB128(const char **error); |
241 | int64_t readSLEB128(const char **error); |
242 | |
243 | Error *E; |
244 | const MachOObjectFile *O; |
245 | ArrayRef<uint8_t> Opcodes; |
246 | const uint8_t *Ptr; |
247 | uint64_t SegmentOffset = 0; |
248 | int32_t SegmentIndex = -1; |
249 | StringRef SymbolName; |
250 | bool LibraryOrdinalSet = false; |
251 | int Ordinal = 0; |
252 | uint32_t Flags = 0; |
253 | int64_t Addend = 0; |
254 | uint64_t RemainingLoopCount = 0; |
255 | uint64_t AdvanceAmount = 0; |
256 | uint8_t BindType = 0; |
257 | uint8_t PointerSize; |
258 | Kind TableKind; |
259 | bool Done = false; |
260 | }; |
261 | using bind_iterator = content_iterator<MachOBindEntry>; |
262 | |
263 | /// ChainedFixupTarget holds all the information about an external symbol |
264 | /// necessary to bind this binary to that symbol. These values are referenced |
265 | /// indirectly by chained fixup binds. This structure captures values from all |
266 | /// import and symbol formats. |
267 | /// |
268 | /// Be aware there are two notions of weak here: |
269 | /// WeakImport == true |
270 | /// The associated bind may be set to 0 if this symbol is missing from its |
271 | /// parent library. This is called a "weak import." |
272 | /// LibOrdinal == BIND_SPECIAL_DYLIB_WEAK_LOOKUP |
273 | /// This symbol may be coalesced with other libraries vending the same |
274 | /// symbol. E.g., C++'s "operator new". This is called a "weak bind." |
275 | struct ChainedFixupTarget { |
276 | public: |
277 | ChainedFixupTarget(int LibOrdinal, uint32_t NameOffset, StringRef Symbol, |
278 | uint64_t Addend, bool WeakImport) |
279 | : LibOrdinal(LibOrdinal), NameOffset(NameOffset), SymbolName(Symbol), |
280 | Addend(Addend), WeakImport(WeakImport) {} |
281 | |
282 | int libOrdinal() { return LibOrdinal; } |
283 | uint32_t nameOffset() { return NameOffset; } |
284 | StringRef symbolName() { return SymbolName; } |
285 | uint64_t addend() { return Addend; } |
286 | bool weakImport() { return WeakImport; } |
287 | bool weakBind() { |
288 | return LibOrdinal == MachO::BIND_SPECIAL_DYLIB_WEAK_LOOKUP; |
289 | } |
290 | |
291 | private: |
292 | int LibOrdinal; |
293 | uint32_t NameOffset; |
294 | StringRef SymbolName; |
295 | uint64_t Addend; |
296 | bool WeakImport; |
297 | }; |
298 | |
299 | struct ChainedFixupsSegment { |
300 | ChainedFixupsSegment(uint8_t SegIdx, uint32_t Offset, |
301 | const MachO::dyld_chained_starts_in_segment &, |
302 | std::vector<uint16_t> &&PageStarts) |
303 | : SegIdx(SegIdx), Offset(Offset), Header(Header), |
304 | PageStarts(PageStarts){}; |
305 | |
306 | uint32_t SegIdx; |
307 | uint32_t Offset; // dyld_chained_starts_in_image::seg_info_offset[SegIdx] |
308 | MachO::dyld_chained_starts_in_segment ; |
309 | std::vector<uint16_t> PageStarts; // page_start[] entries, host endianness |
310 | }; |
311 | |
312 | /// MachOAbstractFixupEntry is an abstract class representing a fixup in a |
313 | /// MH_DYLDLINK file. Fixups generally represent rebases and binds. Binds also |
314 | /// subdivide into additional subtypes (weak, lazy, reexport). |
315 | /// |
316 | /// The two concrete subclasses of MachOAbstractFixupEntry are: |
317 | /// |
318 | /// MachORebaseBindEntry - for dyld opcode-based tables, including threaded- |
319 | /// rebase, where rebases are mixed in with other |
320 | /// bind opcodes. |
321 | /// MachOChainedFixupEntry - for pointer chains embedded in data pages. |
322 | class MachOAbstractFixupEntry { |
323 | public: |
324 | MachOAbstractFixupEntry(Error *Err, const MachOObjectFile *O); |
325 | |
326 | int32_t segmentIndex() const; |
327 | uint64_t segmentOffset() const; |
328 | uint64_t segmentAddress() const; |
329 | StringRef segmentName() const; |
330 | StringRef sectionName() const; |
331 | StringRef typeName() const; |
332 | StringRef symbolName() const; |
333 | uint32_t flags() const; |
334 | int64_t addend() const; |
335 | int ordinal() const; |
336 | |
337 | /// \return the location of this fixup as a VM Address. For the VM |
338 | /// Address this fixup is pointing to, use pointerValue(). |
339 | uint64_t address() const; |
340 | |
341 | /// \return the VM Address pointed to by this fixup. Use |
342 | /// pointerValue() to compare against other VM Addresses, such as |
343 | /// section addresses or segment vmaddrs. |
344 | uint64_t pointerValue() const { return PointerValue; } |
345 | |
346 | /// \return the raw "on-disk" representation of the fixup. For |
347 | /// Threaded rebases and Chained pointers these values are generally |
348 | /// encoded into various different pointer formats. This value is |
349 | /// exposed in API for tools that want to display and annotate the |
350 | /// raw bits. |
351 | uint64_t rawValue() const { return RawValue; } |
352 | |
353 | void moveNext(); |
354 | |
355 | protected: |
356 | Error *E; |
357 | const MachOObjectFile *O; |
358 | uint64_t SegmentOffset = 0; |
359 | int32_t SegmentIndex = -1; |
360 | StringRef SymbolName; |
361 | int32_t Ordinal = 0; |
362 | uint32_t Flags = 0; |
363 | int64_t Addend = 0; |
364 | uint64_t PointerValue = 0; |
365 | uint64_t RawValue = 0; |
366 | bool Done = false; |
367 | |
368 | void moveToFirst(); |
369 | void moveToEnd(); |
370 | |
371 | /// \return the vm address of the start of __TEXT segment. |
372 | uint64_t textAddress() const { return TextAddress; } |
373 | |
374 | private: |
375 | uint64_t TextAddress; |
376 | }; |
377 | |
378 | class MachOChainedFixupEntry : public MachOAbstractFixupEntry { |
379 | public: |
380 | enum class FixupKind { Bind, Rebase }; |
381 | |
382 | MachOChainedFixupEntry(Error *Err, const MachOObjectFile *O, bool Parse); |
383 | |
384 | bool operator==(const MachOChainedFixupEntry &) const; |
385 | |
386 | bool isBind() const { return Kind == FixupKind::Bind; } |
387 | bool isRebase() const { return Kind == FixupKind::Rebase; } |
388 | |
389 | void moveNext(); |
390 | void moveToFirst(); |
391 | void moveToEnd(); |
392 | |
393 | private: |
394 | void findNextPageWithFixups(); |
395 | |
396 | std::vector<ChainedFixupTarget> FixupTargets; |
397 | std::vector<ChainedFixupsSegment> Segments; |
398 | ArrayRef<uint8_t> SegmentData; |
399 | FixupKind Kind; |
400 | uint32_t InfoSegIndex = 0; // Index into Segments |
401 | uint32_t PageIndex = 0; // Index into Segments[InfoSegIdx].PageStarts |
402 | uint32_t PageOffset = 0; // Page offset of the current fixup |
403 | }; |
404 | using fixup_iterator = content_iterator<MachOChainedFixupEntry>; |
405 | |
406 | class MachOObjectFile : public ObjectFile { |
407 | public: |
408 | struct LoadCommandInfo { |
409 | const char *Ptr; // Where in memory the load command is. |
410 | MachO::load_command C; // The command itself. |
411 | }; |
412 | using LoadCommandList = SmallVector<LoadCommandInfo, 4>; |
413 | using load_command_iterator = LoadCommandList::const_iterator; |
414 | |
415 | static Expected<std::unique_ptr<MachOObjectFile>> |
416 | create(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits, |
417 | uint32_t UniversalCputype = 0, uint32_t UniversalIndex = 0, |
418 | size_t MachOFilesetEntryOffset = 0); |
419 | |
420 | static bool isMachOPairedReloc(uint64_t RelocType, uint64_t Arch); |
421 | |
422 | void moveSymbolNext(DataRefImpl &Symb) const override; |
423 | |
424 | uint64_t getNValue(DataRefImpl Sym) const; |
425 | Expected<StringRef> getSymbolName(DataRefImpl Symb) const override; |
426 | |
427 | // MachO specific. |
428 | Error checkSymbolTable() const; |
429 | |
430 | std::error_code getIndirectName(DataRefImpl Symb, StringRef &Res) const; |
431 | unsigned getSectionType(SectionRef Sec) const; |
432 | |
433 | Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override; |
434 | uint32_t getSymbolAlignment(DataRefImpl Symb) const override; |
435 | uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override; |
436 | Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override; |
437 | Expected<uint32_t> getSymbolFlags(DataRefImpl Symb) const override; |
438 | Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override; |
439 | unsigned getSymbolSectionID(SymbolRef Symb) const; |
440 | unsigned getSectionID(SectionRef Sec) const; |
441 | |
442 | void moveSectionNext(DataRefImpl &Sec) const override; |
443 | Expected<StringRef> getSectionName(DataRefImpl Sec) const override; |
444 | uint64_t getSectionAddress(DataRefImpl Sec) const override; |
445 | uint64_t getSectionIndex(DataRefImpl Sec) const override; |
446 | uint64_t getSectionSize(DataRefImpl Sec) const override; |
447 | ArrayRef<uint8_t> getSectionContents(uint32_t Offset, uint64_t Size) const; |
448 | Expected<ArrayRef<uint8_t>> |
449 | getSectionContents(DataRefImpl Sec) const override; |
450 | uint64_t getSectionAlignment(DataRefImpl Sec) const override; |
451 | Expected<SectionRef> getSection(unsigned SectionIndex) const; |
452 | Expected<SectionRef> getSection(StringRef SectionName) const; |
453 | bool isSectionCompressed(DataRefImpl Sec) const override; |
454 | bool isSectionText(DataRefImpl Sec) const override; |
455 | bool isSectionData(DataRefImpl Sec) const override; |
456 | bool isSectionBSS(DataRefImpl Sec) const override; |
457 | bool isSectionVirtual(DataRefImpl Sec) const override; |
458 | bool isSectionBitcode(DataRefImpl Sec) const override; |
459 | bool isDebugSection(DataRefImpl Sec) const override; |
460 | |
461 | /// Return the raw contents of an entire segment. |
462 | ArrayRef<uint8_t> getSegmentContents(StringRef SegmentName) const; |
463 | ArrayRef<uint8_t> getSegmentContents(size_t SegmentIndex) const; |
464 | |
465 | /// When dsymutil generates the companion file, it strips all unnecessary |
466 | /// sections (e.g. everything in the _TEXT segment) by omitting their body |
467 | /// and setting the offset in their corresponding load command to zero. |
468 | /// |
469 | /// While the load command itself is valid, reading the section corresponds |
470 | /// to reading the number of bytes specified in the load command, starting |
471 | /// from offset 0 (i.e. the Mach-O header at the beginning of the file). |
472 | bool isSectionStripped(DataRefImpl Sec) const override; |
473 | |
474 | relocation_iterator section_rel_begin(DataRefImpl Sec) const override; |
475 | relocation_iterator section_rel_end(DataRefImpl Sec) const override; |
476 | |
477 | relocation_iterator extrel_begin() const; |
478 | relocation_iterator extrel_end() const; |
479 | iterator_range<relocation_iterator> external_relocations() const { |
480 | return make_range(x: extrel_begin(), y: extrel_end()); |
481 | } |
482 | |
483 | relocation_iterator locrel_begin() const; |
484 | relocation_iterator locrel_end() const; |
485 | |
486 | void moveRelocationNext(DataRefImpl &Rel) const override; |
487 | uint64_t getRelocationOffset(DataRefImpl Rel) const override; |
488 | symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override; |
489 | section_iterator getRelocationSection(DataRefImpl Rel) const; |
490 | uint64_t getRelocationType(DataRefImpl Rel) const override; |
491 | void getRelocationTypeName(DataRefImpl Rel, |
492 | SmallVectorImpl<char> &Result) const override; |
493 | uint8_t getRelocationLength(DataRefImpl Rel) const; |
494 | |
495 | // MachO specific. |
496 | std::error_code getLibraryShortNameByIndex(unsigned Index, StringRef &) const; |
497 | uint32_t getLibraryCount() const; |
498 | |
499 | section_iterator getRelocationRelocatedSection(relocation_iterator Rel) const; |
500 | |
501 | // TODO: Would be useful to have an iterator based version |
502 | // of the load command interface too. |
503 | |
504 | basic_symbol_iterator symbol_begin() const override; |
505 | basic_symbol_iterator symbol_end() const override; |
506 | |
507 | bool is64Bit() const override; |
508 | |
509 | // MachO specific. |
510 | symbol_iterator getSymbolByIndex(unsigned Index) const; |
511 | uint64_t getSymbolIndex(DataRefImpl Symb) const; |
512 | |
513 | section_iterator section_begin() const override; |
514 | section_iterator section_end() const override; |
515 | |
516 | uint8_t getBytesInAddress() const override; |
517 | |
518 | StringRef getFileFormatName() const override; |
519 | Triple::ArchType getArch() const override; |
520 | Expected<SubtargetFeatures> getFeatures() const override { |
521 | return SubtargetFeatures(); |
522 | } |
523 | Triple getArchTriple(const char **McpuDefault = nullptr) const; |
524 | |
525 | relocation_iterator section_rel_begin(unsigned Index) const; |
526 | relocation_iterator section_rel_end(unsigned Index) const; |
527 | |
528 | dice_iterator begin_dices() const; |
529 | dice_iterator end_dices() const; |
530 | |
531 | load_command_iterator begin_load_commands() const; |
532 | load_command_iterator end_load_commands() const; |
533 | iterator_range<load_command_iterator> load_commands() const; |
534 | |
535 | /// For use iterating over all exported symbols. |
536 | iterator_range<export_iterator> exports(Error &Err) const; |
537 | |
538 | /// For use examining a trie not in a MachOObjectFile. |
539 | static iterator_range<export_iterator> exports(Error &Err, |
540 | ArrayRef<uint8_t> Trie, |
541 | const MachOObjectFile *O = |
542 | nullptr); |
543 | |
544 | /// For use iterating over all rebase table entries. |
545 | iterator_range<rebase_iterator> rebaseTable(Error &Err); |
546 | |
547 | /// For use examining rebase opcodes in a MachOObjectFile. |
548 | static iterator_range<rebase_iterator> rebaseTable(Error &Err, |
549 | MachOObjectFile *O, |
550 | ArrayRef<uint8_t> Opcodes, |
551 | bool is64); |
552 | |
553 | /// For use iterating over all bind table entries. |
554 | iterator_range<bind_iterator> bindTable(Error &Err); |
555 | |
556 | /// For iterating over all chained fixups. |
557 | iterator_range<fixup_iterator> fixupTable(Error &Err); |
558 | |
559 | /// For use iterating over all lazy bind table entries. |
560 | iterator_range<bind_iterator> lazyBindTable(Error &Err); |
561 | |
562 | /// For use iterating over all weak bind table entries. |
563 | iterator_range<bind_iterator> weakBindTable(Error &Err); |
564 | |
565 | /// For use examining bind opcodes in a MachOObjectFile. |
566 | static iterator_range<bind_iterator> bindTable(Error &Err, |
567 | MachOObjectFile *O, |
568 | ArrayRef<uint8_t> Opcodes, |
569 | bool is64, |
570 | MachOBindEntry::Kind); |
571 | |
572 | // Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists |
573 | // that fully contains a pointer at that location. Multiple fixups in a bind |
574 | // (such as with the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode) can |
575 | // be tested via the Count and Skip parameters. |
576 | // |
577 | // This is used by MachOBindEntry::moveNext() to validate a MachOBindEntry. |
578 | const char *BindEntryCheckSegAndOffsets(int32_t SegIndex, uint64_t SegOffset, |
579 | uint8_t PointerSize, uint32_t Count=1, |
580 | uint32_t Skip=0) const { |
581 | return BindRebaseSectionTable->checkSegAndOffsets(SegIndex, SegOffset, |
582 | PointerSize, Count, Skip); |
583 | } |
584 | |
585 | // Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists |
586 | // that fully contains a pointer at that location. Multiple fixups in a rebase |
587 | // (such as with the REBASE_OPCODE_DO_*_TIMES* opcodes) can be tested via the |
588 | // Count and Skip parameters. |
589 | // |
590 | // This is used by MachORebaseEntry::moveNext() to validate a MachORebaseEntry |
591 | const char *RebaseEntryCheckSegAndOffsets(int32_t SegIndex, |
592 | uint64_t SegOffset, |
593 | uint8_t PointerSize, |
594 | uint32_t Count=1, |
595 | uint32_t Skip=0) const { |
596 | return BindRebaseSectionTable->checkSegAndOffsets(SegIndex, SegOffset, |
597 | PointerSize, Count, Skip); |
598 | } |
599 | |
600 | /// For use with the SegIndex of a checked Mach-O Bind or Rebase entry to |
601 | /// get the segment name. |
602 | StringRef BindRebaseSegmentName(int32_t SegIndex) const { |
603 | return BindRebaseSectionTable->segmentName(SegIndex); |
604 | } |
605 | |
606 | /// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or |
607 | /// Rebase entry to get the section name. |
608 | StringRef BindRebaseSectionName(uint32_t SegIndex, uint64_t SegOffset) const { |
609 | return BindRebaseSectionTable->sectionName(SegIndex, SegOffset); |
610 | } |
611 | |
612 | /// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or |
613 | /// Rebase entry to get the address. |
614 | uint64_t BindRebaseAddress(uint32_t SegIndex, uint64_t SegOffset) const { |
615 | return BindRebaseSectionTable->address(SegIndex, SegOffset); |
616 | } |
617 | |
618 | // In a MachO file, sections have a segment name. This is used in the .o |
619 | // files. They have a single segment, but this field specifies which segment |
620 | // a section should be put in the final object. |
621 | StringRef getSectionFinalSegmentName(DataRefImpl Sec) const; |
622 | |
623 | // Names are stored as 16 bytes. These returns the raw 16 bytes without |
624 | // interpreting them as a C string. |
625 | ArrayRef<char> getSectionRawName(DataRefImpl Sec) const; |
626 | ArrayRef<char> getSectionRawFinalSegmentName(DataRefImpl Sec) const; |
627 | |
628 | // MachO specific Info about relocations. |
629 | bool isRelocationScattered(const MachO::any_relocation_info &RE) const; |
630 | unsigned getPlainRelocationSymbolNum( |
631 | const MachO::any_relocation_info &RE) const; |
632 | bool getPlainRelocationExternal(const MachO::any_relocation_info &RE) const; |
633 | bool getScatteredRelocationScattered( |
634 | const MachO::any_relocation_info &RE) const; |
635 | uint32_t getScatteredRelocationValue( |
636 | const MachO::any_relocation_info &RE) const; |
637 | uint32_t getScatteredRelocationType( |
638 | const MachO::any_relocation_info &RE) const; |
639 | unsigned getAnyRelocationAddress(const MachO::any_relocation_info &RE) const; |
640 | unsigned getAnyRelocationPCRel(const MachO::any_relocation_info &RE) const; |
641 | unsigned getAnyRelocationLength(const MachO::any_relocation_info &RE) const; |
642 | unsigned getAnyRelocationType(const MachO::any_relocation_info &RE) const; |
643 | SectionRef getAnyRelocationSection(const MachO::any_relocation_info &RE) const; |
644 | |
645 | // MachO specific structures. |
646 | MachO::section getSection(DataRefImpl DRI) const; |
647 | MachO::section_64 getSection64(DataRefImpl DRI) const; |
648 | MachO::section getSection(const LoadCommandInfo &L, unsigned Index) const; |
649 | MachO::section_64 getSection64(const LoadCommandInfo &L,unsigned Index) const; |
650 | MachO::nlist getSymbolTableEntry(DataRefImpl DRI) const; |
651 | MachO::nlist_64 getSymbol64TableEntry(DataRefImpl DRI) const; |
652 | |
653 | MachO::linkedit_data_command |
654 | getLinkeditDataLoadCommand(const LoadCommandInfo &L) const; |
655 | MachO::segment_command |
656 | getSegmentLoadCommand(const LoadCommandInfo &L) const; |
657 | MachO::segment_command_64 |
658 | getSegment64LoadCommand(const LoadCommandInfo &L) const; |
659 | MachO::linker_option_command |
660 | getLinkerOptionLoadCommand(const LoadCommandInfo &L) const; |
661 | MachO::version_min_command |
662 | getVersionMinLoadCommand(const LoadCommandInfo &L) const; |
663 | MachO::note_command |
664 | getNoteLoadCommand(const LoadCommandInfo &L) const; |
665 | MachO::build_version_command |
666 | getBuildVersionLoadCommand(const LoadCommandInfo &L) const; |
667 | MachO::build_tool_version |
668 | getBuildToolVersion(unsigned index) const; |
669 | MachO::dylib_command |
670 | getDylibIDLoadCommand(const LoadCommandInfo &L) const; |
671 | MachO::dyld_info_command |
672 | getDyldInfoLoadCommand(const LoadCommandInfo &L) const; |
673 | MachO::dylinker_command |
674 | getDylinkerCommand(const LoadCommandInfo &L) const; |
675 | MachO::uuid_command |
676 | getUuidCommand(const LoadCommandInfo &L) const; |
677 | MachO::rpath_command |
678 | getRpathCommand(const LoadCommandInfo &L) const; |
679 | MachO::source_version_command |
680 | getSourceVersionCommand(const LoadCommandInfo &L) const; |
681 | MachO::entry_point_command |
682 | getEntryPointCommand(const LoadCommandInfo &L) const; |
683 | MachO::encryption_info_command |
684 | getEncryptionInfoCommand(const LoadCommandInfo &L) const; |
685 | MachO::encryption_info_command_64 |
686 | getEncryptionInfoCommand64(const LoadCommandInfo &L) const; |
687 | MachO::sub_framework_command |
688 | getSubFrameworkCommand(const LoadCommandInfo &L) const; |
689 | MachO::sub_umbrella_command |
690 | getSubUmbrellaCommand(const LoadCommandInfo &L) const; |
691 | MachO::sub_library_command |
692 | getSubLibraryCommand(const LoadCommandInfo &L) const; |
693 | MachO::sub_client_command |
694 | getSubClientCommand(const LoadCommandInfo &L) const; |
695 | MachO::routines_command |
696 | getRoutinesCommand(const LoadCommandInfo &L) const; |
697 | MachO::routines_command_64 |
698 | getRoutinesCommand64(const LoadCommandInfo &L) const; |
699 | MachO::thread_command |
700 | getThreadCommand(const LoadCommandInfo &L) const; |
701 | MachO::fileset_entry_command |
702 | getFilesetEntryLoadCommand(const LoadCommandInfo &L) const; |
703 | |
704 | MachO::any_relocation_info getRelocation(DataRefImpl Rel) const; |
705 | MachO::data_in_code_entry getDice(DataRefImpl Rel) const; |
706 | const MachO::mach_header &() const; |
707 | const MachO::mach_header_64 &() const; |
708 | uint32_t |
709 | getIndirectSymbolTableEntry(const MachO::dysymtab_command &DLC, |
710 | unsigned Index) const; |
711 | MachO::data_in_code_entry getDataInCodeTableEntry(uint32_t DataOffset, |
712 | unsigned Index) const; |
713 | MachO::symtab_command getSymtabLoadCommand() const; |
714 | MachO::dysymtab_command getDysymtabLoadCommand() const; |
715 | MachO::linkedit_data_command getDataInCodeLoadCommand() const; |
716 | MachO::linkedit_data_command getLinkOptHintsLoadCommand() const; |
717 | ArrayRef<uint8_t> getDyldInfoRebaseOpcodes() const; |
718 | ArrayRef<uint8_t> getDyldInfoBindOpcodes() const; |
719 | ArrayRef<uint8_t> getDyldInfoWeakBindOpcodes() const; |
720 | ArrayRef<uint8_t> getDyldInfoLazyBindOpcodes() const; |
721 | ArrayRef<uint8_t> getDyldInfoExportsTrie() const; |
722 | |
723 | /// If the optional is std::nullopt, no header was found, but the object was |
724 | /// well-formed. |
725 | Expected<std::optional<MachO::dyld_chained_fixups_header>> |
726 | () const; |
727 | Expected<std::vector<ChainedFixupTarget>> getDyldChainedFixupTargets() const; |
728 | |
729 | // Note: This is a limited, temporary API, which will be removed when Apple |
730 | // upstreams their implementation. Please do not rely on this. |
731 | Expected<std::optional<MachO::linkedit_data_command>> |
732 | getChainedFixupsLoadCommand() const; |
733 | // Returns the number of sections listed in dyld_chained_starts_in_image, and |
734 | // a ChainedFixupsSegment for each segment that has fixups. |
735 | Expected<std::pair<size_t, std::vector<ChainedFixupsSegment>>> |
736 | getChainedFixupsSegments() const; |
737 | ArrayRef<uint8_t> getDyldExportsTrie() const; |
738 | |
739 | SmallVector<uint64_t> getFunctionStarts() const; |
740 | ArrayRef<uint8_t> getUuid() const; |
741 | |
742 | StringRef getStringTableData() const; |
743 | |
744 | void ReadULEB128s(uint64_t Index, SmallVectorImpl<uint64_t> &Out) const; |
745 | |
746 | static StringRef guessLibraryShortName(StringRef Name, bool &isFramework, |
747 | StringRef &Suffix); |
748 | |
749 | static Triple::ArchType getArch(uint32_t CPUType, uint32_t CPUSubType); |
750 | static Triple getArchTriple(uint32_t CPUType, uint32_t CPUSubType, |
751 | const char **McpuDefault = nullptr, |
752 | const char **ArchFlag = nullptr); |
753 | static bool isValidArch(StringRef ArchFlag); |
754 | static ArrayRef<StringRef> getValidArchs(); |
755 | static Triple getHostArch(); |
756 | |
757 | bool isRelocatableObject() const override; |
758 | |
759 | StringRef mapDebugSectionName(StringRef Name) const override; |
760 | |
761 | llvm::binaryformat::Swift5ReflectionSectionKind |
762 | mapReflectionSectionNameToEnumValue(StringRef SectionName) const override; |
763 | |
764 | bool hasPageZeroSegment() const { return HasPageZeroSegment; } |
765 | |
766 | size_t getMachOFilesetEntryOffset() const { return MachOFilesetEntryOffset; } |
767 | |
768 | static bool classof(const Binary *v) { |
769 | return v->isMachO(); |
770 | } |
771 | |
772 | static uint32_t |
773 | getVersionMinMajor(MachO::version_min_command &C, bool SDK) { |
774 | uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version; |
775 | return (VersionOrSDK >> 16) & 0xffff; |
776 | } |
777 | |
778 | static uint32_t |
779 | getVersionMinMinor(MachO::version_min_command &C, bool SDK) { |
780 | uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version; |
781 | return (VersionOrSDK >> 8) & 0xff; |
782 | } |
783 | |
784 | static uint32_t |
785 | getVersionMinUpdate(MachO::version_min_command &C, bool SDK) { |
786 | uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version; |
787 | return VersionOrSDK & 0xff; |
788 | } |
789 | |
790 | static std::string getBuildPlatform(uint32_t platform) { |
791 | switch (platform) { |
792 | #define PLATFORM(platform, id, name, build_name, target, tapi_target, \ |
793 | marketing) \ |
794 | case MachO::PLATFORM_##platform: \ |
795 | return #name; |
796 | #include "llvm/BinaryFormat/MachO.def" |
797 | default: |
798 | std::string ret; |
799 | raw_string_ostream ss(ret); |
800 | ss << format_hex(N: platform, Width: 8, Upper: true); |
801 | return ss.str(); |
802 | } |
803 | } |
804 | |
805 | static std::string getBuildTool(uint32_t tools) { |
806 | switch (tools) { |
807 | case MachO::TOOL_CLANG: return "clang" ; |
808 | case MachO::TOOL_SWIFT: return "swift" ; |
809 | case MachO::TOOL_LD: return "ld" ; |
810 | case MachO::TOOL_LLD: |
811 | return "lld" ; |
812 | default: |
813 | std::string ret; |
814 | raw_string_ostream ss(ret); |
815 | ss << format_hex(N: tools, Width: 8, Upper: true); |
816 | return ss.str(); |
817 | } |
818 | } |
819 | |
820 | static std::string getVersionString(uint32_t version) { |
821 | uint32_t major = (version >> 16) & 0xffff; |
822 | uint32_t minor = (version >> 8) & 0xff; |
823 | uint32_t update = version & 0xff; |
824 | |
825 | SmallString<32> Version; |
826 | Version = utostr(X: major) + "." + utostr(X: minor); |
827 | if (update != 0) |
828 | Version += "." + utostr(X: update); |
829 | return std::string(std::string(Version)); |
830 | } |
831 | |
832 | /// If the input path is a .dSYM bundle (as created by the dsymutil tool), |
833 | /// return the paths to the object files found in the bundle, otherwise return |
834 | /// an empty vector. If the path appears to be a .dSYM bundle but no objects |
835 | /// were found or there was a filesystem error, then return an error. |
836 | static Expected<std::vector<std::string>> |
837 | findDsymObjectMembers(StringRef Path); |
838 | |
839 | private: |
840 | MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits, |
841 | Error &Err, uint32_t UniversalCputype = 0, |
842 | uint32_t UniversalIndex = 0, |
843 | size_t MachOFilesetEntryOffset = 0); |
844 | |
845 | uint64_t getSymbolValueImpl(DataRefImpl Symb) const override; |
846 | |
847 | union { |
848 | MachO::mach_header_64 ; |
849 | MachO::mach_header ; |
850 | }; |
851 | using SectionList = SmallVector<const char*, 1>; |
852 | SectionList Sections; |
853 | using LibraryList = SmallVector<const char*, 1>; |
854 | LibraryList Libraries; |
855 | LoadCommandList LoadCommands; |
856 | using LibraryShortName = SmallVector<StringRef, 1>; |
857 | using BuildToolList = SmallVector<const char*, 1>; |
858 | BuildToolList BuildTools; |
859 | mutable LibraryShortName LibrariesShortNames; |
860 | std::unique_ptr<BindRebaseSegInfo> BindRebaseSectionTable; |
861 | const char *SymtabLoadCmd = nullptr; |
862 | const char *DysymtabLoadCmd = nullptr; |
863 | const char *DataInCodeLoadCmd = nullptr; |
864 | const char *LinkOptHintsLoadCmd = nullptr; |
865 | const char *DyldInfoLoadCmd = nullptr; |
866 | const char *FuncStartsLoadCmd = nullptr; |
867 | const char *DyldChainedFixupsLoadCmd = nullptr; |
868 | const char *DyldExportsTrieLoadCmd = nullptr; |
869 | const char *UuidLoadCmd = nullptr; |
870 | bool HasPageZeroSegment = false; |
871 | size_t MachOFilesetEntryOffset = 0; |
872 | }; |
873 | |
874 | /// DiceRef |
875 | inline DiceRef::DiceRef(DataRefImpl DiceP, const ObjectFile *Owner) |
876 | : DicePimpl(DiceP) , OwningObject(Owner) {} |
877 | |
878 | inline bool DiceRef::operator==(const DiceRef &Other) const { |
879 | return DicePimpl == Other.DicePimpl; |
880 | } |
881 | |
882 | inline bool DiceRef::operator<(const DiceRef &Other) const { |
883 | return DicePimpl < Other.DicePimpl; |
884 | } |
885 | |
886 | inline void DiceRef::moveNext() { |
887 | const MachO::data_in_code_entry *P = |
888 | reinterpret_cast<const MachO::data_in_code_entry *>(DicePimpl.p); |
889 | DicePimpl.p = reinterpret_cast<uintptr_t>(P + 1); |
890 | } |
891 | |
892 | // Since a Mach-O data in code reference, a DiceRef, can only be created when |
893 | // the OwningObject ObjectFile is a MachOObjectFile a static_cast<> is used for |
894 | // the methods that get the values of the fields of the reference. |
895 | |
896 | inline std::error_code DiceRef::getOffset(uint32_t &Result) const { |
897 | const MachOObjectFile *MachOOF = |
898 | static_cast<const MachOObjectFile *>(OwningObject); |
899 | MachO::data_in_code_entry Dice = MachOOF->getDice(Rel: DicePimpl); |
900 | Result = Dice.offset; |
901 | return std::error_code(); |
902 | } |
903 | |
904 | inline std::error_code DiceRef::getLength(uint16_t &Result) const { |
905 | const MachOObjectFile *MachOOF = |
906 | static_cast<const MachOObjectFile *>(OwningObject); |
907 | MachO::data_in_code_entry Dice = MachOOF->getDice(Rel: DicePimpl); |
908 | Result = Dice.length; |
909 | return std::error_code(); |
910 | } |
911 | |
912 | inline std::error_code DiceRef::getKind(uint16_t &Result) const { |
913 | const MachOObjectFile *MachOOF = |
914 | static_cast<const MachOObjectFile *>(OwningObject); |
915 | MachO::data_in_code_entry Dice = MachOOF->getDice(Rel: DicePimpl); |
916 | Result = Dice.kind; |
917 | return std::error_code(); |
918 | } |
919 | |
920 | inline DataRefImpl DiceRef::getRawDataRefImpl() const { |
921 | return DicePimpl; |
922 | } |
923 | |
924 | inline const ObjectFile *DiceRef::getObjectFile() const { |
925 | return OwningObject; |
926 | } |
927 | |
928 | } // end namespace object |
929 | } // end namespace llvm |
930 | |
931 | #endif // LLVM_OBJECT_MACHO_H |
932 | |