1 | //===- InputSection.cpp ---------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "InputSection.h" |
10 | #include "ConcatOutputSection.h" |
11 | #include "Config.h" |
12 | #include "InputFiles.h" |
13 | #include "OutputSegment.h" |
14 | #include "Symbols.h" |
15 | #include "SyntheticSections.h" |
16 | #include "Target.h" |
17 | #include "UnwindInfoSection.h" |
18 | #include "Writer.h" |
19 | |
20 | #include "lld/Common/ErrorHandler.h" |
21 | #include "lld/Common/Memory.h" |
22 | #include "llvm/Support/Endian.h" |
23 | #include "llvm/Support/xxhash.h" |
24 | |
25 | using namespace llvm; |
26 | using namespace llvm::MachO; |
27 | using namespace llvm::support; |
28 | using namespace lld; |
29 | using namespace lld::macho; |
30 | |
31 | // Verify ConcatInputSection's size on 64-bit builds. The size of std::vector |
32 | // can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL), |
33 | // so account for that. |
34 | static_assert(sizeof(void *) != 8 || |
35 | sizeof(ConcatInputSection) == sizeof(std::vector<Reloc>) + 88, |
36 | "Try to minimize ConcatInputSection's size, we create many " |
37 | "instances of it" ); |
38 | |
39 | std::vector<ConcatInputSection *> macho::inputSections; |
40 | |
41 | uint64_t InputSection::getFileSize() const { |
42 | return isZeroFill(flags: getFlags()) ? 0 : getSize(); |
43 | } |
44 | |
45 | uint64_t InputSection::getVA(uint64_t off) const { |
46 | return parent->addr + getOffset(off); |
47 | } |
48 | |
49 | static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) { |
50 | const RelocAttrs &relocAttrs = target->getRelocAttrs(type); |
51 | if (relocAttrs.hasAttr(b: RelocAttrBits::BRANCH)) |
52 | return sym->resolveBranchVA(); |
53 | if (relocAttrs.hasAttr(b: RelocAttrBits::GOT)) |
54 | return sym->resolveGotVA(); |
55 | if (relocAttrs.hasAttr(b: RelocAttrBits::TLV)) |
56 | return sym->resolveTlvVA(); |
57 | return sym->getVA(); |
58 | } |
59 | |
60 | const Defined *InputSection::getContainingSymbol(uint64_t off) const { |
61 | auto *nextSym = llvm::upper_bound( |
62 | Range: symbols, Value&: off, C: [](uint64_t a, const Defined *b) { return a < b->value; }); |
63 | if (nextSym == symbols.begin()) |
64 | return nullptr; |
65 | return *std::prev(x: nextSym); |
66 | } |
67 | |
68 | std::string InputSection::getLocation(uint64_t off) const { |
69 | // First, try to find a symbol that's near the offset. Use it as a reference |
70 | // point. |
71 | if (auto *sym = getContainingSymbol(off)) |
72 | return (toString(file: getFile()) + ":(symbol " + toString(*sym) + "+0x" + |
73 | Twine::utohexstr(Val: off - sym->value) + ")" ) |
74 | .str(); |
75 | |
76 | // If that fails, use the section itself as a reference point. |
77 | for (const Subsection &subsec : section.subsections) { |
78 | if (subsec.isec == this) { |
79 | off += subsec.offset; |
80 | break; |
81 | } |
82 | } |
83 | |
84 | return (toString(file: getFile()) + ":(" + getName() + "+0x" + |
85 | Twine::utohexstr(Val: off) + ")" ) |
86 | .str(); |
87 | } |
88 | |
89 | std::string InputSection::getSourceLocation(uint64_t off) const { |
90 | auto *obj = dyn_cast_or_null<ObjFile>(Val: getFile()); |
91 | if (!obj) |
92 | return {}; |
93 | |
94 | DWARFCache *dwarf = obj->getDwarf(); |
95 | if (!dwarf) |
96 | return std::string(); |
97 | |
98 | for (const Subsection &subsec : section.subsections) { |
99 | if (subsec.isec == this) { |
100 | off += subsec.offset; |
101 | break; |
102 | } |
103 | } |
104 | |
105 | auto createMsg = [&](StringRef path, unsigned line) { |
106 | std::string filename = sys::path::filename(path).str(); |
107 | std::string lineStr = (":" + Twine(line)).str(); |
108 | if (filename == path) |
109 | return filename + lineStr; |
110 | return (filename + lineStr + " (" + path + lineStr + ")" ).str(); |
111 | }; |
112 | |
113 | // First, look up a function for a given offset. |
114 | if (std::optional<DILineInfo> li = dwarf->getDILineInfo( |
115 | offset: section.addr + off, sectionIndex: object::SectionedAddress::UndefSection)) |
116 | return createMsg(li->FileName, li->Line); |
117 | |
118 | // If it failed, look up again as a variable. |
119 | if (const Defined *sym = getContainingSymbol(off)) { |
120 | // Symbols are generally prefixed with an underscore, which is not included |
121 | // in the debug information. |
122 | StringRef symName = sym->getName(); |
123 | if (!symName.empty() && symName[0] == '_') |
124 | symName = symName.substr(Start: 1); |
125 | |
126 | if (std::optional<std::pair<std::string, unsigned>> fileLine = |
127 | dwarf->getVariableLoc(name: symName)) |
128 | return createMsg(fileLine->first, fileLine->second); |
129 | } |
130 | |
131 | // Try to get the source file's name from the DWARF information. |
132 | if (obj->compileUnit) |
133 | return obj->sourceFile(); |
134 | |
135 | return {}; |
136 | } |
137 | |
138 | const Reloc *InputSection::getRelocAt(uint32_t off) const { |
139 | auto it = llvm::find_if( |
140 | Range: relocs, P: [=](const macho::Reloc &r) { return r.offset == off; }); |
141 | if (it == relocs.end()) |
142 | return nullptr; |
143 | return &*it; |
144 | } |
145 | |
146 | void ConcatInputSection::foldIdentical(ConcatInputSection *copy) { |
147 | align = std::max(a: align, b: copy->align); |
148 | copy->live = false; |
149 | copy->wasCoalesced = true; |
150 | copy->replacement = this; |
151 | for (auto ©Sym : copy->symbols) { |
152 | copySym->wasIdenticalCodeFolded = true; |
153 | copySym->size = 0; |
154 | } |
155 | |
156 | symbols.insert(I: symbols.end(), From: copy->symbols.begin(), To: copy->symbols.end()); |
157 | copy->symbols.clear(); |
158 | |
159 | // Remove duplicate compact unwind info for symbols at the same address. |
160 | if (symbols.empty()) |
161 | return; |
162 | for (auto it = symbols.begin() + 1; it != symbols.end(); ++it) { |
163 | assert((*it)->value == 0); |
164 | (*it)->unwindEntry = nullptr; |
165 | } |
166 | } |
167 | |
168 | void ConcatInputSection::writeTo(uint8_t *buf) { |
169 | assert(!shouldOmitFromOutput()); |
170 | |
171 | if (getFileSize() == 0) |
172 | return; |
173 | |
174 | memcpy(dest: buf, src: data.data(), n: data.size()); |
175 | |
176 | for (size_t i = 0; i < relocs.size(); i++) { |
177 | const Reloc &r = relocs[i]; |
178 | uint8_t *loc = buf + r.offset; |
179 | uint64_t referentVA = 0; |
180 | |
181 | const bool needsFixup = config->emitChainedFixups && |
182 | target->hasAttr(type: r.type, bit: RelocAttrBits::UNSIGNED); |
183 | if (target->hasAttr(type: r.type, bit: RelocAttrBits::SUBTRAHEND)) { |
184 | const Symbol *fromSym = r.referent.get<Symbol *>(); |
185 | const Reloc &minuend = relocs[++i]; |
186 | uint64_t minuendVA; |
187 | if (const Symbol *toSym = minuend.referent.dyn_cast<Symbol *>()) |
188 | minuendVA = toSym->getVA() + minuend.addend; |
189 | else { |
190 | auto *referentIsec = minuend.referent.get<InputSection *>(); |
191 | assert(!::shouldOmitFromOutput(referentIsec)); |
192 | minuendVA = referentIsec->getVA(off: minuend.addend); |
193 | } |
194 | referentVA = minuendVA - fromSym->getVA(); |
195 | } else if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) { |
196 | if (target->hasAttr(type: r.type, bit: RelocAttrBits::LOAD) && |
197 | !referentSym->isInGot()) |
198 | target->relaxGotLoad(loc, type: r.type); |
199 | // For dtrace symbols, do not handle them as normal undefined symbols |
200 | if (referentSym->getName().starts_with(Prefix: "___dtrace_" )) { |
201 | // Change dtrace call site to pre-defined instructions |
202 | target->handleDtraceReloc(sym: referentSym, r, loc); |
203 | continue; |
204 | } |
205 | referentVA = resolveSymbolVA(sym: referentSym, type: r.type) + r.addend; |
206 | |
207 | if (isThreadLocalVariables(flags: getFlags()) && isa<Defined>(Val: referentSym)) { |
208 | // References from thread-local variable sections are treated as offsets |
209 | // relative to the start of the thread-local data memory area, which |
210 | // is initialized via copying all the TLV data sections (which are all |
211 | // contiguous). |
212 | referentVA -= firstTLVDataSection->addr; |
213 | } else if (needsFixup) { |
214 | writeChainedFixup(buf: loc, sym: referentSym, addend: r.addend); |
215 | continue; |
216 | } |
217 | } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { |
218 | assert(!::shouldOmitFromOutput(referentIsec)); |
219 | referentVA = referentIsec->getVA(off: r.addend); |
220 | |
221 | if (needsFixup) { |
222 | writeChainedRebase(buf: loc, targetVA: referentVA); |
223 | continue; |
224 | } |
225 | } |
226 | target->relocateOne(loc, r, va: referentVA, relocVA: getVA() + r.offset); |
227 | } |
228 | } |
229 | |
230 | ConcatInputSection *macho::makeSyntheticInputSection(StringRef segName, |
231 | StringRef sectName, |
232 | uint32_t flags, |
233 | ArrayRef<uint8_t> data, |
234 | uint32_t align) { |
235 | Section §ion = |
236 | *make<Section>(/*file=*/args: nullptr, args&: segName, args&: sectName, args&: flags, /*addr=*/args: 0); |
237 | auto isec = make<ConcatInputSection>(args&: section, args&: data, args&: align); |
238 | section.subsections.push_back(x: {.offset: 0, .isec: isec}); |
239 | return isec; |
240 | } |
241 | |
242 | void CStringInputSection::splitIntoPieces() { |
243 | size_t off = 0; |
244 | StringRef s = toStringRef(Input: data); |
245 | while (!s.empty()) { |
246 | size_t end = s.find(C: 0); |
247 | if (end == StringRef::npos) |
248 | fatal(msg: getLocation(off) + ": string is not null terminated" ); |
249 | uint32_t hash = deduplicateLiterals ? xxh3_64bits(data: s.take_front(N: end)) : 0; |
250 | pieces.emplace_back(args&: off, args&: hash); |
251 | size_t size = end + 1; // include null terminator |
252 | s = s.substr(Start: size); |
253 | off += size; |
254 | } |
255 | } |
256 | |
257 | StringPiece &CStringInputSection::getStringPiece(uint64_t off) { |
258 | if (off >= data.size()) |
259 | fatal(msg: toString(this) + ": offset is outside the section" ); |
260 | |
261 | auto it = |
262 | partition_point(Range&: pieces, P: [=](StringPiece p) { return p.inSecOff <= off; }); |
263 | return it[-1]; |
264 | } |
265 | |
266 | const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const { |
267 | return const_cast<CStringInputSection *>(this)->getStringPiece(off); |
268 | } |
269 | |
270 | size_t CStringInputSection::getStringPieceIndex(uint64_t off) const { |
271 | if (off >= data.size()) |
272 | fatal(msg: toString(this) + ": offset is outside the section" ); |
273 | |
274 | auto it = |
275 | partition_point(Range: pieces, P: [=](StringPiece p) { return p.inSecOff <= off; }); |
276 | return std::distance(first: pieces.begin(), last: it) - 1; |
277 | } |
278 | |
279 | uint64_t CStringInputSection::getOffset(uint64_t off) const { |
280 | const StringPiece &piece = getStringPiece(off); |
281 | uint64_t addend = off - piece.inSecOff; |
282 | return piece.outSecOff + addend; |
283 | } |
284 | |
285 | WordLiteralInputSection::WordLiteralInputSection(const Section §ion, |
286 | ArrayRef<uint8_t> data, |
287 | uint32_t align) |
288 | : InputSection(WordLiteralKind, section, data, align) { |
289 | switch (sectionType(flags: getFlags())) { |
290 | case S_4BYTE_LITERALS: |
291 | power2LiteralSize = 2; |
292 | break; |
293 | case S_8BYTE_LITERALS: |
294 | power2LiteralSize = 3; |
295 | break; |
296 | case S_16BYTE_LITERALS: |
297 | power2LiteralSize = 4; |
298 | break; |
299 | default: |
300 | llvm_unreachable("invalid literal section type" ); |
301 | } |
302 | |
303 | live.resize(N: data.size() >> power2LiteralSize, t: !config->deadStrip); |
304 | } |
305 | |
306 | uint64_t WordLiteralInputSection::getOffset(uint64_t off) const { |
307 | auto *osec = cast<WordLiteralSection>(Val: parent); |
308 | const uintptr_t buf = reinterpret_cast<uintptr_t>(data.data()); |
309 | switch (sectionType(flags: getFlags())) { |
310 | case S_4BYTE_LITERALS: |
311 | return osec->getLiteral4Offset(buf: buf + (off & ~3LLU)) | (off & 3); |
312 | case S_8BYTE_LITERALS: |
313 | return osec->getLiteral8Offset(buf: buf + (off & ~7LLU)) | (off & 7); |
314 | case S_16BYTE_LITERALS: |
315 | return osec->getLiteral16Offset(buf: buf + (off & ~15LLU)) | (off & 15); |
316 | default: |
317 | llvm_unreachable("invalid literal section type" ); |
318 | } |
319 | } |
320 | |
321 | bool macho::isCodeSection(const InputSection *isec) { |
322 | uint32_t type = sectionType(flags: isec->getFlags()); |
323 | if (type != S_REGULAR && type != S_COALESCED) |
324 | return false; |
325 | |
326 | uint32_t attr = isec->getFlags() & SECTION_ATTRIBUTES_USR; |
327 | if (attr == S_ATTR_PURE_INSTRUCTIONS) |
328 | return true; |
329 | |
330 | if (isec->getSegName() == segment_names::text) |
331 | return StringSwitch<bool>(isec->getName()) |
332 | .Cases(S0: section_names::textCoalNt, S1: section_names::staticInit, Value: true) |
333 | .Default(Value: false); |
334 | |
335 | return false; |
336 | } |
337 | |
338 | bool macho::isCfStringSection(const InputSection *isec) { |
339 | return isec->getName() == section_names::cfString && |
340 | isec->getSegName() == segment_names::data; |
341 | } |
342 | |
343 | bool macho::isClassRefsSection(const InputSection *isec) { |
344 | return isec->getName() == section_names::objcClassRefs && |
345 | isec->getSegName() == segment_names::data; |
346 | } |
347 | |
348 | bool macho::isSelRefsSection(const InputSection *isec) { |
349 | return isec->getName() == section_names::objcSelrefs && |
350 | isec->getSegName() == segment_names::data; |
351 | } |
352 | |
353 | bool macho::isEhFrameSection(const InputSection *isec) { |
354 | return isec->getName() == section_names::ehFrame && |
355 | isec->getSegName() == segment_names::text; |
356 | } |
357 | |
358 | bool macho::isGccExceptTabSection(const InputSection *isec) { |
359 | return isec->getName() == section_names::gccExceptTab && |
360 | isec->getSegName() == segment_names::text; |
361 | } |
362 | |
363 | std::string lld::toString(const InputSection *isec) { |
364 | return (toString(file: isec->getFile()) + ":(" + isec->getName() + ")" ).str(); |
365 | } |
366 | |