1//==- include/llvm/CodeGen/AccelTable.h - Accelerator Tables -----*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file contains support for writing accelerator tables.
10///
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_CODEGEN_ACCELTABLE_H
14#define LLVM_CODEGEN_ACCELTABLE_H
15
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/MapVector.h"
18#include "llvm/ADT/STLFunctionalExtras.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/BinaryFormat/Dwarf.h"
21#include "llvm/CodeGen/DIE.h"
22#include "llvm/CodeGen/DwarfStringPoolEntry.h"
23#include "llvm/Support/Allocator.h"
24#include "llvm/Support/DJB.h"
25#include "llvm/Support/Debug.h"
26#include <cstdint>
27#include <variant>
28#include <vector>
29
30/// \file
31/// The DWARF and Apple accelerator tables are an indirect hash table optimized
32/// for null lookup rather than access to known data. The Apple accelerator
33/// tables are a precursor of the newer DWARF v5 accelerator tables. Both
34/// formats share common design ideas.
35///
36/// The Apple accelerator table are output into an on-disk format that looks
37/// like this:
38///
39/// .------------------.
40/// | HEADER |
41/// |------------------|
42/// | BUCKETS |
43/// |------------------|
44/// | HASHES |
45/// |------------------|
46/// | OFFSETS |
47/// |------------------|
48/// | DATA |
49/// `------------------'
50///
51/// The header contains a magic number, version, type of hash function,
52/// the number of buckets, total number of hashes, and room for a special struct
53/// of data and the length of that struct.
54///
55/// The buckets contain an index (e.g. 6) into the hashes array. The hashes
56/// section contains all of the 32-bit hash values in contiguous memory, and the
57/// offsets contain the offset into the data area for the particular hash.
58///
59/// For a lookup example, we could hash a function name and take it modulo the
60/// number of buckets giving us our bucket. From there we take the bucket value
61/// as an index into the hashes table and look at each successive hash as long
62/// as the hash value is still the same modulo result (bucket value) as earlier.
63/// If we have a match we look at that same entry in the offsets table and grab
64/// the offset in the data for our final match.
65///
66/// The DWARF v5 accelerator table consists of zero or more name indices that
67/// are output into an on-disk format that looks like this:
68///
69/// .------------------.
70/// | HEADER |
71/// |------------------|
72/// | CU LIST |
73/// |------------------|
74/// | LOCAL TU LIST |
75/// |------------------|
76/// | FOREIGN TU LIST |
77/// |------------------|
78/// | HASH TABLE |
79/// |------------------|
80/// | NAME TABLE |
81/// |------------------|
82/// | ABBREV TABLE |
83/// |------------------|
84/// | ENTRY POOL |
85/// `------------------'
86///
87/// For the full documentation please refer to the DWARF 5 standard.
88///
89///
90/// This file defines the class template AccelTable, which is represents an
91/// abstract view of an Accelerator table, without any notion of an on-disk
92/// layout. This class is parameterized by an entry type, which should derive
93/// from AccelTableData. This is the type of individual entries in the table,
94/// and it should store the data necessary to emit them. AppleAccelTableData is
95/// the base class for Apple Accelerator Table entries, which have a uniform
96/// structure based on a sequence of Atoms. There are different sub-classes
97/// derived from AppleAccelTable, which differ in the set of Atoms and how they
98/// obtain their values.
99///
100/// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable
101/// function.
102
103namespace llvm {
104
105class AsmPrinter;
106class DwarfDebug;
107class DwarfTypeUnit;
108class MCSymbol;
109class raw_ostream;
110
111/// Interface which the different types of accelerator table data have to
112/// conform. It serves as a base class for different values of the template
113/// argument of the AccelTable class template.
114class AccelTableData {
115public:
116 virtual ~AccelTableData() = default;
117
118 bool operator<(const AccelTableData &Other) const {
119 return order() < Other.order();
120 }
121
122 // Subclasses should implement:
123 // static uint32_t hash(StringRef Name);
124
125#ifndef NDEBUG
126 virtual void print(raw_ostream &OS) const = 0;
127#endif
128protected:
129 virtual uint64_t order() const = 0;
130};
131
132/// A base class holding non-template-dependant functionality of the AccelTable
133/// class. Clients should not use this class directly but rather instantiate
134/// AccelTable with a type derived from AccelTableData.
135class AccelTableBase {
136public:
137 using HashFn = uint32_t(StringRef);
138
139 /// Represents a group of entries with identical name (and hence, hash value).
140 struct HashData {
141 DwarfStringPoolEntryRef Name;
142 uint32_t HashValue;
143 std::vector<AccelTableData *> Values;
144 MCSymbol *Sym;
145
146 /// Get all AccelTableData cast as a `T`.
147 template <typename T = AccelTableData *> auto getValues() const {
148 static_assert(std::is_pointer<T>());
149 static_assert(
150 std::is_base_of<AccelTableData, std::remove_pointer_t<T>>());
151 return map_range(
152 Values, [](AccelTableData *Data) { return static_cast<T>(Data); });
153 }
154
155#ifndef NDEBUG
156 void print(raw_ostream &OS) const;
157 void dump() const { print(OS&: dbgs()); }
158#endif
159 };
160 using HashList = std::vector<HashData *>;
161 using BucketList = std::vector<HashList>;
162
163protected:
164 /// Allocator for HashData and Values.
165 BumpPtrAllocator Allocator;
166
167 using StringEntries = MapVector<StringRef, HashData>;
168 StringEntries Entries;
169
170 HashFn *Hash;
171 uint32_t BucketCount = 0;
172 uint32_t UniqueHashCount = 0;
173
174 HashList Hashes;
175 BucketList Buckets;
176
177 void computeBucketCount();
178
179 AccelTableBase(HashFn *Hash) : Hash(Hash) {}
180
181public:
182 void finalize(AsmPrinter *Asm, StringRef Prefix);
183 ArrayRef<HashList> getBuckets() const { return Buckets; }
184 uint32_t getBucketCount() const { return BucketCount; }
185 uint32_t getUniqueHashCount() const { return UniqueHashCount; }
186 uint32_t getUniqueNameCount() const { return Entries.size(); }
187
188#ifndef NDEBUG
189 void print(raw_ostream &OS) const;
190 void dump() const { print(OS&: dbgs()); }
191#endif
192
193 AccelTableBase(const AccelTableBase &) = delete;
194 void operator=(const AccelTableBase &) = delete;
195};
196
197/// This class holds an abstract representation of an Accelerator Table,
198/// consisting of a sequence of buckets, each bucket containint a sequence of
199/// HashData entries. The class is parameterized by the type of entries it
200/// holds. The type template parameter also defines the hash function to use for
201/// hashing names.
202template <typename DataT> class AccelTable : public AccelTableBase {
203public:
204 AccelTable() : AccelTableBase(DataT::hash) {}
205
206 template <typename... Types>
207 void addName(DwarfStringPoolEntryRef Name, Types &&... Args);
208 void clear() { Entries.clear(); }
209 void addEntries(AccelTable<DataT> &Table);
210 const StringEntries getEntries() const { return Entries; }
211};
212
213template <typename AccelTableDataT>
214template <typename... Types>
215void AccelTable<AccelTableDataT>::addName(DwarfStringPoolEntryRef Name,
216 Types &&... Args) {
217 assert(Buckets.empty() && "Already finalized!");
218 // If the string is in the list already then add this die to the list
219 // otherwise add a new one.
220 auto &It = Entries[Name.getString()];
221 if (It.Values.empty()) {
222 It.Name = Name;
223 It.HashValue = Hash(Name.getString());
224 }
225 It.Values.push_back(new (Allocator)
226 AccelTableDataT(std::forward<Types>(Args)...));
227}
228
229/// A base class for different implementations of Data classes for Apple
230/// Accelerator Tables. The columns in the table are defined by the static Atoms
231/// variable defined on the subclasses.
232class AppleAccelTableData : public AccelTableData {
233public:
234 /// An Atom defines the form of the data in an Apple accelerator table.
235 /// Conceptually it is a column in the accelerator consisting of a type and a
236 /// specification of the form of its data.
237 struct Atom {
238 /// Atom Type.
239 const uint16_t Type;
240 /// DWARF Form.
241 const uint16_t Form;
242
243 constexpr Atom(uint16_t Type, uint16_t Form) : Type(Type), Form(Form) {}
244
245#ifndef NDEBUG
246 void print(raw_ostream &OS) const;
247 void dump() const { print(OS&: dbgs()); }
248#endif
249 };
250 // Subclasses should define:
251 // static constexpr Atom Atoms[];
252
253 virtual void emit(AsmPrinter *Asm) const = 0;
254
255 static uint32_t hash(StringRef Buffer) { return djbHash(Buffer); }
256};
257
258/// Helper class to identify an entry in DWARF5AccelTable based on their DIE
259/// offset and UnitID.
260struct OffsetAndUnitID : std::pair<uint64_t, uint32_t> {
261 using Base = std::pair<uint64_t, uint32_t>;
262 OffsetAndUnitID(Base B) : Base(B) {}
263
264 OffsetAndUnitID(uint64_t Offset, uint32_t UnitID) : Base(Offset, UnitID) {}
265 uint64_t offset() const { return first; };
266 uint32_t unitID() const { return second; };
267};
268
269template <>
270struct DenseMapInfo<OffsetAndUnitID> : DenseMapInfo<OffsetAndUnitID::Base> {};
271
272/// The Data class implementation for DWARF v5 accelerator table. Unlike the
273/// Apple Data classes, this class is just a DIE wrapper, and does not know to
274/// serialize itself. The complete serialization logic is in the
275/// emitDWARF5AccelTable function.
276class DWARF5AccelTableData : public AccelTableData {
277public:
278 static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Buffer: Name); }
279
280 DWARF5AccelTableData(const DIE &Die, const uint32_t UnitID,
281 const bool IsTU = false);
282 DWARF5AccelTableData(const uint64_t DieOffset,
283 const std::optional<uint64_t> DefiningParentOffset,
284 const unsigned DieTag, const unsigned UnitID,
285 const bool IsTU = false)
286 : OffsetVal(DieOffset), ParentOffset(DefiningParentOffset),
287 DieTag(DieTag), AbbrevNumber(0), IsTU(IsTU), UnitID(UnitID) {}
288
289#ifndef NDEBUG
290 void print(raw_ostream &OS) const override;
291#endif
292
293 uint64_t getDieOffset() const {
294 assert(isNormalized() && "Accessing DIE Offset before normalizing.");
295 return std::get<uint64_t>(v: OffsetVal);
296 }
297
298 OffsetAndUnitID getDieOffsetAndUnitID() const {
299 return {getDieOffset(), UnitID};
300 }
301
302 unsigned getDieTag() const { return DieTag; }
303 unsigned getUnitID() const { return UnitID; }
304 bool isTU() const { return IsTU; }
305 void normalizeDIEToOffset() {
306 assert(!isNormalized() && "Accessing offset after normalizing.");
307 const DIE *Entry = std::get<const DIE *>(v&: OffsetVal);
308 ParentOffset = getDefiningParentDieOffset(Die: *Entry);
309 OffsetVal = Entry->getOffset();
310 }
311 bool isNormalized() const {
312 return std::holds_alternative<uint64_t>(v: OffsetVal);
313 }
314
315 std::optional<uint64_t> getParentDieOffset() const {
316 if (auto OffsetAndId = getParentDieOffsetAndUnitID())
317 return OffsetAndId->offset();
318 return {};
319 }
320
321 std::optional<OffsetAndUnitID> getParentDieOffsetAndUnitID() const {
322 assert(isNormalized() && "Accessing DIE Offset before normalizing.");
323 if (!ParentOffset)
324 return std::nullopt;
325 return OffsetAndUnitID(*ParentOffset, getUnitID());
326 }
327
328 /// Sets AbbrevIndex for an Entry.
329 void setAbbrevNumber(uint16_t AbbrevNum) { AbbrevNumber = AbbrevNum; }
330
331 /// Returns AbbrevIndex for an Entry.
332 uint16_t getAbbrevNumber() const { return AbbrevNumber; }
333
334 /// If `Die` has a non-null parent and the parent is not a declaration,
335 /// return its offset.
336 static std::optional<uint64_t> getDefiningParentDieOffset(const DIE &Die);
337
338protected:
339 std::variant<const DIE *, uint64_t> OffsetVal;
340 std::optional<uint64_t> ParentOffset;
341 uint32_t DieTag : 16;
342 uint32_t AbbrevNumber : 15;
343 uint32_t IsTU : 1;
344 uint32_t UnitID;
345 uint64_t order() const override { return getDieOffset(); }
346};
347
348class DebugNamesAbbrev : public FoldingSetNode {
349public:
350 uint32_t DieTag;
351 uint32_t Number;
352 struct AttributeEncoding {
353 dwarf::Index Index;
354 dwarf::Form Form;
355 };
356 DebugNamesAbbrev(uint32_t DieTag) : DieTag(DieTag) {}
357 /// Add attribute encoding to an abbreviation.
358 void addAttribute(const DebugNamesAbbrev::AttributeEncoding &Attr) {
359 AttrVect.push_back(Elt: Attr);
360 }
361 /// Set abbreviation tag index.
362 void setNumber(uint32_t AbbrevNumber) { Number = AbbrevNumber; }
363 /// Get abbreviation tag index.
364 uint32_t getNumber() const { return Number; }
365 /// Get DIE Tag.
366 uint32_t getDieTag() const { return DieTag; }
367 /// Used to gather unique data for the abbreviation folding set.
368 void Profile(FoldingSetNodeID &ID) const;
369 /// Returns attributes for an abbreviation.
370 const SmallVector<AttributeEncoding, 1> &getAttributes() const {
371 return AttrVect;
372 }
373
374private:
375 SmallVector<AttributeEncoding, 1> AttrVect;
376};
377
378struct TypeUnitMetaInfo {
379 // Symbol for start of the TU section or signature if this is SplitDwarf.
380 std::variant<MCSymbol *, uint64_t> LabelOrSignature;
381 // Unique ID of Type Unit.
382 unsigned UniqueID;
383};
384using TUVectorTy = SmallVector<TypeUnitMetaInfo, 1>;
385class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> {
386 // Symbols to start of all the TU sections that were generated.
387 TUVectorTy TUSymbolsOrHashes;
388
389public:
390 struct UnitIndexAndEncoding {
391 unsigned Index;
392 DebugNamesAbbrev::AttributeEncoding Encoding;
393 };
394 /// Returns type units that were constructed.
395 const TUVectorTy &getTypeUnitsSymbols() { return TUSymbolsOrHashes; }
396 /// Add a type unit start symbol.
397 void addTypeUnitSymbol(DwarfTypeUnit &U);
398 /// Add a type unit Signature.
399 void addTypeUnitSignature(DwarfTypeUnit &U);
400 /// Convert DIE entries to explicit offset.
401 /// Needs to be called after DIE offsets are computed.
402 void convertDieToOffset() {
403 for (auto &Entry : Entries) {
404 for (auto *Data : Entry.second.getValues<DWARF5AccelTableData *>()) {
405 // For TU we normalize as each Unit is emitted.
406 // So when this is invoked after CU construction we will be in mixed
407 // state.
408 if (!Data->isNormalized())
409 Data->normalizeDIEToOffset();
410 }
411 }
412 }
413
414 void addTypeEntries(DWARF5AccelTable &Table) {
415 for (auto &Entry : Table.getEntries()) {
416 for (auto *Data : Entry.second.getValues<DWARF5AccelTableData *>()) {
417 addName(Name: Entry.second.Name, Args: Data->getDieOffset(),
418 Args: Data->getParentDieOffset(), Args: Data->getDieTag(),
419 Args: Data->getUnitID(), Args: true);
420 }
421 }
422 }
423};
424
425void emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents,
426 StringRef Prefix, const MCSymbol *SecBegin,
427 ArrayRef<AppleAccelTableData::Atom> Atoms);
428
429/// Emit an Apple Accelerator Table consisting of entries in the specified
430/// AccelTable. The DataT template parameter should be derived from
431/// AppleAccelTableData.
432template <typename DataT>
433void emitAppleAccelTable(AsmPrinter *Asm, AccelTable<DataT> &Contents,
434 StringRef Prefix, const MCSymbol *SecBegin) {
435 static_assert(std::is_convertible<DataT *, AppleAccelTableData *>::value);
436 emitAppleAccelTableImpl(Asm, Contents, Prefix, SecBegin, DataT::Atoms);
437}
438
439void emitDWARF5AccelTable(AsmPrinter *Asm, DWARF5AccelTable &Contents,
440 const DwarfDebug &DD,
441 ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs);
442
443/// Emit a DWARFv5 Accelerator Table consisting of entries in the specified
444/// AccelTable. The \p CUs contains either symbols keeping offsets to the
445/// start of compilation unit, either offsets to the start of compilation
446/// unit themselves.
447void emitDWARF5AccelTable(
448 AsmPrinter *Asm, DWARF5AccelTable &Contents,
449 ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs,
450 llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
451 const DWARF5AccelTableData &)>
452 getIndexForEntry);
453
454/// Accelerator table data implementation for simple Apple accelerator tables
455/// with just a DIE reference.
456class AppleAccelTableOffsetData : public AppleAccelTableData {
457public:
458 AppleAccelTableOffsetData(const DIE &D) : Die(D) {}
459
460 void emit(AsmPrinter *Asm) const override;
461
462 static constexpr Atom Atoms[] = {
463 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
464
465#ifndef NDEBUG
466 void print(raw_ostream &OS) const override;
467#endif
468protected:
469 uint64_t order() const override { return Die.getOffset(); }
470
471 const DIE &Die;
472};
473
474/// Accelerator table data implementation for Apple type accelerator tables.
475class AppleAccelTableTypeData : public AppleAccelTableOffsetData {
476public:
477 AppleAccelTableTypeData(const DIE &D) : AppleAccelTableOffsetData(D) {}
478
479 void emit(AsmPrinter *Asm) const override;
480
481 static constexpr Atom Atoms[] = {
482 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
483 Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
484 Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
485
486#ifndef NDEBUG
487 void print(raw_ostream &OS) const override;
488#endif
489};
490
491/// Accelerator table data implementation for simple Apple accelerator tables
492/// with a DIE offset but no actual DIE pointer.
493class AppleAccelTableStaticOffsetData : public AppleAccelTableData {
494public:
495 AppleAccelTableStaticOffsetData(uint32_t Offset) : Offset(Offset) {}
496
497 void emit(AsmPrinter *Asm) const override;
498
499 static constexpr Atom Atoms[] = {
500 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
501
502#ifndef NDEBUG
503 void print(raw_ostream &OS) const override;
504#endif
505protected:
506 uint64_t order() const override { return Offset; }
507
508 uint32_t Offset;
509};
510
511/// Accelerator table data implementation for type accelerator tables with
512/// a DIE offset but no actual DIE pointer.
513class AppleAccelTableStaticTypeData : public AppleAccelTableStaticOffsetData {
514public:
515 AppleAccelTableStaticTypeData(uint32_t Offset, uint16_t Tag,
516 bool ObjCClassIsImplementation,
517 uint32_t QualifiedNameHash)
518 : AppleAccelTableStaticOffsetData(Offset),
519 QualifiedNameHash(QualifiedNameHash), Tag(Tag),
520 ObjCClassIsImplementation(ObjCClassIsImplementation) {}
521
522 void emit(AsmPrinter *Asm) const override;
523
524 static constexpr Atom Atoms[] = {
525 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
526 Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
527 Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)};
528
529#ifndef NDEBUG
530 void print(raw_ostream &OS) const override;
531#endif
532protected:
533 uint64_t order() const override { return Offset; }
534
535 uint32_t QualifiedNameHash;
536 uint16_t Tag;
537 bool ObjCClassIsImplementation;
538};
539
540} // end namespace llvm
541
542#endif // LLVM_CODEGEN_ACCELTABLE_H
543

source code of llvm/include/llvm/CodeGen/AccelTable.h