1 | //===-- LVBinaryReader.h ----------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the LVBinaryReader class, which is used to describe a |
10 | // binary reader. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_DEBUGINFO_LOGICALVIEW_READERS_LVBINARYREADER_H |
15 | #define LLVM_DEBUGINFO_LOGICALVIEW_READERS_LVBINARYREADER_H |
16 | |
17 | #include "llvm/DebugInfo/LogicalView/Core/LVReader.h" |
18 | #include "llvm/MC/MCAsmInfo.h" |
19 | #include "llvm/MC/MCContext.h" |
20 | #include "llvm/MC/MCDisassembler/MCDisassembler.h" |
21 | #include "llvm/MC/MCInstPrinter.h" |
22 | #include "llvm/MC/MCInstrInfo.h" |
23 | #include "llvm/MC/MCObjectFileInfo.h" |
24 | #include "llvm/MC/MCRegisterInfo.h" |
25 | #include "llvm/MC/MCSubtargetInfo.h" |
26 | #include "llvm/MC/TargetRegistry.h" |
27 | #include "llvm/Object/COFF.h" |
28 | #include "llvm/Object/ObjectFile.h" |
29 | |
30 | namespace llvm { |
31 | namespace logicalview { |
32 | |
33 | constexpr bool UpdateHighAddress = false; |
34 | |
35 | // Logical scope, Section address, Section index, IsComdat. |
36 | struct LVSymbolTableEntry final { |
37 | LVScope *Scope = nullptr; |
38 | LVAddress Address = 0; |
39 | LVSectionIndex SectionIndex = 0; |
40 | bool IsComdat = false; |
41 | LVSymbolTableEntry() = default; |
42 | LVSymbolTableEntry(LVScope *Scope, LVAddress Address, |
43 | LVSectionIndex SectionIndex, bool IsComdat) |
44 | : Scope(Scope), Address(Address), SectionIndex(SectionIndex), |
45 | IsComdat(IsComdat) {} |
46 | }; |
47 | |
48 | // Function names extracted from the object symbol table. |
49 | class LVSymbolTable final { |
50 | using LVSymbolNames = std::map<std::string, LVSymbolTableEntry>; |
51 | LVSymbolNames SymbolNames; |
52 | |
53 | public: |
54 | LVSymbolTable() = default; |
55 | |
56 | void add(StringRef Name, LVScope *Function, LVSectionIndex SectionIndex = 0); |
57 | void add(StringRef Name, LVAddress Address, LVSectionIndex SectionIndex, |
58 | bool IsComdat); |
59 | LVSectionIndex update(LVScope *Function); |
60 | |
61 | const LVSymbolTableEntry &getEntry(StringRef Name); |
62 | LVAddress getAddress(StringRef Name); |
63 | LVSectionIndex getIndex(StringRef Name); |
64 | bool getIsComdat(StringRef Name); |
65 | |
66 | void print(raw_ostream &OS); |
67 | }; |
68 | |
69 | class LVBinaryReader : public LVReader { |
70 | // Function names extracted from the object symbol table. |
71 | LVSymbolTable SymbolTable; |
72 | |
73 | // It contains the LVLineDebug elements representing the inlined logical |
74 | // lines for the current compile unit, created by parsing the CodeView |
75 | // S_INLINESITE symbol annotation data. |
76 | using LVInlineeLine = std::map<LVScope *, std::unique_ptr<LVLines>>; |
77 | LVInlineeLine CUInlineeLines; |
78 | |
79 | // Instruction lines for a logical scope. These instructions are fetched |
80 | // during its merge with the debug lines. |
81 | LVDoubleMap<LVSectionIndex, LVScope *, LVLines *> ScopeInstructions; |
82 | |
83 | // Links the scope with its first assembler address line. |
84 | LVDoubleMap<LVSectionIndex, LVAddress, LVScope *> AssemblerMappings; |
85 | |
86 | // Mapping from virtual address to section. |
87 | // The virtual address refers to the address where the section is loaded. |
88 | using LVSectionAddresses = std::map<LVSectionIndex, object::SectionRef>; |
89 | LVSectionAddresses SectionAddresses; |
90 | |
91 | void addSectionAddress(const object::SectionRef &Section) { |
92 | if (SectionAddresses.find(x: Section.getAddress()) == SectionAddresses.end()) |
93 | SectionAddresses.emplace(args: Section.getAddress(), args: Section); |
94 | } |
95 | |
96 | // Scopes with ranges for current compile unit. It is used to find a line |
97 | // giving its exact or closest address. To support comdat functions, all |
98 | // addresses for the same section are recorded in the same map. |
99 | using LVSectionRanges = std::map<LVSectionIndex, std::unique_ptr<LVRange>>; |
100 | LVSectionRanges SectionRanges; |
101 | |
102 | // Image base and virtual address for Executable file. |
103 | uint64_t ImageBaseAddress = 0; |
104 | uint64_t VirtualAddress = 0; |
105 | |
106 | // Object sections with machine code. |
107 | using LVSections = std::map<LVSectionIndex, object::SectionRef>; |
108 | LVSections Sections; |
109 | |
110 | std::vector<std::unique_ptr<LVLines>> DiscoveredLines; |
111 | |
112 | protected: |
113 | // It contains the LVLineDebug elements representing the logical lines for |
114 | // the current compile unit, created by parsing the debug line section. |
115 | LVLines CULines; |
116 | |
117 | std::unique_ptr<const MCRegisterInfo> MRI; |
118 | std::unique_ptr<const MCAsmInfo> MAI; |
119 | std::unique_ptr<const MCSubtargetInfo> STI; |
120 | std::unique_ptr<const MCInstrInfo> MII; |
121 | std::unique_ptr<const MCDisassembler> MD; |
122 | std::unique_ptr<MCContext> MC; |
123 | std::unique_ptr<MCInstPrinter> MIP; |
124 | |
125 | // https://yurydelendik.github.io/webassembly-dwarf/ |
126 | // 2. Consuming and Generating DWARF for WebAssembly Code |
127 | // Note: Some DWARF constructs don't map one-to-one onto WebAssembly |
128 | // constructs. We strive to enumerate and resolve any ambiguities here. |
129 | // |
130 | // 2.1. Code Addresses |
131 | // Note: DWARF associates various bits of debug info |
132 | // with particular locations in the program via its code address (instruction |
133 | // pointer or PC). However, WebAssembly's linear memory address space does not |
134 | // contain WebAssembly instructions. |
135 | // |
136 | // Wherever a code address (see 2.17 of [DWARF]) is used in DWARF for |
137 | // WebAssembly, it must be the offset of an instruction relative within the |
138 | // Code section of the WebAssembly file. The DWARF is considered malformed if |
139 | // a PC offset is between instruction boundaries within the Code section. |
140 | // |
141 | // Note: It is expected that a DWARF consumer does not know how to decode |
142 | // WebAssembly instructions. The instruction pointer is selected as the offset |
143 | // in the binary file of the first byte of the instruction, and it is |
144 | // consistent with the WebAssembly Web API conventions definition of the code |
145 | // location. |
146 | // |
147 | // EXAMPLE: .DEBUG_LINE INSTRUCTION POINTERS |
148 | // The .debug_line DWARF section maps instruction pointers to source |
149 | // locations. With WebAssembly, the .debug_line section maps Code |
150 | // section-relative instruction offsets to source locations. |
151 | // |
152 | // EXAMPLE: DW_AT_* ATTRIBUTES |
153 | // For entities with a single associated code address, DWARF uses |
154 | // the DW_AT_low_pc attribute to specify the associated code address value. |
155 | // For WebAssembly, the DW_AT_low_pc's value is a Code section-relative |
156 | // instruction offset. |
157 | // |
158 | // For entities with a single contiguous range of code, DWARF uses a |
159 | // pair of DW_AT_low_pc and DW_AT_high_pc attributes to specify the associated |
160 | // contiguous range of code address values. For WebAssembly, these attributes |
161 | // are Code section-relative instruction offsets. |
162 | // |
163 | // For entities with multiple ranges of code, DWARF uses the DW_AT_ranges |
164 | // attribute, which refers to the array located at the .debug_ranges section. |
165 | LVAddress WasmCodeSectionOffset = 0; |
166 | |
167 | // Loads all info for the architecture of the provided object file. |
168 | Error loadGenericTargetInfo(StringRef TheTriple, StringRef TheFeatures); |
169 | |
170 | virtual void mapRangeAddress(const object::ObjectFile &Obj) {} |
171 | virtual void mapRangeAddress(const object::ObjectFile &Obj, |
172 | const object::SectionRef &Section, |
173 | bool IsComdat) {} |
174 | |
175 | // Create a mapping from virtual address to section. |
176 | void mapVirtualAddress(const object::ObjectFile &Obj); |
177 | void mapVirtualAddress(const object::COFFObjectFile &COFFObj); |
178 | |
179 | Expected<std::pair<LVSectionIndex, object::SectionRef>> |
180 | getSection(LVScope *Scope, LVAddress Address, LVSectionIndex SectionIndex); |
181 | |
182 | void addSectionRange(LVSectionIndex SectionIndex, LVScope *Scope); |
183 | void addSectionRange(LVSectionIndex SectionIndex, LVScope *Scope, |
184 | LVAddress LowerAddress, LVAddress UpperAddress); |
185 | LVRange *getSectionRanges(LVSectionIndex SectionIndex); |
186 | |
187 | void includeInlineeLines(LVSectionIndex SectionIndex, LVScope *Function); |
188 | |
189 | Error createInstructions(); |
190 | Error createInstructions(LVScope *Function, LVSectionIndex SectionIndex); |
191 | Error createInstructions(LVScope *Function, LVSectionIndex SectionIndex, |
192 | const LVNameInfo &NameInfo); |
193 | |
194 | void processLines(LVLines *DebugLines, LVSectionIndex SectionIndex); |
195 | void processLines(LVLines *DebugLines, LVSectionIndex SectionIndex, |
196 | LVScope *Function); |
197 | |
198 | public: |
199 | LVBinaryReader() = delete; |
200 | LVBinaryReader(StringRef Filename, StringRef FileFormatName, ScopedPrinter &W, |
201 | LVBinaryType BinaryType) |
202 | : LVReader(Filename, FileFormatName, W, BinaryType) {} |
203 | LVBinaryReader(const LVBinaryReader &) = delete; |
204 | LVBinaryReader &operator=(const LVBinaryReader &) = delete; |
205 | virtual ~LVBinaryReader() = default; |
206 | |
207 | void addInlineeLines(LVScope *Scope, LVLines &Lines) { |
208 | CUInlineeLines.emplace(args&: Scope, args: std::make_unique<LVLines>(args: std::move(Lines))); |
209 | } |
210 | |
211 | // Convert Segment::Offset pair to absolute address. |
212 | LVAddress linearAddress(uint16_t Segment, uint32_t Offset, |
213 | LVAddress Addendum = 0) { |
214 | return ImageBaseAddress + (Segment * VirtualAddress) + Offset + Addendum; |
215 | } |
216 | |
217 | void addToSymbolTable(StringRef Name, LVScope *Function, |
218 | LVSectionIndex SectionIndex = 0); |
219 | void addToSymbolTable(StringRef Name, LVAddress Address, |
220 | LVSectionIndex SectionIndex, bool IsComdat); |
221 | LVSectionIndex updateSymbolTable(LVScope *Function); |
222 | |
223 | const LVSymbolTableEntry &getSymbolTableEntry(StringRef Name); |
224 | LVAddress getSymbolTableAddress(StringRef Name); |
225 | LVSectionIndex getSymbolTableIndex(StringRef Name); |
226 | bool getSymbolTableIsComdat(StringRef Name); |
227 | |
228 | LVSectionIndex getSectionIndex(LVScope *Scope) override { |
229 | return Scope ? getSymbolTableIndex(Name: Scope->getLinkageName()) |
230 | : DotTextSectionIndex; |
231 | } |
232 | |
233 | void print(raw_ostream &OS) const; |
234 | |
235 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
236 | void dump() const { print(OS&: dbgs()); } |
237 | #endif |
238 | }; |
239 | |
240 | } // end namespace logicalview |
241 | } // end namespace llvm |
242 | |
243 | #endif // LLVM_DEBUGINFO_LOGICALVIEW_READERS_LVBINARYREADER_H |
244 | |