1 | //===- DWARFDebugLine.h -----------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H |
10 | #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H |
11 | |
12 | #include "llvm/ADT/StringRef.h" |
13 | #include "llvm/BinaryFormat/Dwarf.h" |
14 | #include "llvm/DebugInfo/DIContext.h" |
15 | #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" |
16 | #include "llvm/DebugInfo/DWARF/DWARFUnit.h" |
17 | #include "llvm/Support/MD5.h" |
18 | #include "llvm/Support/Path.h" |
19 | #include <cstdint> |
20 | #include <map> |
21 | #include <string> |
22 | #include <vector> |
23 | |
24 | namespace llvm { |
25 | |
26 | class raw_ostream; |
27 | |
28 | class DWARFDebugLine { |
29 | public: |
30 | struct FileNameEntry { |
31 | FileNameEntry() = default; |
32 | |
33 | DWARFFormValue Name; |
34 | uint64_t DirIdx = 0; |
35 | uint64_t ModTime = 0; |
36 | uint64_t Length = 0; |
37 | MD5::MD5Result Checksum; |
38 | DWARFFormValue Source; |
39 | }; |
40 | |
41 | /// Tracks which optional content types are present in a DWARF file name |
42 | /// entry format. |
43 | struct ContentTypeTracker { |
44 | ContentTypeTracker() = default; |
45 | |
46 | /// Whether filename entries provide a modification timestamp. |
47 | bool HasModTime = false; |
48 | /// Whether filename entries provide a file size. |
49 | bool HasLength = false; |
50 | /// For v5, whether filename entries provide an MD5 checksum. |
51 | bool HasMD5 = false; |
52 | /// For v5, whether filename entries provide source text. |
53 | bool HasSource = false; |
54 | |
55 | /// Update tracked content types with \p ContentType. |
56 | void trackContentType(dwarf::LineNumberEntryFormat ContentType); |
57 | }; |
58 | |
59 | struct Prologue { |
60 | Prologue(); |
61 | |
62 | /// The size in bytes of the statement information for this compilation unit |
63 | /// (not including the total_length field itself). |
64 | uint64_t TotalLength; |
65 | /// Version, address size (starting in v5), and DWARF32/64 format; these |
66 | /// parameters affect interpretation of forms (used in the directory and |
67 | /// file tables starting with v5). |
68 | dwarf::FormParams FormParams; |
69 | /// The number of bytes following the prologue_length field to the beginning |
70 | /// of the first byte of the statement program itself. |
71 | uint64_t PrologueLength; |
72 | /// In v5, size in bytes of a segment selector. |
73 | uint8_t SegSelectorSize; |
74 | /// The size in bytes of the smallest target machine instruction. Statement |
75 | /// program opcodes that alter the address register first multiply their |
76 | /// operands by this value. |
77 | uint8_t MinInstLength; |
78 | /// The maximum number of individual operations that may be encoded in an |
79 | /// instruction. |
80 | uint8_t MaxOpsPerInst; |
81 | /// The initial value of theis_stmtregister. |
82 | uint8_t DefaultIsStmt; |
83 | /// This parameter affects the meaning of the special opcodes. See below. |
84 | int8_t LineBase; |
85 | /// This parameter affects the meaning of the special opcodes. See below. |
86 | uint8_t LineRange; |
87 | /// The number assigned to the first special opcode. |
88 | uint8_t OpcodeBase; |
89 | /// This tracks which optional file format content types are present. |
90 | ContentTypeTracker ContentTypes; |
91 | std::vector<uint8_t> StandardOpcodeLengths; |
92 | std::vector<DWARFFormValue> IncludeDirectories; |
93 | std::vector<FileNameEntry> FileNames; |
94 | |
95 | const dwarf::FormParams getFormParams() const { return FormParams; } |
96 | uint16_t getVersion() const { return FormParams.Version; } |
97 | uint8_t getAddressSize() const { return FormParams.AddrSize; } |
98 | bool isDWARF64() const { return FormParams.Format == dwarf::DWARF64; } |
99 | |
100 | uint32_t sizeofTotalLength() const { return isDWARF64() ? 12 : 4; } |
101 | |
102 | uint32_t sizeofPrologueLength() const { return isDWARF64() ? 8 : 4; } |
103 | |
104 | bool totalLengthIsValid() const; |
105 | |
106 | /// Length of the prologue in bytes. |
107 | uint64_t getLength() const; |
108 | |
109 | /// Get DWARF-version aware access to the file name entry at the provided |
110 | /// index. |
111 | const llvm::DWARFDebugLine::FileNameEntry & |
112 | getFileNameEntry(uint64_t Index) const; |
113 | |
114 | bool hasFileAtIndex(uint64_t FileIndex) const; |
115 | |
116 | std::optional<uint64_t> getLastValidFileIndex() const; |
117 | |
118 | bool |
119 | getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, |
120 | DILineInfoSpecifier::FileLineInfoKind Kind, |
121 | std::string &Result, |
122 | sys::path::Style Style = sys::path::Style::native) const; |
123 | |
124 | void clear(); |
125 | void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const; |
126 | Error (DWARFDataExtractor Data, uint64_t *OffsetPtr, |
127 | function_ref<void(Error)> RecoverableErrorHandler, |
128 | const DWARFContext &Ctx, const DWARFUnit *U = nullptr); |
129 | }; |
130 | |
131 | /// Standard .debug_line state machine structure. |
132 | struct Row { |
133 | explicit Row(bool DefaultIsStmt = false); |
134 | |
135 | /// Called after a row is appended to the matrix. |
136 | void postAppend(); |
137 | void reset(bool DefaultIsStmt); |
138 | void dump(raw_ostream &OS) const; |
139 | |
140 | static void (raw_ostream &OS, unsigned Indent); |
141 | |
142 | static bool orderByAddress(const Row &LHS, const Row &RHS) { |
143 | return std::tie(args: LHS.Address.SectionIndex, args: LHS.Address.Address) < |
144 | std::tie(args: RHS.Address.SectionIndex, args: RHS.Address.Address); |
145 | } |
146 | |
147 | /// The program-counter value corresponding to a machine instruction |
148 | /// generated by the compiler and section index pointing to the section |
149 | /// containg this PC. If relocation information is present then section |
150 | /// index is the index of the section which contains above address. |
151 | /// Otherwise this is object::SectionedAddress::Undef value. |
152 | object::SectionedAddress Address; |
153 | /// An unsigned integer indicating a source line number. Lines are numbered |
154 | /// beginning at 1. The compiler may emit the value 0 in cases where an |
155 | /// instruction cannot be attributed to any source line. |
156 | uint32_t Line; |
157 | /// An unsigned integer indicating a column number within a source line. |
158 | /// Columns are numbered beginning at 1. The value 0 is reserved to indicate |
159 | /// that a statement begins at the 'left edge' of the line. |
160 | uint16_t Column; |
161 | /// An unsigned integer indicating the identity of the source file |
162 | /// corresponding to a machine instruction. |
163 | uint16_t File; |
164 | /// An unsigned integer representing the DWARF path discriminator value |
165 | /// for this location. |
166 | uint32_t Discriminator; |
167 | /// An unsigned integer whose value encodes the applicable instruction set |
168 | /// architecture for the current instruction. |
169 | uint8_t Isa; |
170 | /// An unsigned integer representing the index of an operation within a |
171 | /// VLIW instruction. The index of the first operation is 0. |
172 | /// For non-VLIW architectures, this register will always be 0. |
173 | uint8_t OpIndex; |
174 | /// A boolean indicating that the current instruction is the beginning of a |
175 | /// statement. |
176 | uint8_t IsStmt : 1, |
177 | /// A boolean indicating that the current instruction is the |
178 | /// beginning of a basic block. |
179 | BasicBlock : 1, |
180 | /// A boolean indicating that the current address is that of the |
181 | /// first byte after the end of a sequence of target machine |
182 | /// instructions. |
183 | EndSequence : 1, |
184 | /// A boolean indicating that the current address is one (of possibly |
185 | /// many) where execution should be suspended for an entry breakpoint |
186 | /// of a function. |
187 | PrologueEnd : 1, |
188 | /// A boolean indicating that the current address is one (of possibly |
189 | /// many) where execution should be suspended for an exit breakpoint |
190 | /// of a function. |
191 | EpilogueBegin : 1; |
192 | }; |
193 | |
194 | /// Represents a series of contiguous machine instructions. Line table for |
195 | /// each compilation unit may consist of multiple sequences, which are not |
196 | /// guaranteed to be in the order of ascending instruction address. |
197 | struct Sequence { |
198 | Sequence(); |
199 | |
200 | /// Sequence describes instructions at address range [LowPC, HighPC) |
201 | /// and is described by line table rows [FirstRowIndex, LastRowIndex). |
202 | uint64_t LowPC; |
203 | uint64_t HighPC; |
204 | /// If relocation information is present then this is the index of the |
205 | /// section which contains above addresses. Otherwise this is |
206 | /// object::SectionedAddress::Undef value. |
207 | uint64_t SectionIndex; |
208 | unsigned FirstRowIndex; |
209 | unsigned LastRowIndex; |
210 | bool Empty; |
211 | |
212 | void reset(); |
213 | |
214 | static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) { |
215 | return std::tie(args: LHS.SectionIndex, args: LHS.HighPC) < |
216 | std::tie(args: RHS.SectionIndex, args: RHS.HighPC); |
217 | } |
218 | |
219 | bool isValid() const { |
220 | return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex); |
221 | } |
222 | |
223 | bool containsPC(object::SectionedAddress PC) const { |
224 | return SectionIndex == PC.SectionIndex && |
225 | (LowPC <= PC.Address && PC.Address < HighPC); |
226 | } |
227 | }; |
228 | |
229 | struct LineTable { |
230 | LineTable(); |
231 | |
232 | /// Represents an invalid row |
233 | const uint32_t UnknownRowIndex = UINT32_MAX; |
234 | |
235 | void appendRow(const DWARFDebugLine::Row &R) { Rows.push_back(x: R); } |
236 | |
237 | void appendSequence(const DWARFDebugLine::Sequence &S) { |
238 | Sequences.push_back(x: S); |
239 | } |
240 | |
241 | /// Returns the index of the row with file/line info for a given address, |
242 | /// or UnknownRowIndex if there is no such row. |
243 | uint32_t lookupAddress(object::SectionedAddress Address) const; |
244 | |
245 | bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size, |
246 | std::vector<uint32_t> &Result) const; |
247 | |
248 | bool hasFileAtIndex(uint64_t FileIndex) const { |
249 | return Prologue.hasFileAtIndex(FileIndex); |
250 | } |
251 | |
252 | std::optional<uint64_t> getLastValidFileIndex() const { |
253 | return Prologue.getLastValidFileIndex(); |
254 | } |
255 | |
256 | /// Extracts filename by its index in filename table in prologue. |
257 | /// In Dwarf 4, the files are 1-indexed and the current compilation file |
258 | /// name is not represented in the list. In DWARF v5, the files are |
259 | /// 0-indexed and the primary source file has the index 0. |
260 | /// Returns true on success. |
261 | bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, |
262 | DILineInfoSpecifier::FileLineInfoKind Kind, |
263 | std::string &Result) const { |
264 | return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result); |
265 | } |
266 | |
267 | /// Fills the Result argument with the file and line information |
268 | /// corresponding to Address. Returns true on success. |
269 | bool getFileLineInfoForAddress(object::SectionedAddress Address, |
270 | const char *CompDir, |
271 | DILineInfoSpecifier::FileLineInfoKind Kind, |
272 | DILineInfo &Result) const; |
273 | |
274 | /// Extracts directory name by its Entry in include directories table |
275 | /// in prologue. Returns true on success. |
276 | bool getDirectoryForEntry(const FileNameEntry &Entry, |
277 | std::string &Directory) const; |
278 | |
279 | void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const; |
280 | void clear(); |
281 | |
282 | /// Parse prologue and all rows. |
283 | Error (DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, |
284 | const DWARFContext &Ctx, const DWARFUnit *U, |
285 | function_ref<void(Error)> RecoverableErrorHandler, |
286 | raw_ostream *OS = nullptr, bool Verbose = false); |
287 | |
288 | using RowVector = std::vector<Row>; |
289 | using RowIter = RowVector::const_iterator; |
290 | using SequenceVector = std::vector<Sequence>; |
291 | using SequenceIter = SequenceVector::const_iterator; |
292 | |
293 | struct Prologue Prologue; |
294 | RowVector Rows; |
295 | SequenceVector Sequences; |
296 | |
297 | private: |
298 | uint32_t findRowInSeq(const DWARFDebugLine::Sequence &Seq, |
299 | object::SectionedAddress Address) const; |
300 | std::optional<StringRef> |
301 | getSourceByIndex(uint64_t FileIndex, |
302 | DILineInfoSpecifier::FileLineInfoKind Kind) const; |
303 | |
304 | uint32_t lookupAddressImpl(object::SectionedAddress Address) const; |
305 | |
306 | bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size, |
307 | std::vector<uint32_t> &Result) const; |
308 | }; |
309 | |
310 | const LineTable *getLineTable(uint64_t Offset) const; |
311 | Expected<const LineTable *> |
312 | (DWARFDataExtractor &DebugLineData, uint64_t Offset, |
313 | const DWARFContext &Ctx, const DWARFUnit *U, |
314 | function_ref<void(Error)> RecoverableErrorHandler); |
315 | void clearLineTable(uint64_t Offset); |
316 | |
317 | /// Helper to allow for parsing of an entire .debug_line section in sequence. |
318 | class SectionParser { |
319 | public: |
320 | using LineToUnitMap = std::map<uint64_t, DWARFUnit *>; |
321 | |
322 | (DWARFDataExtractor &Data, const DWARFContext &C, |
323 | DWARFUnitVector::iterator_range Units); |
324 | |
325 | /// Get the next line table from the section. Report any issues via the |
326 | /// handlers. |
327 | /// |
328 | /// \param RecoverableErrorHandler - any issues that don't prevent further |
329 | /// parsing of the table will be reported through this handler. |
330 | /// \param UnrecoverableErrorHandler - any issues that prevent further |
331 | /// parsing of the table will be reported through this handler. |
332 | /// \param OS - if not null, the parser will print information about the |
333 | /// table as it parses it. |
334 | /// \param Verbose - if true, the parser will print verbose information when |
335 | /// printing to the output. |
336 | LineTable parseNext(function_ref<void(Error)> RecoverableErrorHandler, |
337 | function_ref<void(Error)> UnrecoverableErrorHandler, |
338 | raw_ostream *OS = nullptr, bool Verbose = false); |
339 | |
340 | /// Skip the current line table and go to the following line table (if |
341 | /// present) immediately. |
342 | /// |
343 | /// \param RecoverableErrorHandler - report any recoverable prologue |
344 | /// parsing issues via this handler. |
345 | /// \param UnrecoverableErrorHandler - report any unrecoverable prologue |
346 | /// parsing issues via this handler. |
347 | void skip(function_ref<void(Error)> RecoverableErrorHandler, |
348 | function_ref<void(Error)> UnrecoverableErrorHandler); |
349 | |
350 | /// Indicates if the parser has parsed as much as possible. |
351 | /// |
352 | /// \note Certain problems with the line table structure might mean that |
353 | /// parsing stops before the end of the section is reached. |
354 | bool done() const { return Done; } |
355 | |
356 | /// Get the offset the parser has reached. |
357 | uint64_t getOffset() const { return Offset; } |
358 | |
359 | private: |
360 | DWARFUnit *prepareToParse(uint64_t Offset); |
361 | void moveToNextTable(uint64_t OldOffset, const Prologue &P); |
362 | bool hasValidVersion(uint64_t Offset); |
363 | |
364 | LineToUnitMap LineToUnit; |
365 | |
366 | DWARFDataExtractor &DebugLineData; |
367 | const DWARFContext &Context; |
368 | uint64_t Offset = 0; |
369 | bool Done = false; |
370 | }; |
371 | |
372 | private: |
373 | struct ParsingState { |
374 | ParsingState(struct LineTable *LT, uint64_t TableOffset, |
375 | function_ref<void(Error)> ErrorHandler); |
376 | |
377 | void resetRowAndSequence(); |
378 | void appendRowToMatrix(); |
379 | |
380 | struct AddrOpIndexDelta { |
381 | uint64_t AddrOffset; |
382 | int16_t OpIndexDelta; |
383 | }; |
384 | |
385 | /// Advance the address and op-index by the \p OperationAdvance value. |
386 | /// \returns the amount advanced by. |
387 | AddrOpIndexDelta advanceAddrOpIndex(uint64_t OperationAdvance, |
388 | uint8_t Opcode, uint64_t OpcodeOffset); |
389 | |
390 | struct OpcodeAdvanceResults { |
391 | uint64_t AddrDelta; |
392 | int16_t OpIndexDelta; |
393 | uint8_t AdjustedOpcode; |
394 | }; |
395 | |
396 | /// Advance the address and op-index as required by the specified \p Opcode. |
397 | /// \returns the amount advanced by and the calculated adjusted opcode. |
398 | OpcodeAdvanceResults advanceForOpcode(uint8_t Opcode, |
399 | uint64_t OpcodeOffset); |
400 | |
401 | struct SpecialOpcodeDelta { |
402 | uint64_t Address; |
403 | int32_t Line; |
404 | int16_t OpIndex; |
405 | }; |
406 | |
407 | /// Advance the line, address and op-index as required by the specified |
408 | /// special \p Opcode. \returns the address, op-index and line delta. |
409 | SpecialOpcodeDelta handleSpecialOpcode(uint8_t Opcode, |
410 | uint64_t OpcodeOffset); |
411 | |
412 | /// Line table we're currently parsing. |
413 | struct LineTable *LineTable; |
414 | struct Row Row; |
415 | struct Sequence Sequence; |
416 | |
417 | private: |
418 | uint64_t LineTableOffset; |
419 | |
420 | bool ReportAdvanceAddrProblem = true; |
421 | bool ReportBadLineRange = true; |
422 | function_ref<void(Error)> ErrorHandler; |
423 | }; |
424 | |
425 | using LineTableMapTy = std::map<uint64_t, LineTable>; |
426 | using LineTableIter = LineTableMapTy::iterator; |
427 | using LineTableConstIter = LineTableMapTy::const_iterator; |
428 | |
429 | LineTableMapTy LineTableMap; |
430 | }; |
431 | |
432 | } // end namespace llvm |
433 | |
434 | #endif // LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H |
435 | |