1//===- DWARFDebugLine.h -----------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H
10#define LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H
11
12#include "llvm/ADT/Optional.h"
13#include "llvm/ADT/StringRef.h"
14#include "llvm/DebugInfo/DIContext.h"
15#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
16#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
17#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
18#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
19#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
20#include "llvm/Support/MD5.h"
21#include "llvm/Support/Path.h"
22#include <cstdint>
23#include <map>
24#include <string>
25#include <vector>
26
27namespace llvm {
28
29class DWARFUnit;
30class raw_ostream;
31
32class DWARFDebugLine {
33public:
34 struct FileNameEntry {
35 FileNameEntry() = default;
36
37 DWARFFormValue Name;
38 uint64_t DirIdx = 0;
39 uint64_t ModTime = 0;
40 uint64_t Length = 0;
41 MD5::MD5Result Checksum;
42 DWARFFormValue Source;
43 };
44
45 /// Tracks which optional content types are present in a DWARF file name
46 /// entry format.
47 struct ContentTypeTracker {
48 ContentTypeTracker() = default;
49
50 /// Whether filename entries provide a modification timestamp.
51 bool HasModTime = false;
52 /// Whether filename entries provide a file size.
53 bool HasLength = false;
54 /// For v5, whether filename entries provide an MD5 checksum.
55 bool HasMD5 = false;
56 /// For v5, whether filename entries provide source text.
57 bool HasSource = false;
58
59 /// Update tracked content types with \p ContentType.
60 void trackContentType(dwarf::LineNumberEntryFormat ContentType);
61 };
62
63 struct Prologue {
64 Prologue();
65
66 /// The size in bytes of the statement information for this compilation unit
67 /// (not including the total_length field itself).
68 uint64_t TotalLength;
69 /// Version, address size (starting in v5), and DWARF32/64 format; these
70 /// parameters affect interpretation of forms (used in the directory and
71 /// file tables starting with v5).
72 dwarf::FormParams FormParams;
73 /// The number of bytes following the prologue_length field to the beginning
74 /// of the first byte of the statement program itself.
75 uint64_t PrologueLength;
76 /// In v5, size in bytes of a segment selector.
77 uint8_t SegSelectorSize;
78 /// The size in bytes of the smallest target machine instruction. Statement
79 /// program opcodes that alter the address register first multiply their
80 /// operands by this value.
81 uint8_t MinInstLength;
82 /// The maximum number of individual operations that may be encoded in an
83 /// instruction.
84 uint8_t MaxOpsPerInst;
85 /// The initial value of theis_stmtregister.
86 uint8_t DefaultIsStmt;
87 /// This parameter affects the meaning of the special opcodes. See below.
88 int8_t LineBase;
89 /// This parameter affects the meaning of the special opcodes. See below.
90 uint8_t LineRange;
91 /// The number assigned to the first special opcode.
92 uint8_t OpcodeBase;
93 /// This tracks which optional file format content types are present.
94 ContentTypeTracker ContentTypes;
95 std::vector<uint8_t> StandardOpcodeLengths;
96 std::vector<DWARFFormValue> IncludeDirectories;
97 std::vector<FileNameEntry> FileNames;
98
99 const dwarf::FormParams getFormParams() const { return FormParams; }
100 uint16_t getVersion() const { return FormParams.Version; }
101 uint8_t getAddressSize() const { return FormParams.AddrSize; }
102 bool isDWARF64() const { return FormParams.Format == dwarf::DWARF64; }
103
104 uint32_t sizeofTotalLength() const { return isDWARF64() ? 12 : 4; }
105
106 uint32_t sizeofPrologueLength() const { return isDWARF64() ? 8 : 4; }
107
108 bool totalLengthIsValid() const;
109
110 /// Length of the prologue in bytes.
111 uint64_t getLength() const;
112
113 int32_t getMaxLineIncrementForSpecialOpcode() const {
114 return LineBase + (int8_t)LineRange - 1;
115 }
116
117 /// Get DWARF-version aware access to the file name entry at the provided
118 /// index.
119 const llvm::DWARFDebugLine::FileNameEntry &
120 getFileNameEntry(uint64_t Index) const;
121
122 bool hasFileAtIndex(uint64_t FileIndex) const;
123
124 Optional<uint64_t> getLastValidFileIndex() const;
125
126 bool
127 getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
128 DILineInfoSpecifier::FileLineInfoKind Kind,
129 std::string &Result,
130 sys::path::Style Style = sys::path::Style::native) const;
131
132 void clear();
133 void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
134 Error parse(DWARFDataExtractor Data, uint64_t *OffsetPtr,
135 function_ref<void(Error)> RecoverableErrorHandler,
136 const DWARFContext &Ctx, const DWARFUnit *U = nullptr);
137 };
138
139 /// Standard .debug_line state machine structure.
140 struct Row {
141 explicit Row(bool DefaultIsStmt = false);
142
143 /// Called after a row is appended to the matrix.
144 void postAppend();
145 void reset(bool DefaultIsStmt);
146 void dump(raw_ostream &OS) const;
147
148 static void dumpTableHeader(raw_ostream &OS, unsigned Indent);
149
150 static bool orderByAddress(const Row &LHS, const Row &RHS) {
151 return std::tie(LHS.Address.SectionIndex, LHS.Address.Address) <
152 std::tie(RHS.Address.SectionIndex, RHS.Address.Address);
153 }
154
155 /// The program-counter value corresponding to a machine instruction
156 /// generated by the compiler and section index pointing to the section
157 /// containg this PC. If relocation information is present then section
158 /// index is the index of the section which contains above address.
159 /// Otherwise this is object::SectionedAddress::Undef value.
160 object::SectionedAddress Address;
161 /// An unsigned integer indicating a source line number. Lines are numbered
162 /// beginning at 1. The compiler may emit the value 0 in cases where an
163 /// instruction cannot be attributed to any source line.
164 uint32_t Line;
165 /// An unsigned integer indicating a column number within a source line.
166 /// Columns are numbered beginning at 1. The value 0 is reserved to indicate
167 /// that a statement begins at the 'left edge' of the line.
168 uint16_t Column;
169 /// An unsigned integer indicating the identity of the source file
170 /// corresponding to a machine instruction.
171 uint16_t File;
172 /// An unsigned integer representing the DWARF path discriminator value
173 /// for this location.
174 uint32_t Discriminator;
175 /// An unsigned integer whose value encodes the applicable instruction set
176 /// architecture for the current instruction.
177 uint8_t Isa;
178 /// A boolean indicating that the current instruction is the beginning of a
179 /// statement.
180 uint8_t IsStmt : 1,
181 /// A boolean indicating that the current instruction is the
182 /// beginning of a basic block.
183 BasicBlock : 1,
184 /// A boolean indicating that the current address is that of the
185 /// first byte after the end of a sequence of target machine
186 /// instructions.
187 EndSequence : 1,
188 /// A boolean indicating that the current address is one (of possibly
189 /// many) where execution should be suspended for an entry breakpoint
190 /// of a function.
191 PrologueEnd : 1,
192 /// A boolean indicating that the current address is one (of possibly
193 /// many) where execution should be suspended for an exit breakpoint
194 /// of a function.
195 EpilogueBegin : 1;
196 };
197
198 /// Represents a series of contiguous machine instructions. Line table for
199 /// each compilation unit may consist of multiple sequences, which are not
200 /// guaranteed to be in the order of ascending instruction address.
201 struct Sequence {
202 Sequence();
203
204 /// Sequence describes instructions at address range [LowPC, HighPC)
205 /// and is described by line table rows [FirstRowIndex, LastRowIndex).
206 uint64_t LowPC;
207 uint64_t HighPC;
208 /// If relocation information is present then this is the index of the
209 /// section which contains above addresses. Otherwise this is
210 /// object::SectionedAddress::Undef value.
211 uint64_t SectionIndex;
212 unsigned FirstRowIndex;
213 unsigned LastRowIndex;
214 bool Empty;
215
216 void reset();
217
218 static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) {
219 return std::tie(LHS.SectionIndex, LHS.HighPC) <
220 std::tie(RHS.SectionIndex, RHS.HighPC);
221 }
222
223 bool isValid() const {
224 return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex);
225 }
226
227 bool containsPC(object::SectionedAddress PC) const {
228 return SectionIndex == PC.SectionIndex &&
229 (LowPC <= PC.Address && PC.Address < HighPC);
230 }
231 };
232
233 struct LineTable {
234 LineTable();
235
236 /// Represents an invalid row
237 const uint32_t UnknownRowIndex = UINT32_MAX;
238
239 void appendRow(const DWARFDebugLine::Row &R) { Rows.push_back(R); }
240
241 void appendSequence(const DWARFDebugLine::Sequence &S) {
242 Sequences.push_back(S);
243 }
244
245 /// Returns the index of the row with file/line info for a given address,
246 /// or UnknownRowIndex if there is no such row.
247 uint32_t lookupAddress(object::SectionedAddress Address) const;
248
249 bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
250 std::vector<uint32_t> &Result) const;
251
252 bool hasFileAtIndex(uint64_t FileIndex) const {
253 return Prologue.hasFileAtIndex(FileIndex);
254 }
255
256 Optional<uint64_t> getLastValidFileIndex() const {
257 return Prologue.getLastValidFileIndex();
258 }
259
260 /// Extracts filename by its index in filename table in prologue.
261 /// In Dwarf 4, the files are 1-indexed and the current compilation file
262 /// name is not represented in the list. In DWARF v5, the files are
263 /// 0-indexed and the primary source file has the index 0.
264 /// Returns true on success.
265 bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
266 DILineInfoSpecifier::FileLineInfoKind Kind,
267 std::string &Result) const {
268 return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result);
269 }
270
271 /// Fills the Result argument with the file and line information
272 /// corresponding to Address. Returns true on success.
273 bool getFileLineInfoForAddress(object::SectionedAddress Address,
274 const char *CompDir,
275 DILineInfoSpecifier::FileLineInfoKind Kind,
276 DILineInfo &Result) const;
277
278 void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
279 void clear();
280
281 /// Parse prologue and all rows.
282 Error parse(DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr,
283 const DWARFContext &Ctx, const DWARFUnit *U,
284 function_ref<void(Error)> RecoverableErrorHandler,
285 raw_ostream *OS = nullptr, bool Verbose = false);
286
287 using RowVector = std::vector<Row>;
288 using RowIter = RowVector::const_iterator;
289 using SequenceVector = std::vector<Sequence>;
290 using SequenceIter = SequenceVector::const_iterator;
291
292 struct Prologue Prologue;
293 RowVector Rows;
294 SequenceVector Sequences;
295
296 private:
297 uint32_t findRowInSeq(const DWARFDebugLine::Sequence &Seq,
298 object::SectionedAddress Address) const;
299 Optional<StringRef>
300 getSourceByIndex(uint64_t FileIndex,
301 DILineInfoSpecifier::FileLineInfoKind Kind) const;
302
303 uint32_t lookupAddressImpl(object::SectionedAddress Address) const;
304
305 bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
306 std::vector<uint32_t> &Result) const;
307 };
308
309 const LineTable *getLineTable(uint64_t Offset) const;
310 Expected<const LineTable *>
311 getOrParseLineTable(DWARFDataExtractor &DebugLineData, uint64_t Offset,
312 const DWARFContext &Ctx, const DWARFUnit *U,
313 function_ref<void(Error)> RecoverableErrorHandler);
314
315 /// Helper to allow for parsing of an entire .debug_line section in sequence.
316 class SectionParser {
317 public:
318 using LineToUnitMap = std::map<uint64_t, DWARFUnit *>;
319
320 SectionParser(DWARFDataExtractor &Data, const DWARFContext &C,
321 DWARFUnitVector::iterator_range Units);
322
323 /// Get the next line table from the section. Report any issues via the
324 /// handlers.
325 ///
326 /// \param RecoverableErrorHandler - any issues that don't prevent further
327 /// parsing of the table will be reported through this handler.
328 /// \param UnrecoverableErrorHandler - any issues that prevent further
329 /// parsing of the table will be reported through this handler.
330 /// \param OS - if not null, the parser will print information about the
331 /// table as it parses it.
332 /// \param Verbose - if true, the parser will print verbose information when
333 /// printing to the output.
334 LineTable parseNext(function_ref<void(Error)> RecoverableErrorHandler,
335 function_ref<void(Error)> UnrecoverableErrorHandler,
336 raw_ostream *OS = nullptr, bool Verbose = false);
337
338 /// Skip the current line table and go to the following line table (if
339 /// present) immediately.
340 ///
341 /// \param RecoverableErrorHandler - report any recoverable prologue
342 /// parsing issues via this handler.
343 /// \param UnrecoverableErrorHandler - report any unrecoverable prologue
344 /// parsing issues via this handler.
345 void skip(function_ref<void(Error)> RecoverableErrorHandler,
346 function_ref<void(Error)> UnrecoverableErrorHandler);
347
348 /// Indicates if the parser has parsed as much as possible.
349 ///
350 /// \note Certain problems with the line table structure might mean that
351 /// parsing stops before the end of the section is reached.
352 bool done() const { return Done; }
353
354 /// Get the offset the parser has reached.
355 uint64_t getOffset() const { return Offset; }
356
357 private:
358 DWARFUnit *prepareToParse(uint64_t Offset);
359 void moveToNextTable(uint64_t OldOffset, const Prologue &P);
360
361 LineToUnitMap LineToUnit;
362
363 DWARFDataExtractor &DebugLineData;
364 const DWARFContext &Context;
365 uint64_t Offset = 0;
366 bool Done = false;
367 };
368
369private:
370 struct ParsingState {
371 ParsingState(struct LineTable *LT, uint64_t TableOffset,
372 function_ref<void(Error)> ErrorHandler);
373
374 void resetRowAndSequence();
375 void appendRowToMatrix();
376
377 /// Advance the address by the \p OperationAdvance value. \returns the
378 /// amount advanced by.
379 uint64_t advanceAddr(uint64_t OperationAdvance, uint8_t Opcode,
380 uint64_t OpcodeOffset);
381
382 struct AddrAndAdjustedOpcode {
383 uint64_t AddrDelta;
384 uint8_t AdjustedOpcode;
385 };
386
387 /// Advance the address as required by the specified \p Opcode.
388 /// \returns the amount advanced by and the calculated adjusted opcode.
389 AddrAndAdjustedOpcode advanceAddrForOpcode(uint8_t Opcode,
390 uint64_t OpcodeOffset);
391
392 struct AddrAndLineDelta {
393 uint64_t Address;
394 int32_t Line;
395 };
396
397 /// Advance the line and address as required by the specified special \p
398 /// Opcode. \returns the address and line delta.
399 AddrAndLineDelta handleSpecialOpcode(uint8_t Opcode, uint64_t OpcodeOffset);
400
401 /// Line table we're currently parsing.
402 struct LineTable *LineTable;
403 struct Row Row;
404 struct Sequence Sequence;
405
406 private:
407 uint64_t LineTableOffset;
408
409 bool ReportAdvanceAddrProblem = true;
410 bool ReportBadLineRange = true;
411 function_ref<void(Error)> ErrorHandler;
412 };
413
414 using LineTableMapTy = std::map<uint64_t, LineTable>;
415 using LineTableIter = LineTableMapTy::iterator;
416 using LineTableConstIter = LineTableMapTy::const_iterator;
417
418 LineTableMapTy LineTableMap;
419};
420
421} // end namespace llvm
422
423#endif // LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H
424