1 | //===-- Disassembler.h ------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLDB_CORE_DISASSEMBLER_H |
10 | #define LLDB_CORE_DISASSEMBLER_H |
11 | |
12 | #include "lldb/Core/Address.h" |
13 | #include "lldb/Core/EmulateInstruction.h" |
14 | #include "lldb/Core/FormatEntity.h" |
15 | #include "lldb/Core/Opcode.h" |
16 | #include "lldb/Core/PluginInterface.h" |
17 | #include "lldb/Interpreter/OptionValue.h" |
18 | #include "lldb/Symbol/LineEntry.h" |
19 | #include "lldb/Target/ExecutionContext.h" |
20 | #include "lldb/Utility/ArchSpec.h" |
21 | #include "lldb/Utility/ConstString.h" |
22 | #include "lldb/Utility/FileSpec.h" |
23 | #include "lldb/lldb-defines.h" |
24 | #include "lldb/lldb-forward.h" |
25 | #include "lldb/lldb-private-enumerations.h" |
26 | #include "lldb/lldb-types.h" |
27 | |
28 | #include "llvm/ADT/StringRef.h" |
29 | |
30 | #include <functional> |
31 | #include <map> |
32 | #include <memory> |
33 | #include <set> |
34 | #include <string> |
35 | #include <vector> |
36 | |
37 | #include <stddef.h> |
38 | #include <stdint.h> |
39 | #include <stdio.h> |
40 | |
41 | namespace llvm { |
42 | template <typename T> class SmallVectorImpl; |
43 | } |
44 | |
45 | namespace lldb_private { |
46 | class AddressRange; |
47 | class ; |
48 | class Debugger; |
49 | class Disassembler; |
50 | class Module; |
51 | class StackFrame; |
52 | class Stream; |
53 | class SymbolContext; |
54 | class SymbolContextList; |
55 | class Target; |
56 | struct RegisterInfo; |
57 | |
58 | class Instruction { |
59 | public: |
60 | Instruction(const Address &address, |
61 | AddressClass addr_class = AddressClass::eInvalid); |
62 | |
63 | virtual ~Instruction(); |
64 | |
65 | const Address &GetAddress() const { return m_address; } |
66 | |
67 | const char *GetMnemonic(const ExecutionContext *exe_ctx) { |
68 | CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); |
69 | return m_opcode_name.c_str(); |
70 | } |
71 | |
72 | const char *GetOperands(const ExecutionContext *exe_ctx) { |
73 | CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); |
74 | return m_mnemonics.c_str(); |
75 | } |
76 | |
77 | const char *(const ExecutionContext *exe_ctx) { |
78 | CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); |
79 | return m_comment.c_str(); |
80 | } |
81 | |
82 | virtual void |
83 | CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0; |
84 | |
85 | AddressClass GetAddressClass(); |
86 | |
87 | void SetAddress(const Address &addr) { |
88 | // Invalidate the address class to lazily discover it if we need to. |
89 | m_address_class = AddressClass::eInvalid; |
90 | m_address = addr; |
91 | } |
92 | |
93 | /// Dump the text representation of this Instruction to a Stream |
94 | /// |
95 | /// Print the (optional) address, (optional) bytes, opcode, |
96 | /// operands, and instruction comments to a stream. |
97 | /// |
98 | /// \param[in] s |
99 | /// The Stream to add the text to. |
100 | /// |
101 | /// \param[in] show_address |
102 | /// Whether the address (using disassembly_addr_format_spec formatting) |
103 | /// should be printed. |
104 | /// |
105 | /// \param[in] show_bytes |
106 | /// Whether the bytes of the assembly instruction should be printed. |
107 | /// |
108 | /// \param[in] max_opcode_byte_size |
109 | /// The size (in bytes) of the largest instruction in the list that |
110 | /// we are printing (for text justification/alignment purposes) |
111 | /// Only needed if show_bytes is true. |
112 | /// |
113 | /// \param[in] exe_ctx |
114 | /// The current execution context, if available. May be used in |
115 | /// the assembling of the operands+comments for this instruction. |
116 | /// Pass NULL if not applicable. |
117 | /// |
118 | /// \param[in] sym_ctx |
119 | /// The SymbolContext for this instruction. |
120 | /// Pass NULL if not available/computed. |
121 | /// Only needed if show_address is true. |
122 | /// |
123 | /// \param[in] prev_sym_ctx |
124 | /// The SymbolContext for the previous instruction. Depending on |
125 | /// the disassembly address format specification, a change in |
126 | /// Symbol / Function may mean that a line is printed with the new |
127 | /// symbol/function name. |
128 | /// Pass NULL if unavailable, or if this is the first instruction of |
129 | /// the InstructionList. |
130 | /// Only needed if show_address is true. |
131 | /// |
132 | /// \param[in] disassembly_addr_format |
133 | /// The format specification for how addresses are printed. |
134 | /// Only needed if show_address is true. |
135 | /// |
136 | /// \param[in] max_address_text_size |
137 | /// The length of the longest address string at the start of the |
138 | /// disassembly line that will be printed (the |
139 | /// Debugger::FormatDisassemblerAddress() string) |
140 | /// so this method can properly align the instruction opcodes. |
141 | /// May be 0 to indicate no indentation/alignment of the opcodes. |
142 | virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address, |
143 | bool show_bytes, const ExecutionContext *exe_ctx, |
144 | const SymbolContext *sym_ctx, |
145 | const SymbolContext *prev_sym_ctx, |
146 | const FormatEntity::Entry *disassembly_addr_format, |
147 | size_t max_address_text_size); |
148 | |
149 | virtual bool DoesBranch() = 0; |
150 | |
151 | virtual bool HasDelaySlot(); |
152 | |
153 | bool CanSetBreakpoint (); |
154 | |
155 | virtual size_t (const Disassembler &disassembler, |
156 | const DataExtractor &data, |
157 | lldb::offset_t data_offset) = 0; |
158 | |
159 | virtual void SetDescription(llvm::StringRef) { |
160 | } // May be overridden in sub-classes that have descriptions. |
161 | |
162 | lldb::OptionValueSP ReadArray(FILE *in_file, Stream *out_stream, |
163 | OptionValue::Type data_type); |
164 | |
165 | lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream *out_stream); |
166 | |
167 | bool DumpEmulation(const ArchSpec &arch); |
168 | |
169 | virtual bool TestEmulation(Stream *stream, const char *test_file_name); |
170 | |
171 | bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton, |
172 | EmulateInstruction::ReadMemoryCallback read_mem_callback, |
173 | EmulateInstruction::WriteMemoryCallback write_mem_calback, |
174 | EmulateInstruction::ReadRegisterCallback read_reg_callback, |
175 | EmulateInstruction::WriteRegisterCallback write_reg_callback); |
176 | |
177 | const Opcode &GetOpcode() const { return m_opcode; } |
178 | |
179 | uint32_t (DataExtractor &data); |
180 | |
181 | struct Operand { |
182 | enum class Type { |
183 | Invalid = 0, |
184 | Register, |
185 | Immediate, |
186 | Dereference, |
187 | Sum, |
188 | Product |
189 | } m_type = Type::Invalid; |
190 | std::vector<Operand> m_children; |
191 | lldb::addr_t m_immediate = 0; |
192 | ConstString m_register; |
193 | bool m_negative = false; |
194 | bool m_clobbered = false; |
195 | |
196 | bool IsValid() { return m_type != Type::Invalid; } |
197 | |
198 | static Operand BuildRegister(ConstString &r); |
199 | static Operand BuildImmediate(lldb::addr_t imm, bool neg); |
200 | static Operand BuildImmediate(int64_t imm); |
201 | static Operand BuildDereference(const Operand &ref); |
202 | static Operand BuildSum(const Operand &lhs, const Operand &rhs); |
203 | static Operand BuildProduct(const Operand &lhs, const Operand &rhs); |
204 | }; |
205 | |
206 | virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) { |
207 | return false; |
208 | } |
209 | |
210 | virtual bool IsCall() { return false; } |
211 | |
212 | protected: |
213 | Address m_address; // The section offset address of this instruction |
214 | // We include an address class in the Instruction class to |
215 | // allow the instruction specify the |
216 | // AddressClass::eCodeAlternateISA (currently used for |
217 | // thumb), and also to specify data (AddressClass::eData). |
218 | // The usual value will be AddressClass::eCode, but often |
219 | // when disassembling memory, you might run into data. |
220 | // This can help us to disassemble appropriately. |
221 | private: |
222 | AddressClass m_address_class; // Use GetAddressClass () accessor function! |
223 | |
224 | protected: |
225 | Opcode m_opcode; // The opcode for this instruction |
226 | std::string m_opcode_name; |
227 | std::string m_mnemonics; |
228 | std::string ; |
229 | bool m_calculated_strings; |
230 | |
231 | void |
232 | CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) { |
233 | if (!m_calculated_strings) { |
234 | m_calculated_strings = true; |
235 | CalculateMnemonicOperandsAndComment(exe_ctx); |
236 | } |
237 | } |
238 | }; |
239 | |
240 | namespace OperandMatchers { |
241 | std::function<bool(const Instruction::Operand &)> |
242 | MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base, |
243 | std::function<bool(const Instruction::Operand &)> left, |
244 | std::function<bool(const Instruction::Operand &)> right); |
245 | |
246 | std::function<bool(const Instruction::Operand &)> |
247 | MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base, |
248 | std::function<bool(const Instruction::Operand &)> child); |
249 | |
250 | std::function<bool(const Instruction::Operand &)> |
251 | MatchRegOp(const RegisterInfo &info); |
252 | |
253 | std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString ®); |
254 | |
255 | std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm); |
256 | |
257 | std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm); |
258 | |
259 | std::function<bool(const Instruction::Operand &)> |
260 | MatchOpType(Instruction::Operand::Type type); |
261 | } |
262 | |
263 | class InstructionList { |
264 | public: |
265 | InstructionList(); |
266 | ~InstructionList(); |
267 | |
268 | size_t GetSize() const; |
269 | |
270 | uint32_t GetMaxOpcocdeByteSize() const; |
271 | |
272 | lldb::InstructionSP GetInstructionAtIndex(size_t idx) const; |
273 | |
274 | /// Get the instruction at the given address. |
275 | /// |
276 | /// \return |
277 | /// A valid \a InstructionSP if the address could be found, or null |
278 | /// otherwise. |
279 | lldb::InstructionSP GetInstructionAtAddress(const Address &addr); |
280 | |
281 | //------------------------------------------------------------------ |
282 | /// Get the index of the next branch instruction. |
283 | /// |
284 | /// Given a list of instructions, find the next branch instruction |
285 | /// in the list by returning an index. |
286 | /// |
287 | /// @param[in] start |
288 | /// The instruction index of the first instruction to check. |
289 | /// |
290 | /// @param[in] ignore_calls |
291 | /// It true, then fine the first branch instruction that isn't |
292 | /// a function call (a branch that calls and returns to the next |
293 | /// instruction). If false, find the instruction index of any |
294 | /// branch in the list. |
295 | /// |
296 | /// @param[out] found_calls |
297 | /// If non-null, this will be set to true if any calls were found in |
298 | /// extending the range. |
299 | /// |
300 | /// @return |
301 | /// The instruction index of the first branch that is at or past |
302 | /// \a start. Returns UINT32_MAX if no matching branches are |
303 | /// found. |
304 | //------------------------------------------------------------------ |
305 | uint32_t GetIndexOfNextBranchInstruction(uint32_t start, |
306 | bool ignore_calls, |
307 | bool *found_calls) const; |
308 | |
309 | uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr, |
310 | Target &target); |
311 | |
312 | uint32_t GetIndexOfInstructionAtAddress(const Address &addr); |
313 | |
314 | void Clear(); |
315 | |
316 | void Append(lldb::InstructionSP &inst_sp); |
317 | |
318 | void Dump(Stream *s, bool show_address, bool show_bytes, |
319 | const ExecutionContext *exe_ctx); |
320 | |
321 | private: |
322 | typedef std::vector<lldb::InstructionSP> collection; |
323 | typedef collection::iterator iterator; |
324 | typedef collection::const_iterator const_iterator; |
325 | |
326 | collection m_instructions; |
327 | }; |
328 | |
329 | class PseudoInstruction : public Instruction { |
330 | public: |
331 | PseudoInstruction(); |
332 | |
333 | ~PseudoInstruction() override; |
334 | |
335 | bool DoesBranch() override; |
336 | |
337 | bool HasDelaySlot() override; |
338 | |
339 | void CalculateMnemonicOperandsAndComment( |
340 | const ExecutionContext *exe_ctx) override { |
341 | // TODO: fill this in and put opcode name into Instruction::m_opcode_name, |
342 | // mnemonic into Instruction::m_mnemonics, and any comment into |
343 | // Instruction::m_comment |
344 | } |
345 | |
346 | size_t (const Disassembler &disassembler, const DataExtractor &data, |
347 | lldb::offset_t data_offset) override; |
348 | |
349 | void SetOpcode(size_t opcode_size, void *opcode_data); |
350 | |
351 | void SetDescription(llvm::StringRef description) override; |
352 | |
353 | protected: |
354 | std::string m_description; |
355 | |
356 | PseudoInstruction(const PseudoInstruction &) = delete; |
357 | const PseudoInstruction &operator=(const PseudoInstruction &) = delete; |
358 | }; |
359 | |
360 | class Disassembler : public std::enable_shared_from_this<Disassembler>, |
361 | public PluginInterface { |
362 | public: |
363 | enum { |
364 | eOptionNone = 0u, |
365 | eOptionShowBytes = (1u << 0), |
366 | eOptionRawOuput = (1u << 1), |
367 | eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains |
368 | // the current PC (mixed mode only) |
369 | eOptionMarkPCAddress = |
370 | (1u << 3) // Mark the disassembly line the contains the PC |
371 | }; |
372 | |
373 | enum HexImmediateStyle { |
374 | eHexStyleC, |
375 | eHexStyleAsm, |
376 | }; |
377 | |
378 | // FindPlugin should be lax about the flavor string (it is too annoying to |
379 | // have various internal uses of the disassembler fail because the global |
380 | // flavor string gets set wrong. Instead, if you get a flavor string you |
381 | // don't understand, use the default. Folks who care to check can use the |
382 | // FlavorValidForArchSpec method on the disassembler they got back. |
383 | static lldb::DisassemblerSP |
384 | FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name); |
385 | |
386 | // This version will use the value in the Target settings if flavor is NULL; |
387 | static lldb::DisassemblerSP FindPluginForTarget(const Target &target, |
388 | const ArchSpec &arch, |
389 | const char *flavor, |
390 | const char *plugin_name); |
391 | |
392 | struct Limit { |
393 | enum { Bytes, Instructions } kind; |
394 | lldb::addr_t value; |
395 | }; |
396 | |
397 | static lldb::DisassemblerSP DisassembleRange(const ArchSpec &arch, |
398 | const char *plugin_name, |
399 | const char *flavor, |
400 | Target &target, |
401 | const AddressRange &disasm_range, |
402 | bool force_live_memory = false); |
403 | |
404 | static lldb::DisassemblerSP |
405 | DisassembleBytes(const ArchSpec &arch, const char *plugin_name, |
406 | const char *flavor, const Address &start, const void *bytes, |
407 | size_t length, uint32_t max_num_instructions, |
408 | bool data_from_file); |
409 | |
410 | static bool Disassemble(Debugger &debugger, const ArchSpec &arch, |
411 | const char *plugin_name, const char *flavor, |
412 | const ExecutionContext &exe_ctx, const Address &start, |
413 | Limit limit, bool mixed_source_and_assembly, |
414 | uint32_t num_mixed_context_lines, uint32_t options, |
415 | Stream &strm); |
416 | |
417 | static bool Disassemble(Debugger &debugger, const ArchSpec &arch, |
418 | StackFrame &frame, Stream &strm); |
419 | |
420 | // Constructors and Destructors |
421 | Disassembler(const ArchSpec &arch, const char *flavor); |
422 | ~Disassembler() override; |
423 | |
424 | void PrintInstructions(Debugger &debugger, const ArchSpec &arch, |
425 | const ExecutionContext &exe_ctx, |
426 | bool mixed_source_and_assembly, |
427 | uint32_t num_mixed_context_lines, uint32_t options, |
428 | Stream &strm); |
429 | |
430 | size_t ParseInstructions(Target &target, Address address, Limit limit, |
431 | Stream *error_strm_ptr, |
432 | bool force_live_memory = false); |
433 | |
434 | virtual size_t (const Address &base_addr, |
435 | const DataExtractor &data, |
436 | lldb::offset_t data_offset, |
437 | size_t num_instructions, bool append, |
438 | bool data_from_file) = 0; |
439 | |
440 | InstructionList &GetInstructionList(); |
441 | |
442 | const InstructionList &GetInstructionList() const; |
443 | |
444 | const ArchSpec &GetArchitecture() const { return m_arch; } |
445 | |
446 | const char *GetFlavor() const { return m_flavor.c_str(); } |
447 | |
448 | virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch, |
449 | const char *flavor) = 0; |
450 | |
451 | protected: |
452 | // SourceLine and SourceLinesToDisplay structures are only used in the mixed |
453 | // source and assembly display methods internal to this class. |
454 | |
455 | struct SourceLine { |
456 | FileSpec file; |
457 | uint32_t line; |
458 | uint32_t column; |
459 | |
460 | SourceLine() : file(), line(LLDB_INVALID_LINE_NUMBER), column(0) {} |
461 | |
462 | bool operator==(const SourceLine &rhs) const { |
463 | return file == rhs.file && line == rhs.line && rhs.column == column; |
464 | } |
465 | |
466 | bool operator!=(const SourceLine &rhs) const { |
467 | return file != rhs.file || line != rhs.line || column != rhs.column; |
468 | } |
469 | |
470 | bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; } |
471 | }; |
472 | |
473 | struct SourceLinesToDisplay { |
474 | std::vector<SourceLine> lines; |
475 | |
476 | // index of the "current" source line, if we want to highlight that when |
477 | // displaying the source lines. (as opposed to the surrounding source |
478 | // lines provided to give context) |
479 | size_t current_source_line; |
480 | |
481 | // Whether to print a blank line at the end of the source lines. |
482 | bool print_source_context_end_eol; |
483 | |
484 | SourceLinesToDisplay() |
485 | : lines(), current_source_line(-1), print_source_context_end_eol(true) { |
486 | } |
487 | }; |
488 | |
489 | // Get the function's declaration line number, hopefully a line number |
490 | // earlier than the opening curly brace at the start of the function body. |
491 | static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc); |
492 | |
493 | // Add the provided SourceLine to the map of filenames-to-source-lines-seen. |
494 | static void AddLineToSourceLineTables( |
495 | SourceLine &line, |
496 | std::map<FileSpec, std::set<uint32_t>> &source_lines_seen); |
497 | |
498 | // Given a source line, determine if we should print it when we're doing |
499 | // mixed source & assembly output. We're currently using the |
500 | // target.process.thread.step-avoid-regexp setting (which is used for |
501 | // stepping over inlined STL functions by default) to determine what source |
502 | // lines to avoid showing. |
503 | // |
504 | // Returns true if this source line should be elided (if the source line |
505 | // should not be displayed). |
506 | static bool |
507 | ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, |
508 | const SymbolContext &sc, SourceLine &line); |
509 | |
510 | static bool |
511 | ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, |
512 | const SymbolContext &sc, LineEntry &line) { |
513 | SourceLine sl; |
514 | sl.file = line.file; |
515 | sl.line = line.line; |
516 | sl.column = line.column; |
517 | return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl); |
518 | }; |
519 | |
520 | // Classes that inherit from Disassembler can see and modify these |
521 | ArchSpec m_arch; |
522 | InstructionList m_instruction_list; |
523 | lldb::addr_t m_base_addr; |
524 | std::string m_flavor; |
525 | |
526 | private: |
527 | // For Disassembler only |
528 | Disassembler(const Disassembler &) = delete; |
529 | const Disassembler &operator=(const Disassembler &) = delete; |
530 | }; |
531 | |
532 | } // namespace lldb_private |
533 | |
534 | #endif // LLDB_CORE_DISASSEMBLER_H |
535 | |