1 | //===-- Disassembler.h ------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLDB_CORE_DISASSEMBLER_H |
10 | #define LLDB_CORE_DISASSEMBLER_H |
11 | |
12 | #include "lldb/Core/Address.h" |
13 | #include "lldb/Core/EmulateInstruction.h" |
14 | #include "lldb/Core/FormatEntity.h" |
15 | #include "lldb/Core/Opcode.h" |
16 | #include "lldb/Core/PluginInterface.h" |
17 | #include "lldb/Interpreter/OptionValue.h" |
18 | #include "lldb/Symbol/LineEntry.h" |
19 | #include "lldb/Target/ExecutionContext.h" |
20 | #include "lldb/Utility/ArchSpec.h" |
21 | #include "lldb/Utility/ConstString.h" |
22 | #include "lldb/Utility/FileSpec.h" |
23 | #include "lldb/lldb-defines.h" |
24 | #include "lldb/lldb-forward.h" |
25 | #include "lldb/lldb-private-enumerations.h" |
26 | #include "lldb/lldb-types.h" |
27 | |
28 | #include "llvm/ADT/StringRef.h" |
29 | |
30 | #include <functional> |
31 | #include <map> |
32 | #include <memory> |
33 | #include <set> |
34 | #include <string> |
35 | #include <vector> |
36 | |
37 | #include <cstddef> |
38 | #include <cstdint> |
39 | #include <cstdio> |
40 | |
41 | namespace llvm { |
42 | template <typename T> class SmallVectorImpl; |
43 | } |
44 | |
45 | namespace lldb_private { |
46 | class AddressRange; |
47 | class ; |
48 | class Debugger; |
49 | class Disassembler; |
50 | class Module; |
51 | class StackFrame; |
52 | class Stream; |
53 | class SymbolContext; |
54 | class SymbolContextList; |
55 | class Target; |
56 | struct RegisterInfo; |
57 | |
58 | class Instruction { |
59 | public: |
60 | Instruction(const Address &address, |
61 | AddressClass addr_class = AddressClass::eInvalid); |
62 | |
63 | virtual ~Instruction(); |
64 | |
65 | const Address &GetAddress() const { return m_address; } |
66 | |
67 | const char *GetMnemonic(const ExecutionContext *exe_ctx, |
68 | bool markup = false) { |
69 | CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); |
70 | return markup ? m_markup_opcode_name.c_str() : m_opcode_name.c_str(); |
71 | } |
72 | |
73 | const char *GetOperands(const ExecutionContext *exe_ctx, |
74 | bool markup = false) { |
75 | CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); |
76 | return markup ? m_markup_mnemonics.c_str() : m_mnemonics.c_str(); |
77 | } |
78 | |
79 | const char *(const ExecutionContext *exe_ctx) { |
80 | CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); |
81 | return m_comment.c_str(); |
82 | } |
83 | |
84 | /// \return |
85 | /// The control flow kind of this instruction, or |
86 | /// eInstructionControlFlowKindUnknown if the instruction |
87 | /// can't be classified. |
88 | virtual lldb::InstructionControlFlowKind |
89 | GetControlFlowKind(const ExecutionContext *exe_ctx) { |
90 | return lldb::eInstructionControlFlowKindUnknown; |
91 | } |
92 | |
93 | virtual void |
94 | CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0; |
95 | |
96 | AddressClass GetAddressClass(); |
97 | |
98 | void SetAddress(const Address &addr) { |
99 | // Invalidate the address class to lazily discover it if we need to. |
100 | m_address_class = AddressClass::eInvalid; |
101 | m_address = addr; |
102 | } |
103 | |
104 | /// Dump the text representation of this Instruction to a Stream |
105 | /// |
106 | /// Print the (optional) address, (optional) bytes, opcode, |
107 | /// operands, and instruction comments to a stream. |
108 | /// |
109 | /// \param[in] s |
110 | /// The Stream to add the text to. |
111 | /// |
112 | /// \param[in] show_address |
113 | /// Whether the address (using disassembly_addr_format_spec formatting) |
114 | /// should be printed. |
115 | /// |
116 | /// \param[in] show_bytes |
117 | /// Whether the bytes of the assembly instruction should be printed. |
118 | /// |
119 | /// \param[in] show_control_flow_kind |
120 | /// Whether the control flow kind of the instruction should be printed. |
121 | /// |
122 | /// \param[in] max_opcode_byte_size |
123 | /// The size (in bytes) of the largest instruction in the list that |
124 | /// we are printing (for text justification/alignment purposes) |
125 | /// Only needed if show_bytes is true. |
126 | /// |
127 | /// \param[in] exe_ctx |
128 | /// The current execution context, if available. May be used in |
129 | /// the assembling of the operands+comments for this instruction. |
130 | /// Pass NULL if not applicable. |
131 | /// |
132 | /// \param[in] sym_ctx |
133 | /// The SymbolContext for this instruction. |
134 | /// Pass NULL if not available/computed. |
135 | /// Only needed if show_address is true. |
136 | /// |
137 | /// \param[in] prev_sym_ctx |
138 | /// The SymbolContext for the previous instruction. Depending on |
139 | /// the disassembly address format specification, a change in |
140 | /// Symbol / Function may mean that a line is printed with the new |
141 | /// symbol/function name. |
142 | /// Pass NULL if unavailable, or if this is the first instruction of |
143 | /// the InstructionList. |
144 | /// Only needed if show_address is true. |
145 | /// |
146 | /// \param[in] disassembly_addr_format |
147 | /// The format specification for how addresses are printed. |
148 | /// Only needed if show_address is true. |
149 | /// |
150 | /// \param[in] max_address_text_size |
151 | /// The length of the longest address string at the start of the |
152 | /// disassembly line that will be printed (the |
153 | /// Debugger::FormatDisassemblerAddress() string) |
154 | /// so this method can properly align the instruction opcodes. |
155 | /// May be 0 to indicate no indentation/alignment of the opcodes. |
156 | virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address, |
157 | bool show_bytes, bool show_control_flow_kind, |
158 | const ExecutionContext *exe_ctx, |
159 | const SymbolContext *sym_ctx, |
160 | const SymbolContext *prev_sym_ctx, |
161 | const FormatEntity::Entry *disassembly_addr_format, |
162 | size_t max_address_text_size); |
163 | |
164 | virtual bool DoesBranch() = 0; |
165 | |
166 | virtual bool HasDelaySlot(); |
167 | |
168 | virtual bool IsLoad() = 0; |
169 | |
170 | virtual bool IsAuthenticated() = 0; |
171 | |
172 | bool CanSetBreakpoint (); |
173 | |
174 | virtual size_t (const Disassembler &disassembler, |
175 | const DataExtractor &data, |
176 | lldb::offset_t data_offset) = 0; |
177 | |
178 | virtual void SetDescription(llvm::StringRef) { |
179 | } // May be overridden in sub-classes that have descriptions. |
180 | |
181 | lldb::OptionValueSP ReadArray(FILE *in_file, Stream &out_stream, |
182 | OptionValue::Type data_type); |
183 | |
184 | lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream &out_stream); |
185 | |
186 | bool DumpEmulation(const ArchSpec &arch); |
187 | |
188 | virtual bool TestEmulation(Stream &stream, const char *test_file_name); |
189 | |
190 | bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton, |
191 | EmulateInstruction::ReadMemoryCallback read_mem_callback, |
192 | EmulateInstruction::WriteMemoryCallback write_mem_calback, |
193 | EmulateInstruction::ReadRegisterCallback read_reg_callback, |
194 | EmulateInstruction::WriteRegisterCallback write_reg_callback); |
195 | |
196 | const Opcode &GetOpcode() const { return m_opcode; } |
197 | |
198 | uint32_t (DataExtractor &data); |
199 | |
200 | struct Operand { |
201 | enum class Type { |
202 | Invalid = 0, |
203 | Register, |
204 | Immediate, |
205 | Dereference, |
206 | Sum, |
207 | Product |
208 | } m_type = Type::Invalid; |
209 | std::vector<Operand> m_children; |
210 | lldb::addr_t m_immediate = 0; |
211 | ConstString m_register; |
212 | bool m_negative = false; |
213 | bool m_clobbered = false; |
214 | |
215 | bool IsValid() { return m_type != Type::Invalid; } |
216 | |
217 | static Operand BuildRegister(ConstString &r); |
218 | static Operand BuildImmediate(lldb::addr_t imm, bool neg); |
219 | static Operand BuildImmediate(int64_t imm); |
220 | static Operand BuildDereference(const Operand &ref); |
221 | static Operand BuildSum(const Operand &lhs, const Operand &rhs); |
222 | static Operand BuildProduct(const Operand &lhs, const Operand &rhs); |
223 | }; |
224 | |
225 | virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) { |
226 | return false; |
227 | } |
228 | |
229 | virtual bool IsCall() { return false; } |
230 | |
231 | static const char *GetNameForInstructionControlFlowKind( |
232 | lldb::InstructionControlFlowKind instruction_control_flow_kind); |
233 | |
234 | protected: |
235 | Address m_address; // The section offset address of this instruction |
236 | // We include an address class in the Instruction class to |
237 | // allow the instruction specify the |
238 | // AddressClass::eCodeAlternateISA (currently used for |
239 | // thumb), and also to specify data (AddressClass::eData). |
240 | // The usual value will be AddressClass::eCode, but often |
241 | // when disassembling memory, you might run into data. |
242 | // This can help us to disassemble appropriately. |
243 | private: |
244 | AddressClass m_address_class; // Use GetAddressClass () accessor function! |
245 | |
246 | protected: |
247 | Opcode m_opcode; // The opcode for this instruction |
248 | std::string m_opcode_name; |
249 | std::string m_markup_opcode_name; |
250 | std::string m_mnemonics; |
251 | std::string m_markup_mnemonics; |
252 | std::string ; |
253 | bool m_calculated_strings; |
254 | |
255 | void |
256 | CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) { |
257 | if (!m_calculated_strings) { |
258 | m_calculated_strings = true; |
259 | CalculateMnemonicOperandsAndComment(exe_ctx); |
260 | } |
261 | } |
262 | }; |
263 | |
264 | namespace OperandMatchers { |
265 | std::function<bool(const Instruction::Operand &)> |
266 | MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base, |
267 | std::function<bool(const Instruction::Operand &)> left, |
268 | std::function<bool(const Instruction::Operand &)> right); |
269 | |
270 | std::function<bool(const Instruction::Operand &)> |
271 | MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base, |
272 | std::function<bool(const Instruction::Operand &)> child); |
273 | |
274 | std::function<bool(const Instruction::Operand &)> |
275 | MatchRegOp(const RegisterInfo &info); |
276 | |
277 | std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString ®); |
278 | |
279 | std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm); |
280 | |
281 | std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm); |
282 | |
283 | std::function<bool(const Instruction::Operand &)> |
284 | MatchOpType(Instruction::Operand::Type type); |
285 | } |
286 | |
287 | class InstructionList { |
288 | public: |
289 | InstructionList(); |
290 | ~InstructionList(); |
291 | |
292 | size_t GetSize() const; |
293 | |
294 | uint32_t GetMaxOpcocdeByteSize() const; |
295 | |
296 | lldb::InstructionSP GetInstructionAtIndex(size_t idx) const; |
297 | |
298 | /// Get the instruction at the given address. |
299 | /// |
300 | /// \return |
301 | /// A valid \a InstructionSP if the address could be found, or null |
302 | /// otherwise. |
303 | lldb::InstructionSP GetInstructionAtAddress(const Address &addr); |
304 | |
305 | //------------------------------------------------------------------ |
306 | /// Get the index of the next branch instruction. |
307 | /// |
308 | /// Given a list of instructions, find the next branch instruction |
309 | /// in the list by returning an index. |
310 | /// |
311 | /// @param[in] start |
312 | /// The instruction index of the first instruction to check. |
313 | /// |
314 | /// @param[in] ignore_calls |
315 | /// It true, then fine the first branch instruction that isn't |
316 | /// a function call (a branch that calls and returns to the next |
317 | /// instruction). If false, find the instruction index of any |
318 | /// branch in the list. |
319 | /// |
320 | /// @param[out] found_calls |
321 | /// If non-null, this will be set to true if any calls were found in |
322 | /// extending the range. |
323 | /// |
324 | /// @return |
325 | /// The instruction index of the first branch that is at or past |
326 | /// \a start. Returns UINT32_MAX if no matching branches are |
327 | /// found. |
328 | //------------------------------------------------------------------ |
329 | uint32_t GetIndexOfNextBranchInstruction(uint32_t start, |
330 | bool ignore_calls, |
331 | bool *found_calls) const; |
332 | |
333 | uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr, |
334 | Target &target); |
335 | |
336 | uint32_t GetIndexOfInstructionAtAddress(const Address &addr); |
337 | |
338 | void Clear(); |
339 | |
340 | void Append(lldb::InstructionSP &inst_sp); |
341 | |
342 | void Dump(Stream *s, bool show_address, bool show_bytes, |
343 | bool show_control_flow_kind, const ExecutionContext *exe_ctx); |
344 | |
345 | private: |
346 | typedef std::vector<lldb::InstructionSP> collection; |
347 | typedef collection::iterator iterator; |
348 | typedef collection::const_iterator const_iterator; |
349 | |
350 | collection m_instructions; |
351 | }; |
352 | |
353 | class PseudoInstruction : public Instruction { |
354 | public: |
355 | PseudoInstruction(); |
356 | |
357 | ~PseudoInstruction() override; |
358 | |
359 | bool DoesBranch() override; |
360 | |
361 | bool HasDelaySlot() override; |
362 | |
363 | bool IsLoad() override; |
364 | |
365 | bool IsAuthenticated() override; |
366 | |
367 | void CalculateMnemonicOperandsAndComment( |
368 | const ExecutionContext *exe_ctx) override { |
369 | // TODO: fill this in and put opcode name into Instruction::m_opcode_name, |
370 | // mnemonic into Instruction::m_mnemonics, and any comment into |
371 | // Instruction::m_comment |
372 | } |
373 | |
374 | size_t (const Disassembler &disassembler, const DataExtractor &data, |
375 | lldb::offset_t data_offset) override; |
376 | |
377 | void SetOpcode(size_t opcode_size, void *opcode_data); |
378 | |
379 | void SetDescription(llvm::StringRef description) override; |
380 | |
381 | protected: |
382 | std::string m_description; |
383 | |
384 | PseudoInstruction(const PseudoInstruction &) = delete; |
385 | const PseudoInstruction &operator=(const PseudoInstruction &) = delete; |
386 | }; |
387 | |
388 | class Disassembler : public std::enable_shared_from_this<Disassembler>, |
389 | public PluginInterface { |
390 | public: |
391 | enum { |
392 | eOptionNone = 0u, |
393 | eOptionShowBytes = (1u << 0), |
394 | eOptionRawOuput = (1u << 1), |
395 | eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains |
396 | // the current PC (mixed mode only) |
397 | eOptionMarkPCAddress = |
398 | (1u << 3), // Mark the disassembly line the contains the PC |
399 | eOptionShowControlFlowKind = (1u << 4), |
400 | }; |
401 | |
402 | enum HexImmediateStyle { |
403 | eHexStyleC, |
404 | eHexStyleAsm, |
405 | }; |
406 | |
407 | // FindPlugin should be lax about the flavor string (it is too annoying to |
408 | // have various internal uses of the disassembler fail because the global |
409 | // flavor string gets set wrong. Instead, if you get a flavor string you |
410 | // don't understand, use the default. Folks who care to check can use the |
411 | // FlavorValidForArchSpec method on the disassembler they got back. |
412 | static lldb::DisassemblerSP |
413 | FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name); |
414 | |
415 | // This version will use the value in the Target settings if flavor is NULL; |
416 | static lldb::DisassemblerSP FindPluginForTarget(const Target &target, |
417 | const ArchSpec &arch, |
418 | const char *flavor, |
419 | const char *plugin_name); |
420 | |
421 | struct Limit { |
422 | enum { Bytes, Instructions } kind; |
423 | lldb::addr_t value; |
424 | }; |
425 | |
426 | static lldb::DisassemblerSP DisassembleRange(const ArchSpec &arch, |
427 | const char *plugin_name, |
428 | const char *flavor, |
429 | Target &target, |
430 | const AddressRange &disasm_range, |
431 | bool force_live_memory = false); |
432 | |
433 | static lldb::DisassemblerSP |
434 | DisassembleBytes(const ArchSpec &arch, const char *plugin_name, |
435 | const char *flavor, const Address &start, const void *bytes, |
436 | size_t length, uint32_t max_num_instructions, |
437 | bool data_from_file); |
438 | |
439 | static bool Disassemble(Debugger &debugger, const ArchSpec &arch, |
440 | const char *plugin_name, const char *flavor, |
441 | const ExecutionContext &exe_ctx, const Address &start, |
442 | Limit limit, bool mixed_source_and_assembly, |
443 | uint32_t num_mixed_context_lines, uint32_t options, |
444 | Stream &strm); |
445 | |
446 | static bool Disassemble(Debugger &debugger, const ArchSpec &arch, |
447 | StackFrame &frame, Stream &strm); |
448 | |
449 | // Constructors and Destructors |
450 | Disassembler(const ArchSpec &arch, const char *flavor); |
451 | ~Disassembler() override; |
452 | |
453 | void PrintInstructions(Debugger &debugger, const ArchSpec &arch, |
454 | const ExecutionContext &exe_ctx, |
455 | bool mixed_source_and_assembly, |
456 | uint32_t num_mixed_context_lines, uint32_t options, |
457 | Stream &strm); |
458 | |
459 | size_t ParseInstructions(Target &target, Address address, Limit limit, |
460 | Stream *error_strm_ptr, |
461 | bool force_live_memory = false); |
462 | |
463 | virtual size_t (const Address &base_addr, |
464 | const DataExtractor &data, |
465 | lldb::offset_t data_offset, |
466 | size_t num_instructions, bool append, |
467 | bool data_from_file) = 0; |
468 | |
469 | InstructionList &GetInstructionList(); |
470 | |
471 | const InstructionList &GetInstructionList() const; |
472 | |
473 | const ArchSpec &GetArchitecture() const { return m_arch; } |
474 | |
475 | const char *GetFlavor() const { return m_flavor.c_str(); } |
476 | |
477 | virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch, |
478 | const char *flavor) = 0; |
479 | |
480 | protected: |
481 | // SourceLine and SourceLinesToDisplay structures are only used in the mixed |
482 | // source and assembly display methods internal to this class. |
483 | |
484 | struct SourceLine { |
485 | FileSpec file; |
486 | uint32_t line = LLDB_INVALID_LINE_NUMBER; |
487 | uint32_t column = 0; |
488 | |
489 | SourceLine() = default; |
490 | |
491 | bool operator==(const SourceLine &rhs) const { |
492 | return file == rhs.file && line == rhs.line && rhs.column == column; |
493 | } |
494 | |
495 | bool operator!=(const SourceLine &rhs) const { |
496 | return file != rhs.file || line != rhs.line || column != rhs.column; |
497 | } |
498 | |
499 | bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; } |
500 | }; |
501 | |
502 | struct SourceLinesToDisplay { |
503 | std::vector<SourceLine> lines; |
504 | |
505 | // index of the "current" source line, if we want to highlight that when |
506 | // displaying the source lines. (as opposed to the surrounding source |
507 | // lines provided to give context) |
508 | size_t current_source_line = -1; |
509 | |
510 | // Whether to print a blank line at the end of the source lines. |
511 | bool print_source_context_end_eol = true; |
512 | |
513 | SourceLinesToDisplay() = default; |
514 | }; |
515 | |
516 | // Get the function's declaration line number, hopefully a line number |
517 | // earlier than the opening curly brace at the start of the function body. |
518 | static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc); |
519 | |
520 | // Add the provided SourceLine to the map of filenames-to-source-lines-seen. |
521 | static void AddLineToSourceLineTables( |
522 | SourceLine &line, |
523 | std::map<FileSpec, std::set<uint32_t>> &source_lines_seen); |
524 | |
525 | // Given a source line, determine if we should print it when we're doing |
526 | // mixed source & assembly output. We're currently using the |
527 | // target.process.thread.step-avoid-regexp setting (which is used for |
528 | // stepping over inlined STL functions by default) to determine what source |
529 | // lines to avoid showing. |
530 | // |
531 | // Returns true if this source line should be elided (if the source line |
532 | // should not be displayed). |
533 | static bool |
534 | ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, |
535 | const SymbolContext &sc, SourceLine &line); |
536 | |
537 | static bool |
538 | ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, |
539 | const SymbolContext &sc, LineEntry &line) { |
540 | SourceLine sl; |
541 | sl.file = line.file; |
542 | sl.line = line.line; |
543 | sl.column = line.column; |
544 | return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, line&: sl); |
545 | }; |
546 | |
547 | // Classes that inherit from Disassembler can see and modify these |
548 | ArchSpec m_arch; |
549 | InstructionList m_instruction_list; |
550 | lldb::addr_t m_base_addr; |
551 | std::string m_flavor; |
552 | |
553 | private: |
554 | // For Disassembler only |
555 | Disassembler(const Disassembler &) = delete; |
556 | const Disassembler &operator=(const Disassembler &) = delete; |
557 | }; |
558 | |
559 | } // namespace lldb_private |
560 | |
561 | #endif // LLDB_CORE_DISASSEMBLER_H |
562 | |