1//===-- Disassembler.h ------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLDB_CORE_DISASSEMBLER_H
10#define LLDB_CORE_DISASSEMBLER_H
11
12#include "lldb/Core/Address.h"
13#include "lldb/Core/EmulateInstruction.h"
14#include "lldb/Core/FormatEntity.h"
15#include "lldb/Core/Opcode.h"
16#include "lldb/Core/PluginInterface.h"
17#include "lldb/Interpreter/OptionValue.h"
18#include "lldb/Symbol/LineEntry.h"
19#include "lldb/Target/ExecutionContext.h"
20#include "lldb/Utility/ArchSpec.h"
21#include "lldb/Utility/ConstString.h"
22#include "lldb/Utility/FileSpec.h"
23#include "lldb/lldb-defines.h"
24#include "lldb/lldb-forward.h"
25#include "lldb/lldb-private-enumerations.h"
26#include "lldb/lldb-types.h"
27
28#include "llvm/ADT/StringRef.h"
29
30#include <functional>
31#include <map>
32#include <memory>
33#include <set>
34#include <string>
35#include <vector>
36
37#include <cstddef>
38#include <cstdint>
39#include <cstdio>
40
41namespace llvm {
42template <typename T> class SmallVectorImpl;
43}
44
45namespace lldb_private {
46class AddressRange;
47class DataExtractor;
48class Debugger;
49class Disassembler;
50class Module;
51class StackFrame;
52class Stream;
53class SymbolContext;
54class SymbolContextList;
55class Target;
56struct RegisterInfo;
57
58class Instruction {
59public:
60 Instruction(const Address &address,
61 AddressClass addr_class = AddressClass::eInvalid);
62
63 virtual ~Instruction();
64
65 const Address &GetAddress() const { return m_address; }
66
67 const char *GetMnemonic(const ExecutionContext *exe_ctx,
68 bool markup = false) {
69 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
70 return markup ? m_markup_opcode_name.c_str() : m_opcode_name.c_str();
71 }
72
73 const char *GetOperands(const ExecutionContext *exe_ctx,
74 bool markup = false) {
75 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
76 return markup ? m_markup_mnemonics.c_str() : m_mnemonics.c_str();
77 }
78
79 const char *GetComment(const ExecutionContext *exe_ctx) {
80 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
81 return m_comment.c_str();
82 }
83
84 /// \return
85 /// The control flow kind of this instruction, or
86 /// eInstructionControlFlowKindUnknown if the instruction
87 /// can't be classified.
88 virtual lldb::InstructionControlFlowKind
89 GetControlFlowKind(const ExecutionContext *exe_ctx) {
90 return lldb::eInstructionControlFlowKindUnknown;
91 }
92
93 virtual void
94 CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0;
95
96 AddressClass GetAddressClass();
97
98 void SetAddress(const Address &addr) {
99 // Invalidate the address class to lazily discover it if we need to.
100 m_address_class = AddressClass::eInvalid;
101 m_address = addr;
102 }
103
104 /// Dump the text representation of this Instruction to a Stream
105 ///
106 /// Print the (optional) address, (optional) bytes, opcode,
107 /// operands, and instruction comments to a stream.
108 ///
109 /// \param[in] s
110 /// The Stream to add the text to.
111 ///
112 /// \param[in] show_address
113 /// Whether the address (using disassembly_addr_format_spec formatting)
114 /// should be printed.
115 ///
116 /// \param[in] show_bytes
117 /// Whether the bytes of the assembly instruction should be printed.
118 ///
119 /// \param[in] show_control_flow_kind
120 /// Whether the control flow kind of the instruction should be printed.
121 ///
122 /// \param[in] max_opcode_byte_size
123 /// The size (in bytes) of the largest instruction in the list that
124 /// we are printing (for text justification/alignment purposes)
125 /// Only needed if show_bytes is true.
126 ///
127 /// \param[in] exe_ctx
128 /// The current execution context, if available. May be used in
129 /// the assembling of the operands+comments for this instruction.
130 /// Pass NULL if not applicable.
131 ///
132 /// \param[in] sym_ctx
133 /// The SymbolContext for this instruction.
134 /// Pass NULL if not available/computed.
135 /// Only needed if show_address is true.
136 ///
137 /// \param[in] prev_sym_ctx
138 /// The SymbolContext for the previous instruction. Depending on
139 /// the disassembly address format specification, a change in
140 /// Symbol / Function may mean that a line is printed with the new
141 /// symbol/function name.
142 /// Pass NULL if unavailable, or if this is the first instruction of
143 /// the InstructionList.
144 /// Only needed if show_address is true.
145 ///
146 /// \param[in] disassembly_addr_format
147 /// The format specification for how addresses are printed.
148 /// Only needed if show_address is true.
149 ///
150 /// \param[in] max_address_text_size
151 /// The length of the longest address string at the start of the
152 /// disassembly line that will be printed (the
153 /// Debugger::FormatDisassemblerAddress() string)
154 /// so this method can properly align the instruction opcodes.
155 /// May be 0 to indicate no indentation/alignment of the opcodes.
156 virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address,
157 bool show_bytes, bool show_control_flow_kind,
158 const ExecutionContext *exe_ctx,
159 const SymbolContext *sym_ctx,
160 const SymbolContext *prev_sym_ctx,
161 const FormatEntity::Entry *disassembly_addr_format,
162 size_t max_address_text_size);
163
164 virtual bool DoesBranch() = 0;
165
166 virtual bool HasDelaySlot();
167
168 virtual bool IsLoad() = 0;
169
170 virtual bool IsAuthenticated() = 0;
171
172 bool CanSetBreakpoint ();
173
174 virtual size_t Decode(const Disassembler &disassembler,
175 const DataExtractor &data,
176 lldb::offset_t data_offset) = 0;
177
178 virtual void SetDescription(llvm::StringRef) {
179 } // May be overridden in sub-classes that have descriptions.
180
181 lldb::OptionValueSP ReadArray(FILE *in_file, Stream &out_stream,
182 OptionValue::Type data_type);
183
184 lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream &out_stream);
185
186 bool DumpEmulation(const ArchSpec &arch);
187
188 virtual bool TestEmulation(Stream &stream, const char *test_file_name);
189
190 bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton,
191 EmulateInstruction::ReadMemoryCallback read_mem_callback,
192 EmulateInstruction::WriteMemoryCallback write_mem_calback,
193 EmulateInstruction::ReadRegisterCallback read_reg_callback,
194 EmulateInstruction::WriteRegisterCallback write_reg_callback);
195
196 const Opcode &GetOpcode() const { return m_opcode; }
197
198 uint32_t GetData(DataExtractor &data);
199
200 struct Operand {
201 enum class Type {
202 Invalid = 0,
203 Register,
204 Immediate,
205 Dereference,
206 Sum,
207 Product
208 } m_type = Type::Invalid;
209 std::vector<Operand> m_children;
210 lldb::addr_t m_immediate = 0;
211 ConstString m_register;
212 bool m_negative = false;
213 bool m_clobbered = false;
214
215 bool IsValid() { return m_type != Type::Invalid; }
216
217 static Operand BuildRegister(ConstString &r);
218 static Operand BuildImmediate(lldb::addr_t imm, bool neg);
219 static Operand BuildImmediate(int64_t imm);
220 static Operand BuildDereference(const Operand &ref);
221 static Operand BuildSum(const Operand &lhs, const Operand &rhs);
222 static Operand BuildProduct(const Operand &lhs, const Operand &rhs);
223 };
224
225 virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) {
226 return false;
227 }
228
229 virtual bool IsCall() { return false; }
230
231 static const char *GetNameForInstructionControlFlowKind(
232 lldb::InstructionControlFlowKind instruction_control_flow_kind);
233
234protected:
235 Address m_address; // The section offset address of this instruction
236 // We include an address class in the Instruction class to
237 // allow the instruction specify the
238 // AddressClass::eCodeAlternateISA (currently used for
239 // thumb), and also to specify data (AddressClass::eData).
240 // The usual value will be AddressClass::eCode, but often
241 // when disassembling memory, you might run into data.
242 // This can help us to disassemble appropriately.
243private:
244 AddressClass m_address_class; // Use GetAddressClass () accessor function!
245
246protected:
247 Opcode m_opcode; // The opcode for this instruction
248 std::string m_opcode_name;
249 std::string m_markup_opcode_name;
250 std::string m_mnemonics;
251 std::string m_markup_mnemonics;
252 std::string m_comment;
253 bool m_calculated_strings;
254
255 void
256 CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) {
257 if (!m_calculated_strings) {
258 m_calculated_strings = true;
259 CalculateMnemonicOperandsAndComment(exe_ctx);
260 }
261 }
262};
263
264namespace OperandMatchers {
265std::function<bool(const Instruction::Operand &)>
266MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base,
267 std::function<bool(const Instruction::Operand &)> left,
268 std::function<bool(const Instruction::Operand &)> right);
269
270std::function<bool(const Instruction::Operand &)>
271MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base,
272 std::function<bool(const Instruction::Operand &)> child);
273
274std::function<bool(const Instruction::Operand &)>
275MatchRegOp(const RegisterInfo &info);
276
277std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString &reg);
278
279std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm);
280
281std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm);
282
283std::function<bool(const Instruction::Operand &)>
284MatchOpType(Instruction::Operand::Type type);
285}
286
287class InstructionList {
288public:
289 InstructionList();
290 ~InstructionList();
291
292 size_t GetSize() const;
293
294 uint32_t GetMaxOpcocdeByteSize() const;
295
296 lldb::InstructionSP GetInstructionAtIndex(size_t idx) const;
297
298 /// Get the instruction at the given address.
299 ///
300 /// \return
301 /// A valid \a InstructionSP if the address could be found, or null
302 /// otherwise.
303 lldb::InstructionSP GetInstructionAtAddress(const Address &addr);
304
305 //------------------------------------------------------------------
306 /// Get the index of the next branch instruction.
307 ///
308 /// Given a list of instructions, find the next branch instruction
309 /// in the list by returning an index.
310 ///
311 /// @param[in] start
312 /// The instruction index of the first instruction to check.
313 ///
314 /// @param[in] ignore_calls
315 /// It true, then fine the first branch instruction that isn't
316 /// a function call (a branch that calls and returns to the next
317 /// instruction). If false, find the instruction index of any
318 /// branch in the list.
319 ///
320 /// @param[out] found_calls
321 /// If non-null, this will be set to true if any calls were found in
322 /// extending the range.
323 ///
324 /// @return
325 /// The instruction index of the first branch that is at or past
326 /// \a start. Returns UINT32_MAX if no matching branches are
327 /// found.
328 //------------------------------------------------------------------
329 uint32_t GetIndexOfNextBranchInstruction(uint32_t start,
330 bool ignore_calls,
331 bool *found_calls) const;
332
333 uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
334 Target &target);
335
336 uint32_t GetIndexOfInstructionAtAddress(const Address &addr);
337
338 void Clear();
339
340 void Append(lldb::InstructionSP &inst_sp);
341
342 void Dump(Stream *s, bool show_address, bool show_bytes,
343 bool show_control_flow_kind, const ExecutionContext *exe_ctx);
344
345private:
346 typedef std::vector<lldb::InstructionSP> collection;
347 typedef collection::iterator iterator;
348 typedef collection::const_iterator const_iterator;
349
350 collection m_instructions;
351};
352
353class PseudoInstruction : public Instruction {
354public:
355 PseudoInstruction();
356
357 ~PseudoInstruction() override;
358
359 bool DoesBranch() override;
360
361 bool HasDelaySlot() override;
362
363 bool IsLoad() override;
364
365 bool IsAuthenticated() override;
366
367 void CalculateMnemonicOperandsAndComment(
368 const ExecutionContext *exe_ctx) override {
369 // TODO: fill this in and put opcode name into Instruction::m_opcode_name,
370 // mnemonic into Instruction::m_mnemonics, and any comment into
371 // Instruction::m_comment
372 }
373
374 size_t Decode(const Disassembler &disassembler, const DataExtractor &data,
375 lldb::offset_t data_offset) override;
376
377 void SetOpcode(size_t opcode_size, void *opcode_data);
378
379 void SetDescription(llvm::StringRef description) override;
380
381protected:
382 std::string m_description;
383
384 PseudoInstruction(const PseudoInstruction &) = delete;
385 const PseudoInstruction &operator=(const PseudoInstruction &) = delete;
386};
387
388class Disassembler : public std::enable_shared_from_this<Disassembler>,
389 public PluginInterface {
390public:
391 enum {
392 eOptionNone = 0u,
393 eOptionShowBytes = (1u << 0),
394 eOptionRawOuput = (1u << 1),
395 eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains
396 // the current PC (mixed mode only)
397 eOptionMarkPCAddress =
398 (1u << 3), // Mark the disassembly line the contains the PC
399 eOptionShowControlFlowKind = (1u << 4),
400 };
401
402 enum HexImmediateStyle {
403 eHexStyleC,
404 eHexStyleAsm,
405 };
406
407 // FindPlugin should be lax about the flavor string (it is too annoying to
408 // have various internal uses of the disassembler fail because the global
409 // flavor string gets set wrong. Instead, if you get a flavor string you
410 // don't understand, use the default. Folks who care to check can use the
411 // FlavorValidForArchSpec method on the disassembler they got back.
412 static lldb::DisassemblerSP
413 FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name);
414
415 // This version will use the value in the Target settings if flavor is NULL;
416 static lldb::DisassemblerSP FindPluginForTarget(const Target &target,
417 const ArchSpec &arch,
418 const char *flavor,
419 const char *plugin_name);
420
421 struct Limit {
422 enum { Bytes, Instructions } kind;
423 lldb::addr_t value;
424 };
425
426 static lldb::DisassemblerSP DisassembleRange(const ArchSpec &arch,
427 const char *plugin_name,
428 const char *flavor,
429 Target &target,
430 const AddressRange &disasm_range,
431 bool force_live_memory = false);
432
433 static lldb::DisassemblerSP
434 DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
435 const char *flavor, const Address &start, const void *bytes,
436 size_t length, uint32_t max_num_instructions,
437 bool data_from_file);
438
439 static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
440 const char *plugin_name, const char *flavor,
441 const ExecutionContext &exe_ctx, const Address &start,
442 Limit limit, bool mixed_source_and_assembly,
443 uint32_t num_mixed_context_lines, uint32_t options,
444 Stream &strm);
445
446 static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
447 StackFrame &frame, Stream &strm);
448
449 // Constructors and Destructors
450 Disassembler(const ArchSpec &arch, const char *flavor);
451 ~Disassembler() override;
452
453 void PrintInstructions(Debugger &debugger, const ArchSpec &arch,
454 const ExecutionContext &exe_ctx,
455 bool mixed_source_and_assembly,
456 uint32_t num_mixed_context_lines, uint32_t options,
457 Stream &strm);
458
459 size_t ParseInstructions(Target &target, Address address, Limit limit,
460 Stream *error_strm_ptr,
461 bool force_live_memory = false);
462
463 virtual size_t DecodeInstructions(const Address &base_addr,
464 const DataExtractor &data,
465 lldb::offset_t data_offset,
466 size_t num_instructions, bool append,
467 bool data_from_file) = 0;
468
469 InstructionList &GetInstructionList();
470
471 const InstructionList &GetInstructionList() const;
472
473 const ArchSpec &GetArchitecture() const { return m_arch; }
474
475 const char *GetFlavor() const { return m_flavor.c_str(); }
476
477 virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch,
478 const char *flavor) = 0;
479
480protected:
481 // SourceLine and SourceLinesToDisplay structures are only used in the mixed
482 // source and assembly display methods internal to this class.
483
484 struct SourceLine {
485 FileSpec file;
486 uint32_t line = LLDB_INVALID_LINE_NUMBER;
487 uint32_t column = 0;
488
489 SourceLine() = default;
490
491 bool operator==(const SourceLine &rhs) const {
492 return file == rhs.file && line == rhs.line && rhs.column == column;
493 }
494
495 bool operator!=(const SourceLine &rhs) const {
496 return file != rhs.file || line != rhs.line || column != rhs.column;
497 }
498
499 bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; }
500 };
501
502 struct SourceLinesToDisplay {
503 std::vector<SourceLine> lines;
504
505 // index of the "current" source line, if we want to highlight that when
506 // displaying the source lines. (as opposed to the surrounding source
507 // lines provided to give context)
508 size_t current_source_line = -1;
509
510 // Whether to print a blank line at the end of the source lines.
511 bool print_source_context_end_eol = true;
512
513 SourceLinesToDisplay() = default;
514 };
515
516 // Get the function's declaration line number, hopefully a line number
517 // earlier than the opening curly brace at the start of the function body.
518 static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc);
519
520 // Add the provided SourceLine to the map of filenames-to-source-lines-seen.
521 static void AddLineToSourceLineTables(
522 SourceLine &line,
523 std::map<FileSpec, std::set<uint32_t>> &source_lines_seen);
524
525 // Given a source line, determine if we should print it when we're doing
526 // mixed source & assembly output. We're currently using the
527 // target.process.thread.step-avoid-regexp setting (which is used for
528 // stepping over inlined STL functions by default) to determine what source
529 // lines to avoid showing.
530 //
531 // Returns true if this source line should be elided (if the source line
532 // should not be displayed).
533 static bool
534 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
535 const SymbolContext &sc, SourceLine &line);
536
537 static bool
538 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
539 const SymbolContext &sc, LineEntry &line) {
540 SourceLine sl;
541 sl.file = line.file;
542 sl.line = line.line;
543 sl.column = line.column;
544 return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, line&: sl);
545 };
546
547 // Classes that inherit from Disassembler can see and modify these
548 ArchSpec m_arch;
549 InstructionList m_instruction_list;
550 lldb::addr_t m_base_addr;
551 std::string m_flavor;
552
553private:
554 // For Disassembler only
555 Disassembler(const Disassembler &) = delete;
556 const Disassembler &operator=(const Disassembler &) = delete;
557};
558
559} // namespace lldb_private
560
561#endif // LLDB_CORE_DISASSEMBLER_H
562

source code of lldb/include/lldb/Core/Disassembler.h