1//===-- Disassembler.h ------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLDB_CORE_DISASSEMBLER_H
10#define LLDB_CORE_DISASSEMBLER_H
11
12#include "lldb/Core/Address.h"
13#include "lldb/Core/EmulateInstruction.h"
14#include "lldb/Core/FormatEntity.h"
15#include "lldb/Core/Opcode.h"
16#include "lldb/Core/PluginInterface.h"
17#include "lldb/Interpreter/OptionValue.h"
18#include "lldb/Symbol/LineEntry.h"
19#include "lldb/Target/ExecutionContext.h"
20#include "lldb/Utility/ArchSpec.h"
21#include "lldb/Utility/ConstString.h"
22#include "lldb/Utility/FileSpec.h"
23#include "lldb/lldb-defines.h"
24#include "lldb/lldb-forward.h"
25#include "lldb/lldb-private-enumerations.h"
26#include "lldb/lldb-types.h"
27
28#include "llvm/ADT/StringRef.h"
29
30#include <functional>
31#include <map>
32#include <memory>
33#include <set>
34#include <string>
35#include <vector>
36
37#include <stddef.h>
38#include <stdint.h>
39#include <stdio.h>
40
41namespace llvm {
42template <typename T> class SmallVectorImpl;
43}
44
45namespace lldb_private {
46class AddressRange;
47class DataExtractor;
48class Debugger;
49class Disassembler;
50class Module;
51class StackFrame;
52class Stream;
53class SymbolContext;
54class SymbolContextList;
55class Target;
56struct RegisterInfo;
57
58class Instruction {
59public:
60 Instruction(const Address &address,
61 AddressClass addr_class = AddressClass::eInvalid);
62
63 virtual ~Instruction();
64
65 const Address &GetAddress() const { return m_address; }
66
67 const char *GetMnemonic(const ExecutionContext *exe_ctx) {
68 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
69 return m_opcode_name.c_str();
70 }
71
72 const char *GetOperands(const ExecutionContext *exe_ctx) {
73 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
74 return m_mnemonics.c_str();
75 }
76
77 const char *GetComment(const ExecutionContext *exe_ctx) {
78 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
79 return m_comment.c_str();
80 }
81
82 virtual void
83 CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0;
84
85 AddressClass GetAddressClass();
86
87 void SetAddress(const Address &addr) {
88 // Invalidate the address class to lazily discover it if we need to.
89 m_address_class = AddressClass::eInvalid;
90 m_address = addr;
91 }
92
93 /// Dump the text representation of this Instruction to a Stream
94 ///
95 /// Print the (optional) address, (optional) bytes, opcode,
96 /// operands, and instruction comments to a stream.
97 ///
98 /// \param[in] s
99 /// The Stream to add the text to.
100 ///
101 /// \param[in] show_address
102 /// Whether the address (using disassembly_addr_format_spec formatting)
103 /// should be printed.
104 ///
105 /// \param[in] show_bytes
106 /// Whether the bytes of the assembly instruction should be printed.
107 ///
108 /// \param[in] max_opcode_byte_size
109 /// The size (in bytes) of the largest instruction in the list that
110 /// we are printing (for text justification/alignment purposes)
111 /// Only needed if show_bytes is true.
112 ///
113 /// \param[in] exe_ctx
114 /// The current execution context, if available. May be used in
115 /// the assembling of the operands+comments for this instruction.
116 /// Pass NULL if not applicable.
117 ///
118 /// \param[in] sym_ctx
119 /// The SymbolContext for this instruction.
120 /// Pass NULL if not available/computed.
121 /// Only needed if show_address is true.
122 ///
123 /// \param[in] prev_sym_ctx
124 /// The SymbolContext for the previous instruction. Depending on
125 /// the disassembly address format specification, a change in
126 /// Symbol / Function may mean that a line is printed with the new
127 /// symbol/function name.
128 /// Pass NULL if unavailable, or if this is the first instruction of
129 /// the InstructionList.
130 /// Only needed if show_address is true.
131 ///
132 /// \param[in] disassembly_addr_format
133 /// The format specification for how addresses are printed.
134 /// Only needed if show_address is true.
135 ///
136 /// \param[in] max_address_text_size
137 /// The length of the longest address string at the start of the
138 /// disassembly line that will be printed (the
139 /// Debugger::FormatDisassemblerAddress() string)
140 /// so this method can properly align the instruction opcodes.
141 /// May be 0 to indicate no indentation/alignment of the opcodes.
142 virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address,
143 bool show_bytes, const ExecutionContext *exe_ctx,
144 const SymbolContext *sym_ctx,
145 const SymbolContext *prev_sym_ctx,
146 const FormatEntity::Entry *disassembly_addr_format,
147 size_t max_address_text_size);
148
149 virtual bool DoesBranch() = 0;
150
151 virtual bool HasDelaySlot();
152
153 bool CanSetBreakpoint ();
154
155 virtual size_t Decode(const Disassembler &disassembler,
156 const DataExtractor &data,
157 lldb::offset_t data_offset) = 0;
158
159 virtual void SetDescription(llvm::StringRef) {
160 } // May be overridden in sub-classes that have descriptions.
161
162 lldb::OptionValueSP ReadArray(FILE *in_file, Stream *out_stream,
163 OptionValue::Type data_type);
164
165 lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream *out_stream);
166
167 bool DumpEmulation(const ArchSpec &arch);
168
169 virtual bool TestEmulation(Stream *stream, const char *test_file_name);
170
171 bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton,
172 EmulateInstruction::ReadMemoryCallback read_mem_callback,
173 EmulateInstruction::WriteMemoryCallback write_mem_calback,
174 EmulateInstruction::ReadRegisterCallback read_reg_callback,
175 EmulateInstruction::WriteRegisterCallback write_reg_callback);
176
177 const Opcode &GetOpcode() const { return m_opcode; }
178
179 uint32_t GetData(DataExtractor &data);
180
181 struct Operand {
182 enum class Type {
183 Invalid = 0,
184 Register,
185 Immediate,
186 Dereference,
187 Sum,
188 Product
189 } m_type = Type::Invalid;
190 std::vector<Operand> m_children;
191 lldb::addr_t m_immediate = 0;
192 ConstString m_register;
193 bool m_negative = false;
194 bool m_clobbered = false;
195
196 bool IsValid() { return m_type != Type::Invalid; }
197
198 static Operand BuildRegister(ConstString &r);
199 static Operand BuildImmediate(lldb::addr_t imm, bool neg);
200 static Operand BuildImmediate(int64_t imm);
201 static Operand BuildDereference(const Operand &ref);
202 static Operand BuildSum(const Operand &lhs, const Operand &rhs);
203 static Operand BuildProduct(const Operand &lhs, const Operand &rhs);
204 };
205
206 virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) {
207 return false;
208 }
209
210 virtual bool IsCall() { return false; }
211
212protected:
213 Address m_address; // The section offset address of this instruction
214 // We include an address class in the Instruction class to
215 // allow the instruction specify the
216 // AddressClass::eCodeAlternateISA (currently used for
217 // thumb), and also to specify data (AddressClass::eData).
218 // The usual value will be AddressClass::eCode, but often
219 // when disassembling memory, you might run into data.
220 // This can help us to disassemble appropriately.
221private:
222 AddressClass m_address_class; // Use GetAddressClass () accessor function!
223
224protected:
225 Opcode m_opcode; // The opcode for this instruction
226 std::string m_opcode_name;
227 std::string m_mnemonics;
228 std::string m_comment;
229 bool m_calculated_strings;
230
231 void
232 CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) {
233 if (!m_calculated_strings) {
234 m_calculated_strings = true;
235 CalculateMnemonicOperandsAndComment(exe_ctx);
236 }
237 }
238};
239
240namespace OperandMatchers {
241std::function<bool(const Instruction::Operand &)>
242MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base,
243 std::function<bool(const Instruction::Operand &)> left,
244 std::function<bool(const Instruction::Operand &)> right);
245
246std::function<bool(const Instruction::Operand &)>
247MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base,
248 std::function<bool(const Instruction::Operand &)> child);
249
250std::function<bool(const Instruction::Operand &)>
251MatchRegOp(const RegisterInfo &info);
252
253std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString &reg);
254
255std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm);
256
257std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm);
258
259std::function<bool(const Instruction::Operand &)>
260MatchOpType(Instruction::Operand::Type type);
261}
262
263class InstructionList {
264public:
265 InstructionList();
266 ~InstructionList();
267
268 size_t GetSize() const;
269
270 uint32_t GetMaxOpcocdeByteSize() const;
271
272 lldb::InstructionSP GetInstructionAtIndex(size_t idx) const;
273
274 /// Get the instruction at the given address.
275 ///
276 /// \return
277 /// A valid \a InstructionSP if the address could be found, or null
278 /// otherwise.
279 lldb::InstructionSP GetInstructionAtAddress(const Address &addr);
280
281 //------------------------------------------------------------------
282 /// Get the index of the next branch instruction.
283 ///
284 /// Given a list of instructions, find the next branch instruction
285 /// in the list by returning an index.
286 ///
287 /// @param[in] start
288 /// The instruction index of the first instruction to check.
289 ///
290 /// @param[in] ignore_calls
291 /// It true, then fine the first branch instruction that isn't
292 /// a function call (a branch that calls and returns to the next
293 /// instruction). If false, find the instruction index of any
294 /// branch in the list.
295 ///
296 /// @param[out] found_calls
297 /// If non-null, this will be set to true if any calls were found in
298 /// extending the range.
299 ///
300 /// @return
301 /// The instruction index of the first branch that is at or past
302 /// \a start. Returns UINT32_MAX if no matching branches are
303 /// found.
304 //------------------------------------------------------------------
305 uint32_t GetIndexOfNextBranchInstruction(uint32_t start,
306 bool ignore_calls,
307 bool *found_calls) const;
308
309 uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
310 Target &target);
311
312 uint32_t GetIndexOfInstructionAtAddress(const Address &addr);
313
314 void Clear();
315
316 void Append(lldb::InstructionSP &inst_sp);
317
318 void Dump(Stream *s, bool show_address, bool show_bytes,
319 const ExecutionContext *exe_ctx);
320
321private:
322 typedef std::vector<lldb::InstructionSP> collection;
323 typedef collection::iterator iterator;
324 typedef collection::const_iterator const_iterator;
325
326 collection m_instructions;
327};
328
329class PseudoInstruction : public Instruction {
330public:
331 PseudoInstruction();
332
333 ~PseudoInstruction() override;
334
335 bool DoesBranch() override;
336
337 bool HasDelaySlot() override;
338
339 void CalculateMnemonicOperandsAndComment(
340 const ExecutionContext *exe_ctx) override {
341 // TODO: fill this in and put opcode name into Instruction::m_opcode_name,
342 // mnemonic into Instruction::m_mnemonics, and any comment into
343 // Instruction::m_comment
344 }
345
346 size_t Decode(const Disassembler &disassembler, const DataExtractor &data,
347 lldb::offset_t data_offset) override;
348
349 void SetOpcode(size_t opcode_size, void *opcode_data);
350
351 void SetDescription(llvm::StringRef description) override;
352
353protected:
354 std::string m_description;
355
356 PseudoInstruction(const PseudoInstruction &) = delete;
357 const PseudoInstruction &operator=(const PseudoInstruction &) = delete;
358};
359
360class Disassembler : public std::enable_shared_from_this<Disassembler>,
361 public PluginInterface {
362public:
363 enum {
364 eOptionNone = 0u,
365 eOptionShowBytes = (1u << 0),
366 eOptionRawOuput = (1u << 1),
367 eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains
368 // the current PC (mixed mode only)
369 eOptionMarkPCAddress =
370 (1u << 3) // Mark the disassembly line the contains the PC
371 };
372
373 enum HexImmediateStyle {
374 eHexStyleC,
375 eHexStyleAsm,
376 };
377
378 // FindPlugin should be lax about the flavor string (it is too annoying to
379 // have various internal uses of the disassembler fail because the global
380 // flavor string gets set wrong. Instead, if you get a flavor string you
381 // don't understand, use the default. Folks who care to check can use the
382 // FlavorValidForArchSpec method on the disassembler they got back.
383 static lldb::DisassemblerSP
384 FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name);
385
386 // This version will use the value in the Target settings if flavor is NULL;
387 static lldb::DisassemblerSP FindPluginForTarget(const Target &target,
388 const ArchSpec &arch,
389 const char *flavor,
390 const char *plugin_name);
391
392 struct Limit {
393 enum { Bytes, Instructions } kind;
394 lldb::addr_t value;
395 };
396
397 static lldb::DisassemblerSP DisassembleRange(const ArchSpec &arch,
398 const char *plugin_name,
399 const char *flavor,
400 Target &target,
401 const AddressRange &disasm_range,
402 bool force_live_memory = false);
403
404 static lldb::DisassemblerSP
405 DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
406 const char *flavor, const Address &start, const void *bytes,
407 size_t length, uint32_t max_num_instructions,
408 bool data_from_file);
409
410 static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
411 const char *plugin_name, const char *flavor,
412 const ExecutionContext &exe_ctx, const Address &start,
413 Limit limit, bool mixed_source_and_assembly,
414 uint32_t num_mixed_context_lines, uint32_t options,
415 Stream &strm);
416
417 static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
418 StackFrame &frame, Stream &strm);
419
420 // Constructors and Destructors
421 Disassembler(const ArchSpec &arch, const char *flavor);
422 ~Disassembler() override;
423
424 void PrintInstructions(Debugger &debugger, const ArchSpec &arch,
425 const ExecutionContext &exe_ctx,
426 bool mixed_source_and_assembly,
427 uint32_t num_mixed_context_lines, uint32_t options,
428 Stream &strm);
429
430 size_t ParseInstructions(Target &target, Address address, Limit limit,
431 Stream *error_strm_ptr,
432 bool force_live_memory = false);
433
434 virtual size_t DecodeInstructions(const Address &base_addr,
435 const DataExtractor &data,
436 lldb::offset_t data_offset,
437 size_t num_instructions, bool append,
438 bool data_from_file) = 0;
439
440 InstructionList &GetInstructionList();
441
442 const InstructionList &GetInstructionList() const;
443
444 const ArchSpec &GetArchitecture() const { return m_arch; }
445
446 const char *GetFlavor() const { return m_flavor.c_str(); }
447
448 virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch,
449 const char *flavor) = 0;
450
451protected:
452 // SourceLine and SourceLinesToDisplay structures are only used in the mixed
453 // source and assembly display methods internal to this class.
454
455 struct SourceLine {
456 FileSpec file;
457 uint32_t line;
458 uint32_t column;
459
460 SourceLine() : file(), line(LLDB_INVALID_LINE_NUMBER), column(0) {}
461
462 bool operator==(const SourceLine &rhs) const {
463 return file == rhs.file && line == rhs.line && rhs.column == column;
464 }
465
466 bool operator!=(const SourceLine &rhs) const {
467 return file != rhs.file || line != rhs.line || column != rhs.column;
468 }
469
470 bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; }
471 };
472
473 struct SourceLinesToDisplay {
474 std::vector<SourceLine> lines;
475
476 // index of the "current" source line, if we want to highlight that when
477 // displaying the source lines. (as opposed to the surrounding source
478 // lines provided to give context)
479 size_t current_source_line;
480
481 // Whether to print a blank line at the end of the source lines.
482 bool print_source_context_end_eol;
483
484 SourceLinesToDisplay()
485 : lines(), current_source_line(-1), print_source_context_end_eol(true) {
486 }
487 };
488
489 // Get the function's declaration line number, hopefully a line number
490 // earlier than the opening curly brace at the start of the function body.
491 static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc);
492
493 // Add the provided SourceLine to the map of filenames-to-source-lines-seen.
494 static void AddLineToSourceLineTables(
495 SourceLine &line,
496 std::map<FileSpec, std::set<uint32_t>> &source_lines_seen);
497
498 // Given a source line, determine if we should print it when we're doing
499 // mixed source & assembly output. We're currently using the
500 // target.process.thread.step-avoid-regexp setting (which is used for
501 // stepping over inlined STL functions by default) to determine what source
502 // lines to avoid showing.
503 //
504 // Returns true if this source line should be elided (if the source line
505 // should not be displayed).
506 static bool
507 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
508 const SymbolContext &sc, SourceLine &line);
509
510 static bool
511 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
512 const SymbolContext &sc, LineEntry &line) {
513 SourceLine sl;
514 sl.file = line.file;
515 sl.line = line.line;
516 sl.column = line.column;
517 return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl);
518 };
519
520 // Classes that inherit from Disassembler can see and modify these
521 ArchSpec m_arch;
522 InstructionList m_instruction_list;
523 lldb::addr_t m_base_addr;
524 std::string m_flavor;
525
526private:
527 // For Disassembler only
528 Disassembler(const Disassembler &) = delete;
529 const Disassembler &operator=(const Disassembler &) = delete;
530};
531
532} // namespace lldb_private
533
534#endif // LLDB_CORE_DISASSEMBLER_H
535