1//===-- Disassembler.cpp --------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "lldb/Core/Disassembler.h"
10
11#include "lldb/Core/AddressRange.h"
12#include "lldb/Core/Debugger.h"
13#include "lldb/Core/EmulateInstruction.h"
14#include "lldb/Core/Mangled.h"
15#include "lldb/Core/Module.h"
16#include "lldb/Core/ModuleList.h"
17#include "lldb/Core/PluginManager.h"
18#include "lldb/Core/SourceManager.h"
19#include "lldb/Host/FileSystem.h"
20#include "lldb/Interpreter/OptionValue.h"
21#include "lldb/Interpreter/OptionValueArray.h"
22#include "lldb/Interpreter/OptionValueDictionary.h"
23#include "lldb/Interpreter/OptionValueRegex.h"
24#include "lldb/Interpreter/OptionValueString.h"
25#include "lldb/Interpreter/OptionValueUInt64.h"
26#include "lldb/Symbol/Function.h"
27#include "lldb/Symbol/Symbol.h"
28#include "lldb/Symbol/SymbolContext.h"
29#include "lldb/Target/ExecutionContext.h"
30#include "lldb/Target/SectionLoadList.h"
31#include "lldb/Target/StackFrame.h"
32#include "lldb/Target/Target.h"
33#include "lldb/Target/Thread.h"
34#include "lldb/Utility/DataBufferHeap.h"
35#include "lldb/Utility/DataExtractor.h"
36#include "lldb/Utility/RegularExpression.h"
37#include "lldb/Utility/Status.h"
38#include "lldb/Utility/Stream.h"
39#include "lldb/Utility/StreamString.h"
40#include "lldb/Utility/Timer.h"
41#include "lldb/lldb-private-enumerations.h"
42#include "lldb/lldb-private-interfaces.h"
43#include "lldb/lldb-private-types.h"
44#include "llvm/ADT/Triple.h"
45#include "llvm/Support/Compiler.h"
46
47#include <cstdint>
48#include <cstring>
49#include <utility>
50
51#include <assert.h>
52
53#define DEFAULT_DISASM_BYTE_SIZE 32
54
55using namespace lldb;
56using namespace lldb_private;
57
58DisassemblerSP Disassembler::FindPlugin(const ArchSpec &arch,
59 const char *flavor,
60 const char *plugin_name) {
61 LLDB_SCOPED_TIMERF("Disassembler::FindPlugin (arch = %s, plugin_name = %s)",
62 arch.GetArchitectureName(), plugin_name);
63
64 DisassemblerCreateInstance create_callback = nullptr;
65
66 if (plugin_name) {
67 ConstString const_plugin_name(plugin_name);
68 create_callback = PluginManager::GetDisassemblerCreateCallbackForPluginName(
69 const_plugin_name);
70 if (create_callback) {
71 DisassemblerSP disassembler_sp(create_callback(arch, flavor));
72
73 if (disassembler_sp)
74 return disassembler_sp;
75 }
76 } else {
77 for (uint32_t idx = 0;
78 (create_callback = PluginManager::GetDisassemblerCreateCallbackAtIndex(
79 idx)) != nullptr;
80 ++idx) {
81 DisassemblerSP disassembler_sp(create_callback(arch, flavor));
82
83 if (disassembler_sp)
84 return disassembler_sp;
85 }
86 }
87 return DisassemblerSP();
88}
89
90DisassemblerSP Disassembler::FindPluginForTarget(const Target &target,
91 const ArchSpec &arch,
92 const char *flavor,
93 const char *plugin_name) {
94 if (flavor == nullptr) {
95 // FIXME - we don't have the mechanism in place to do per-architecture
96 // settings. But since we know that for now we only support flavors on x86
97 // & x86_64,
98 if (arch.GetTriple().getArch() == llvm::Triple::x86 ||
99 arch.GetTriple().getArch() == llvm::Triple::x86_64)
100 flavor = target.GetDisassemblyFlavor();
101 }
102 return FindPlugin(arch, flavor, plugin_name);
103}
104
105static Address ResolveAddress(Target &target, const Address &addr) {
106 if (!addr.IsSectionOffset()) {
107 Address resolved_addr;
108 // If we weren't passed in a section offset address range, try and resolve
109 // it to something
110 bool is_resolved = target.GetSectionLoadList().IsEmpty()
111 ? target.GetImages().ResolveFileAddress(
112 addr.GetOffset(), resolved_addr)
113 : target.GetSectionLoadList().ResolveLoadAddress(
114 addr.GetOffset(), resolved_addr);
115
116 // We weren't able to resolve the address, just treat it as a raw address
117 if (is_resolved && resolved_addr.IsValid())
118 return resolved_addr;
119 }
120 return addr;
121}
122
123lldb::DisassemblerSP Disassembler::DisassembleRange(
124 const ArchSpec &arch, const char *plugin_name, const char *flavor,
125 Target &target, const AddressRange &range, bool force_live_memory) {
126 if (range.GetByteSize() <= 0)
127 return {};
128
129 if (!range.GetBaseAddress().IsValid())
130 return {};
131
132 lldb::DisassemblerSP disasm_sp =
133 Disassembler::FindPluginForTarget(target, arch, flavor, plugin_name);
134
135 if (!disasm_sp)
136 return {};
137
138 const size_t bytes_disassembled = disasm_sp->ParseInstructions(
139 target, range.GetBaseAddress(), {Limit::Bytes, range.GetByteSize()},
140 nullptr, force_live_memory);
141 if (bytes_disassembled == 0)
142 return {};
143
144 return disasm_sp;
145}
146
147lldb::DisassemblerSP
148Disassembler::DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
149 const char *flavor, const Address &start,
150 const void *src, size_t src_len,
151 uint32_t num_instructions, bool data_from_file) {
152 if (!src)
153 return {};
154
155 lldb::DisassemblerSP disasm_sp =
156 Disassembler::FindPlugin(arch, flavor, plugin_name);
157
158 if (!disasm_sp)
159 return {};
160
161 DataExtractor data(src, src_len, arch.GetByteOrder(),
162 arch.GetAddressByteSize());
163
164 (void)disasm_sp->DecodeInstructions(start, data, 0, num_instructions, false,
165 data_from_file);
166 return disasm_sp;
167}
168
169bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
170 const char *plugin_name, const char *flavor,
171 const ExecutionContext &exe_ctx,
172 const Address &address, Limit limit,
173 bool mixed_source_and_assembly,
174 uint32_t num_mixed_context_lines,
175 uint32_t options, Stream &strm) {
176 if (!exe_ctx.GetTargetPtr())
177 return false;
178
179 lldb::DisassemblerSP disasm_sp(Disassembler::FindPluginForTarget(
180 exe_ctx.GetTargetRef(), arch, flavor, plugin_name));
181 if (!disasm_sp)
182 return false;
183
184 const bool force_live_memory = true;
185 size_t bytes_disassembled = disasm_sp->ParseInstructions(
186 exe_ctx.GetTargetRef(), address, limit, &strm, force_live_memory);
187 if (bytes_disassembled == 0)
188 return false;
189
190 disasm_sp->PrintInstructions(debugger, arch, exe_ctx,
191 mixed_source_and_assembly,
192 num_mixed_context_lines, options, strm);
193 return true;
194}
195
196Disassembler::SourceLine
197Disassembler::GetFunctionDeclLineEntry(const SymbolContext &sc) {
198 if (!sc.function)
199 return {};
200
201 if (!sc.line_entry.IsValid())
202 return {};
203
204 LineEntry prologue_end_line = sc.line_entry;
205 FileSpec func_decl_file;
206 uint32_t func_decl_line;
207 sc.function->GetStartLineSourceInfo(func_decl_file, func_decl_line);
208
209 if (func_decl_file != prologue_end_line.file &&
210 func_decl_file != prologue_end_line.original_file)
211 return {};
212
213 SourceLine decl_line;
214 decl_line.file = func_decl_file;
215 decl_line.line = func_decl_line;
216 // TODO: Do we care about column on these entries? If so, we need to plumb
217 // that through GetStartLineSourceInfo.
218 decl_line.column = 0;
219 return decl_line;
220}
221
222void Disassembler::AddLineToSourceLineTables(
223 SourceLine &line,
224 std::map<FileSpec, std::set<uint32_t>> &source_lines_seen) {
225 if (line.IsValid()) {
226 auto source_lines_seen_pos = source_lines_seen.find(line.file);
227 if (source_lines_seen_pos == source_lines_seen.end()) {
228 std::set<uint32_t> lines;
229 lines.insert(line.line);
230 source_lines_seen.emplace(line.file, lines);
231 } else {
232 source_lines_seen_pos->second.insert(line.line);
233 }
234 }
235}
236
237bool Disassembler::ElideMixedSourceAndDisassemblyLine(
238 const ExecutionContext &exe_ctx, const SymbolContext &sc,
239 SourceLine &line) {
240
241 // TODO: should we also check target.process.thread.step-avoid-libraries ?
242
243 const RegularExpression *avoid_regex = nullptr;
244
245 // Skip any line #0 entries - they are implementation details
246 if (line.line == 0)
247 return false;
248
249 ThreadSP thread_sp = exe_ctx.GetThreadSP();
250 if (thread_sp) {
251 avoid_regex = thread_sp->GetSymbolsToAvoidRegexp();
252 } else {
253 TargetSP target_sp = exe_ctx.GetTargetSP();
254 if (target_sp) {
255 Status error;
256 OptionValueSP value_sp = target_sp->GetDebugger().GetPropertyValue(
257 &exe_ctx, "target.process.thread.step-avoid-regexp", false, error);
258 if (value_sp && value_sp->GetType() == OptionValue::eTypeRegex) {
259 OptionValueRegex *re = value_sp->GetAsRegex();
260 if (re) {
261 avoid_regex = re->GetCurrentValue();
262 }
263 }
264 }
265 }
266 if (avoid_regex && sc.symbol != nullptr) {
267 const char *function_name =
268 sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments)
269 .GetCString();
270 if (function_name && avoid_regex->Execute(function_name)) {
271 // skip this source line
272 return true;
273 }
274 }
275 // don't skip this source line
276 return false;
277}
278
279void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch,
280 const ExecutionContext &exe_ctx,
281 bool mixed_source_and_assembly,
282 uint32_t num_mixed_context_lines,
283 uint32_t options, Stream &strm) {
284 // We got some things disassembled...
285 size_t num_instructions_found = GetInstructionList().GetSize();
286
287 const uint32_t max_opcode_byte_size =
288 GetInstructionList().GetMaxOpcocdeByteSize();
289 SymbolContext sc;
290 SymbolContext prev_sc;
291 AddressRange current_source_line_range;
292 const Address *pc_addr_ptr = nullptr;
293 StackFrame *frame = exe_ctx.GetFramePtr();
294
295 TargetSP target_sp(exe_ctx.GetTargetSP());
296 SourceManager &source_manager =
297 target_sp ? target_sp->GetSourceManager() : debugger.GetSourceManager();
298
299 if (frame) {
300 pc_addr_ptr = &frame->GetFrameCodeAddress();
301 }
302 const uint32_t scope =
303 eSymbolContextLineEntry | eSymbolContextFunction | eSymbolContextSymbol;
304 const bool use_inline_block_range = false;
305
306 const FormatEntity::Entry *disassembly_format = nullptr;
307 FormatEntity::Entry format;
308 if (exe_ctx.HasTargetScope()) {
309 disassembly_format =
310 exe_ctx.GetTargetRef().GetDebugger().GetDisassemblyFormat();
311 } else {
312 FormatEntity::Parse("${addr}: ", format);
313 disassembly_format = &format;
314 }
315
316 // First pass: step through the list of instructions, find how long the
317 // initial addresses strings are, insert padding in the second pass so the
318 // opcodes all line up nicely.
319
320 // Also build up the source line mapping if this is mixed source & assembly
321 // mode. Calculate the source line for each assembly instruction (eliding
322 // inlined functions which the user wants to skip).
323
324 std::map<FileSpec, std::set<uint32_t>> source_lines_seen;
325 Symbol *previous_symbol = nullptr;
326
327 size_t address_text_size = 0;
328 for (size_t i = 0; i < num_instructions_found; ++i) {
329 Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get();
330 if (inst) {
331 const Address &addr = inst->GetAddress();
332 ModuleSP module_sp(addr.GetModule());
333 if (module_sp) {
334 const SymbolContextItem resolve_mask = eSymbolContextFunction |
335 eSymbolContextSymbol |
336 eSymbolContextLineEntry;
337 uint32_t resolved_mask =
338 module_sp->ResolveSymbolContextForAddress(addr, resolve_mask, sc);
339 if (resolved_mask) {
340 StreamString strmstr;
341 Debugger::FormatDisassemblerAddress(disassembly_format, &sc, nullptr,
342 &exe_ctx, &addr, strmstr);
343 size_t cur_line = strmstr.GetSizeOfLastLine();
344 if (cur_line > address_text_size)
345 address_text_size = cur_line;
346
347 // Add entries to our "source_lines_seen" map+set which list which
348 // sources lines occur in this disassembly session. We will print
349 // lines of context around a source line, but we don't want to print
350 // a source line that has a line table entry of its own - we'll leave
351 // that source line to be printed when it actually occurs in the
352 // disassembly.
353
354 if (mixed_source_and_assembly && sc.line_entry.IsValid()) {
355 if (sc.symbol != previous_symbol) {
356 SourceLine decl_line = GetFunctionDeclLineEntry(sc);
357 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, decl_line))
358 AddLineToSourceLineTables(decl_line, source_lines_seen);
359 }
360 if (sc.line_entry.IsValid()) {
361 SourceLine this_line;
362 this_line.file = sc.line_entry.file;
363 this_line.line = sc.line_entry.line;
364 this_line.column = sc.line_entry.column;
365 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, this_line))
366 AddLineToSourceLineTables(this_line, source_lines_seen);
367 }
368 }
369 }
370 sc.Clear(false);
371 }
372 }
373 }
374
375 previous_symbol = nullptr;
376 SourceLine previous_line;
377 for (size_t i = 0; i < num_instructions_found; ++i) {
378 Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get();
379
380 if (inst) {
381 const Address &addr = inst->GetAddress();
382 const bool inst_is_at_pc = pc_addr_ptr && addr == *pc_addr_ptr;
383 SourceLinesToDisplay source_lines_to_display;
384
385 prev_sc = sc;
386
387 ModuleSP module_sp(addr.GetModule());
388 if (module_sp) {
389 uint32_t resolved_mask = module_sp->ResolveSymbolContextForAddress(
390 addr, eSymbolContextEverything, sc);
391 if (resolved_mask) {
392 if (mixed_source_and_assembly) {
393
394 // If we've started a new function (non-inlined), print all of the
395 // source lines from the function declaration until the first line
396 // table entry - typically the opening curly brace of the function.
397 if (previous_symbol != sc.symbol) {
398 // The default disassembly format puts an extra blank line
399 // between functions - so when we're displaying the source
400 // context for a function, we don't want to add a blank line
401 // after the source context or we'll end up with two of them.
402 if (previous_symbol != nullptr)
403 source_lines_to_display.print_source_context_end_eol = false;
404
405 previous_symbol = sc.symbol;
406 if (sc.function && sc.line_entry.IsValid()) {
407 LineEntry prologue_end_line = sc.line_entry;
408 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc,
409 prologue_end_line)) {
410 FileSpec func_decl_file;
411 uint32_t func_decl_line;
412 sc.function->GetStartLineSourceInfo(func_decl_file,
413 func_decl_line);
414 if (func_decl_file == prologue_end_line.file ||
415 func_decl_file == prologue_end_line.original_file) {
416 // Add all the lines between the function declaration and
417 // the first non-prologue source line to the list of lines
418 // to print.
419 for (uint32_t lineno = func_decl_line;
420 lineno <= prologue_end_line.line; lineno++) {
421 SourceLine this_line;
422 this_line.file = func_decl_file;
423 this_line.line = lineno;
424 source_lines_to_display.lines.push_back(this_line);
425 }
426 // Mark the last line as the "current" one. Usually this
427 // is the open curly brace.
428 if (source_lines_to_display.lines.size() > 0)
429 source_lines_to_display.current_source_line =
430 source_lines_to_display.lines.size() - 1;
431 }
432 }
433 }
434 sc.GetAddressRange(scope, 0, use_inline_block_range,
435 current_source_line_range);
436 }
437
438 // If we've left a previous source line's address range, print a
439 // new source line
440 if (!current_source_line_range.ContainsFileAddress(addr)) {
441 sc.GetAddressRange(scope, 0, use_inline_block_range,
442 current_source_line_range);
443
444 if (sc != prev_sc && sc.comp_unit && sc.line_entry.IsValid()) {
445 SourceLine this_line;
446 this_line.file = sc.line_entry.file;
447 this_line.line = sc.line_entry.line;
448
449 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc,
450 this_line)) {
451 // Only print this source line if it is different from the
452 // last source line we printed. There may have been inlined
453 // functions between these lines that we elided, resulting in
454 // the same line being printed twice in a row for a
455 // contiguous block of assembly instructions.
456 if (this_line != previous_line) {
457
458 std::vector<uint32_t> previous_lines;
459 for (uint32_t i = 0;
460 i < num_mixed_context_lines &&
461 (this_line.line - num_mixed_context_lines) > 0;
462 i++) {
463 uint32_t line =
464 this_line.line - num_mixed_context_lines + i;
465 auto pos = source_lines_seen.find(this_line.file);
466 if (pos != source_lines_seen.end()) {
467 if (pos->second.count(line) == 1) {
468 previous_lines.clear();
469 } else {
470 previous_lines.push_back(line);
471 }
472 }
473 }
474 for (size_t i = 0; i < previous_lines.size(); i++) {
475 SourceLine previous_line;
476 previous_line.file = this_line.file;
477 previous_line.line = previous_lines[i];
478 auto pos = source_lines_seen.find(previous_line.file);
479 if (pos != source_lines_seen.end()) {
480 pos->second.insert(previous_line.line);
481 }
482 source_lines_to_display.lines.push_back(previous_line);
483 }
484
485 source_lines_to_display.lines.push_back(this_line);
486 source_lines_to_display.current_source_line =
487 source_lines_to_display.lines.size() - 1;
488
489 for (uint32_t i = 0; i < num_mixed_context_lines; i++) {
490 SourceLine next_line;
491 next_line.file = this_line.file;
492 next_line.line = this_line.line + i + 1;
493 auto pos = source_lines_seen.find(next_line.file);
494 if (pos != source_lines_seen.end()) {
495 if (pos->second.count(next_line.line) == 1)
496 break;
497 pos->second.insert(next_line.line);
498 }
499 source_lines_to_display.lines.push_back(next_line);
500 }
501 }
502 previous_line = this_line;
503 }
504 }
505 }
506 }
507 } else {
508 sc.Clear(true);
509 }
510 }
511
512 if (source_lines_to_display.lines.size() > 0) {
513 strm.EOL();
514 for (size_t idx = 0; idx < source_lines_to_display.lines.size();
515 idx++) {
516 SourceLine ln = source_lines_to_display.lines[idx];
517 const char *line_highlight = "";
518 if (inst_is_at_pc && (options & eOptionMarkPCSourceLine)) {
519 line_highlight = "->";
520 } else if (idx == source_lines_to_display.current_source_line) {
521 line_highlight = "**";
522 }
523 source_manager.DisplaySourceLinesWithLineNumbers(
524 ln.file, ln.line, ln.column, 0, 0, line_highlight, &strm);
525 }
526 if (source_lines_to_display.print_source_context_end_eol)
527 strm.EOL();
528 }
529
530 const bool show_bytes = (options & eOptionShowBytes) != 0;
531 inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, &exe_ctx, &sc,
532 &prev_sc, nullptr, address_text_size);
533 strm.EOL();
534 } else {
535 break;
536 }
537 }
538}
539
540bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
541 StackFrame &frame, Stream &strm) {
542 AddressRange range;
543 SymbolContext sc(
544 frame.GetSymbolContext(eSymbolContextFunction | eSymbolContextSymbol));
545 if (sc.function) {
546 range = sc.function->GetAddressRange();
547 } else if (sc.symbol && sc.symbol->ValueIsAddress()) {
548 range.GetBaseAddress() = sc.symbol->GetAddressRef();
549 range.SetByteSize(sc.symbol->GetByteSize());
550 } else {
551 range.GetBaseAddress() = frame.GetFrameCodeAddress();
552 }
553
554 if (range.GetBaseAddress().IsValid() && range.GetByteSize() == 0)
555 range.SetByteSize(DEFAULT_DISASM_BYTE_SIZE);
556
557 Disassembler::Limit limit = {Disassembler::Limit::Bytes,
558 range.GetByteSize()};
559 if (limit.value == 0)
560 limit.value = DEFAULT_DISASM_BYTE_SIZE;
561
562 return Disassemble(debugger, arch, nullptr, nullptr, frame,
563 range.GetBaseAddress(), limit, false, 0, 0, strm);
564}
565
566Instruction::Instruction(const Address &address, AddressClass addr_class)
567 : m_address(address), m_address_class(addr_class), m_opcode(),
568 m_calculated_strings(false) {}
569
570Instruction::~Instruction() = default;
571
572AddressClass Instruction::GetAddressClass() {
573 if (m_address_class == AddressClass::eInvalid)
574 m_address_class = m_address.GetAddressClass();
575 return m_address_class;
576}
577
578void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size,
579 bool show_address, bool show_bytes,
580 const ExecutionContext *exe_ctx,
581 const SymbolContext *sym_ctx,
582 const SymbolContext *prev_sym_ctx,
583 const FormatEntity::Entry *disassembly_addr_format,
584 size_t max_address_text_size) {
585 size_t opcode_column_width = 7;
586 const size_t operand_column_width = 25;
587
588 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
589
590 StreamString ss;
591
592 if (show_address) {
593 Debugger::FormatDisassemblerAddress(disassembly_addr_format, sym_ctx,
594 prev_sym_ctx, exe_ctx, &m_address, ss);
595 ss.FillLastLineToColumn(max_address_text_size, ' ');
596 }
597
598 if (show_bytes) {
599 if (m_opcode.GetType() == Opcode::eTypeBytes) {
600 // x86_64 and i386 are the only ones that use bytes right now so pad out
601 // the byte dump to be able to always show 15 bytes (3 chars each) plus a
602 // space
603 if (max_opcode_byte_size > 0)
604 m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
605 else
606 m_opcode.Dump(&ss, 15 * 3 + 1);
607 } else {
608 // Else, we have ARM or MIPS which can show up to a uint32_t 0x00000000
609 // (10 spaces) plus two for padding...
610 if (max_opcode_byte_size > 0)
611 m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
612 else
613 m_opcode.Dump(&ss, 12);
614 }
615 }
616
617 const size_t opcode_pos = ss.GetSizeOfLastLine();
618
619 // The default opcode size of 7 characters is plenty for most architectures
620 // but some like arm can pull out the occasional vqrshrun.s16. We won't get
621 // consistent column spacing in these cases, unfortunately.
622 if (m_opcode_name.length() >= opcode_column_width) {
623 opcode_column_width = m_opcode_name.length() + 1;
624 }
625
626 ss.PutCString(m_opcode_name);
627 ss.FillLastLineToColumn(opcode_pos + opcode_column_width, ' ');
628 ss.PutCString(m_mnemonics);
629
630 if (!m_comment.empty()) {
631 ss.FillLastLineToColumn(
632 opcode_pos + opcode_column_width + operand_column_width, ' ');
633 ss.PutCString(" ; ");
634 ss.PutCString(m_comment);
635 }
636 s->PutCString(ss.GetString());
637}
638
639bool Instruction::DumpEmulation(const ArchSpec &arch) {
640 std::unique_ptr<EmulateInstruction> insn_emulator_up(
641 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
642 if (insn_emulator_up) {
643 insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr);
644 return insn_emulator_up->EvaluateInstruction(0);
645 }
646
647 return false;
648}
649
650bool Instruction::CanSetBreakpoint () {
651 return !HasDelaySlot();
652}
653
654bool Instruction::HasDelaySlot() {
655 // Default is false.
656 return false;
657}
658
659OptionValueSP Instruction::ReadArray(FILE *in_file, Stream *out_stream,
660 OptionValue::Type data_type) {
661 bool done = false;
662 char buffer[1024];
663
664 auto option_value_sp = std::make_shared<OptionValueArray>(1u << data_type);
665
666 int idx = 0;
667 while (!done) {
668 if (!fgets(buffer, 1023, in_file)) {
669 out_stream->Printf(
670 "Instruction::ReadArray: Error reading file (fgets).\n");
671 option_value_sp.reset();
672 return option_value_sp;
673 }
674
675 std::string line(buffer);
676
677 size_t len = line.size();
678 if (line[len - 1] == '\n') {
679 line[len - 1] = '\0';
680 line.resize(len - 1);
681 }
682
683 if ((line.size() == 1) && line[0] == ']') {
684 done = true;
685 line.clear();
686 }
687
688 if (!line.empty()) {
689 std::string value;
690 static RegularExpression g_reg_exp(
691 llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$"));
692 llvm::SmallVector<llvm::StringRef, 2> matches;
693 if (g_reg_exp.Execute(line, &matches))
694 value = matches[1].str();
695 else
696 value = line;
697
698 OptionValueSP data_value_sp;
699 switch (data_type) {
700 case OptionValue::eTypeUInt64:
701 data_value_sp = std::make_shared<OptionValueUInt64>(0, 0);
702 data_value_sp->SetValueFromString(value);
703 break;
704 // Other types can be added later as needed.
705 default:
706 data_value_sp = std::make_shared<OptionValueString>(value.c_str(), "");
707 break;
708 }
709
710 option_value_sp->GetAsArray()->InsertValue(idx, data_value_sp);
711 ++idx;
712 }
713 }
714
715 return option_value_sp;
716}
717
718OptionValueSP Instruction::ReadDictionary(FILE *in_file, Stream *out_stream) {
719 bool done = false;
720 char buffer[1024];
721
722 auto option_value_sp = std::make_shared<OptionValueDictionary>();
723 static ConstString encoding_key("data_encoding");
724 OptionValue::Type data_type = OptionValue::eTypeInvalid;
725
726 while (!done) {
727 // Read the next line in the file
728 if (!fgets(buffer, 1023, in_file)) {
729 out_stream->Printf(
730 "Instruction::ReadDictionary: Error reading file (fgets).\n");
731 option_value_sp.reset();
732 return option_value_sp;
733 }
734
735 // Check to see if the line contains the end-of-dictionary marker ("}")
736 std::string line(buffer);
737
738 size_t len = line.size();
739 if (line[len - 1] == '\n') {
740 line[len - 1] = '\0';
741 line.resize(len - 1);
742 }
743
744 if ((line.size() == 1) && (line[0] == '}')) {
745 done = true;
746 line.clear();
747 }
748
749 // Try to find a key-value pair in the current line and add it to the
750 // dictionary.
751 if (!line.empty()) {
752 static RegularExpression g_reg_exp(llvm::StringRef(
753 "^[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*=[ \t]*(.*)[ \t]*$"));
754
755 llvm::SmallVector<llvm::StringRef, 3> matches;
756
757 bool reg_exp_success = g_reg_exp.Execute(line, &matches);
758 std::string key;
759 std::string value;
760 if (reg_exp_success) {
761 key = matches[1].str();
762 value = matches[2].str();
763 } else {
764 out_stream->Printf("Instruction::ReadDictionary: Failure executing "
765 "regular expression.\n");
766 option_value_sp.reset();
767 return option_value_sp;
768 }
769
770 ConstString const_key(key.c_str());
771 // Check value to see if it's the start of an array or dictionary.
772
773 lldb::OptionValueSP value_sp;
774 assert(value.empty() == false);
775 assert(key.empty() == false);
776
777 if (value[0] == '{') {
778 assert(value.size() == 1);
779 // value is a dictionary
780 value_sp = ReadDictionary(in_file, out_stream);
781 if (!value_sp) {
782 option_value_sp.reset();
783 return option_value_sp;
784 }
785 } else if (value[0] == '[') {
786 assert(value.size() == 1);
787 // value is an array
788 value_sp = ReadArray(in_file, out_stream, data_type);
789 if (!value_sp) {
790 option_value_sp.reset();
791 return option_value_sp;
792 }
793 // We've used the data_type to read an array; re-set the type to
794 // Invalid
795 data_type = OptionValue::eTypeInvalid;
796 } else if ((value[0] == '0') && (value[1] == 'x')) {
797 value_sp = std::make_shared<OptionValueUInt64>(0, 0);
798 value_sp->SetValueFromString(value);
799 } else {
800 size_t len = value.size();
801 if ((value[0] == '"') && (value[len - 1] == '"'))
802 value = value.substr(1, len - 2);
803 value_sp = std::make_shared<OptionValueString>(value.c_str(), "");
804 }
805
806 if (const_key == encoding_key) {
807 // A 'data_encoding=..." is NOT a normal key-value pair; it is meta-data
808 // indicating the
809 // data type of an upcoming array (usually the next bit of data to be
810 // read in).
811 if (strcmp(value.c_str(), "uint32_t") == 0)
812 data_type = OptionValue::eTypeUInt64;
813 } else
814 option_value_sp->GetAsDictionary()->SetValueForKey(const_key, value_sp,
815 false);
816 }
817 }
818
819 return option_value_sp;
820}
821
822bool Instruction::TestEmulation(Stream *out_stream, const char *file_name) {
823 if (!out_stream)
824 return false;
825
826 if (!file_name) {
827 out_stream->Printf("Instruction::TestEmulation: Missing file_name.");
828 return false;
829 }
830 FILE *test_file = FileSystem::Instance().Fopen(file_name, "r");
831 if (!test_file) {
832 out_stream->Printf(
833 "Instruction::TestEmulation: Attempt to open test file failed.");
834 return false;
835 }
836
837 char buffer[256];
838 if (!fgets(buffer, 255, test_file)) {
839 out_stream->Printf(
840 "Instruction::TestEmulation: Error reading first line of test file.\n");
841 fclose(test_file);
842 return false;
843 }
844
845 if (strncmp(buffer, "InstructionEmulationState={", 27) != 0) {
846 out_stream->Printf("Instructin::TestEmulation: Test file does not contain "
847 "emulation state dictionary\n");
848 fclose(test_file);
849 return false;
850 }
851
852 // Read all the test information from the test file into an
853 // OptionValueDictionary.
854
855 OptionValueSP data_dictionary_sp(ReadDictionary(test_file, out_stream));
856 if (!data_dictionary_sp) {
857 out_stream->Printf(
858 "Instruction::TestEmulation: Error reading Dictionary Object.\n");
859 fclose(test_file);
860 return false;
861 }
862
863 fclose(test_file);
864
865 OptionValueDictionary *data_dictionary =
866 data_dictionary_sp->GetAsDictionary();
867 static ConstString description_key("assembly_string");
868 static ConstString triple_key("triple");
869
870 OptionValueSP value_sp = data_dictionary->GetValueForKey(description_key);
871
872 if (!value_sp) {
873 out_stream->Printf("Instruction::TestEmulation: Test file does not "
874 "contain description string.\n");
875 return false;
876 }
877
878 SetDescription(value_sp->GetStringValue());
879
880 value_sp = data_dictionary->GetValueForKey(triple_key);
881 if (!value_sp) {
882 out_stream->Printf(
883 "Instruction::TestEmulation: Test file does not contain triple.\n");
884 return false;
885 }
886
887 ArchSpec arch;
888 arch.SetTriple(llvm::Triple(value_sp->GetStringValue()));
889
890 bool success = false;
891 std::unique_ptr<EmulateInstruction> insn_emulator_up(
892 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
893 if (insn_emulator_up)
894 success =
895 insn_emulator_up->TestEmulation(out_stream, arch, data_dictionary);
896
897 if (success)
898 out_stream->Printf("Emulation test succeeded.");
899 else
900 out_stream->Printf("Emulation test failed.");
901
902 return success;
903}
904
905bool Instruction::Emulate(
906 const ArchSpec &arch, uint32_t evaluate_options, void *baton,
907 EmulateInstruction::ReadMemoryCallback read_mem_callback,
908 EmulateInstruction::WriteMemoryCallback write_mem_callback,
909 EmulateInstruction::ReadRegisterCallback read_reg_callback,
910 EmulateInstruction::WriteRegisterCallback write_reg_callback) {
911 std::unique_ptr<EmulateInstruction> insn_emulator_up(
912 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
913 if (insn_emulator_up) {
914 insn_emulator_up->SetBaton(baton);
915 insn_emulator_up->SetCallbacks(read_mem_callback, write_mem_callback,
916 read_reg_callback, write_reg_callback);
917 insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr);
918 return insn_emulator_up->EvaluateInstruction(evaluate_options);
919 }
920
921 return false;
922}
923
924uint32_t Instruction::GetData(DataExtractor &data) {
925 return m_opcode.GetData(data);
926}
927
928InstructionList::InstructionList() : m_instructions() {}
929
930InstructionList::~InstructionList() = default;
931
932size_t InstructionList::GetSize() const { return m_instructions.size(); }
933
934uint32_t InstructionList::GetMaxOpcocdeByteSize() const {
935 uint32_t max_inst_size = 0;
936 collection::const_iterator pos, end;
937 for (pos = m_instructions.begin(), end = m_instructions.end(); pos != end;
938 ++pos) {
939 uint32_t inst_size = (*pos)->GetOpcode().GetByteSize();
940 if (max_inst_size < inst_size)
941 max_inst_size = inst_size;
942 }
943 return max_inst_size;
944}
945
946InstructionSP InstructionList::GetInstructionAtIndex(size_t idx) const {
947 InstructionSP inst_sp;
948 if (idx < m_instructions.size())
949 inst_sp = m_instructions[idx];
950 return inst_sp;
951}
952
953InstructionSP InstructionList::GetInstructionAtAddress(const Address &address) {
954 uint32_t index = GetIndexOfInstructionAtAddress(address);
955 if (index != UINT32_MAX)
956 return GetInstructionAtIndex(index);
957 return nullptr;
958}
959
960void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes,
961 const ExecutionContext *exe_ctx) {
962 const uint32_t max_opcode_byte_size = GetMaxOpcocdeByteSize();
963 collection::const_iterator pos, begin, end;
964
965 const FormatEntity::Entry *disassembly_format = nullptr;
966 FormatEntity::Entry format;
967 if (exe_ctx && exe_ctx->HasTargetScope()) {
968 disassembly_format =
969 exe_ctx->GetTargetRef().GetDebugger().GetDisassemblyFormat();
970 } else {
971 FormatEntity::Parse("${addr}: ", format);
972 disassembly_format = &format;
973 }
974
975 for (begin = m_instructions.begin(), end = m_instructions.end(), pos = begin;
976 pos != end; ++pos) {
977 if (pos != begin)
978 s->EOL();
979 (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, exe_ctx,
980 nullptr, nullptr, disassembly_format, 0);
981 }
982}
983
984void InstructionList::Clear() { m_instructions.clear(); }
985
986void InstructionList::Append(lldb::InstructionSP &inst_sp) {
987 if (inst_sp)
988 m_instructions.push_back(inst_sp);
989}
990
991uint32_t
992InstructionList::GetIndexOfNextBranchInstruction(uint32_t start,
993 bool ignore_calls,
994 bool *found_calls) const {
995 size_t num_instructions = m_instructions.size();
996
997 uint32_t next_branch = UINT32_MAX;
998
999 if (found_calls)
1000 *found_calls = false;
1001 for (size_t i = start; i < num_instructions; i++) {
1002 if (m_instructions[i]->DoesBranch()) {
1003 if (ignore_calls && m_instructions[i]->IsCall()) {
1004 if (found_calls)
1005 *found_calls = true;
1006 continue;
1007 }
1008 next_branch = i;
1009 break;
1010 }
1011 }
1012
1013 return next_branch;
1014}
1015
1016uint32_t
1017InstructionList::GetIndexOfInstructionAtAddress(const Address &address) {
1018 size_t num_instructions = m_instructions.size();
1019 uint32_t index = UINT32_MAX;
1020 for (size_t i = 0; i < num_instructions; i++) {
1021 if (m_instructions[i]->GetAddress() == address) {
1022 index = i;
1023 break;
1024 }
1025 }
1026 return index;
1027}
1028
1029uint32_t
1030InstructionList::GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
1031 Target &target) {
1032 Address address;
1033 address.SetLoadAddress(load_addr, &target);
1034 return GetIndexOfInstructionAtAddress(address);
1035}
1036
1037size_t Disassembler::ParseInstructions(Target &target, Address start,
1038 Limit limit, Stream *error_strm_ptr,
1039 bool force_live_memory) {
1040 m_instruction_list.Clear();
1041
1042 if (!start.IsValid())
1043 return 0;
1044
1045 start = ResolveAddress(target, start);
1046
1047 addr_t byte_size = limit.value;
1048 if (limit.kind == Limit::Instructions)
1049 byte_size *= m_arch.GetMaximumOpcodeByteSize();
1050 auto data_sp = std::make_shared<DataBufferHeap>(byte_size, '\0');
1051
1052 Status error;
1053 lldb::addr_t load_addr = LLDB_INVALID_ADDRESS;
1054 const size_t bytes_read =
1055 target.ReadMemory(start, data_sp->GetBytes(), data_sp->GetByteSize(),
1056 error, force_live_memory, &load_addr);
1057 const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS;
1058
1059 if (bytes_read == 0) {
1060 if (error_strm_ptr) {
1061 if (const char *error_cstr = error.AsCString())
1062 error_strm_ptr->Printf("error: %s\n", error_cstr);
1063 }
1064 return 0;
1065 }
1066
1067 if (bytes_read != data_sp->GetByteSize())
1068 data_sp->SetByteSize(bytes_read);
1069 DataExtractor data(data_sp, m_arch.GetByteOrder(),
1070 m_arch.GetAddressByteSize());
1071 return DecodeInstructions(start, data, 0,
1072 limit.kind == Limit::Instructions ? limit.value
1073 : UINT32_MAX,
1074 false, data_from_file);
1075}
1076
1077// Disassembler copy constructor
1078Disassembler::Disassembler(const ArchSpec &arch, const char *flavor)
1079 : m_arch(arch), m_instruction_list(), m_base_addr(LLDB_INVALID_ADDRESS),
1080 m_flavor() {
1081 if (flavor == nullptr)
1082 m_flavor.assign("default");
1083 else
1084 m_flavor.assign(flavor);
1085
1086 // If this is an arm variant that can only include thumb (T16, T32)
1087 // instructions, force the arch triple to be "thumbv.." instead of "armv..."
1088 if (arch.IsAlwaysThumbInstructions()) {
1089 std::string thumb_arch_name(arch.GetTriple().getArchName().str());
1090 // Replace "arm" with "thumb" so we get all thumb variants correct
1091 if (thumb_arch_name.size() > 3) {
1092 thumb_arch_name.erase(0, 3);
1093 thumb_arch_name.insert(0, "thumb");
1094 }
1095 m_arch.SetTriple(thumb_arch_name.c_str());
1096 }
1097}
1098
1099Disassembler::~Disassembler() = default;
1100
1101InstructionList &Disassembler::GetInstructionList() {
1102 return m_instruction_list;
1103}
1104
1105const InstructionList &Disassembler::GetInstructionList() const {
1106 return m_instruction_list;
1107}
1108
1109// Class PseudoInstruction
1110
1111PseudoInstruction::PseudoInstruction()
1112 : Instruction(Address(), AddressClass::eUnknown), m_description() {}
1113
1114PseudoInstruction::~PseudoInstruction() = default;
1115
1116bool PseudoInstruction::DoesBranch() {
1117 // This is NOT a valid question for a pseudo instruction.
1118 return false;
1119}
1120
1121bool PseudoInstruction::HasDelaySlot() {
1122 // This is NOT a valid question for a pseudo instruction.
1123 return false;
1124}
1125
1126size_t PseudoInstruction::Decode(const lldb_private::Disassembler &disassembler,
1127 const lldb_private::DataExtractor &data,
1128 lldb::offset_t data_offset) {
1129 return m_opcode.GetByteSize();
1130}
1131
1132void PseudoInstruction::SetOpcode(size_t opcode_size, void *opcode_data) {
1133 if (!opcode_data)
1134 return;
1135
1136 switch (opcode_size) {
1137 case 8: {
1138 uint8_t value8 = *((uint8_t *)opcode_data);
1139 m_opcode.SetOpcode8(value8, eByteOrderInvalid);
1140 break;
1141 }
1142 case 16: {
1143 uint16_t value16 = *((uint16_t *)opcode_data);
1144 m_opcode.SetOpcode16(value16, eByteOrderInvalid);
1145 break;
1146 }
1147 case 32: {
1148 uint32_t value32 = *((uint32_t *)opcode_data);
1149 m_opcode.SetOpcode32(value32, eByteOrderInvalid);
1150 break;
1151 }
1152 case 64: {
1153 uint64_t value64 = *((uint64_t *)opcode_data);
1154 m_opcode.SetOpcode64(value64, eByteOrderInvalid);
1155 break;
1156 }
1157 default:
1158 break;
1159 }
1160}
1161
1162void PseudoInstruction::SetDescription(llvm::StringRef description) {
1163 m_description = std::string(description);
1164}
1165
1166Instruction::Operand Instruction::Operand::BuildRegister(ConstString &r) {
1167 Operand ret;
1168 ret.m_type = Type::Register;
1169 ret.m_register = r;
1170 return ret;
1171}
1172
1173Instruction::Operand Instruction::Operand::BuildImmediate(lldb::addr_t imm,
1174 bool neg) {
1175 Operand ret;
1176 ret.m_type = Type::Immediate;
1177 ret.m_immediate = imm;
1178 ret.m_negative = neg;
1179 return ret;
1180}
1181
1182Instruction::Operand Instruction::Operand::BuildImmediate(int64_t imm) {
1183 Operand ret;
1184 ret.m_type = Type::Immediate;
1185 if (imm < 0) {
1186 ret.m_immediate = -imm;
1187 ret.m_negative = true;
1188 } else {
1189 ret.m_immediate = imm;
1190 ret.m_negative = false;
1191 }
1192 return ret;
1193}
1194
1195Instruction::Operand
1196Instruction::Operand::BuildDereference(const Operand &ref) {
1197 Operand ret;
1198 ret.m_type = Type::Dereference;
1199 ret.m_children = {ref};
1200 return ret;
1201}
1202
1203Instruction::Operand Instruction::Operand::BuildSum(const Operand &lhs,
1204 const Operand &rhs) {
1205 Operand ret;
1206 ret.m_type = Type::Sum;
1207 ret.m_children = {lhs, rhs};
1208 return ret;
1209}
1210
1211Instruction::Operand Instruction::Operand::BuildProduct(const Operand &lhs,
1212 const Operand &rhs) {
1213 Operand ret;
1214 ret.m_type = Type::Product;
1215 ret.m_children = {lhs, rhs};
1216 return ret;
1217}
1218
1219std::function<bool(const Instruction::Operand &)>
1220lldb_private::OperandMatchers::MatchBinaryOp(
1221 std::function<bool(const Instruction::Operand &)> base,
1222 std::function<bool(const Instruction::Operand &)> left,
1223 std::function<bool(const Instruction::Operand &)> right) {
1224 return [base, left, right](const Instruction::Operand &op) -> bool {
1225 return (base(op) && op.m_children.size() == 2 &&
1226 ((left(op.m_children[0]) && right(op.m_children[1])) ||
1227 (left(op.m_children[1]) && right(op.m_children[0]))));
1228 };
1229}
1230
1231std::function<bool(const Instruction::Operand &)>
1232lldb_private::OperandMatchers::MatchUnaryOp(
1233 std::function<bool(const Instruction::Operand &)> base,
1234 std::function<bool(const Instruction::Operand &)> child) {
1235 return [base, child](const Instruction::Operand &op) -> bool {
1236 return (base(op) && op.m_children.size() == 1 && child(op.m_children[0]));
1237 };
1238}
1239
1240std::function<bool(const Instruction::Operand &)>
1241lldb_private::OperandMatchers::MatchRegOp(const RegisterInfo &info) {
1242 return [&info](const Instruction::Operand &op) {
1243 return (op.m_type == Instruction::Operand::Type::Register &&
1244 (op.m_register == ConstString(info.name) ||
1245 op.m_register == ConstString(info.alt_name)));
1246 };
1247}
1248
1249std::function<bool(const Instruction::Operand &)>
1250lldb_private::OperandMatchers::FetchRegOp(ConstString &reg) {
1251 return [&reg](const Instruction::Operand &op) {
1252 if (op.m_type != Instruction::Operand::Type::Register) {
1253 return false;
1254 }
1255 reg = op.m_register;
1256 return true;
1257 };
1258}
1259
1260std::function<bool(const Instruction::Operand &)>
1261lldb_private::OperandMatchers::MatchImmOp(int64_t imm) {
1262 return [imm](const Instruction::Operand &op) {
1263 return (op.m_type == Instruction::Operand::Type::Immediate &&
1264 ((op.m_negative && op.m_immediate == (uint64_t)-imm) ||
1265 (!op.m_negative && op.m_immediate == (uint64_t)imm)));
1266 };
1267}
1268
1269std::function<bool(const Instruction::Operand &)>
1270lldb_private::OperandMatchers::FetchImmOp(int64_t &imm) {
1271 return [&imm](const Instruction::Operand &op) {
1272 if (op.m_type != Instruction::Operand::Type::Immediate) {
1273 return false;
1274 }
1275 if (op.m_negative) {
1276 imm = -((int64_t)op.m_immediate);
1277 } else {
1278 imm = ((int64_t)op.m_immediate);
1279 }
1280 return true;
1281 };
1282}
1283
1284std::function<bool(const Instruction::Operand &)>
1285lldb_private::OperandMatchers::MatchOpType(Instruction::Operand::Type type) {
1286 return [type](const Instruction::Operand &op) { return op.m_type == type; };
1287}
1288