1//===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
10#define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
11
12#include "llvm/ADT/StringRef.h"
13#include "llvm/BinaryFormat/XCOFF.h"
14#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
15#include <cstdint>
16#include <memory>
17#include <vector>
18
19namespace llvm {
20
21struct XCOFFSymbolInfoTy {
22 std::optional<XCOFF::StorageMappingClass> StorageMappingClass;
23 std::optional<uint32_t> Index;
24 bool IsLabel = false;
25 bool operator<(const XCOFFSymbolInfoTy &SymInfo) const;
26};
27
28struct SymbolInfoTy {
29 uint64_t Addr;
30 StringRef Name;
31 // XCOFF uses XCOFFSymInfo. Other targets use Type.
32 XCOFFSymbolInfoTy XCOFFSymInfo;
33 uint8_t Type;
34 // Used by ELF to describe a mapping symbol that is usually not displayed.
35 bool IsMappingSymbol;
36
37private:
38 bool IsXCOFF;
39 bool HasType;
40
41public:
42 SymbolInfoTy(std::optional<XCOFF::StorageMappingClass> Smc, uint64_t Addr,
43 StringRef Name, std::optional<uint32_t> Idx, bool Label)
44 : Addr(Addr), Name(Name), XCOFFSymInfo{.StorageMappingClass: Smc, .Index: Idx, .IsLabel: Label}, Type(0),
45 IsMappingSymbol(false), IsXCOFF(true), HasType(false) {}
46 SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type,
47 bool IsMappingSymbol = false, bool IsXCOFF = false)
48 : Addr(Addr), Name(Name), Type(Type), IsMappingSymbol(IsMappingSymbol),
49 IsXCOFF(IsXCOFF), HasType(true) {}
50 bool isXCOFF() const { return IsXCOFF; }
51
52private:
53 friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) {
54 assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) &&
55 "The value of IsXCOFF and HasType in P1 and P2 should be the same "
56 "respectively.");
57
58 if (P1.IsXCOFF && P1.HasType)
59 return std::tie(args: P1.Addr, args: P1.Type, args: P1.Name) <
60 std::tie(args: P2.Addr, args: P2.Type, args: P2.Name);
61
62 if (P1.IsXCOFF)
63 return std::tie(args: P1.Addr, args: P1.XCOFFSymInfo, args: P1.Name) <
64 std::tie(args: P2.Addr, args: P2.XCOFFSymInfo, args: P2.Name);
65
66 // With the same address, place mapping symbols first.
67 bool MS1 = !P1.IsMappingSymbol, MS2 = !P2.IsMappingSymbol;
68 return std::tie(args: P1.Addr, args&: MS1, args: P1.Name, args: P1.Type) <
69 std::tie(args: P2.Addr, args&: MS2, args: P2.Name, args: P2.Type);
70 }
71};
72
73using SectionSymbolsTy = std::vector<SymbolInfoTy>;
74
75template <typename T> class ArrayRef;
76class MCContext;
77class MCInst;
78class MCSubtargetInfo;
79class raw_ostream;
80
81/// Superclass for all disassemblers. Consumes a memory region and provides an
82/// array of assembly instructions.
83class MCDisassembler {
84public:
85 /// Ternary decode status. Most backends will just use Fail and
86 /// Success, however some have a concept of an instruction with
87 /// understandable semantics but which is architecturally
88 /// incorrect. An example of this is ARM UNPREDICTABLE instructions
89 /// which are disassemblable but cause undefined behaviour.
90 ///
91 /// Because it makes sense to disassemble these instructions, there
92 /// is a "soft fail" failure mode that indicates the MCInst& is
93 /// valid but architecturally incorrect.
94 ///
95 /// The enum numbers are deliberately chosen such that reduction
96 /// from Success->SoftFail ->Fail can be done with a simple
97 /// bitwise-AND:
98 ///
99 /// LEFT & TOP = | Success Unpredictable Fail
100 /// --------------+-----------------------------------
101 /// Success | Success Unpredictable Fail
102 /// Unpredictable | Unpredictable Unpredictable Fail
103 /// Fail | Fail Fail Fail
104 ///
105 /// An easy way of encoding this is as 0b11, 0b01, 0b00 for
106 /// Success, SoftFail, Fail respectively.
107 enum DecodeStatus {
108 Fail = 0,
109 SoftFail = 1,
110 Success = 3
111 };
112
113 MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
114 : Ctx(Ctx), STI(STI) {}
115
116 virtual ~MCDisassembler();
117
118 /// Returns the disassembly of a single instruction.
119 ///
120 /// \param Instr - An MCInst to populate with the contents of the
121 /// instruction.
122 /// \param Size - A value to populate with the size of the instruction, or
123 /// the number of bytes consumed while attempting to decode
124 /// an invalid instruction.
125 /// \param Address - The address, in the memory space of region, of the first
126 /// byte of the instruction.
127 /// \param Bytes - A reference to the actual bytes of the instruction.
128 /// \param CStream - The stream to print comments and annotations on.
129 /// \return - MCDisassembler::Success if the instruction is valid,
130 /// MCDisassembler::SoftFail if the instruction was
131 /// disassemblable but invalid,
132 /// MCDisassembler::Fail if the instruction was invalid.
133 virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
134 ArrayRef<uint8_t> Bytes, uint64_t Address,
135 raw_ostream &CStream) const = 0;
136
137 /// Used to perform separate target specific disassembly for a particular
138 /// symbol. May parse any prelude that precedes instructions after the
139 /// start of a symbol, or the entire symbol.
140 /// This is used for example by WebAssembly to decode preludes.
141 ///
142 /// Base implementation returns std::nullopt. So all targets by default ignore
143 /// to treat symbols separately.
144 ///
145 /// \param Symbol - The symbol.
146 /// \param Size - The number of bytes consumed.
147 /// \param Address - The address, in the memory space of region, of the first
148 /// byte of the symbol.
149 /// \param Bytes - A reference to the actual bytes at the symbol location.
150 /// \param CStream - The stream to print comments and annotations on.
151 /// \return - MCDisassembler::Success if bytes are decoded
152 /// successfully. Size must hold the number of bytes that
153 /// were decoded.
154 /// - MCDisassembler::Fail if the bytes are invalid. Size
155 /// must hold the number of bytes that were decoded before
156 /// failing. The target must print nothing. This can be
157 /// done by buffering the output if needed.
158 /// - std::nullopt if the target doesn't want to handle the
159 /// symbol separately. Value of Size is ignored in this
160 /// case.
161 virtual std::optional<DecodeStatus>
162 onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
163 uint64_t Address, raw_ostream &CStream) const;
164 // TODO:
165 // Implement similar hooks that can be used at other points during
166 // disassembly. Something along the following lines:
167 // - onBeforeInstructionDecode()
168 // - onAfterInstructionDecode()
169 // - onSymbolEnd()
170 // It should help move much of the target specific code from llvm-objdump to
171 // respective target disassemblers.
172
173 /// Suggest a distance to skip in a buffer of data to find the next
174 /// place to look for the start of an instruction. For example, if
175 /// all instructions have a fixed alignment, this might advance to
176 /// the next multiple of that alignment.
177 ///
178 /// If not overridden, the default is 1.
179 ///
180 /// \param Address - The address, in the memory space of region, of the
181 /// starting point (typically the first byte of something
182 /// that did not decode as a valid instruction at all).
183 /// \param Bytes - A reference to the actual bytes at Address. May be
184 /// needed in order to determine the width of an
185 /// unrecognized instruction (e.g. in Thumb this is a simple
186 /// consistent criterion that doesn't require knowing the
187 /// specific instruction). The caller can pass as much data
188 /// as they have available, and the function is required to
189 /// make a reasonable default choice if not enough data is
190 /// available to make a better one.
191 /// \return - A number of bytes to skip. Must always be greater than
192 /// zero. May be greater than the size of Bytes.
193 virtual uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
194 uint64_t Address) const;
195
196private:
197 MCContext &Ctx;
198
199protected:
200 // Subtarget information, for instruction decoding predicates if required.
201 const MCSubtargetInfo &STI;
202 std::unique_ptr<MCSymbolizer> Symbolizer;
203
204public:
205 // Helpers around MCSymbolizer
206 bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address,
207 bool IsBranch, uint64_t Offset, uint64_t OpSize,
208 uint64_t InstSize) const;
209
210 void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
211
212 /// Set \p Symzer as the current symbolizer.
213 /// This takes ownership of \p Symzer, and deletes the previously set one.
214 void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
215
216 MCContext& getContext() const { return Ctx; }
217
218 const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
219
220 /// ELF-specific, set the ABI version from the object header.
221 virtual void setABIVersion(unsigned Version) {}
222
223 // Marked mutable because we cache it inside the disassembler, rather than
224 // having to pass it around as an argument through all the autogenerated code.
225 mutable raw_ostream *CommentStream = nullptr;
226};
227
228} // end namespace llvm
229
230#endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
231

source code of llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h