1 | //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H |
10 | #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H |
11 | |
12 | #include "llvm/ADT/StringRef.h" |
13 | #include "llvm/BinaryFormat/XCOFF.h" |
14 | #include "llvm/MC/MCDisassembler/MCSymbolizer.h" |
15 | #include <cstdint> |
16 | #include <memory> |
17 | #include <vector> |
18 | |
19 | namespace llvm { |
20 | |
21 | struct XCOFFSymbolInfoTy { |
22 | std::optional<XCOFF::StorageMappingClass> StorageMappingClass; |
23 | std::optional<uint32_t> Index; |
24 | bool IsLabel = false; |
25 | bool operator<(const XCOFFSymbolInfoTy &SymInfo) const; |
26 | }; |
27 | |
28 | struct SymbolInfoTy { |
29 | uint64_t Addr; |
30 | StringRef Name; |
31 | // XCOFF uses XCOFFSymInfo. Other targets use Type. |
32 | XCOFFSymbolInfoTy XCOFFSymInfo; |
33 | uint8_t Type; |
34 | // Used by ELF to describe a mapping symbol that is usually not displayed. |
35 | bool IsMappingSymbol; |
36 | |
37 | private: |
38 | bool IsXCOFF; |
39 | bool HasType; |
40 | |
41 | public: |
42 | SymbolInfoTy(std::optional<XCOFF::StorageMappingClass> Smc, uint64_t Addr, |
43 | StringRef Name, std::optional<uint32_t> Idx, bool Label) |
44 | : Addr(Addr), Name(Name), XCOFFSymInfo{.StorageMappingClass: Smc, .Index: Idx, .IsLabel: Label}, Type(0), |
45 | IsMappingSymbol(false), IsXCOFF(true), HasType(false) {} |
46 | SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type, |
47 | bool IsMappingSymbol = false, bool IsXCOFF = false) |
48 | : Addr(Addr), Name(Name), Type(Type), IsMappingSymbol(IsMappingSymbol), |
49 | IsXCOFF(IsXCOFF), HasType(true) {} |
50 | bool isXCOFF() const { return IsXCOFF; } |
51 | |
52 | private: |
53 | friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) { |
54 | assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) && |
55 | "The value of IsXCOFF and HasType in P1 and P2 should be the same " |
56 | "respectively." ); |
57 | |
58 | if (P1.IsXCOFF && P1.HasType) |
59 | return std::tie(args: P1.Addr, args: P1.Type, args: P1.Name) < |
60 | std::tie(args: P2.Addr, args: P2.Type, args: P2.Name); |
61 | |
62 | if (P1.IsXCOFF) |
63 | return std::tie(args: P1.Addr, args: P1.XCOFFSymInfo, args: P1.Name) < |
64 | std::tie(args: P2.Addr, args: P2.XCOFFSymInfo, args: P2.Name); |
65 | |
66 | // With the same address, place mapping symbols first. |
67 | bool MS1 = !P1.IsMappingSymbol, MS2 = !P2.IsMappingSymbol; |
68 | return std::tie(args: P1.Addr, args&: MS1, args: P1.Name, args: P1.Type) < |
69 | std::tie(args: P2.Addr, args&: MS2, args: P2.Name, args: P2.Type); |
70 | } |
71 | }; |
72 | |
73 | using SectionSymbolsTy = std::vector<SymbolInfoTy>; |
74 | |
75 | template <typename T> class ArrayRef; |
76 | class MCContext; |
77 | class MCInst; |
78 | class MCSubtargetInfo; |
79 | class raw_ostream; |
80 | |
81 | /// Superclass for all disassemblers. Consumes a memory region and provides an |
82 | /// array of assembly instructions. |
83 | class MCDisassembler { |
84 | public: |
85 | /// Ternary decode status. Most backends will just use Fail and |
86 | /// Success, however some have a concept of an instruction with |
87 | /// understandable semantics but which is architecturally |
88 | /// incorrect. An example of this is ARM UNPREDICTABLE instructions |
89 | /// which are disassemblable but cause undefined behaviour. |
90 | /// |
91 | /// Because it makes sense to disassemble these instructions, there |
92 | /// is a "soft fail" failure mode that indicates the MCInst& is |
93 | /// valid but architecturally incorrect. |
94 | /// |
95 | /// The enum numbers are deliberately chosen such that reduction |
96 | /// from Success->SoftFail ->Fail can be done with a simple |
97 | /// bitwise-AND: |
98 | /// |
99 | /// LEFT & TOP = | Success Unpredictable Fail |
100 | /// --------------+----------------------------------- |
101 | /// Success | Success Unpredictable Fail |
102 | /// Unpredictable | Unpredictable Unpredictable Fail |
103 | /// Fail | Fail Fail Fail |
104 | /// |
105 | /// An easy way of encoding this is as 0b11, 0b01, 0b00 for |
106 | /// Success, SoftFail, Fail respectively. |
107 | enum DecodeStatus { |
108 | Fail = 0, |
109 | SoftFail = 1, |
110 | Success = 3 |
111 | }; |
112 | |
113 | MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) |
114 | : Ctx(Ctx), STI(STI) {} |
115 | |
116 | virtual ~MCDisassembler(); |
117 | |
118 | /// Returns the disassembly of a single instruction. |
119 | /// |
120 | /// \param Instr - An MCInst to populate with the contents of the |
121 | /// instruction. |
122 | /// \param Size - A value to populate with the size of the instruction, or |
123 | /// the number of bytes consumed while attempting to decode |
124 | /// an invalid instruction. |
125 | /// \param Address - The address, in the memory space of region, of the first |
126 | /// byte of the instruction. |
127 | /// \param Bytes - A reference to the actual bytes of the instruction. |
128 | /// \param CStream - The stream to print comments and annotations on. |
129 | /// \return - MCDisassembler::Success if the instruction is valid, |
130 | /// MCDisassembler::SoftFail if the instruction was |
131 | /// disassemblable but invalid, |
132 | /// MCDisassembler::Fail if the instruction was invalid. |
133 | virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, |
134 | ArrayRef<uint8_t> Bytes, uint64_t Address, |
135 | raw_ostream &CStream) const = 0; |
136 | |
137 | /// Used to perform separate target specific disassembly for a particular |
138 | /// symbol. May parse any prelude that precedes instructions after the |
139 | /// start of a symbol, or the entire symbol. |
140 | /// This is used for example by WebAssembly to decode preludes. |
141 | /// |
142 | /// Base implementation returns std::nullopt. So all targets by default ignore |
143 | /// to treat symbols separately. |
144 | /// |
145 | /// \param Symbol - The symbol. |
146 | /// \param Size - The number of bytes consumed. |
147 | /// \param Address - The address, in the memory space of region, of the first |
148 | /// byte of the symbol. |
149 | /// \param Bytes - A reference to the actual bytes at the symbol location. |
150 | /// \param CStream - The stream to print comments and annotations on. |
151 | /// \return - MCDisassembler::Success if bytes are decoded |
152 | /// successfully. Size must hold the number of bytes that |
153 | /// were decoded. |
154 | /// - MCDisassembler::Fail if the bytes are invalid. Size |
155 | /// must hold the number of bytes that were decoded before |
156 | /// failing. The target must print nothing. This can be |
157 | /// done by buffering the output if needed. |
158 | /// - std::nullopt if the target doesn't want to handle the |
159 | /// symbol separately. Value of Size is ignored in this |
160 | /// case. |
161 | virtual std::optional<DecodeStatus> |
162 | onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes, |
163 | uint64_t Address, raw_ostream &CStream) const; |
164 | // TODO: |
165 | // Implement similar hooks that can be used at other points during |
166 | // disassembly. Something along the following lines: |
167 | // - onBeforeInstructionDecode() |
168 | // - onAfterInstructionDecode() |
169 | // - onSymbolEnd() |
170 | // It should help move much of the target specific code from llvm-objdump to |
171 | // respective target disassemblers. |
172 | |
173 | /// Suggest a distance to skip in a buffer of data to find the next |
174 | /// place to look for the start of an instruction. For example, if |
175 | /// all instructions have a fixed alignment, this might advance to |
176 | /// the next multiple of that alignment. |
177 | /// |
178 | /// If not overridden, the default is 1. |
179 | /// |
180 | /// \param Address - The address, in the memory space of region, of the |
181 | /// starting point (typically the first byte of something |
182 | /// that did not decode as a valid instruction at all). |
183 | /// \param Bytes - A reference to the actual bytes at Address. May be |
184 | /// needed in order to determine the width of an |
185 | /// unrecognized instruction (e.g. in Thumb this is a simple |
186 | /// consistent criterion that doesn't require knowing the |
187 | /// specific instruction). The caller can pass as much data |
188 | /// as they have available, and the function is required to |
189 | /// make a reasonable default choice if not enough data is |
190 | /// available to make a better one. |
191 | /// \return - A number of bytes to skip. Must always be greater than |
192 | /// zero. May be greater than the size of Bytes. |
193 | virtual uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes, |
194 | uint64_t Address) const; |
195 | |
196 | private: |
197 | MCContext &Ctx; |
198 | |
199 | protected: |
200 | // Subtarget information, for instruction decoding predicates if required. |
201 | const MCSubtargetInfo &STI; |
202 | std::unique_ptr<MCSymbolizer> Symbolizer; |
203 | |
204 | public: |
205 | // Helpers around MCSymbolizer |
206 | bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, |
207 | bool IsBranch, uint64_t Offset, uint64_t OpSize, |
208 | uint64_t InstSize) const; |
209 | |
210 | void (int64_t Value, uint64_t Address) const; |
211 | |
212 | /// Set \p Symzer as the current symbolizer. |
213 | /// This takes ownership of \p Symzer, and deletes the previously set one. |
214 | void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer); |
215 | |
216 | MCContext& getContext() const { return Ctx; } |
217 | |
218 | const MCSubtargetInfo& getSubtargetInfo() const { return STI; } |
219 | |
220 | /// ELF-specific, set the ABI version from the object header. |
221 | virtual void setABIVersion(unsigned Version) {} |
222 | |
223 | // Marked mutable because we cache it inside the disassembler, rather than |
224 | // having to pass it around as an argument through all the autogenerated code. |
225 | mutable raw_ostream * = nullptr; |
226 | }; |
227 | |
228 | } // end namespace llvm |
229 | |
230 | #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H |
231 | |