1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
19#include "Disassembler/AMDGPUDisassembler.h"
20#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21#include "SIDefines.h"
22#include "SIRegisterInfo.h"
23#include "TargetInfo/AMDGPUTargetInfo.h"
24#include "Utils/AMDGPUBaseInfo.h"
25#include "llvm-c/DisassemblerTypes.h"
26#include "llvm/BinaryFormat/ELF.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCDecoderOps.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInstrDesc.h"
32#include "llvm/MC/MCRegisterInfo.h"
33#include "llvm/MC/MCSubtargetInfo.h"
34#include "llvm/MC/TargetRegistry.h"
35#include "llvm/Support/AMDHSAKernelDescriptor.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE "amdgpu-disassembler"
40
41#define SGPR_MAX \
42 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
43 : AMDGPU::EncValues::SGPR_MAX_SI)
44
45using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
46
47AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
48 MCContext &Ctx, MCInstrInfo const *MCII)
49 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
50 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(STI: &STI)),
51 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
52 // ToDo: AMDGPUDisassembler supports only VI ISA.
53 if (!STI.hasFeature(AMDGPU::Feature: FeatureGCN3Encoding) && !isGFX10Plus())
54 report_fatal_error(reason: "Disassembly not yet supported for subtarget");
55}
56
57void AMDGPUDisassembler::setABIVersion(unsigned Version) {
58 CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(ABIVersion: Version);
59}
60
61inline static MCDisassembler::DecodeStatus
62addOperand(MCInst &Inst, const MCOperand& Opnd) {
63 Inst.addOperand(Op: Opnd);
64 return Opnd.isValid() ?
65 MCDisassembler::Success :
66 MCDisassembler::Fail;
67}
68
69static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
70 uint16_t NameIdx) {
71 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), NamedIdx: NameIdx);
72 if (OpIdx != -1) {
73 auto I = MI.begin();
74 std::advance(i&: I, n: OpIdx);
75 MI.insert(I, Op);
76 }
77 return OpIdx;
78}
79
80static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
81 uint64_t Addr,
82 const MCDisassembler *Decoder) {
83 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
84
85 // Our branches take a simm16, but we need two extra bits to account for the
86 // factor of 4.
87 APInt SignedOffset(18, Imm * 4, true);
88 int64_t Offset = (SignedOffset.sext(width: 64) + 4 + Addr).getSExtValue();
89
90 if (DAsm->tryAddingSymbolicOperand(Inst, Value: Offset, Address: Addr, IsBranch: true, Offset: 2, OpSize: 2, InstSize: 0))
91 return MCDisassembler::Success;
92 return addOperand(Inst, Opnd: MCOperand::createImm(Val: Imm));
93}
94
95static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
96 const MCDisassembler *Decoder) {
97 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
98 int64_t Offset;
99 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
100 Offset = SignExtend64<24>(x: Imm);
101 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
102 Offset = Imm & 0xFFFFF;
103 } else { // GFX9+ supports 21-bit signed offsets.
104 Offset = SignExtend64<21>(x: Imm);
105 }
106 return addOperand(Inst, Opnd: MCOperand::createImm(Val: Offset));
107}
108
109static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
110 const MCDisassembler *Decoder) {
111 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
112 return addOperand(Inst, Opnd: DAsm->decodeBoolReg(Val));
113}
114
115static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
116 uint64_t Addr,
117 const MCDisassembler *Decoder) {
118 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
119 return addOperand(Inst, Opnd: DAsm->decodeSplitBarrier(Val));
120}
121
122#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
123 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
124 uint64_t /*Addr*/, \
125 const MCDisassembler *Decoder) { \
126 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
127 return addOperand(Inst, DAsm->DecoderName(Imm)); \
128 }
129
130// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
131// number of register. Used by VGPR only and AGPR only operands.
132#define DECODE_OPERAND_REG_8(RegClass) \
133 static DecodeStatus Decode##RegClass##RegisterClass( \
134 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
135 const MCDisassembler *Decoder) { \
136 assert(Imm < (1 << 8) && "8-bit encoding"); \
137 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
138 return addOperand( \
139 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
140 }
141
142#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
143 ImmWidth) \
144 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
145 const MCDisassembler *Decoder) { \
146 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
147 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
148 return addOperand(Inst, \
149 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
150 MandatoryLiteral, ImmWidth)); \
151 }
152
153static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
154 AMDGPUDisassembler::OpWidthTy OpWidth,
155 unsigned Imm, unsigned EncImm,
156 bool MandatoryLiteral, unsigned ImmWidth,
157 const MCDisassembler *Decoder) {
158 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
159 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
160 return addOperand(
161 Inst, Opnd: DAsm->decodeSrcOp(Width: OpWidth, Val: EncImm, MandatoryLiteral, ImmWidth));
162}
163
164// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
165// get register class. Used by SGPR only operands.
166#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
167 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
168
169// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
170// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
171// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
172// Used by AV_ register classes (AGPR or VGPR only register operands).
173template <AMDGPUDisassembler::OpWidthTy OpWidth>
174static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
175 const MCDisassembler *Decoder) {
176 return decodeSrcOp(Inst, EncSize: 10, OpWidth, Imm, EncImm: Imm | AMDGPU::EncValues::IS_VGPR,
177 MandatoryLiteral: false, ImmWidth: 0, Decoder);
178}
179
180// Decoder for Src(9-bit encoding) registers only.
181template <AMDGPUDisassembler::OpWidthTy OpWidth>
182static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
183 uint64_t /* Addr */,
184 const MCDisassembler *Decoder) {
185 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, MandatoryLiteral: false, ImmWidth: 0, Decoder);
186}
187
188// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
189// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
190// only.
191template <AMDGPUDisassembler::OpWidthTy OpWidth>
192static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
193 const MCDisassembler *Decoder) {
194 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm | 512, MandatoryLiteral: false, ImmWidth: 0, Decoder);
195}
196
197// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
198// Imm{9} is acc, registers only.
199template <AMDGPUDisassembler::OpWidthTy OpWidth>
200static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
201 uint64_t /* Addr */,
202 const MCDisassembler *Decoder) {
203 return decodeSrcOp(Inst, EncSize: 10, OpWidth, Imm, EncImm: Imm, MandatoryLiteral: false, ImmWidth: 0, Decoder);
204}
205
206// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
207// register from RegClass or immediate. Registers that don't belong to RegClass
208// will be decoded and InstPrinter will report warning. Immediate will be
209// decoded into constant of size ImmWidth, should match width of immediate used
210// by OperandType (important for floating point types).
211template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth>
212static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
213 uint64_t /* Addr */,
214 const MCDisassembler *Decoder) {
215 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, MandatoryLiteral: false, ImmWidth, Decoder);
216}
217
218// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
219// and decode using 'enum10' from decodeSrcOp.
220template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth>
221static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
222 uint64_t /* Addr */,
223 const MCDisassembler *Decoder) {
224 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm | 512, MandatoryLiteral: false, ImmWidth,
225 Decoder);
226}
227
228template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth>
229static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm,
230 uint64_t /* Addr */,
231 const MCDisassembler *Decoder) {
232 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, MandatoryLiteral: true, ImmWidth, Decoder);
233}
234
235// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
236// when RegisterClass is used as an operand. Most often used for destination
237// operands.
238
239DECODE_OPERAND_REG_8(VGPR_32)
240DECODE_OPERAND_REG_8(VGPR_32_Lo128)
241DECODE_OPERAND_REG_8(VReg_64)
242DECODE_OPERAND_REG_8(VReg_96)
243DECODE_OPERAND_REG_8(VReg_128)
244DECODE_OPERAND_REG_8(VReg_256)
245DECODE_OPERAND_REG_8(VReg_288)
246DECODE_OPERAND_REG_8(VReg_352)
247DECODE_OPERAND_REG_8(VReg_384)
248DECODE_OPERAND_REG_8(VReg_512)
249DECODE_OPERAND_REG_8(VReg_1024)
250
251DECODE_OPERAND_REG_7(SReg_32, OPW32)
252DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
253DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
254DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
255DECODE_OPERAND_REG_7(SReg_64, OPW64)
256DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
257DECODE_OPERAND_REG_7(SReg_96, OPW96)
258DECODE_OPERAND_REG_7(SReg_128, OPW128)
259DECODE_OPERAND_REG_7(SReg_256, OPW256)
260DECODE_OPERAND_REG_7(SReg_512, OPW512)
261
262DECODE_OPERAND_REG_8(AGPR_32)
263DECODE_OPERAND_REG_8(AReg_64)
264DECODE_OPERAND_REG_8(AReg_128)
265DECODE_OPERAND_REG_8(AReg_256)
266DECODE_OPERAND_REG_8(AReg_512)
267DECODE_OPERAND_REG_8(AReg_1024)
268
269static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
270 uint64_t /*Addr*/,
271 const MCDisassembler *Decoder) {
272 assert(isUInt<10>(Imm) && "10-bit encoding expected");
273 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
274
275 bool IsHi = Imm & (1 << 9);
276 unsigned RegIdx = Imm & 0xff;
277 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
278 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
279}
280
281static DecodeStatus
282DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
283 const MCDisassembler *Decoder) {
284 assert(isUInt<8>(Imm) && "8-bit encoding expected");
285
286 bool IsHi = Imm & (1 << 7);
287 unsigned RegIdx = Imm & 0x7f;
288 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
289 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
290}
291
292static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
293 uint64_t /*Addr*/,
294 const MCDisassembler *Decoder) {
295 assert(isUInt<9>(Imm) && "9-bit encoding expected");
296
297 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
298 bool IsVGPR = Imm & (1 << 8);
299 if (IsVGPR) {
300 bool IsHi = Imm & (1 << 7);
301 unsigned RegIdx = Imm & 0x7f;
302 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
303 }
304 return addOperand(Inst, Opnd: DAsm->decodeNonVGPRSrcOp(Width: AMDGPUDisassembler::OPW16,
305 Val: Imm & 0xFF, MandatoryLiteral: false, ImmWidth: 16));
306}
307
308static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
309 uint64_t /*Addr*/,
310 const MCDisassembler *Decoder) {
311 assert(isUInt<10>(Imm) && "10-bit encoding expected");
312
313 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
314 bool IsVGPR = Imm & (1 << 8);
315 if (IsVGPR) {
316 bool IsHi = Imm & (1 << 9);
317 unsigned RegIdx = Imm & 0xff;
318 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
319 }
320 return addOperand(Inst, Opnd: DAsm->decodeNonVGPRSrcOp(Width: AMDGPUDisassembler::OPW16,
321 Val: Imm & 0xFF, MandatoryLiteral: false, ImmWidth: 16));
322}
323
324static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
325 uint64_t Addr,
326 const MCDisassembler *Decoder) {
327 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
328 return addOperand(Inst, Opnd: DAsm->decodeMandatoryLiteralConstant(Imm));
329}
330
331static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
332 uint64_t Addr, const void *Decoder) {
333 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
334 return addOperand(Inst, Opnd: DAsm->decodeVOPDDstYOp(Inst, Val));
335}
336
337static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
338 const MCRegisterInfo *MRI) {
339 if (OpIdx < 0)
340 return false;
341
342 const MCOperand &Op = Inst.getOperand(i: OpIdx);
343 if (!Op.isReg())
344 return false;
345
346 unsigned Sub = MRI->getSubReg(Reg: Op.getReg(), AMDGPU::Idx: sub0);
347 auto Reg = Sub ? Sub : Op.getReg();
348 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
349}
350
351static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
352 AMDGPUDisassembler::OpWidthTy Opw,
353 const MCDisassembler *Decoder) {
354 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
355 if (!DAsm->isGFX90A()) {
356 Imm &= 511;
357 } else {
358 // If atomic has both vdata and vdst their register classes are tied.
359 // The bit is decoded along with the vdst, first operand. We need to
360 // change register class to AGPR if vdst was AGPR.
361 // If a DS instruction has both data0 and data1 their register classes
362 // are also tied.
363 unsigned Opc = Inst.getOpcode();
364 uint64_t TSFlags = DAsm->getMCII()->get(Opcode: Opc).TSFlags;
365 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
366 : AMDGPU::OpName::vdata;
367 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
368 int DataIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: DataNameIdx);
369 if ((int)Inst.getNumOperands() == DataIdx) {
370 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
371 if (IsAGPROperand(Inst, OpIdx: DstIdx, MRI))
372 Imm |= 512;
373 }
374
375 if (TSFlags & SIInstrFlags::DS) {
376 int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
377 if ((int)Inst.getNumOperands() == Data2Idx &&
378 IsAGPROperand(Inst, OpIdx: DataIdx, MRI))
379 Imm |= 512;
380 }
381 }
382 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Width: Opw, Val: Imm | 256));
383}
384
385template <AMDGPUDisassembler::OpWidthTy Opw>
386static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
387 uint64_t /* Addr */,
388 const MCDisassembler *Decoder) {
389 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
390}
391
392static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
393 uint64_t Addr,
394 const MCDisassembler *Decoder) {
395 assert(Imm < (1 << 9) && "9-bit encoding");
396 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
397 return addOperand(
398 Inst, Opnd: DAsm->decodeSrcOp(Width: AMDGPUDisassembler::OPW64, Val: Imm, MandatoryLiteral: false, ImmWidth: 64, IsFP: true));
399}
400
401#define DECODE_SDWA(DecName) \
402DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
403
404DECODE_SDWA(Src32)
405DECODE_SDWA(Src16)
406DECODE_SDWA(VopcDst)
407
408#include "AMDGPUGenDisassemblerTables.inc"
409
410//===----------------------------------------------------------------------===//
411//
412//===----------------------------------------------------------------------===//
413
414template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
415 assert(Bytes.size() >= sizeof(T));
416 const auto Res =
417 support::endian::read<T, llvm::endianness::little>(Bytes.data());
418 Bytes = Bytes.slice(N: sizeof(T));
419 return Res;
420}
421
422static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
423 assert(Bytes.size() >= 12);
424 uint64_t Lo =
425 support::endian::read<uint64_t, llvm::endianness::little>(P: Bytes.data());
426 Bytes = Bytes.slice(N: 8);
427 uint64_t Hi =
428 support::endian::read<uint32_t, llvm::endianness::little>(P: Bytes.data());
429 Bytes = Bytes.slice(N: 4);
430 return DecoderUInt128(Lo, Hi);
431}
432
433// The disassembler is greedy, so we need to check FI operand value to
434// not parse a dpp if the correct literal is not set. For dpp16 the
435// autogenerated decoder checks the dpp literal
436static bool isValidDPP8(const MCInst &MI) {
437 using namespace llvm::AMDGPU::DPP;
438 int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
439 assert(FiIdx != -1);
440 if ((unsigned)FiIdx >= MI.getNumOperands())
441 return false;
442 unsigned Fi = MI.getOperand(i: FiIdx).getImm();
443 return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
444}
445
446DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
447 ArrayRef<uint8_t> Bytes_,
448 uint64_t Address,
449 raw_ostream &CS) const {
450 bool IsSDWA = false;
451
452 unsigned MaxInstBytesNum = std::min(a: (size_t)TargetMaxInstBytes, b: Bytes_.size());
453 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
454
455 DecodeStatus Res = MCDisassembler::Fail;
456 do {
457 // ToDo: better to switch encoding length using some bit predicate
458 // but it is unknown yet, so try all we can
459
460 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
461 // encodings
462 if (isGFX11Plus() && Bytes.size() >= 12 ) {
463 DecoderUInt128 DecW = eat12Bytes(Bytes);
464 Res =
465 tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696,
466 MI, DecW, Address, CS);
467 if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
468 break;
469 MI = MCInst(); // clear
470 Res =
471 tryDecodeInst(DecoderTableDPP8GFX1296, DecoderTableDPP8GFX12_FAKE1696,
472 MI, DecW, Address, CS);
473 if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
474 break;
475 MI = MCInst(); // clear
476
477 const auto convertVOPDPP = [&]() {
478 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) {
479 convertVOP3PDPPInst(MI);
480 } else if (AMDGPU::isVOPC64DPP(Opc: MI.getOpcode())) {
481 convertVOPCDPPInst(MI); // Special VOP3 case
482 } else {
483 assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);
484 convertVOP3DPPInst(MI); // Regular VOP3 case
485 }
486 };
487 Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696,
488 MI, DecW, Address, CS);
489 if (Res) {
490 convertVOPDPP();
491 break;
492 }
493 Res = tryDecodeInst(DecoderTableDPPGFX1296, DecoderTableDPPGFX12_FAKE1696,
494 MI, DecW, Address, CS);
495 if (Res) {
496 convertVOPDPP();
497 break;
498 }
499 Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS);
500 if (Res)
501 break;
502
503 Res = tryDecodeInst(DecoderTableGFX1296, MI, DecW, Address, CS);
504 if (Res)
505 break;
506
507 Res = tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS);
508 if (Res)
509 break;
510 }
511 // Reinitialize Bytes
512 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
513
514 if (Bytes.size() >= 8) {
515 const uint64_t QW = eatBytes<uint64_t>(Bytes);
516
517 if (STI.hasFeature(AMDGPU::Feature: FeatureGFX10_BEncoding)) {
518 Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS);
519 if (Res) {
520 if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8)
521 == -1)
522 break;
523 if (convertDPP8Inst(MI) == MCDisassembler::Success)
524 break;
525 MI = MCInst(); // clear
526 }
527 }
528
529 Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address, CS);
530 if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
531 break;
532 MI = MCInst(); // clear
533
534 Res = tryDecodeInst(DecoderTableDPP8GFX1164,
535 DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS);
536 if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
537 break;
538 MI = MCInst(); // clear
539
540 Res = tryDecodeInst(DecoderTableDPP8GFX1264,
541 DecoderTableDPP8GFX12_FAKE1664, MI, QW, Address, CS);
542 if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
543 break;
544 MI = MCInst(); // clear
545
546 Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
547 if (Res) break;
548
549 Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664,
550 MI, QW, Address, CS);
551 if (Res) {
552 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
553 convertVOPCDPPInst(MI);
554 break;
555 }
556
557 Res = tryDecodeInst(DecoderTableDPPGFX1264, DecoderTableDPPGFX12_FAKE1664,
558 MI, QW, Address, CS);
559 if (Res) {
560 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
561 convertVOPCDPPInst(MI);
562 break;
563 }
564
565 Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address, CS);
566 if (Res) { IsSDWA = true; break; }
567
568 Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address, CS);
569 if (Res) { IsSDWA = true; break; }
570
571 Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address, CS);
572 if (Res) { IsSDWA = true; break; }
573
574 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem)) {
575 Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS);
576 if (Res)
577 break;
578 }
579
580 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
581 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
582 // table first so we print the correct name.
583 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts)) {
584 Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS);
585 if (Res)
586 break;
587 }
588 }
589
590 // Reinitialize Bytes as DPP64 could have eaten too much
591 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
592
593 // Try decode 32-bit instruction
594 if (Bytes.size() < 4) break;
595 const uint32_t DW = eatBytes<uint32_t>(Bytes);
596 Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS);
597 if (Res) break;
598
599 Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS);
600 if (Res) break;
601
602 Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS);
603 if (Res) break;
604
605 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
606 Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS);
607 if (Res)
608 break;
609 }
610
611 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
612 Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS);
613 if (Res) break;
614 }
615
616 Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);
617 if (Res) break;
618
619 Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
620 Address, CS);
621 if (Res) break;
622
623 Res = tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
624 Address, CS);
625 if (Res)
626 break;
627
628 if (Bytes.size() < 4) break;
629 const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
630
631 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts)) {
632 Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS);
633 if (Res)
634 break;
635 }
636
637 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
638 Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS);
639 if (Res)
640 break;
641 }
642
643 Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS);
644 if (Res) break;
645
646 Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address, CS);
647 if (Res) break;
648
649 Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS);
650 if (Res) break;
651
652 Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
653 if (Res) break;
654
655 Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
656 Address, CS);
657 if (Res)
658 break;
659
660 Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
661 Address, CS);
662 if (Res)
663 break;
664
665 Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address, CS);
666 if (Res)
667 break;
668
669 Res = tryDecodeInst(DecoderTableWMMAGFX1264, MI, QW, Address, CS);
670 } while (false);
671
672 if (Res && AMDGPU::isMAC(Opc: MI.getOpcode())) {
673 // Insert dummy unused src2_modifiers.
674 insertNamedMCOperand(MI, MCOperand::createImm(0),
675 AMDGPU::OpName::src2_modifiers);
676 }
677
678 if (Res && (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
679 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp)) {
680 // Insert dummy unused src2_modifiers.
681 insertNamedMCOperand(MI, MCOperand::createImm(0),
682 AMDGPU::OpName::src2_modifiers);
683 }
684
685 if (Res && (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
686 !AMDGPU::hasGDS(STI)) {
687 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
688 }
689
690 if (Res && (MCII->get(Opcode: MI.getOpcode()).TSFlags &
691 (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) {
692 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
693 AMDGPU::OpName::cpol);
694 if (CPolPos != -1) {
695 unsigned CPol =
696 (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
697 AMDGPU::CPol::GLC : 0;
698 if (MI.getNumOperands() <= (unsigned)CPolPos) {
699 insertNamedMCOperand(MI, MCOperand::createImm(CPol),
700 AMDGPU::OpName::cpol);
701 } else if (CPol) {
702 MI.getOperand(i: CPolPos).setImm(MI.getOperand(i: CPolPos).getImm() | CPol);
703 }
704 }
705 }
706
707 if (Res && (MCII->get(MI.getOpcode()).TSFlags &
708 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
709 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
710 // GFX90A lost TFE, its place is occupied by ACC.
711 int TFEOpIdx =
712 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
713 if (TFEOpIdx != -1) {
714 auto TFEIter = MI.begin();
715 std::advance(i&: TFEIter, n: TFEOpIdx);
716 MI.insert(I: TFEIter, Op: MCOperand::createImm(Val: 0));
717 }
718 }
719
720 if (Res && (MCII->get(Opcode: MI.getOpcode()).TSFlags &
721 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) {
722 int SWZOpIdx =
723 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
724 if (SWZOpIdx != -1) {
725 auto SWZIter = MI.begin();
726 std::advance(i&: SWZIter, n: SWZOpIdx);
727 MI.insert(I: SWZIter, Op: MCOperand::createImm(Val: 0));
728 }
729 }
730
731 if (Res && (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
732 int VAddr0Idx =
733 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
734 int RsrcIdx =
735 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
736 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
737 if (VAddr0Idx >= 0 && NSAArgs > 0) {
738 unsigned NSAWords = (NSAArgs + 3) / 4;
739 if (Bytes.size() < 4 * NSAWords) {
740 Res = MCDisassembler::Fail;
741 } else {
742 for (unsigned i = 0; i < NSAArgs; ++i) {
743 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
744 auto VAddrRCID =
745 MCII->get(Opcode: MI.getOpcode()).operands()[VAddrIdx].RegClass;
746 MI.insert(I: MI.begin() + VAddrIdx,
747 Op: createRegOperand(RegClassID: VAddrRCID, Val: Bytes[i]));
748 }
749 Bytes = Bytes.slice(N: 4 * NSAWords);
750 }
751 }
752
753 if (Res)
754 Res = convertMIMGInst(MI);
755 }
756
757 if (Res && (MCII->get(Opcode: MI.getOpcode()).TSFlags &
758 (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE)))
759 Res = convertMIMGInst(MI);
760
761 if (Res && (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::EXP))
762 Res = convertEXPInst(MI);
763
764 if (Res && (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP))
765 Res = convertVINTERPInst(MI);
766
767 if (Res && IsSDWA)
768 Res = convertSDWAInst(MI);
769
770 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
771 AMDGPU::OpName::vdst_in);
772 if (VDstIn_Idx != -1) {
773 int Tied = MCII->get(Opcode: MI.getOpcode()).getOperandConstraint(OpNum: VDstIn_Idx,
774 Constraint: MCOI::OperandConstraint::TIED_TO);
775 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
776 !MI.getOperand(i: VDstIn_Idx).isReg() ||
777 MI.getOperand(i: VDstIn_Idx).getReg() != MI.getOperand(i: Tied).getReg())) {
778 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
779 MI.erase(I: &MI.getOperand(i: VDstIn_Idx));
780 insertNamedMCOperand(MI,
781 MCOperand::createReg(MI.getOperand(Tied).getReg()),
782 AMDGPU::OpName::vdst_in);
783 }
784 }
785
786 int ImmLitIdx =
787 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
788 bool IsSOPK = MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
789 if (Res && ImmLitIdx != -1 && !IsSOPK)
790 Res = convertFMAanyK(MI, ImmLitIdx);
791
792 // if the opcode was not recognized we'll assume a Size of 4 bytes
793 // (unless there are fewer bytes left)
794 Size = Res ? (MaxInstBytesNum - Bytes.size())
795 : std::min(a: (size_t)4, b: Bytes_.size());
796 return Res;
797}
798
799DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
800 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
801 // The MCInst still has these fields even though they are no longer encoded
802 // in the GFX11 instruction.
803 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
804 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
805 }
806 return MCDisassembler::Success;
807}
808
809DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
810 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
811 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
812 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
813 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
814 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
815 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
816 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
817 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
818 // The MCInst has this field that is not directly encoded in the
819 // instruction.
820 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
821 }
822 return MCDisassembler::Success;
823}
824
825DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
826 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
827 STI.hasFeature(AMDGPU::FeatureGFX10)) {
828 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
829 // VOPC - insert clamp
830 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
831 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
832 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
833 if (SDst != -1) {
834 // VOPC - insert VCC register as sdst
835 insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
836 AMDGPU::OpName::sdst);
837 } else {
838 // VOP1/2 - insert omod if present in instruction
839 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
840 }
841 }
842 return MCDisassembler::Success;
843}
844
845struct VOPModifiers {
846 unsigned OpSel = 0;
847 unsigned OpSelHi = 0;
848 unsigned NegLo = 0;
849 unsigned NegHi = 0;
850};
851
852// Reconstruct values of VOP3/VOP3P operands such as op_sel.
853// Note that these values do not affect disassembler output,
854// so this is only necessary for consistency with src_modifiers.
855static VOPModifiers collectVOPModifiers(const MCInst &MI,
856 bool IsVOP3P = false) {
857 VOPModifiers Modifiers;
858 unsigned Opc = MI.getOpcode();
859 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
860 AMDGPU::OpName::src1_modifiers,
861 AMDGPU::OpName::src2_modifiers};
862 for (int J = 0; J < 3; ++J) {
863 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: ModOps[J]);
864 if (OpIdx == -1)
865 continue;
866
867 unsigned Val = MI.getOperand(i: OpIdx).getImm();
868
869 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
870 if (IsVOP3P) {
871 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
872 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
873 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
874 } else if (J == 0) {
875 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
876 }
877 }
878
879 return Modifiers;
880}
881
882// Instructions decode the op_sel/suffix bits into the src_modifier
883// operands. Copy those bits into the src operands for true16 VGPRs.
884void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
885 const unsigned Opc = MI.getOpcode();
886 const MCRegisterClass &ConversionRC =
887 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
888 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
889 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
890 SISrcMods::OP_SEL_0},
891 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
892 SISrcMods::OP_SEL_0},
893 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
894 SISrcMods::OP_SEL_0},
895 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
896 SISrcMods::DST_OP_SEL}}};
897 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
898 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: OpName);
899 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: OpModsName);
900 if (OpIdx == -1 || OpModsIdx == -1)
901 continue;
902 MCOperand &Op = MI.getOperand(i: OpIdx);
903 if (!Op.isReg())
904 continue;
905 if (!ConversionRC.contains(Reg: Op.getReg()))
906 continue;
907 unsigned OpEnc = MRI.getEncodingValue(RegNo: Op.getReg());
908 const MCOperand &OpMods = MI.getOperand(i: OpModsIdx);
909 unsigned ModVal = OpMods.getImm();
910 if (ModVal & OpSelMask) { // isHi
911 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
912 Op.setReg(ConversionRC.getRegister(i: RegIdx * 2 + 1));
913 }
914 }
915}
916
917// MAC opcodes have special old and src2 operands.
918// src2 is tied to dst, while old is not tied (but assumed to be).
919bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
920 constexpr int DST_IDX = 0;
921 auto Opcode = MI.getOpcode();
922 const auto &Desc = MCII->get(Opcode);
923 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
924
925 if (OldIdx != -1 && Desc.getOperandConstraint(
926 OpNum: OldIdx, Constraint: MCOI::OperandConstraint::TIED_TO) == -1) {
927 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
928 assert(Desc.getOperandConstraint(
929 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
930 MCOI::OperandConstraint::TIED_TO) == DST_IDX);
931 (void)DST_IDX;
932 return true;
933 }
934
935 return false;
936}
937
938// Create dummy old operand and insert dummy unused src2_modifiers
939void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
940 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
941 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
942 insertNamedMCOperand(MI, MCOperand::createImm(0),
943 AMDGPU::OpName::src2_modifiers);
944}
945
946// We must check FI == literal to reject not genuine dpp8 insts, and we must
947// first add optional MI operands to check FI
948DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
949 unsigned Opc = MI.getOpcode();
950
951 if (MCII->get(Opcode: Opc).TSFlags & SIInstrFlags::VOP3P) {
952 convertVOP3PDPPInst(MI);
953 } else if ((MCII->get(Opcode: Opc).TSFlags & SIInstrFlags::VOPC) ||
954 AMDGPU::isVOPC64DPP(Opc)) {
955 convertVOPCDPPInst(MI);
956 } else {
957 if (isMacDPP(MI))
958 convertMacDPPInst(MI);
959
960 int VDstInIdx =
961 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
962 if (VDstInIdx != -1)
963 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
964
965 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
966 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12)
967 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
968
969 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
970 if (MI.getNumOperands() < DescNumOps &&
971 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
972 convertTrue16OpSel(MI);
973 auto Mods = collectVOPModifiers(MI);
974 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
975 AMDGPU::OpName::op_sel);
976 } else {
977 // Insert dummy unused src modifiers.
978 if (MI.getNumOperands() < DescNumOps &&
979 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
980 insertNamedMCOperand(MI, MCOperand::createImm(0),
981 AMDGPU::OpName::src0_modifiers);
982
983 if (MI.getNumOperands() < DescNumOps &&
984 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
985 insertNamedMCOperand(MI, MCOperand::createImm(0),
986 AMDGPU::OpName::src1_modifiers);
987 }
988 }
989 return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
990}
991
992DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
993 if (isMacDPP(MI))
994 convertMacDPPInst(MI);
995
996 convertTrue16OpSel(MI);
997
998 int VDstInIdx =
999 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1000 if (VDstInIdx != -1)
1001 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1002
1003 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
1004 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12)
1005 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
1006
1007 unsigned Opc = MI.getOpcode();
1008 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1009 if (MI.getNumOperands() < DescNumOps &&
1010 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1011 auto Mods = collectVOPModifiers(MI);
1012 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
1013 AMDGPU::OpName::op_sel);
1014 }
1015 return MCDisassembler::Success;
1016}
1017
1018// Note that before gfx10, the MIMG encoding provided no information about
1019// VADDR size. Consequently, decoded instructions always show address as if it
1020// has 1 dword, which could be not really so.
1021DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
1022 auto TSFlags = MCII->get(Opcode: MI.getOpcode()).TSFlags;
1023
1024 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1025 AMDGPU::OpName::vdst);
1026
1027 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1028 AMDGPU::OpName::vdata);
1029 int VAddr0Idx =
1030 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1031 int RsrcOpName = TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
1032 : AMDGPU::OpName::rsrc;
1033 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), NamedIdx: RsrcOpName);
1034 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1035 AMDGPU::OpName::dmask);
1036
1037 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1038 AMDGPU::OpName::tfe);
1039 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1040 AMDGPU::OpName::d16);
1041
1042 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI.getOpcode());
1043 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1044 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
1045
1046 assert(VDataIdx != -1);
1047 if (BaseOpcode->BVH) {
1048 // Add A16 operand for intersect_ray instructions
1049 addOperand(Inst&: MI, Opnd: MCOperand::createImm(Val: BaseOpcode->A16));
1050 return MCDisassembler::Success;
1051 }
1052
1053 bool IsAtomic = (VDstIdx != -1);
1054 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1055 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1056 bool IsNSA = false;
1057 bool IsPartialNSA = false;
1058 unsigned AddrSize = Info->VAddrDwords;
1059
1060 if (isGFX10Plus()) {
1061 unsigned DimIdx =
1062 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1063 int A16Idx =
1064 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1065 const AMDGPU::MIMGDimInfo *Dim =
1066 AMDGPU::getMIMGDimInfoByEncoding(DimEnc: MI.getOperand(i: DimIdx).getImm());
1067 const bool IsA16 = (A16Idx != -1 && MI.getOperand(i: A16Idx).getImm());
1068
1069 AddrSize =
1070 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, IsG16Supported: AMDGPU::hasG16(STI));
1071
1072 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1073 // VIMAGE insts other than BVH never use vaddr4.
1074 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1075 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1076 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1077 if (!IsNSA) {
1078 if (!IsVSample && AddrSize > 12)
1079 AddrSize = 16;
1080 } else {
1081 if (AddrSize > Info->VAddrDwords) {
1082 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1083 // The NSA encoding does not contain enough operands for the
1084 // combination of base opcode / dimension. Should this be an error?
1085 return MCDisassembler::Success;
1086 }
1087 IsPartialNSA = true;
1088 }
1089 }
1090 }
1091
1092 unsigned DMask = MI.getOperand(i: DMaskIdx).getImm() & 0xf;
1093 unsigned DstSize = IsGather4 ? 4 : std::max(a: llvm::popcount(Value: DMask), b: 1);
1094
1095 bool D16 = D16Idx >= 0 && MI.getOperand(i: D16Idx).getImm();
1096 if (D16 && AMDGPU::hasPackedD16(STI)) {
1097 DstSize = (DstSize + 1) / 2;
1098 }
1099
1100 if (TFEIdx != -1 && MI.getOperand(i: TFEIdx).getImm())
1101 DstSize += 1;
1102
1103 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1104 return MCDisassembler::Success;
1105
1106 int NewOpcode =
1107 AMDGPU::getMIMGOpcode(BaseOpcode: Info->BaseOpcode, MIMGEncoding: Info->MIMGEncoding, VDataDwords: DstSize, VAddrDwords: AddrSize);
1108 if (NewOpcode == -1)
1109 return MCDisassembler::Success;
1110
1111 // Widen the register to the correct number of enabled channels.
1112 unsigned NewVdata = AMDGPU::NoRegister;
1113 if (DstSize != Info->VDataDwords) {
1114 auto DataRCID = MCII->get(Opcode: NewOpcode).operands()[VDataIdx].RegClass;
1115
1116 // Get first subregister of VData
1117 unsigned Vdata0 = MI.getOperand(i: VDataIdx).getReg();
1118 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1119 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1120
1121 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1122 &MRI.getRegClass(DataRCID));
1123 if (NewVdata == AMDGPU::NoRegister) {
1124 // It's possible to encode this such that the low register + enabled
1125 // components exceeds the register count.
1126 return MCDisassembler::Success;
1127 }
1128 }
1129
1130 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1131 // If using partial NSA on GFX11+ widen last address register.
1132 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1133 unsigned NewVAddrSA = AMDGPU::NoRegister;
1134 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1135 AddrSize != Info->VAddrDwords) {
1136 unsigned VAddrSA = MI.getOperand(i: VAddrSAIdx).getReg();
1137 unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1138 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1139
1140 auto AddrRCID = MCII->get(Opcode: NewOpcode).operands()[VAddrSAIdx].RegClass;
1141 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1142 &MRI.getRegClass(AddrRCID));
1143 if (!NewVAddrSA)
1144 return MCDisassembler::Success;
1145 }
1146
1147 MI.setOpcode(NewOpcode);
1148
1149 if (NewVdata != AMDGPU::NoRegister) {
1150 MI.getOperand(i: VDataIdx) = MCOperand::createReg(Reg: NewVdata);
1151
1152 if (IsAtomic) {
1153 // Atomic operations have an additional operand (a copy of data)
1154 MI.getOperand(i: VDstIdx) = MCOperand::createReg(Reg: NewVdata);
1155 }
1156 }
1157
1158 if (NewVAddrSA) {
1159 MI.getOperand(i: VAddrSAIdx) = MCOperand::createReg(Reg: NewVAddrSA);
1160 } else if (IsNSA) {
1161 assert(AddrSize <= Info->VAddrDwords);
1162 MI.erase(First: MI.begin() + VAddr0Idx + AddrSize,
1163 Last: MI.begin() + VAddr0Idx + Info->VAddrDwords);
1164 }
1165
1166 return MCDisassembler::Success;
1167}
1168
1169// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1170// decoder only adds to src_modifiers, so manually add the bits to the other
1171// operands.
1172DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
1173 unsigned Opc = MI.getOpcode();
1174 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1175 auto Mods = collectVOPModifiers(MI, IsVOP3P: true);
1176
1177 if (MI.getNumOperands() < DescNumOps &&
1178 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1179 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1180
1181 if (MI.getNumOperands() < DescNumOps &&
1182 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1183 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
1184 AMDGPU::OpName::op_sel);
1185 if (MI.getNumOperands() < DescNumOps &&
1186 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1187 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi),
1188 AMDGPU::OpName::op_sel_hi);
1189 if (MI.getNumOperands() < DescNumOps &&
1190 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1191 insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo),
1192 AMDGPU::OpName::neg_lo);
1193 if (MI.getNumOperands() < DescNumOps &&
1194 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1195 insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi),
1196 AMDGPU::OpName::neg_hi);
1197
1198 return MCDisassembler::Success;
1199}
1200
1201// Create dummy old operand and insert optional operands
1202DecodeStatus AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
1203 unsigned Opc = MI.getOpcode();
1204 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1205
1206 if (MI.getNumOperands() < DescNumOps &&
1207 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1208 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1209
1210 if (MI.getNumOperands() < DescNumOps &&
1211 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1212 insertNamedMCOperand(MI, MCOperand::createImm(0),
1213 AMDGPU::OpName::src0_modifiers);
1214
1215 if (MI.getNumOperands() < DescNumOps &&
1216 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1217 insertNamedMCOperand(MI, MCOperand::createImm(0),
1218 AMDGPU::OpName::src1_modifiers);
1219 return MCDisassembler::Success;
1220}
1221
1222DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
1223 int ImmLitIdx) const {
1224 assert(HasLiteral && "Should have decoded a literal");
1225 const MCInstrDesc &Desc = MCII->get(Opcode: MI.getOpcode());
1226 unsigned DescNumOps = Desc.getNumOperands();
1227 insertNamedMCOperand(MI, MCOperand::createImm(Literal),
1228 AMDGPU::OpName::immDeferred);
1229 assert(DescNumOps == MI.getNumOperands());
1230 for (unsigned I = 0; I < DescNumOps; ++I) {
1231 auto &Op = MI.getOperand(i: I);
1232 auto OpType = Desc.operands()[I].OperandType;
1233 bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1234 OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
1235 if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1236 IsDeferredOp)
1237 Op.setImm(Literal);
1238 }
1239 return MCDisassembler::Success;
1240}
1241
1242const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1243 return getContext().getRegisterInfo()->
1244 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1245}
1246
1247inline
1248MCOperand AMDGPUDisassembler::errOperand(unsigned V,
1249 const Twine& ErrMsg) const {
1250 *CommentStream << "Error: " + ErrMsg;
1251
1252 // ToDo: add support for error operands to MCInst.h
1253 // return MCOperand::createError(V);
1254 return MCOperand();
1255}
1256
1257inline
1258MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
1259 return MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: RegId, STI));
1260}
1261
1262inline
1263MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
1264 unsigned Val) const {
1265 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1266 if (Val >= RegCl.getNumRegs())
1267 return errOperand(V: Val, ErrMsg: Twine(getRegClassName(RegClassID)) +
1268 ": unknown register " + Twine(Val));
1269 return createRegOperand(RegCl.getRegister(Val));
1270}
1271
1272inline
1273MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
1274 unsigned Val) const {
1275 // ToDo: SI/CI have 104 SGPRs, VI - 102
1276 // Valery: here we accepting as much as we can, let assembler sort it out
1277 int shift = 0;
1278 switch (SRegClassID) {
1279 case AMDGPU::SGPR_32RegClassID:
1280 case AMDGPU::TTMP_32RegClassID:
1281 break;
1282 case AMDGPU::SGPR_64RegClassID:
1283 case AMDGPU::TTMP_64RegClassID:
1284 shift = 1;
1285 break;
1286 case AMDGPU::SGPR_96RegClassID:
1287 case AMDGPU::TTMP_96RegClassID:
1288 case AMDGPU::SGPR_128RegClassID:
1289 case AMDGPU::TTMP_128RegClassID:
1290 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1291 // this bundle?
1292 case AMDGPU::SGPR_256RegClassID:
1293 case AMDGPU::TTMP_256RegClassID:
1294 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1295 // this bundle?
1296 case AMDGPU::SGPR_288RegClassID:
1297 case AMDGPU::TTMP_288RegClassID:
1298 case AMDGPU::SGPR_320RegClassID:
1299 case AMDGPU::TTMP_320RegClassID:
1300 case AMDGPU::SGPR_352RegClassID:
1301 case AMDGPU::TTMP_352RegClassID:
1302 case AMDGPU::SGPR_384RegClassID:
1303 case AMDGPU::TTMP_384RegClassID:
1304 case AMDGPU::SGPR_512RegClassID:
1305 case AMDGPU::TTMP_512RegClassID:
1306 shift = 2;
1307 break;
1308 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1309 // this bundle?
1310 default:
1311 llvm_unreachable("unhandled register class");
1312 }
1313
1314 if (Val % (1 << shift)) {
1315 *CommentStream << "Warning: " << getRegClassName(RegClassID: SRegClassID)
1316 << ": scalar reg isn't aligned " << Val;
1317 }
1318
1319 return createRegOperand(RegClassID: SRegClassID, Val: Val >> shift);
1320}
1321
1322MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
1323 bool IsHi) const {
1324 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1325 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1326}
1327
1328// Decode Literals for insts which always have a literal in the encoding
1329MCOperand
1330AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1331 if (HasLiteral) {
1332 assert(
1333 AMDGPU::hasVOPD(STI) &&
1334 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1335 if (Literal != Val)
1336 return errOperand(V: Val, ErrMsg: "More than one unique literal is illegal");
1337 }
1338 HasLiteral = true;
1339 Literal = Val;
1340 return MCOperand::createImm(Val: Literal);
1341}
1342
1343MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
1344 // For now all literal constants are supposed to be unsigned integer
1345 // ToDo: deal with signed/unsigned 64-bit integer constants
1346 // ToDo: deal with float/double constants
1347 if (!HasLiteral) {
1348 if (Bytes.size() < 4) {
1349 return errOperand(V: 0, ErrMsg: "cannot read literal, inst bytes left " +
1350 Twine(Bytes.size()));
1351 }
1352 HasLiteral = true;
1353 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1354 if (ExtendFP64)
1355 Literal64 <<= 32;
1356 }
1357 return MCOperand::createImm(Val: ExtendFP64 ? Literal64 : Literal);
1358}
1359
1360MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
1361 using namespace AMDGPU::EncValues;
1362
1363 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1364 return MCOperand::createImm(Val: (Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1365 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1366 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1367 // Cast prevents negative overflow.
1368}
1369
1370static int64_t getInlineImmVal32(unsigned Imm) {
1371 switch (Imm) {
1372 case 240:
1373 return llvm::bit_cast<uint32_t>(from: 0.5f);
1374 case 241:
1375 return llvm::bit_cast<uint32_t>(from: -0.5f);
1376 case 242:
1377 return llvm::bit_cast<uint32_t>(from: 1.0f);
1378 case 243:
1379 return llvm::bit_cast<uint32_t>(from: -1.0f);
1380 case 244:
1381 return llvm::bit_cast<uint32_t>(from: 2.0f);
1382 case 245:
1383 return llvm::bit_cast<uint32_t>(from: -2.0f);
1384 case 246:
1385 return llvm::bit_cast<uint32_t>(from: 4.0f);
1386 case 247:
1387 return llvm::bit_cast<uint32_t>(from: -4.0f);
1388 case 248: // 1 / (2 * PI)
1389 return 0x3e22f983;
1390 default:
1391 llvm_unreachable("invalid fp inline imm");
1392 }
1393}
1394
1395static int64_t getInlineImmVal64(unsigned Imm) {
1396 switch (Imm) {
1397 case 240:
1398 return llvm::bit_cast<uint64_t>(from: 0.5);
1399 case 241:
1400 return llvm::bit_cast<uint64_t>(from: -0.5);
1401 case 242:
1402 return llvm::bit_cast<uint64_t>(from: 1.0);
1403 case 243:
1404 return llvm::bit_cast<uint64_t>(from: -1.0);
1405 case 244:
1406 return llvm::bit_cast<uint64_t>(from: 2.0);
1407 case 245:
1408 return llvm::bit_cast<uint64_t>(from: -2.0);
1409 case 246:
1410 return llvm::bit_cast<uint64_t>(from: 4.0);
1411 case 247:
1412 return llvm::bit_cast<uint64_t>(from: -4.0);
1413 case 248: // 1 / (2 * PI)
1414 return 0x3fc45f306dc9c882;
1415 default:
1416 llvm_unreachable("invalid fp inline imm");
1417 }
1418}
1419
1420static int64_t getInlineImmVal16(unsigned Imm) {
1421 switch (Imm) {
1422 case 240:
1423 return 0x3800;
1424 case 241:
1425 return 0xB800;
1426 case 242:
1427 return 0x3C00;
1428 case 243:
1429 return 0xBC00;
1430 case 244:
1431 return 0x4000;
1432 case 245:
1433 return 0xC000;
1434 case 246:
1435 return 0x4400;
1436 case 247:
1437 return 0xC400;
1438 case 248: // 1 / (2 * PI)
1439 return 0x3118;
1440 default:
1441 llvm_unreachable("invalid fp inline imm");
1442 }
1443}
1444
1445MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm) {
1446 assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
1447 && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
1448
1449 // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1450 // ImmWidth 0 is a default case where operand should not allow immediates.
1451 // Imm value is still decoded into 32 bit immediate operand, inst printer will
1452 // use it to print verbose error message.
1453 switch (ImmWidth) {
1454 case 0:
1455 case 32:
1456 return MCOperand::createImm(Val: getInlineImmVal32(Imm));
1457 case 64:
1458 return MCOperand::createImm(Val: getInlineImmVal64(Imm));
1459 case 16:
1460 return MCOperand::createImm(Val: getInlineImmVal16(Imm));
1461 default:
1462 llvm_unreachable("implement me");
1463 }
1464}
1465
1466unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
1467 using namespace AMDGPU;
1468
1469 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1470 switch (Width) {
1471 default: // fall
1472 case OPW32:
1473 case OPW16:
1474 case OPWV216:
1475 return VGPR_32RegClassID;
1476 case OPW64:
1477 case OPWV232: return VReg_64RegClassID;
1478 case OPW96: return VReg_96RegClassID;
1479 case OPW128: return VReg_128RegClassID;
1480 case OPW160: return VReg_160RegClassID;
1481 case OPW256: return VReg_256RegClassID;
1482 case OPW288: return VReg_288RegClassID;
1483 case OPW320: return VReg_320RegClassID;
1484 case OPW352: return VReg_352RegClassID;
1485 case OPW384: return VReg_384RegClassID;
1486 case OPW512: return VReg_512RegClassID;
1487 case OPW1024: return VReg_1024RegClassID;
1488 }
1489}
1490
1491unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
1492 using namespace AMDGPU;
1493
1494 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1495 switch (Width) {
1496 default: // fall
1497 case OPW32:
1498 case OPW16:
1499 case OPWV216:
1500 return AGPR_32RegClassID;
1501 case OPW64:
1502 case OPWV232: return AReg_64RegClassID;
1503 case OPW96: return AReg_96RegClassID;
1504 case OPW128: return AReg_128RegClassID;
1505 case OPW160: return AReg_160RegClassID;
1506 case OPW256: return AReg_256RegClassID;
1507 case OPW288: return AReg_288RegClassID;
1508 case OPW320: return AReg_320RegClassID;
1509 case OPW352: return AReg_352RegClassID;
1510 case OPW384: return AReg_384RegClassID;
1511 case OPW512: return AReg_512RegClassID;
1512 case OPW1024: return AReg_1024RegClassID;
1513 }
1514}
1515
1516
1517unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
1518 using namespace AMDGPU;
1519
1520 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1521 switch (Width) {
1522 default: // fall
1523 case OPW32:
1524 case OPW16:
1525 case OPWV216:
1526 return SGPR_32RegClassID;
1527 case OPW64:
1528 case OPWV232: return SGPR_64RegClassID;
1529 case OPW96: return SGPR_96RegClassID;
1530 case OPW128: return SGPR_128RegClassID;
1531 case OPW160: return SGPR_160RegClassID;
1532 case OPW256: return SGPR_256RegClassID;
1533 case OPW288: return SGPR_288RegClassID;
1534 case OPW320: return SGPR_320RegClassID;
1535 case OPW352: return SGPR_352RegClassID;
1536 case OPW384: return SGPR_384RegClassID;
1537 case OPW512: return SGPR_512RegClassID;
1538 }
1539}
1540
1541unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
1542 using namespace AMDGPU;
1543
1544 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1545 switch (Width) {
1546 default: // fall
1547 case OPW32:
1548 case OPW16:
1549 case OPWV216:
1550 return TTMP_32RegClassID;
1551 case OPW64:
1552 case OPWV232: return TTMP_64RegClassID;
1553 case OPW128: return TTMP_128RegClassID;
1554 case OPW256: return TTMP_256RegClassID;
1555 case OPW288: return TTMP_288RegClassID;
1556 case OPW320: return TTMP_320RegClassID;
1557 case OPW352: return TTMP_352RegClassID;
1558 case OPW384: return TTMP_384RegClassID;
1559 case OPW512: return TTMP_512RegClassID;
1560 }
1561}
1562
1563int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1564 using namespace AMDGPU::EncValues;
1565
1566 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1567 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1568
1569 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1570}
1571
1572MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
1573 bool MandatoryLiteral,
1574 unsigned ImmWidth, bool IsFP) const {
1575 using namespace AMDGPU::EncValues;
1576
1577 assert(Val < 1024); // enum10
1578
1579 bool IsAGPR = Val & 512;
1580 Val &= 511;
1581
1582 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1583 return createRegOperand(RegClassID: IsAGPR ? getAgprClassId(Width)
1584 : getVgprClassId(Width), Val: Val - VGPR_MIN);
1585 }
1586 return decodeNonVGPRSrcOp(Width, Val: Val & 0xFF, MandatoryLiteral, ImmWidth,
1587 IsFP);
1588}
1589
1590MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width,
1591 unsigned Val,
1592 bool MandatoryLiteral,
1593 unsigned ImmWidth,
1594 bool IsFP) const {
1595 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1596 // decoded earlier.
1597 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1598 using namespace AMDGPU::EncValues;
1599
1600 if (Val <= SGPR_MAX) {
1601 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1602 static_assert(SGPR_MIN == 0);
1603 return createSRegOperand(SRegClassID: getSgprClassId(Width), Val: Val - SGPR_MIN);
1604 }
1605
1606 int TTmpIdx = getTTmpIdx(Val);
1607 if (TTmpIdx >= 0) {
1608 return createSRegOperand(SRegClassID: getTtmpClassId(Width), Val: TTmpIdx);
1609 }
1610
1611 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1612 return decodeIntImmed(Imm: Val);
1613
1614 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1615 return decodeFPImmed(ImmWidth, Imm: Val);
1616
1617 if (Val == LITERAL_CONST) {
1618 if (MandatoryLiteral)
1619 // Keep a sentinel value for deferred setting
1620 return MCOperand::createImm(Val: LITERAL_CONST);
1621 else
1622 return decodeLiteralConstant(ExtendFP64: IsFP && ImmWidth == 64);
1623 }
1624
1625 switch (Width) {
1626 case OPW32:
1627 case OPW16:
1628 case OPWV216:
1629 return decodeSpecialReg32(Val);
1630 case OPW64:
1631 case OPWV232:
1632 return decodeSpecialReg64(Val);
1633 default:
1634 llvm_unreachable("unexpected immediate type");
1635 }
1636}
1637
1638// Bit 0 of DstY isn't stored in the instruction, because it's always the
1639// opposite of bit 0 of DstX.
1640MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
1641 unsigned Val) const {
1642 int VDstXInd =
1643 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1644 assert(VDstXInd != -1);
1645 assert(Inst.getOperand(VDstXInd).isReg());
1646 unsigned XDstReg = MRI.getEncodingValue(RegNo: Inst.getOperand(i: VDstXInd).getReg());
1647 Val |= ~XDstReg & 1;
1648 auto Width = llvm::AMDGPUDisassembler::OPW32;
1649 return createRegOperand(RegClassID: getVgprClassId(Width), Val);
1650}
1651
1652MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
1653 using namespace AMDGPU;
1654
1655 switch (Val) {
1656 // clang-format off
1657 case 102: return createRegOperand(FLAT_SCR_LO);
1658 case 103: return createRegOperand(FLAT_SCR_HI);
1659 case 104: return createRegOperand(XNACK_MASK_LO);
1660 case 105: return createRegOperand(XNACK_MASK_HI);
1661 case 106: return createRegOperand(VCC_LO);
1662 case 107: return createRegOperand(VCC_HI);
1663 case 108: return createRegOperand(TBA_LO);
1664 case 109: return createRegOperand(TBA_HI);
1665 case 110: return createRegOperand(TMA_LO);
1666 case 111: return createRegOperand(TMA_HI);
1667 case 124:
1668 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1669 case 125:
1670 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1671 case 126: return createRegOperand(EXEC_LO);
1672 case 127: return createRegOperand(EXEC_HI);
1673 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1674 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1675 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1676 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1677 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1678 case 251: return createRegOperand(SRC_VCCZ);
1679 case 252: return createRegOperand(SRC_EXECZ);
1680 case 253: return createRegOperand(SRC_SCC);
1681 case 254: return createRegOperand(LDS_DIRECT);
1682 default: break;
1683 // clang-format on
1684 }
1685 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
1686}
1687
1688MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
1689 using namespace AMDGPU;
1690
1691 switch (Val) {
1692 case 102: return createRegOperand(FLAT_SCR);
1693 case 104: return createRegOperand(XNACK_MASK);
1694 case 106: return createRegOperand(VCC);
1695 case 108: return createRegOperand(TBA);
1696 case 110: return createRegOperand(TMA);
1697 case 124:
1698 if (isGFX11Plus())
1699 return createRegOperand(SGPR_NULL);
1700 break;
1701 case 125:
1702 if (!isGFX11Plus())
1703 return createRegOperand(SGPR_NULL);
1704 break;
1705 case 126: return createRegOperand(EXEC);
1706 case 235: return createRegOperand(SRC_SHARED_BASE);
1707 case 236: return createRegOperand(SRC_SHARED_LIMIT);
1708 case 237: return createRegOperand(SRC_PRIVATE_BASE);
1709 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1710 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1711 case 251: return createRegOperand(SRC_VCCZ);
1712 case 252: return createRegOperand(SRC_EXECZ);
1713 case 253: return createRegOperand(SRC_SCC);
1714 default: break;
1715 }
1716 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
1717}
1718
1719MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
1720 const unsigned Val,
1721 unsigned ImmWidth) const {
1722 using namespace AMDGPU::SDWA;
1723 using namespace AMDGPU::EncValues;
1724
1725 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1726 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1727 // XXX: cast to int is needed to avoid stupid warning:
1728 // compare with unsigned is always true
1729 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1730 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1731 return createRegOperand(RegClassID: getVgprClassId(Width),
1732 Val: Val - SDWA9EncValues::SRC_VGPR_MIN);
1733 }
1734 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1735 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1736 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1737 return createSRegOperand(SRegClassID: getSgprClassId(Width),
1738 Val: Val - SDWA9EncValues::SRC_SGPR_MIN);
1739 }
1740 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1741 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1742 return createSRegOperand(SRegClassID: getTtmpClassId(Width),
1743 Val: Val - SDWA9EncValues::SRC_TTMP_MIN);
1744 }
1745
1746 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1747
1748 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1749 return decodeIntImmed(Imm: SVal);
1750
1751 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1752 return decodeFPImmed(ImmWidth, Imm: SVal);
1753
1754 return decodeSpecialReg32(Val: SVal);
1755 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1756 return createRegOperand(RegClassID: getVgprClassId(Width), Val);
1757 }
1758 llvm_unreachable("unsupported target");
1759}
1760
1761MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1762 return decodeSDWASrc(Width: OPW16, Val, ImmWidth: 16);
1763}
1764
1765MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1766 return decodeSDWASrc(Width: OPW32, Val, ImmWidth: 32);
1767}
1768
1769MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
1770 using namespace AMDGPU::SDWA;
1771
1772 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1773 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1774 "SDWAVopcDst should be present only on GFX9+");
1775
1776 bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1777
1778 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1779 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1780
1781 int TTmpIdx = getTTmpIdx(Val);
1782 if (TTmpIdx >= 0) {
1783 auto TTmpClsId = getTtmpClassId(Width: IsWave64 ? OPW64 : OPW32);
1784 return createSRegOperand(SRegClassID: TTmpClsId, Val: TTmpIdx);
1785 } else if (Val > SGPR_MAX) {
1786 return IsWave64 ? decodeSpecialReg64(Val)
1787 : decodeSpecialReg32(Val);
1788 } else {
1789 return createSRegOperand(SRegClassID: getSgprClassId(Width: IsWave64 ? OPW64 : OPW32), Val);
1790 }
1791 } else {
1792 return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1793 }
1794}
1795
1796MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
1797 return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
1798 ? decodeSrcOp(OPW64, Val)
1799 : decodeSrcOp(OPW32, Val);
1800}
1801
1802MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
1803 return decodeSrcOp(Width: OPW32, Val);
1804}
1805
1806bool AMDGPUDisassembler::isVI() const {
1807 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1808}
1809
1810bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
1811
1812bool AMDGPUDisassembler::isGFX90A() const {
1813 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1814}
1815
1816bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
1817
1818bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
1819
1820bool AMDGPUDisassembler::isGFX10Plus() const {
1821 return AMDGPU::isGFX10Plus(STI);
1822}
1823
1824bool AMDGPUDisassembler::isGFX11() const {
1825 return STI.hasFeature(AMDGPU::FeatureGFX11);
1826}
1827
1828bool AMDGPUDisassembler::isGFX11Plus() const {
1829 return AMDGPU::isGFX11Plus(STI);
1830}
1831
1832bool AMDGPUDisassembler::isGFX12Plus() const {
1833 return AMDGPU::isGFX12Plus(STI);
1834}
1835
1836bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
1837 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1838}
1839
1840bool AMDGPUDisassembler::hasKernargPreload() const {
1841 return AMDGPU::hasKernargPreload(STI);
1842}
1843
1844//===----------------------------------------------------------------------===//
1845// AMDGPU specific symbol handling
1846//===----------------------------------------------------------------------===//
1847#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1848#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1849 do { \
1850 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
1851 } while (0)
1852#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
1853 do { \
1854 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
1855 << GET_FIELD(MASK) << '\n'; \
1856 } while (0)
1857
1858// NOLINTNEXTLINE(readability-identifier-naming)
1859MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
1860 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1861 using namespace amdhsa;
1862 StringRef Indent = "\t";
1863
1864 // We cannot accurately backward compute #VGPRs used from
1865 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1866 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1867 // simply calculate the inverse of what the assembler does.
1868
1869 uint32_t GranulatedWorkitemVGPRCount =
1870 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1871
1872 uint32_t NextFreeVGPR =
1873 (GranulatedWorkitemVGPRCount + 1) *
1874 AMDGPU::IsaInfo::getVGPREncodingGranule(STI: &STI, EnableWavefrontSize32);
1875
1876 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1877
1878 // We cannot backward compute values used to calculate
1879 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1880 // directives can't be computed:
1881 // .amdhsa_reserve_vcc
1882 // .amdhsa_reserve_flat_scratch
1883 // .amdhsa_reserve_xnack_mask
1884 // They take their respective default values if not specified in the assembly.
1885 //
1886 // GRANULATED_WAVEFRONT_SGPR_COUNT
1887 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1888 //
1889 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1890 // are set to 0. So while disassembling we consider that:
1891 //
1892 // GRANULATED_WAVEFRONT_SGPR_COUNT
1893 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1894 //
1895 // The disassembler cannot recover the original values of those 3 directives.
1896
1897 uint32_t GranulatedWavefrontSGPRCount =
1898 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1899
1900 if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
1901 return MCDisassembler::Fail;
1902
1903 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1904 AMDGPU::IsaInfo::getSGPREncodingGranule(STI: &STI);
1905
1906 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1907 if (!hasArchitectedFlatScratch())
1908 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1909 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1910 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1911
1912 if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY)
1913 return MCDisassembler::Fail;
1914
1915 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1916 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1917 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1918 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1919 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1920 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1921 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1922 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1923
1924 if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
1925 return MCDisassembler::Fail;
1926
1927 if (!isGFX12Plus())
1928 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
1929 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
1930
1931 if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
1932 return MCDisassembler::Fail;
1933
1934 if (!isGFX12Plus())
1935 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
1936 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
1937
1938 if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
1939 return MCDisassembler::Fail;
1940
1941 if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
1942 return MCDisassembler::Fail;
1943
1944 if (isGFX9Plus())
1945 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1946
1947 if (!isGFX9Plus())
1948 if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0)
1949 return MCDisassembler::Fail;
1950 if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1)
1951 return MCDisassembler::Fail;
1952 if (!isGFX10Plus())
1953 if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2)
1954 return MCDisassembler::Fail;
1955
1956 if (isGFX10Plus()) {
1957 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1958 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1959 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1960 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1961 }
1962
1963 if (isGFX12Plus())
1964 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
1965 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
1966
1967 return MCDisassembler::Success;
1968}
1969
1970// NOLINTNEXTLINE(readability-identifier-naming)
1971MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
1972 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1973 using namespace amdhsa;
1974 StringRef Indent = "\t";
1975 if (hasArchitectedFlatScratch())
1976 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
1977 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1978 else
1979 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
1980 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1981 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
1982 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1983 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
1984 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1985 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
1986 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
1987 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
1988 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
1989 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
1990 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
1991
1992 if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH)
1993 return MCDisassembler::Fail;
1994
1995 if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY)
1996 return MCDisassembler::Fail;
1997
1998 if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE)
1999 return MCDisassembler::Fail;
2000
2001 PRINT_DIRECTIVE(
2002 ".amdhsa_exception_fp_ieee_invalid_op",
2003 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2004 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2005 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2006 PRINT_DIRECTIVE(
2007 ".amdhsa_exception_fp_ieee_div_zero",
2008 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2009 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2010 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2011 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2012 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2013 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2014 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2015 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2016 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2017
2018 if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0)
2019 return MCDisassembler::Fail;
2020
2021 return MCDisassembler::Success;
2022}
2023
2024// NOLINTNEXTLINE(readability-identifier-naming)
2025MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
2026 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2027 using namespace amdhsa;
2028 StringRef Indent = "\t";
2029 if (isGFX90A()) {
2030 KdStream << Indent << ".amdhsa_accum_offset "
2031 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2032 << '\n';
2033 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED0)
2034 return MCDisassembler::Fail;
2035 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2036 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1)
2037 return MCDisassembler::Fail;
2038 } else if (isGFX10Plus()) {
2039 // Bits [0-3].
2040 if (!isGFX12Plus()) {
2041 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2042 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2043 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2044 } else {
2045 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2046 "SHARED_VGPR_COUNT",
2047 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2048 }
2049 } else {
2050 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0)
2051 return MCDisassembler::Fail;
2052 }
2053
2054 // Bits [4-11].
2055 if (isGFX11()) {
2056 PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
2057 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2058 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2059 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2060 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2061 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2062 } else if (isGFX12Plus()) {
2063 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2064 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2065 } else {
2066 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED1)
2067 return MCDisassembler::Fail;
2068 }
2069
2070 // Bits [12].
2071 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2)
2072 return MCDisassembler::Fail;
2073
2074 // Bits [13].
2075 if (isGFX12Plus()) {
2076 PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN",
2077 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2078 } else {
2079 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3)
2080 return MCDisassembler::Fail;
2081 }
2082
2083 // Bits [14-30].
2084 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4)
2085 return MCDisassembler::Fail;
2086
2087 // Bits [31].
2088 if (isGFX11Plus()) {
2089 PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
2090 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2091 } else {
2092 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED5)
2093 return MCDisassembler::Fail;
2094 }
2095 } else if (FourByteBuffer) {
2096 return MCDisassembler::Fail;
2097 }
2098 return MCDisassembler::Success;
2099}
2100#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2101#undef PRINT_DIRECTIVE
2102#undef GET_FIELD
2103
2104MCDisassembler::DecodeStatus
2105AMDGPUDisassembler::decodeKernelDescriptorDirective(
2106 DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
2107 raw_string_ostream &KdStream) const {
2108#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2109 do { \
2110 KdStream << Indent << DIRECTIVE " " \
2111 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2112 } while (0)
2113
2114 uint16_t TwoByteBuffer = 0;
2115 uint32_t FourByteBuffer = 0;
2116
2117 StringRef ReservedBytes;
2118 StringRef Indent = "\t";
2119
2120 assert(Bytes.size() == 64);
2121 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2122
2123 switch (Cursor.tell()) {
2124 case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
2125 FourByteBuffer = DE.getU32(C&: Cursor);
2126 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2127 << '\n';
2128 return MCDisassembler::Success;
2129
2130 case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
2131 FourByteBuffer = DE.getU32(C&: Cursor);
2132 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2133 << FourByteBuffer << '\n';
2134 return MCDisassembler::Success;
2135
2136 case amdhsa::KERNARG_SIZE_OFFSET:
2137 FourByteBuffer = DE.getU32(C&: Cursor);
2138 KdStream << Indent << ".amdhsa_kernarg_size "
2139 << FourByteBuffer << '\n';
2140 return MCDisassembler::Success;
2141
2142 case amdhsa::RESERVED0_OFFSET:
2143 // 4 reserved bytes, must be 0.
2144 ReservedBytes = DE.getBytes(C&: Cursor, Length: 4);
2145 for (int I = 0; I < 4; ++I) {
2146 if (ReservedBytes[I] != 0) {
2147 return MCDisassembler::Fail;
2148 }
2149 }
2150 return MCDisassembler::Success;
2151
2152 case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
2153 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2154 // So far no directive controls this for Code Object V3, so simply skip for
2155 // disassembly.
2156 DE.skip(C&: Cursor, Length: 8);
2157 return MCDisassembler::Success;
2158
2159 case amdhsa::RESERVED1_OFFSET:
2160 // 20 reserved bytes, must be 0.
2161 ReservedBytes = DE.getBytes(C&: Cursor, Length: 20);
2162 for (int I = 0; I < 20; ++I) {
2163 if (ReservedBytes[I] != 0) {
2164 return MCDisassembler::Fail;
2165 }
2166 }
2167 return MCDisassembler::Success;
2168
2169 case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
2170 FourByteBuffer = DE.getU32(C&: Cursor);
2171 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2172
2173 case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
2174 FourByteBuffer = DE.getU32(C&: Cursor);
2175 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2176
2177 case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
2178 FourByteBuffer = DE.getU32(C&: Cursor);
2179 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2180
2181 case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
2182 using namespace amdhsa;
2183 TwoByteBuffer = DE.getU16(C&: Cursor);
2184
2185 if (!hasArchitectedFlatScratch())
2186 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2187 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2188 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2189 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2190 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2191 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2192 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2193 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2194 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2195 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2196 if (!hasArchitectedFlatScratch())
2197 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2198 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2199 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2200 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2201
2202 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2203 return MCDisassembler::Fail;
2204
2205 // Reserved for GFX9
2206 if (isGFX9() &&
2207 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2208 return MCDisassembler::Fail;
2209 } else if (isGFX10Plus()) {
2210 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2211 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2212 }
2213
2214 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2215 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2216 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2217
2218 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
2219 return MCDisassembler::Fail;
2220
2221 return MCDisassembler::Success;
2222
2223 case amdhsa::KERNARG_PRELOAD_OFFSET:
2224 using namespace amdhsa;
2225 TwoByteBuffer = DE.getU16(C&: Cursor);
2226 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2227 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2228 KERNARG_PRELOAD_SPEC_LENGTH);
2229 }
2230
2231 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2232 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2233 KERNARG_PRELOAD_SPEC_OFFSET);
2234 }
2235 return MCDisassembler::Success;
2236
2237 case amdhsa::RESERVED3_OFFSET:
2238 // 4 bytes from here are reserved, must be 0.
2239 ReservedBytes = DE.getBytes(C&: Cursor, Length: 4);
2240 for (int I = 0; I < 4; ++I) {
2241 if (ReservedBytes[I] != 0)
2242 return MCDisassembler::Fail;
2243 }
2244 return MCDisassembler::Success;
2245
2246 default:
2247 llvm_unreachable("Unhandled index. Case statements cover everything.");
2248 return MCDisassembler::Fail;
2249 }
2250#undef PRINT_DIRECTIVE
2251}
2252
2253MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
2254 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2255 // CP microcode requires the kernel descriptor to be 64 aligned.
2256 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2257 return MCDisassembler::Fail;
2258
2259 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2260 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2261 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2262 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2263 // when required.
2264 if (isGFX10Plus()) {
2265 uint16_t KernelCodeProperties =
2266 support::endian::read16(P: &Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
2267 E: llvm::endianness::little);
2268 EnableWavefrontSize32 =
2269 AMDHSA_BITS_GET(KernelCodeProperties,
2270 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2271 }
2272
2273 std::string Kd;
2274 raw_string_ostream KdStream(Kd);
2275 KdStream << ".amdhsa_kernel " << KdName << '\n';
2276
2277 DataExtractor::Cursor C(0);
2278 while (C && C.tell() < Bytes.size()) {
2279 MCDisassembler::DecodeStatus Status =
2280 decodeKernelDescriptorDirective(Cursor&: C, Bytes, KdStream);
2281
2282 cantFail(Err: C.takeError());
2283
2284 if (Status == MCDisassembler::Fail)
2285 return MCDisassembler::Fail;
2286 }
2287 KdStream << ".end_amdhsa_kernel\n";
2288 outs() << KdStream.str();
2289 return MCDisassembler::Success;
2290}
2291
2292std::optional<MCDisassembler::DecodeStatus>
2293AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
2294 ArrayRef<uint8_t> Bytes, uint64_t Address,
2295 raw_ostream &CStream) const {
2296 // Right now only kernel descriptor needs to be handled.
2297 // We ignore all other symbols for target specific handling.
2298 // TODO:
2299 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2300 // Object V2 and V3 when symbols are marked protected.
2301
2302 // amd_kernel_code_t for Code Object V2.
2303 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2304 Size = 256;
2305 return MCDisassembler::Fail;
2306 }
2307
2308 // Code Object V3 kernel descriptors.
2309 StringRef Name = Symbol.Name;
2310 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(Suffix: StringRef(".kd"))) {
2311 Size = 64; // Size = 64 regardless of success or failure.
2312 return decodeKernelDescriptor(KdName: Name.drop_back(N: 3), Bytes, KdAddress: Address);
2313 }
2314 return std::nullopt;
2315}
2316
2317//===----------------------------------------------------------------------===//
2318// AMDGPUSymbolizer
2319//===----------------------------------------------------------------------===//
2320
2321// Try to find symbol name for specified label
2322bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
2323 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2324 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2325 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2326
2327 if (!IsBranch) {
2328 return false;
2329 }
2330
2331 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2332 if (!Symbols)
2333 return false;
2334
2335 auto Result = llvm::find_if(Range&: *Symbols, P: [Value](const SymbolInfoTy &Val) {
2336 return Val.Addr == static_cast<uint64_t>(Value) &&
2337 Val.Type == ELF::STT_NOTYPE;
2338 });
2339 if (Result != Symbols->end()) {
2340 auto *Sym = Ctx.getOrCreateSymbol(Name: Result->Name);
2341 const auto *Add = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
2342 Inst.addOperand(Op: MCOperand::createExpr(Val: Add));
2343 return true;
2344 }
2345 // Add to list of referenced addresses, so caller can synthesize a label.
2346 ReferencedAddresses.push_back(x: static_cast<uint64_t>(Value));
2347 return false;
2348}
2349
2350void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
2351 int64_t Value,
2352 uint64_t Address) {
2353 llvm_unreachable("unimplemented");
2354}
2355
2356//===----------------------------------------------------------------------===//
2357// Initialization
2358//===----------------------------------------------------------------------===//
2359
2360static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
2361 LLVMOpInfoCallback /*GetOpInfo*/,
2362 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2363 void *DisInfo,
2364 MCContext *Ctx,
2365 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2366 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2367}
2368
2369static MCDisassembler *createAMDGPUDisassembler(const Target &T,
2370 const MCSubtargetInfo &STI,
2371 MCContext &Ctx) {
2372 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2373}
2374
2375extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler() {
2376 TargetRegistry::RegisterMCDisassembler(T&: getTheGCNTarget(),
2377 Fn: createAMDGPUDisassembler);
2378 TargetRegistry::RegisterMCSymbolizer(T&: getTheGCNTarget(),
2379 Fn: createAMDGPUSymbolizer);
2380}
2381

source code of llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp