1//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12#include "SIDefines.h"
13#include "llvm/IR/CallingConv.h"
14#include "llvm/IR/InstrTypes.h"
15#include "llvm/IR/Module.h"
16#include "llvm/Support/Alignment.h"
17#include <array>
18#include <functional>
19#include <utility>
20
21struct amd_kernel_code_t;
22
23namespace llvm {
24
25struct Align;
26class Argument;
27class Function;
28class GlobalValue;
29class MCInstrInfo;
30class MCRegisterClass;
31class MCRegisterInfo;
32class MCSubtargetInfo;
33class StringRef;
34class Triple;
35class raw_ostream;
36
37namespace AMDGPU {
38
39struct IsaVersion;
40
41/// Generic target versions emitted by this version of LLVM.
42///
43/// These numbers are incremented every time a codegen breaking change occurs
44/// within a generic family.
45namespace GenericVersion {
46static constexpr unsigned GFX9 = 1;
47static constexpr unsigned GFX10_1 = 1;
48static constexpr unsigned GFX10_3 = 1;
49static constexpr unsigned GFX11 = 1;
50} // namespace GenericVersion
51
52enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
53
54/// \returns True if \p STI is AMDHSA.
55bool isHsaAbi(const MCSubtargetInfo &STI);
56
57/// \returns Code object version from the IR module flag.
58unsigned getAMDHSACodeObjectVersion(const Module &M);
59
60/// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
61unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
62
63/// \returns The default HSA code object version. This should only be used when
64/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
65/// flag or a .amdhsa_code_object_version directive)
66unsigned getDefaultAMDHSACodeObjectVersion();
67
68/// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
69/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
70uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
71
72/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
73unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
74
75/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
76unsigned getHostcallImplicitArgPosition(unsigned COV);
77
78unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
79unsigned getCompletionActionImplicitArgPosition(unsigned COV);
80
81struct GcnBufferFormatInfo {
82 unsigned Format;
83 unsigned BitsPerComp;
84 unsigned NumComponents;
85 unsigned NumFormat;
86 unsigned DataFormat;
87};
88
89struct MAIInstInfo {
90 uint16_t Opcode;
91 bool is_dgemm;
92 bool is_gfx940_xdl;
93};
94
95#define GET_MIMGBaseOpcode_DECL
96#define GET_MIMGDim_DECL
97#define GET_MIMGEncoding_DECL
98#define GET_MIMGLZMapping_DECL
99#define GET_MIMGMIPMapping_DECL
100#define GET_MIMGBiASMapping_DECL
101#define GET_MAIInstInfoTable_DECL
102#include "AMDGPUGenSearchableTables.inc"
103
104namespace IsaInfo {
105
106enum {
107 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
108 // doesn't spill SGPRs as much as when 80 is set.
109 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
110 TRAP_NUM_SGPRS = 16
111};
112
113enum class TargetIDSetting {
114 Unsupported,
115 Any,
116 Off,
117 On
118};
119
120class AMDGPUTargetID {
121private:
122 const MCSubtargetInfo &STI;
123 TargetIDSetting XnackSetting;
124 TargetIDSetting SramEccSetting;
125
126public:
127 explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
128 ~AMDGPUTargetID() = default;
129
130 /// \return True if the current xnack setting is not "Unsupported".
131 bool isXnackSupported() const {
132 return XnackSetting != TargetIDSetting::Unsupported;
133 }
134
135 /// \returns True if the current xnack setting is "On" or "Any".
136 bool isXnackOnOrAny() const {
137 return XnackSetting == TargetIDSetting::On ||
138 XnackSetting == TargetIDSetting::Any;
139 }
140
141 /// \returns True if current xnack setting is "On" or "Off",
142 /// false otherwise.
143 bool isXnackOnOrOff() const {
144 return getXnackSetting() == TargetIDSetting::On ||
145 getXnackSetting() == TargetIDSetting::Off;
146 }
147
148 /// \returns The current xnack TargetIDSetting, possible options are
149 /// "Unsupported", "Any", "Off", and "On".
150 TargetIDSetting getXnackSetting() const {
151 return XnackSetting;
152 }
153
154 /// Sets xnack setting to \p NewXnackSetting.
155 void setXnackSetting(TargetIDSetting NewXnackSetting) {
156 XnackSetting = NewXnackSetting;
157 }
158
159 /// \return True if the current sramecc setting is not "Unsupported".
160 bool isSramEccSupported() const {
161 return SramEccSetting != TargetIDSetting::Unsupported;
162 }
163
164 /// \returns True if the current sramecc setting is "On" or "Any".
165 bool isSramEccOnOrAny() const {
166 return SramEccSetting == TargetIDSetting::On ||
167 SramEccSetting == TargetIDSetting::Any;
168 }
169
170 /// \returns True if current sramecc setting is "On" or "Off",
171 /// false otherwise.
172 bool isSramEccOnOrOff() const {
173 return getSramEccSetting() == TargetIDSetting::On ||
174 getSramEccSetting() == TargetIDSetting::Off;
175 }
176
177 /// \returns The current sramecc TargetIDSetting, possible options are
178 /// "Unsupported", "Any", "Off", and "On".
179 TargetIDSetting getSramEccSetting() const {
180 return SramEccSetting;
181 }
182
183 /// Sets sramecc setting to \p NewSramEccSetting.
184 void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
185 SramEccSetting = NewSramEccSetting;
186 }
187
188 void setTargetIDFromFeaturesString(StringRef FS);
189 void setTargetIDFromTargetIDStream(StringRef TargetID);
190
191 /// \returns String representation of an object.
192 std::string toString() const;
193};
194
195/// \returns Wavefront size for given subtarget \p STI.
196unsigned getWavefrontSize(const MCSubtargetInfo *STI);
197
198/// \returns Local memory size in bytes for given subtarget \p STI.
199unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
200
201/// \returns Maximum addressable local memory size in bytes for given subtarget
202/// \p STI.
203unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
204
205/// \returns Number of execution units per compute unit for given subtarget \p
206/// STI.
207unsigned getEUsPerCU(const MCSubtargetInfo *STI);
208
209/// \returns Maximum number of work groups per compute unit for given subtarget
210/// \p STI and limited by given \p FlatWorkGroupSize.
211unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
212 unsigned FlatWorkGroupSize);
213
214/// \returns Minimum number of waves per execution unit for given subtarget \p
215/// STI.
216unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
217
218/// \returns Maximum number of waves per execution unit for given subtarget \p
219/// STI without any kind of limitation.
220unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
221
222/// \returns Number of waves per execution unit required to support the given \p
223/// FlatWorkGroupSize.
224unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
225 unsigned FlatWorkGroupSize);
226
227/// \returns Minimum flat work group size for given subtarget \p STI.
228unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
229
230/// \returns Maximum flat work group size for given subtarget \p STI.
231unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
232
233/// \returns Number of waves per work group for given subtarget \p STI and
234/// \p FlatWorkGroupSize.
235unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
236 unsigned FlatWorkGroupSize);
237
238/// \returns SGPR allocation granularity for given subtarget \p STI.
239unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
240
241/// \returns SGPR encoding granularity for given subtarget \p STI.
242unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
243
244/// \returns Total number of SGPRs for given subtarget \p STI.
245unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
246
247/// \returns Addressable number of SGPRs for given subtarget \p STI.
248unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
249
250/// \returns Minimum number of SGPRs that meets the given number of waves per
251/// execution unit requirement for given subtarget \p STI.
252unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
253
254/// \returns Maximum number of SGPRs that meets the given number of waves per
255/// execution unit requirement for given subtarget \p STI.
256unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
257 bool Addressable);
258
259/// \returns Number of extra SGPRs implicitly required by given subtarget \p
260/// STI when the given special registers are used.
261unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
262 bool FlatScrUsed, bool XNACKUsed);
263
264/// \returns Number of extra SGPRs implicitly required by given subtarget \p
265/// STI when the given special registers are used. XNACK is inferred from
266/// \p STI.
267unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
268 bool FlatScrUsed);
269
270/// \returns Number of SGPR blocks needed for given subtarget \p STI when
271/// \p NumSGPRs are used. \p NumSGPRs should already include any special
272/// register counts.
273unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
274
275/// \returns VGPR allocation granularity for given subtarget \p STI.
276///
277/// For subtargets which support it, \p EnableWavefrontSize32 should match
278/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
279unsigned
280getVGPRAllocGranule(const MCSubtargetInfo *STI,
281 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
282
283/// \returns VGPR encoding granularity for given subtarget \p STI.
284///
285/// For subtargets which support it, \p EnableWavefrontSize32 should match
286/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
287unsigned getVGPREncodingGranule(
288 const MCSubtargetInfo *STI,
289 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
290
291/// \returns Total number of VGPRs for given subtarget \p STI.
292unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
293
294/// \returns Addressable number of architectural VGPRs for a given subtarget \p
295/// STI.
296unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
297
298/// \returns Addressable number of VGPRs for given subtarget \p STI.
299unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
300
301/// \returns Minimum number of VGPRs that meets given number of waves per
302/// execution unit requirement for given subtarget \p STI.
303unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
304
305/// \returns Maximum number of VGPRs that meets given number of waves per
306/// execution unit requirement for given subtarget \p STI.
307unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
308
309/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
310/// subtarget \p STI.
311unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
312 unsigned NumVGPRs);
313
314/// \returns Number of VGPR blocks needed for given subtarget \p STI when
315/// \p NumVGPRs are used. We actually return the number of blocks -1, since
316/// that's what we encode.
317///
318/// For subtargets which support it, \p EnableWavefrontSize32 should match the
319/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
320unsigned getEncodedNumVGPRBlocks(
321 const MCSubtargetInfo *STI, unsigned NumVGPRs,
322 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
323
324/// \returns Number of VGPR blocks that need to be allocated for the given
325/// subtarget \p STI when \p NumVGPRs are used.
326unsigned getAllocatedNumVGPRBlocks(
327 const MCSubtargetInfo *STI, unsigned NumVGPRs,
328 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
329
330} // end namespace IsaInfo
331
332// Represents a field in an encoded value.
333template <unsigned HighBit, unsigned LowBit, unsigned D = 0>
334struct EncodingField {
335 static_assert(HighBit >= LowBit, "Invalid bit range!");
336 static constexpr unsigned Offset = LowBit;
337 static constexpr unsigned Width = HighBit - LowBit + 1;
338
339 using ValueType = unsigned;
340 static constexpr ValueType Default = D;
341
342 ValueType Value;
343 constexpr EncodingField(ValueType Value) : Value(Value) {}
344
345 constexpr uint64_t encode() const { return Value; }
346 static ValueType decode(uint64_t Encoded) { return Encoded; }
347};
348
349// A helper for encoding and decoding multiple fields.
350template <typename... Fields> struct EncodingFields {
351 static constexpr uint64_t encode(Fields... Values) {
352 return ((Values.encode() << Values.Offset) | ...);
353 }
354
355 static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
356 return {Fields::decode((Encoded >> Fields::Offset) &
357 maxUIntN(Fields::Width))...};
358 }
359};
360
361LLVM_READONLY
362int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
363
364LLVM_READONLY
365inline bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx) {
366 return getNamedOperandIdx(Opcode, NamedIdx) != -1;
367}
368
369LLVM_READONLY
370int getSOPPWithRelaxation(uint16_t Opcode);
371
372struct MIMGBaseOpcodeInfo {
373 MIMGBaseOpcode BaseOpcode;
374 bool Store;
375 bool Atomic;
376 bool AtomicX2;
377 bool Sampler;
378 bool Gather4;
379
380 uint8_t NumExtraArgs;
381 bool Gradients;
382 bool G16;
383 bool Coordinates;
384 bool LodOrClampOrMip;
385 bool HasD16;
386 bool MSAA;
387 bool BVH;
388 bool A16;
389};
390
391LLVM_READONLY
392const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
393
394LLVM_READONLY
395const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
396
397struct MIMGDimInfo {
398 MIMGDim Dim;
399 uint8_t NumCoords;
400 uint8_t NumGradients;
401 bool MSAA;
402 bool DA;
403 uint8_t Encoding;
404 const char *AsmSuffix;
405};
406
407LLVM_READONLY
408const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
409
410LLVM_READONLY
411const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
412
413LLVM_READONLY
414const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
415
416struct MIMGLZMappingInfo {
417 MIMGBaseOpcode L;
418 MIMGBaseOpcode LZ;
419};
420
421struct MIMGMIPMappingInfo {
422 MIMGBaseOpcode MIP;
423 MIMGBaseOpcode NONMIP;
424};
425
426struct MIMGBiasMappingInfo {
427 MIMGBaseOpcode Bias;
428 MIMGBaseOpcode NoBias;
429};
430
431struct MIMGOffsetMappingInfo {
432 MIMGBaseOpcode Offset;
433 MIMGBaseOpcode NoOffset;
434};
435
436struct MIMGG16MappingInfo {
437 MIMGBaseOpcode G;
438 MIMGBaseOpcode G16;
439};
440
441LLVM_READONLY
442const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
443
444struct WMMAOpcodeMappingInfo {
445 unsigned Opcode2Addr;
446 unsigned Opcode3Addr;
447};
448
449LLVM_READONLY
450const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
451
452LLVM_READONLY
453const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
454
455LLVM_READONLY
456const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
457
458LLVM_READONLY
459const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
460
461LLVM_READONLY
462int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
463 unsigned VDataDwords, unsigned VAddrDwords);
464
465LLVM_READONLY
466int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
467
468LLVM_READONLY
469unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
470 const MIMGDimInfo *Dim, bool IsA16,
471 bool IsG16Supported);
472
473struct MIMGInfo {
474 uint16_t Opcode;
475 uint16_t BaseOpcode;
476 uint8_t MIMGEncoding;
477 uint8_t VDataDwords;
478 uint8_t VAddrDwords;
479 uint8_t VAddrOperands;
480};
481
482LLVM_READONLY
483const MIMGInfo *getMIMGInfo(unsigned Opc);
484
485LLVM_READONLY
486int getMTBUFBaseOpcode(unsigned Opc);
487
488LLVM_READONLY
489int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
490
491LLVM_READONLY
492int getMTBUFElements(unsigned Opc);
493
494LLVM_READONLY
495bool getMTBUFHasVAddr(unsigned Opc);
496
497LLVM_READONLY
498bool getMTBUFHasSrsrc(unsigned Opc);
499
500LLVM_READONLY
501bool getMTBUFHasSoffset(unsigned Opc);
502
503LLVM_READONLY
504int getMUBUFBaseOpcode(unsigned Opc);
505
506LLVM_READONLY
507int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
508
509LLVM_READONLY
510int getMUBUFElements(unsigned Opc);
511
512LLVM_READONLY
513bool getMUBUFHasVAddr(unsigned Opc);
514
515LLVM_READONLY
516bool getMUBUFHasSrsrc(unsigned Opc);
517
518LLVM_READONLY
519bool getMUBUFHasSoffset(unsigned Opc);
520
521LLVM_READONLY
522bool getMUBUFIsBufferInv(unsigned Opc);
523
524LLVM_READONLY
525bool getMUBUFTfe(unsigned Opc);
526
527LLVM_READONLY
528bool getSMEMIsBuffer(unsigned Opc);
529
530LLVM_READONLY
531bool getVOP1IsSingle(unsigned Opc);
532
533LLVM_READONLY
534bool getVOP2IsSingle(unsigned Opc);
535
536LLVM_READONLY
537bool getVOP3IsSingle(unsigned Opc);
538
539LLVM_READONLY
540bool isVOPC64DPP(unsigned Opc);
541
542LLVM_READONLY
543bool isVOPCAsmOnly(unsigned Opc);
544
545/// Returns true if MAI operation is a double precision GEMM.
546LLVM_READONLY
547bool getMAIIsDGEMM(unsigned Opc);
548
549LLVM_READONLY
550bool getMAIIsGFX940XDL(unsigned Opc);
551
552struct CanBeVOPD {
553 bool X;
554 bool Y;
555};
556
557/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
558LLVM_READONLY
559unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
560
561LLVM_READONLY
562CanBeVOPD getCanBeVOPD(unsigned Opc);
563
564LLVM_READONLY
565const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
566 uint8_t NumComponents,
567 uint8_t NumFormat,
568 const MCSubtargetInfo &STI);
569LLVM_READONLY
570const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
571 const MCSubtargetInfo &STI);
572
573LLVM_READONLY
574int getMCOpcode(uint16_t Opcode, unsigned Gen);
575
576LLVM_READONLY
577unsigned getVOPDOpcode(unsigned Opc);
578
579LLVM_READONLY
580int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily);
581
582LLVM_READONLY
583bool isVOPD(unsigned Opc);
584
585LLVM_READNONE
586bool isMAC(unsigned Opc);
587
588LLVM_READNONE
589bool isPermlane16(unsigned Opc);
590
591LLVM_READNONE
592bool isGenericAtomic(unsigned Opc);
593
594LLVM_READNONE
595bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
596
597namespace VOPD {
598
599enum Component : unsigned {
600 DST = 0,
601 SRC0,
602 SRC1,
603 SRC2,
604
605 DST_NUM = 1,
606 MAX_SRC_NUM = 3,
607 MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
608};
609
610// LSB mask for VGPR banks per VOPD component operand.
611// 4 banks result in a mask 3, setting 2 lower bits.
612constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
613
614enum ComponentIndex : unsigned { X = 0, Y = 1 };
615constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
616constexpr unsigned COMPONENTS_NUM = 2;
617
618// Properties of VOPD components.
619class ComponentProps {
620private:
621 unsigned SrcOperandsNum = 0;
622 unsigned MandatoryLiteralIdx = ~0u;
623 bool HasSrc2Acc = false;
624
625public:
626 ComponentProps() = default;
627 ComponentProps(const MCInstrDesc &OpDesc);
628
629 // Return the total number of src operands this component has.
630 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
631
632 // Return the number of src operands of this component visible to the parser.
633 unsigned getCompParsedSrcOperandsNum() const {
634 return SrcOperandsNum - HasSrc2Acc;
635 }
636
637 // Return true iif this component has a mandatory literal.
638 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; }
639
640 // If this component has a mandatory literal, return component operand
641 // index of this literal (i.e. either Component::SRC1 or Component::SRC2).
642 unsigned getMandatoryLiteralCompOperandIndex() const {
643 assert(hasMandatoryLiteral());
644 return MandatoryLiteralIdx;
645 }
646
647 // Return true iif this component has operand
648 // with component index CompSrcIdx and this operand may be a register.
649 bool hasRegSrcOperand(unsigned CompSrcIdx) const {
650 assert(CompSrcIdx < Component::MAX_SRC_NUM);
651 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
652 }
653
654 // Return true iif this component has tied src2.
655 bool hasSrc2Acc() const { return HasSrc2Acc; }
656
657private:
658 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
659 assert(CompSrcIdx < Component::MAX_SRC_NUM);
660 return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
661 }
662};
663
664enum ComponentKind : unsigned {
665 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD.
666 COMPONENT_X, // A VOPD instruction, X component.
667 COMPONENT_Y, // A VOPD instruction, Y component.
668 MAX = COMPONENT_Y
669};
670
671// Interface functions of this class map VOPD component operand indices
672// to indices of operands in MachineInstr/MCInst or parsed operands array.
673//
674// Note that this class operates with 3 kinds of indices:
675// - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
676// - MC operand indices (they refer operands in a MachineInstr/MCInst);
677// - parsed operand indices (they refer operands in parsed operands array).
678//
679// For SINGLE components mapping between these indices is trivial.
680// But things get more complicated for COMPONENT_X and
681// COMPONENT_Y because these components share the same
682// MachineInstr/MCInst and the same parsed operands array.
683// Below is an example of component operand to parsed operand
684// mapping for the following instruction:
685//
686// v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
687//
688// PARSED COMPONENT PARSED
689// COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
690// -------------------------------------------------------------------
691// "v_dual_add_f32" 0
692// v_dual_add_f32 v255 0 (DST) --> 1
693// v4 1 (SRC0) --> 2
694// v5 2 (SRC1) --> 3
695// "::" 4
696// "v_dual_mov_b32" 5
697// v_dual_mov_b32 v6 0 (DST) --> 6
698// v1 1 (SRC0) --> 7
699// -------------------------------------------------------------------
700//
701class ComponentLayout {
702private:
703 // Regular MachineInstr/MCInst operands are ordered as follows:
704 // dst, src0 [, other src operands]
705 // VOPD MachineInstr/MCInst operands are ordered as follows:
706 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
707 // Each ComponentKind has operand indices defined below.
708 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
709 static constexpr unsigned FIRST_MC_SRC_IDX[] = {1, 2, 2 /* + OpX.MCSrcNum */};
710
711 // Parsed operands of regular instructions are ordered as follows:
712 // Mnemo dst src0 [vsrc1 ...]
713 // Parsed VOPD operands are ordered as follows:
714 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
715 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
716 // Each ComponentKind has operand indices defined below.
717 static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
718 4 /* + OpX.ParsedSrcNum */};
719 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
720 2, 2, 5 /* + OpX.ParsedSrcNum */};
721
722private:
723 const ComponentKind Kind;
724 const ComponentProps PrevComp;
725
726public:
727 // Create layout for COMPONENT_X or SINGLE component.
728 ComponentLayout(ComponentKind Kind) : Kind(Kind) {
729 assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X);
730 }
731
732 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
733 ComponentLayout(const ComponentProps &OpXProps)
734 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps) {}
735
736public:
737 // Return the index of dst operand in MCInst operands.
738 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
739
740 // Return the index of the specified src operand in MCInst operands.
741 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const {
742 assert(CompSrcIdx < Component::MAX_SRC_NUM);
743 return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx;
744 }
745
746 // Return the index of dst operand in the parsed operands array.
747 unsigned getIndexOfDstInParsedOperands() const {
748 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
749 }
750
751 // Return the index of the specified src operand in the parsed operands array.
752 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
753 assert(CompSrcIdx < Component::MAX_SRC_NUM);
754 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
755 }
756
757private:
758 unsigned getPrevCompSrcNum() const {
759 return PrevComp.getCompSrcOperandsNum();
760 }
761 unsigned getPrevCompParsedSrcNum() const {
762 return PrevComp.getCompParsedSrcOperandsNum();
763 }
764};
765
766// Layout and properties of VOPD components.
767class ComponentInfo : public ComponentLayout, public ComponentProps {
768public:
769 // Create ComponentInfo for COMPONENT_X or SINGLE component.
770 ComponentInfo(const MCInstrDesc &OpDesc,
771 ComponentKind Kind = ComponentKind::SINGLE)
772 : ComponentLayout(Kind), ComponentProps(OpDesc) {}
773
774 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
775 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps)
776 : ComponentLayout(OpXProps), ComponentProps(OpDesc) {}
777
778 // Map component operand index to parsed operand index.
779 // Return 0 if the specified operand does not exist.
780 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
781};
782
783// Properties of VOPD instructions.
784class InstInfo {
785private:
786 const ComponentInfo CompInfo[COMPONENTS_NUM];
787
788public:
789 using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>;
790
791 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
792 : CompInfo{OpX, OpY} {}
793
794 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
795 : CompInfo{OprInfoX, OprInfoY} {}
796
797 const ComponentInfo &operator[](size_t ComponentIdx) const {
798 assert(ComponentIdx < COMPONENTS_NUM);
799 return CompInfo[ComponentIdx];
800 }
801
802 // Check VOPD operands constraints.
803 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
804 // for the specified component and MC operand. The callback must return 0
805 // if the operand is not a register or not a VGPR.
806 // If \p SkipSrc is set to true then constraints for source operands are not
807 // checked.
808 bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx,
809 bool SkipSrc = false) const {
810 return getInvalidCompOperandIndex(GetRegIdx: GetRegIdx, SkipSrc).has_value();
811 }
812
813 // Check VOPD operands constraints.
814 // Return the index of an invalid component operand, if any.
815 // If \p SkipSrc is set to true then constraints for source operands are not
816 // checked.
817 std::optional<unsigned> getInvalidCompOperandIndex(
818 std::function<unsigned(unsigned, unsigned)> GetRegIdx,
819 bool SkipSrc = false) const;
820
821private:
822 RegIndices
823 getRegIndices(unsigned ComponentIdx,
824 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const;
825};
826
827} // namespace VOPD
828
829LLVM_READONLY
830std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
831
832LLVM_READONLY
833// Get properties of 2 single VOP1/VOP2 instructions
834// used as components to create a VOPD instruction.
835VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
836
837LLVM_READONLY
838// Get properties of VOPD X and Y components.
839VOPD::InstInfo
840getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo);
841
842LLVM_READONLY
843bool isTrue16Inst(unsigned Opc);
844
845LLVM_READONLY
846unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
847
848LLVM_READONLY
849unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
850
851void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
852 const MCSubtargetInfo *STI);
853
854bool isGroupSegment(const GlobalValue *GV);
855bool isGlobalSegment(const GlobalValue *GV);
856bool isReadOnlySegment(const GlobalValue *GV);
857
858/// \returns True if constants should be emitted to .text section for given
859/// target triple \p TT, false otherwise.
860bool shouldEmitConstantsToTextSection(const Triple &TT);
861
862/// \returns Integer value requested using \p F's \p Name attribute.
863///
864/// \returns \p Default if attribute is not present.
865///
866/// \returns \p Default and emits error if requested value cannot be converted
867/// to integer.
868int getIntegerAttribute(const Function &F, StringRef Name, int Default);
869
870/// \returns A pair of integer values requested using \p F's \p Name attribute
871/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
872/// is false).
873///
874/// \returns \p Default if attribute is not present.
875///
876/// \returns \p Default and emits error if one of the requested values cannot be
877/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
878/// not present.
879std::pair<unsigned, unsigned>
880getIntegerPairAttribute(const Function &F, StringRef Name,
881 std::pair<unsigned, unsigned> Default,
882 bool OnlyFirstRequired = false);
883
884/// \returns Generate a vector of integer values requested using \p F's \p Name
885/// attribute.
886///
887/// \returns true if exactly Size (>2) number of integers are found in the
888/// attribute.
889///
890/// \returns false if any error occurs.
891SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
892 unsigned Size);
893
894/// Represents the counter values to wait for in an s_waitcnt instruction.
895///
896/// Large values (including the maximum possible integer) can be used to
897/// represent "don't care" waits.
898struct Waitcnt {
899 unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
900 unsigned ExpCnt = ~0u;
901 unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12.
902 unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11.
903 unsigned SampleCnt = ~0u; // gfx12+ only.
904 unsigned BvhCnt = ~0u; // gfx12+ only.
905 unsigned KmCnt = ~0u; // gfx12+ only.
906
907 Waitcnt() = default;
908 // Pre-gfx12 constructor.
909 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
910 : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt),
911 SampleCnt(~0u), BvhCnt(~0u), KmCnt(~0u) {}
912
913 // gfx12+ constructor.
914 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
915 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt)
916 : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
917 SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt) {}
918
919 bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
920
921 bool hasWaitExceptStoreCnt() const {
922 return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
923 SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u;
924 }
925
926 bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
927
928 Waitcnt combined(const Waitcnt &Other) const {
929 // Does the right thing provided self and Other are either both pre-gfx12
930 // or both gfx12+.
931 return Waitcnt(
932 std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
933 std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
934 std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
935 std::min(KmCnt, Other.KmCnt));
936 }
937};
938
939// The following methods are only meaningful on targets that support
940// S_WAITCNT.
941
942/// \returns Vmcnt bit mask for given isa \p Version.
943unsigned getVmcntBitMask(const IsaVersion &Version);
944
945/// \returns Expcnt bit mask for given isa \p Version.
946unsigned getExpcntBitMask(const IsaVersion &Version);
947
948/// \returns Lgkmcnt bit mask for given isa \p Version.
949unsigned getLgkmcntBitMask(const IsaVersion &Version);
950
951/// \returns Waitcnt bit mask for given isa \p Version.
952unsigned getWaitcntBitMask(const IsaVersion &Version);
953
954/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
955unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
956
957/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
958unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
959
960/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
961unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
962
963/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
964/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
965/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
966/// which needs it is deprecated
967///
968/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
969/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
970/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
971/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
972/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
973/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
974/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
975/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
976/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
977///
978void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
979 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
980
981Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
982
983/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
984unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
985 unsigned Vmcnt);
986
987/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
988unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
989 unsigned Expcnt);
990
991/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
992unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
993 unsigned Lgkmcnt);
994
995/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
996/// \p Version. Should not be used on gfx12+, the instruction which needs
997/// it is deprecated
998///
999/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1000/// Waitcnt[2:0] = \p Expcnt (gfx11+)
1001/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
1002/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
1003/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
1004/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
1005/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
1006/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
1007/// Waitcnt[15:10] = \p Vmcnt (gfx11)
1008/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
1009///
1010/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1011/// isa \p Version.
1012///
1013unsigned encodeWaitcnt(const IsaVersion &Version,
1014 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
1015
1016unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1017
1018// The following methods are only meaningful on targets that support
1019// S_WAIT_*CNT, introduced with gfx12.
1020
1021/// \returns Loadcnt bit mask for given isa \p Version.
1022/// Returns 0 for versions that do not support LOADcnt
1023unsigned getLoadcntBitMask(const IsaVersion &Version);
1024
1025/// \returns Samplecnt bit mask for given isa \p Version.
1026/// Returns 0 for versions that do not support SAMPLEcnt
1027unsigned getSamplecntBitMask(const IsaVersion &Version);
1028
1029/// \returns Bvhcnt bit mask for given isa \p Version.
1030/// Returns 0 for versions that do not support BVHcnt
1031unsigned getBvhcntBitMask(const IsaVersion &Version);
1032
1033/// \returns Dscnt bit mask for given isa \p Version.
1034/// Returns 0 for versions that do not support DScnt
1035unsigned getDscntBitMask(const IsaVersion &Version);
1036
1037/// \returns Dscnt bit mask for given isa \p Version.
1038/// Returns 0 for versions that do not support KMcnt
1039unsigned getKmcntBitMask(const IsaVersion &Version);
1040
1041/// \return STOREcnt or VScnt bit mask for given isa \p Version.
1042/// returns 0 for versions that do not support STOREcnt or VScnt.
1043/// STOREcnt and VScnt are the same counter, the name used
1044/// depends on the ISA version.
1045unsigned getStorecntBitMask(const IsaVersion &Version);
1046
1047// The following are only meaningful on targets that support
1048// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1049
1050/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1051/// isa \p Version.
1052Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1053
1054/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1055/// isa \p Version.
1056Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1057
1058/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
1059/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1060/// \p Version.
1061unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1062
1063/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
1064/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1065/// \p Version.
1066unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1067
1068namespace Hwreg {
1069
1070using HwregId = EncodingField<5, 0>;
1071using HwregOffset = EncodingField<10, 6>;
1072
1073struct HwregSize : EncodingField<15, 11, 32> {
1074 using EncodingField::EncodingField;
1075 constexpr uint64_t encode() const { return Value - 1; }
1076 static ValueType decode(uint64_t Encoded) { return Encoded + 1; }
1077};
1078
1079using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
1080
1081LLVM_READONLY
1082int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI);
1083
1084LLVM_READNONE
1085StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
1086
1087} // namespace Hwreg
1088
1089namespace DepCtr {
1090
1091int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1092int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1093 const MCSubtargetInfo &STI);
1094bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1095 const MCSubtargetInfo &STI);
1096bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1097 bool &IsDefault, const MCSubtargetInfo &STI);
1098
1099/// \returns Decoded VaVdst from given immediate \p Encoded.
1100unsigned decodeFieldVaVdst(unsigned Encoded);
1101
1102/// \returns Decoded VmVsrc from given immediate \p Encoded.
1103unsigned decodeFieldVmVsrc(unsigned Encoded);
1104
1105/// \returns Decoded SaSdst from given immediate \p Encoded.
1106unsigned decodeFieldSaSdst(unsigned Encoded);
1107
1108/// \returns \p VmVsrc as an encoded Depctr immediate.
1109unsigned encodeFieldVmVsrc(unsigned VmVsrc);
1110
1111/// \returns \p Encoded combined with encoded \p VmVsrc.
1112unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1113
1114/// \returns \p VaVdst as an encoded Depctr immediate.
1115unsigned encodeFieldVaVdst(unsigned VaVdst);
1116
1117/// \returns \p Encoded combined with encoded \p VaVdst.
1118unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1119
1120/// \returns \p SaSdst as an encoded Depctr immediate.
1121unsigned encodeFieldSaSdst(unsigned SaSdst);
1122
1123/// \returns \p Encoded combined with encoded \p SaSdst.
1124unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1125
1126} // namespace DepCtr
1127
1128namespace Exp {
1129
1130bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1131
1132LLVM_READONLY
1133unsigned getTgtId(const StringRef Name);
1134
1135LLVM_READNONE
1136bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1137
1138} // namespace Exp
1139
1140namespace MTBUFFormat {
1141
1142LLVM_READNONE
1143int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1144
1145void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1146
1147int64_t getDfmt(const StringRef Name);
1148
1149StringRef getDfmtName(unsigned Id);
1150
1151int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1152
1153StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1154
1155bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1156
1157bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1158
1159int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1160
1161StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1162
1163bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1164
1165int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1166 const MCSubtargetInfo &STI);
1167
1168bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1169
1170unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1171
1172} // namespace MTBUFFormat
1173
1174namespace SendMsg {
1175
1176LLVM_READONLY
1177int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI);
1178
1179LLVM_READONLY
1180int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
1181
1182LLVM_READNONE
1183StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI);
1184
1185LLVM_READNONE
1186StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1187
1188LLVM_READNONE
1189bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1190
1191LLVM_READNONE
1192bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1193 bool Strict = true);
1194
1195LLVM_READNONE
1196bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1197 const MCSubtargetInfo &STI, bool Strict = true);
1198
1199LLVM_READNONE
1200bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1201
1202LLVM_READNONE
1203bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1204
1205void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1206 uint16_t &StreamId, const MCSubtargetInfo &STI);
1207
1208LLVM_READNONE
1209uint64_t encodeMsg(uint64_t MsgId,
1210 uint64_t OpId,
1211 uint64_t StreamId);
1212
1213} // namespace SendMsg
1214
1215
1216unsigned getInitialPSInputAddr(const Function &F);
1217
1218bool getHasColorExport(const Function &F);
1219
1220bool getHasDepthExport(const Function &F);
1221
1222LLVM_READNONE
1223bool isShader(CallingConv::ID CC);
1224
1225LLVM_READNONE
1226bool isGraphics(CallingConv::ID CC);
1227
1228LLVM_READNONE
1229bool isCompute(CallingConv::ID CC);
1230
1231LLVM_READNONE
1232bool isEntryFunctionCC(CallingConv::ID CC);
1233
1234// These functions are considered entrypoints into the current module, i.e. they
1235// are allowed to be called from outside the current module. This is different
1236// from isEntryFunctionCC, which is only true for functions that are entered by
1237// the hardware. Module entry points include all entry functions but also
1238// include functions that can be called from other functions inside or outside
1239// the current module. Module entry functions are allowed to allocate LDS.
1240LLVM_READNONE
1241bool isModuleEntryFunctionCC(CallingConv::ID CC);
1242
1243LLVM_READNONE
1244bool isChainCC(CallingConv::ID CC);
1245
1246bool isKernelCC(const Function *Func);
1247
1248// FIXME: Remove this when calling conventions cleaned up
1249LLVM_READNONE
1250inline bool isKernel(CallingConv::ID CC) {
1251 switch (CC) {
1252 case CallingConv::AMDGPU_KERNEL:
1253 case CallingConv::SPIR_KERNEL:
1254 return true;
1255 default:
1256 return false;
1257 }
1258}
1259
1260bool hasXNACK(const MCSubtargetInfo &STI);
1261bool hasSRAMECC(const MCSubtargetInfo &STI);
1262bool hasMIMG_R128(const MCSubtargetInfo &STI);
1263bool hasA16(const MCSubtargetInfo &STI);
1264bool hasG16(const MCSubtargetInfo &STI);
1265bool hasPackedD16(const MCSubtargetInfo &STI);
1266bool hasGDS(const MCSubtargetInfo &STI);
1267unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1268unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1269
1270bool isSI(const MCSubtargetInfo &STI);
1271bool isCI(const MCSubtargetInfo &STI);
1272bool isVI(const MCSubtargetInfo &STI);
1273bool isGFX9(const MCSubtargetInfo &STI);
1274bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1275bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1276bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1277bool isGFX8Plus(const MCSubtargetInfo &STI);
1278bool isGFX9Plus(const MCSubtargetInfo &STI);
1279bool isGFX10(const MCSubtargetInfo &STI);
1280bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1281bool isGFX10Plus(const MCSubtargetInfo &STI);
1282bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1283bool isGFX10Before1030(const MCSubtargetInfo &STI);
1284bool isGFX11(const MCSubtargetInfo &STI);
1285bool isGFX11Plus(const MCSubtargetInfo &STI);
1286bool isGFX12(const MCSubtargetInfo &STI);
1287bool isGFX12Plus(const MCSubtargetInfo &STI);
1288bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1289bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1290bool isGCN3Encoding(const MCSubtargetInfo &STI);
1291bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1292bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1293bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1294bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1295bool isGFX90A(const MCSubtargetInfo &STI);
1296bool isGFX940(const MCSubtargetInfo &STI);
1297bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1298bool hasMAIInsts(const MCSubtargetInfo &STI);
1299bool hasVOPD(const MCSubtargetInfo &STI);
1300bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1301int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1302unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1303
1304/// Is Reg - scalar register
1305bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
1306
1307/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1308/// The bit indicating isHi is the LSB of the encoding.
1309bool isHi(unsigned Reg, const MCRegisterInfo &MRI);
1310
1311/// If \p Reg is a pseudo reg, return the correct hardware register given
1312/// \p STI otherwise return \p Reg.
1313unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
1314
1315/// Convert hardware register \p Reg to a pseudo register
1316LLVM_READNONE
1317unsigned mc2PseudoReg(unsigned Reg);
1318
1319LLVM_READNONE
1320bool isInlineValue(unsigned Reg);
1321
1322/// Is this an AMDGPU specific source operand? These include registers,
1323/// inline constants, literals and mandatory literals (KImm).
1324bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
1325
1326/// Is this a KImm operand?
1327bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1328
1329/// Is this floating-point operand?
1330bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1331
1332/// Does this operand support only inlinable literals?
1333bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1334
1335/// Get the size in bits of a register from the register class \p RC.
1336unsigned getRegBitWidth(unsigned RCID);
1337
1338/// Get the size in bits of a register from the register class \p RC.
1339unsigned getRegBitWidth(const MCRegisterClass &RC);
1340
1341/// Get size of register operand
1342unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1343 unsigned OpNo);
1344
1345LLVM_READNONE
1346inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1347 switch (OpInfo.OperandType) {
1348 case AMDGPU::OPERAND_REG_IMM_INT32:
1349 case AMDGPU::OPERAND_REG_IMM_FP32:
1350 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1351 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1352 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1353 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1354 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1355 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1356 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1357 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1358 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1359 case AMDGPU::OPERAND_KIMM32:
1360 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1361 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1362 return 4;
1363
1364 case AMDGPU::OPERAND_REG_IMM_INT64:
1365 case AMDGPU::OPERAND_REG_IMM_FP64:
1366 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1367 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1368 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1369 return 8;
1370
1371 case AMDGPU::OPERAND_REG_IMM_INT16:
1372 case AMDGPU::OPERAND_REG_IMM_BF16:
1373 case AMDGPU::OPERAND_REG_IMM_FP16:
1374 case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
1375 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1376 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1377 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1378 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1379 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1380 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1381 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1382 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1383 case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
1384 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1385 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1386 case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
1387 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1388 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1389 case AMDGPU::OPERAND_REG_IMM_V2BF16:
1390 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1391 return 2;
1392
1393 default:
1394 llvm_unreachable("unhandled operand type");
1395 }
1396}
1397
1398LLVM_READNONE
1399inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1400 return getOperandSize(Desc.operands()[OpNo]);
1401}
1402
1403/// Is this literal inlinable, and not one of the values intended for floating
1404/// point values.
1405LLVM_READNONE
1406inline bool isInlinableIntLiteral(int64_t Literal) {
1407 return Literal >= -16 && Literal <= 64;
1408}
1409
1410/// Is this literal inlinable
1411LLVM_READNONE
1412bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1413
1414LLVM_READNONE
1415bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1416
1417LLVM_READNONE
1418bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1419
1420LLVM_READNONE
1421bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1422
1423LLVM_READNONE
1424bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1425
1426LLVM_READNONE
1427bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1428
1429LLVM_READNONE
1430std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1431
1432LLVM_READNONE
1433std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1434
1435LLVM_READNONE
1436std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1437
1438LLVM_READNONE
1439bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1440
1441LLVM_READNONE
1442bool isInlinableLiteralV2I16(uint32_t Literal);
1443
1444LLVM_READNONE
1445bool isInlinableLiteralV2BF16(uint32_t Literal);
1446
1447LLVM_READNONE
1448bool isInlinableLiteralV2F16(uint32_t Literal);
1449
1450LLVM_READNONE
1451bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1452
1453bool isArgPassedInSGPR(const Argument *Arg);
1454
1455bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
1456
1457LLVM_READONLY
1458bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1459 int64_t EncodedOffset);
1460
1461LLVM_READONLY
1462bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1463 int64_t EncodedOffset,
1464 bool IsBuffer);
1465
1466/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1467/// offsets.
1468uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1469
1470/// \returns The encoding that will be used for \p ByteOffset in the
1471/// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1472/// S_LOAD instructions have a signed offset, on other subtargets it is
1473/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1474std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1475 int64_t ByteOffset, bool IsBuffer);
1476
1477/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1478/// instruction. This is only useful on CI.s
1479std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1480 int64_t ByteOffset);
1481
1482/// For pre-GFX12 FLAT instructions the offset must be positive;
1483/// MSB is ignored and forced to zero.
1484///
1485/// \return The number of bits available for the signed offset field in flat
1486/// instructions. Note that some forms of the instruction disallow negative
1487/// offsets.
1488unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1489
1490/// \returns true if this offset is small enough to fit in the SMRD
1491/// offset field. \p ByteOffset should be the offset in bytes and
1492/// not the encoded offset.
1493bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1494
1495LLVM_READNONE
1496inline bool isLegalDPALU_DPPControl(unsigned DC) {
1497 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1498}
1499
1500/// \returns true if an instruction may have a 64-bit VGPR operand.
1501bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);
1502
1503/// \returns true if an instruction is a DP ALU DPP.
1504bool isDPALU_DPP(const MCInstrDesc &OpDesc);
1505
1506/// \returns true if the intrinsic is divergent
1507bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1508
1509/// \returns true if the intrinsic is uniform
1510bool isIntrinsicAlwaysUniform(unsigned IntrID);
1511
1512/// \returns lds block size in terms of dwords. \p
1513/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1514/// must be defined in terms of bytes.
1515unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1516
1517} // end namespace AMDGPU
1518
1519raw_ostream &operator<<(raw_ostream &OS,
1520 const AMDGPU::IsaInfo::TargetIDSetting S);
1521
1522} // end namespace llvm
1523
1524#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1525

source code of llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h