AMDGPUBaseInfo.h source code [llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h]

1	//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10	#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12	#include "SIDefines.h"
13	#include "llvm/IR/CallingConv.h"
14	#include "llvm/IR/InstrTypes.h"
15	#include "llvm/IR/Module.h"
16	#include "llvm/Support/Alignment.h"
17	#include <array>
18	#include <functional>
19	#include <utility>
20
21	struct amd_kernel_code_t;
22
23	namespace llvm {
24
25	struct Align;
26	class Argument;
27	class Function;
28	class GlobalValue;
29	class MCInstrInfo;
30	class MCRegisterClass;
31	class MCRegisterInfo;
32	class MCSubtargetInfo;
33	class StringRef;
34	class Triple;
35	class raw_ostream;
36
37	namespace AMDGPU {
38
39	struct IsaVersion;
40
41	/// Generic target versions emitted by this version of LLVM.
42	///
43	/// These numbers are incremented every time a codegen breaking change occurs
44	/// within a generic family.
45	namespace GenericVersion {
46	static constexpr unsigned GFX9 = `1`;
47	static constexpr unsigned GFX10_1 = `1`;
48	static constexpr unsigned GFX10_3 = `1`;
49	static constexpr unsigned GFX11 = `1`;
50	} // namespace GenericVersion
51
52	enum { AMDHSA_COV4 = `4`, AMDHSA_COV5 = `5`, AMDHSA_COV6 = `6` };
53
54	/// \returns True if \p STI is AMDHSA.
55	bool isHsaAbi(const MCSubtargetInfo &STI);
56
57	/// \returns Code object version from the IR module flag.
58	unsigned getAMDHSACodeObjectVersion(const Module &M);
59
60	/// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
61	unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
62
63	/// \returns The default HSA code object version. This should only be used when
64	/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
65	/// flag or a .amdhsa_code_object_version directive)
66	unsigned getDefaultAMDHSACodeObjectVersion();
67
68	/// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
69	/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
70	uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
71
72	/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
73	unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
74
75	/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
76	unsigned getHostcallImplicitArgPosition(unsigned COV);
77
78	unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
79	unsigned getCompletionActionImplicitArgPosition(unsigned COV);
80
81	struct GcnBufferFormatInfo {
82	unsigned Format;
83	unsigned BitsPerComp;
84	unsigned NumComponents;
85	unsigned NumFormat;
86	unsigned DataFormat;
87	};
88
89	struct MAIInstInfo {
90	uint16_t Opcode;
91	bool is_dgemm;
92	bool is_gfx940_xdl;
93	};
94
95	#define GET_MIMGBaseOpcode_DECL
96	#define GET_MIMGDim_DECL
97	#define GET_MIMGEncoding_DECL
98	#define GET_MIMGLZMapping_DECL
99	#define GET_MIMGMIPMapping_DECL
100	#define GET_MIMGBiASMapping_DECL
101	#define GET_MAIInstInfoTable_DECL
102	#include "AMDGPUGenSearchableTables.inc"
103
104	namespace IsaInfo {
105
106	enum {
107	// The closed Vulkan driver sets 96, which limits the wave count to 8 but
108	// doesn't spill SGPRs as much as when 80 is set.
109	FIXED_NUM_SGPRS_FOR_INIT_BUG = `96`,
110	TRAP_NUM_SGPRS = `16`
111	};
112
113	enum class TargetIDSetting {
114	Unsupported,
115	Any,
116	Off,
117	On
118	};
119
120	class AMDGPUTargetID {
121	private:
122	const MCSubtargetInfo &STI;
123	TargetIDSetting XnackSetting;
124	TargetIDSetting SramEccSetting;
125
126	public:
127	explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
128	~AMDGPUTargetID() = default;
129
130	/// \return True if the current xnack setting is not "Unsupported".
131	bool isXnackSupported() const {
132	return XnackSetting != TargetIDSetting::Unsupported;
133	}
134
135	/// \returns True if the current xnack setting is "On" or "Any".
136	bool isXnackOnOrAny() const {
137	return XnackSetting == TargetIDSetting::On \|\|
138	XnackSetting == TargetIDSetting::Any;
139	}
140
141	/// \returns True if current xnack setting is "On" or "Off",
142	/// false otherwise.
143	bool isXnackOnOrOff() const {
144	return getXnackSetting() == TargetIDSetting::On \|\|
145	getXnackSetting() == TargetIDSetting::Off;
146	}
147
148	/// \returns The current xnack TargetIDSetting, possible options are
149	/// "Unsupported", "Any", "Off", and "On".
150	TargetIDSetting getXnackSetting() const {
151	return XnackSetting;
152	}
153
154	/// Sets xnack setting to \p NewXnackSetting.
155	void setXnackSetting(TargetIDSetting NewXnackSetting) {
156	XnackSetting = NewXnackSetting;
157	}
158
159	/// \return True if the current sramecc setting is not "Unsupported".
160	bool isSramEccSupported() const {
161	return SramEccSetting != TargetIDSetting::Unsupported;
162	}
163
164	/// \returns True if the current sramecc setting is "On" or "Any".
165	bool isSramEccOnOrAny() const {
166	return SramEccSetting == TargetIDSetting::On \|\|
167	SramEccSetting == TargetIDSetting::Any;
168	}
169
170	/// \returns True if current sramecc setting is "On" or "Off",
171	/// false otherwise.
172	bool isSramEccOnOrOff() const {
173	return getSramEccSetting() == TargetIDSetting::On \|\|
174	getSramEccSetting() == TargetIDSetting::Off;
175	}
176
177	/// \returns The current sramecc TargetIDSetting, possible options are
178	/// "Unsupported", "Any", "Off", and "On".
179	TargetIDSetting getSramEccSetting() const {
180	return SramEccSetting;
181	}
182
183	/// Sets sramecc setting to \p NewSramEccSetting.
184	void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
185	SramEccSetting = NewSramEccSetting;
186	}
187
188	void setTargetIDFromFeaturesString(StringRef FS);
189	void setTargetIDFromTargetIDStream(StringRef TargetID);
190
191	/// \returns String representation of an object.
192	std::string toString() const;
193	};
194
195	/// \returns Wavefront size for given subtarget \p STI.
196	unsigned getWavefrontSize(const MCSubtargetInfo *STI);
197
198	/// \returns Local memory size in bytes for given subtarget \p STI.
199	unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
200
201	/// \returns Maximum addressable local memory size in bytes for given subtarget
202	/// \p STI.
203	unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
204
205	/// \returns Number of execution units per compute unit for given subtarget \p
206	/// STI.
207	unsigned getEUsPerCU(const MCSubtargetInfo *STI);
208
209	/// \returns Maximum number of work groups per compute unit for given subtarget
210	/// \p STI and limited by given \p FlatWorkGroupSize.
211	unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
212	unsigned FlatWorkGroupSize);
213
214	/// \returns Minimum number of waves per execution unit for given subtarget \p
215	/// STI.
216	unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
217
218	/// \returns Maximum number of waves per execution unit for given subtarget \p
219	/// STI without any kind of limitation.
220	unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
221
222	/// \returns Number of waves per execution unit required to support the given \p
223	/// FlatWorkGroupSize.
224	unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
225	unsigned FlatWorkGroupSize);
226
227	/// \returns Minimum flat work group size for given subtarget \p STI.
228	unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
229
230	/// \returns Maximum flat work group size for given subtarget \p STI.
231	unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
232
233	/// \returns Number of waves per work group for given subtarget \p STI and
234	/// \p FlatWorkGroupSize.
235	unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
236	unsigned FlatWorkGroupSize);
237
238	/// \returns SGPR allocation granularity for given subtarget \p STI.
239	unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
240
241	/// \returns SGPR encoding granularity for given subtarget \p STI.
242	unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
243
244	/// \returns Total number of SGPRs for given subtarget \p STI.
245	unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
246
247	/// \returns Addressable number of SGPRs for given subtarget \p STI.
248	unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
249
250	/// \returns Minimum number of SGPRs that meets the given number of waves per
251	/// execution unit requirement for given subtarget \p STI.
252	unsigned getMinNumSGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU);
253
254	/// \returns Maximum number of SGPRs that meets the given number of waves per
255	/// execution unit requirement for given subtarget \p STI.
256	unsigned getMaxNumSGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU,
257	bool Addressable);
258
259	/// \returns Number of extra SGPRs implicitly required by given subtarget \p
260	/// STI when the given special registers are used.
261	unsigned getNumExtraSGPRs(const MCSubtargetInfo STI, bool* VCCUsed,
262	bool FlatScrUsed, bool XNACKUsed);
263
264	/// \returns Number of extra SGPRs implicitly required by given subtarget \p
265	/// STI when the given special registers are used. XNACK is inferred from
266	/// \p STI.
267	unsigned getNumExtraSGPRs(const MCSubtargetInfo STI, bool* VCCUsed,
268	bool FlatScrUsed);
269
270	/// \returns Number of SGPR blocks needed for given subtarget \p STI when
271	/// \p NumSGPRs are used. \p NumSGPRs should already include any special
272	/// register counts.
273	unsigned getNumSGPRBlocks(const MCSubtargetInfo STI, unsigned* NumSGPRs);
274
275	/// \returns VGPR allocation granularity for given subtarget \p STI.
276	///
277	/// For subtargets which support it, \p EnableWavefrontSize32 should match
278	/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
279	unsigned
280	getVGPRAllocGranule(const MCSubtargetInfo *STI,
281	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
282
283	/// \returns VGPR encoding granularity for given subtarget \p STI.
284	///
285	/// For subtargets which support it, \p EnableWavefrontSize32 should match
286	/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
287	unsigned getVGPREncodingGranule(
288	const MCSubtargetInfo *STI,
289	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
290
291	/// \returns Total number of VGPRs for given subtarget \p STI.
292	unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
293
294	/// \returns Addressable number of architectural VGPRs for a given subtarget \p
295	/// STI.
296	unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
297
298	/// \returns Addressable number of VGPRs for given subtarget \p STI.
299	unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
300
301	/// \returns Minimum number of VGPRs that meets given number of waves per
302	/// execution unit requirement for given subtarget \p STI.
303	unsigned getMinNumVGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU);
304
305	/// \returns Maximum number of VGPRs that meets given number of waves per
306	/// execution unit requirement for given subtarget \p STI.
307	unsigned getMaxNumVGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU);
308
309	/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
310	/// subtarget \p STI.
311	unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
312	unsigned NumVGPRs);
313
314	/// \returns Number of VGPR blocks needed for given subtarget \p STI when
315	/// \p NumVGPRs are used. We actually return the number of blocks -1, since
316	/// that's what we encode.
317	///
318	/// For subtargets which support it, \p EnableWavefrontSize32 should match the
319	/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
320	unsigned getEncodedNumVGPRBlocks(
321	const MCSubtargetInfo STI, unsigned* NumVGPRs,
322	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
323
324	/// \returns Number of VGPR blocks that need to be allocated for the given
325	/// subtarget \p STI when \p NumVGPRs are used.
326	unsigned getAllocatedNumVGPRBlocks(
327	const MCSubtargetInfo STI, unsigned* NumVGPRs,
328	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
329
330	} // end namespace IsaInfo
331
332	// Represents a field in an encoded value.
333	template <unsigned HighBit, unsigned LowBit, unsigned D = `0`>
334	struct EncodingField {
335	static_assert(HighBit >= LowBit, "Invalid bit range!");
336	static constexpr unsigned Offset = LowBit;
337	static constexpr unsigned Width = HighBit - LowBit + `1`;
338
339	using ValueType = unsigned;
340	static constexpr ValueType Default = D;
341
342	ValueType Value;
343	constexpr EncodingField(ValueType Value) : Value(Value) {}
344
345	constexpr uint64_t encode() const { return Value; }
346	static ValueType decode(uint64_t Encoded) { return Encoded; }
347	};
348
349	// A helper for encoding and decoding multiple fields.
350	template <typename... Fields> struct EncodingFields {
351	static constexpr uint64_t encode(Fields... Values) {
352	return ((Values.encode() << Values.Offset) \| ...);
353	}
354
355	static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
356	return {Fields::decode((Encoded >> Fields::Offset) &
357	maxUIntN(Fields::Width))...};
358	}
359	};
360
361	LLVM_READONLY
362	int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
363
364	LLVM_READONLY
365	inline bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx) {
366	return getNamedOperandIdx(Opcode, NamedIdx) != -`1`;
367	}
368
369	LLVM_READONLY
370	int getSOPPWithRelaxation(uint16_t Opcode);
371
372	struct MIMGBaseOpcodeInfo {
373	MIMGBaseOpcode BaseOpcode;
374	bool Store;
375	bool Atomic;
376	bool AtomicX2;
377	bool Sampler;
378	bool Gather4;
379
380	uint8_t NumExtraArgs;
381	bool Gradients;
382	bool G16;
383	bool Coordinates;
384	bool LodOrClampOrMip;
385	bool HasD16;
386	bool MSAA;
387	bool BVH;
388	bool A16;
389	};
390
391	LLVM_READONLY
392	const MIMGBaseOpcodeInfo getMIMGBaseOpcode(unsigned* Opc);
393
394	LLVM_READONLY
395	const MIMGBaseOpcodeInfo getMIMGBaseOpcodeInfo(unsigned* BaseOpcode);
396
397	struct MIMGDimInfo {
398	MIMGDim Dim;
399	uint8_t NumCoords;
400	uint8_t NumGradients;
401	bool MSAA;
402	bool DA;
403	uint8_t Encoding;
404	const char *AsmSuffix;
405	};
406
407	LLVM_READONLY
408	const MIMGDimInfo getMIMGDimInfo(unsigned* DimEnum);
409
410	LLVM_READONLY
411	const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
412
413	LLVM_READONLY
414	const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
415
416	struct MIMGLZMappingInfo {
417	MIMGBaseOpcode L;
418	MIMGBaseOpcode LZ;
419	};
420
421	struct MIMGMIPMappingInfo {
422	MIMGBaseOpcode MIP;
423	MIMGBaseOpcode NONMIP;
424	};
425
426	struct MIMGBiasMappingInfo {
427	MIMGBaseOpcode Bias;
428	MIMGBaseOpcode NoBias;
429	};
430
431	struct MIMGOffsetMappingInfo {
432	MIMGBaseOpcode Offset;
433	MIMGBaseOpcode NoOffset;
434	};
435
436	struct MIMGG16MappingInfo {
437	MIMGBaseOpcode G;
438	MIMGBaseOpcode G16;
439	};
440
441	LLVM_READONLY
442	const MIMGLZMappingInfo getMIMGLZMappingInfo(unsigned* L);
443
444	struct WMMAOpcodeMappingInfo {
445	unsigned Opcode2Addr;
446	unsigned Opcode3Addr;
447	};
448
449	LLVM_READONLY
450	const MIMGMIPMappingInfo getMIMGMIPMappingInfo(unsigned* MIP);
451
452	LLVM_READONLY
453	const MIMGBiasMappingInfo getMIMGBiasMappingInfo(unsigned* Bias);
454
455	LLVM_READONLY
456	const MIMGOffsetMappingInfo getMIMGOffsetMappingInfo(unsigned* Offset);
457
458	LLVM_READONLY
459	const MIMGG16MappingInfo getMIMGG16MappingInfo(unsigned* G);
460
461	LLVM_READONLY
462	int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
463	unsigned VDataDwords, unsigned VAddrDwords);
464
465	LLVM_READONLY
466	int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
467
468	LLVM_READONLY
469	unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
470	const MIMGDimInfo Dim, bool* IsA16,
471	bool IsG16Supported);
472
473	struct MIMGInfo {
474	uint16_t Opcode;
475	uint16_t BaseOpcode;
476	uint8_t MIMGEncoding;
477	uint8_t VDataDwords;
478	uint8_t VAddrDwords;
479	uint8_t VAddrOperands;
480	};
481
482	LLVM_READONLY
483	const MIMGInfo getMIMGInfo(unsigned* Opc);
484
485	LLVM_READONLY
486	int getMTBUFBaseOpcode(unsigned Opc);
487
488	LLVM_READONLY
489	int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
490
491	LLVM_READONLY
492	int getMTBUFElements(unsigned Opc);
493
494	LLVM_READONLY
495	bool getMTBUFHasVAddr(unsigned Opc);
496
497	LLVM_READONLY
498	bool getMTBUFHasSrsrc(unsigned Opc);
499
500	LLVM_READONLY
501	bool getMTBUFHasSoffset(unsigned Opc);
502
503	LLVM_READONLY
504	int getMUBUFBaseOpcode(unsigned Opc);
505
506	LLVM_READONLY
507	int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
508
509	LLVM_READONLY
510	int getMUBUFElements(unsigned Opc);
511
512	LLVM_READONLY
513	bool getMUBUFHasVAddr(unsigned Opc);
514
515	LLVM_READONLY
516	bool getMUBUFHasSrsrc(unsigned Opc);
517
518	LLVM_READONLY
519	bool getMUBUFHasSoffset(unsigned Opc);
520
521	LLVM_READONLY
522	bool getMUBUFIsBufferInv(unsigned Opc);
523
524	LLVM_READONLY
525	bool getMUBUFTfe(unsigned Opc);
526
527	LLVM_READONLY
528	bool getSMEMIsBuffer(unsigned Opc);
529
530	LLVM_READONLY
531	bool getVOP1IsSingle(unsigned Opc);
532
533	LLVM_READONLY
534	bool getVOP2IsSingle(unsigned Opc);
535
536	LLVM_READONLY
537	bool getVOP3IsSingle(unsigned Opc);
538
539	LLVM_READONLY
540	bool isVOPC64DPP(unsigned Opc);
541
542	LLVM_READONLY
543	bool isVOPCAsmOnly(unsigned Opc);
544
545	/// Returns true if MAI operation is a double precision GEMM.
546	LLVM_READONLY
547	bool getMAIIsDGEMM(unsigned Opc);
548
549	LLVM_READONLY
550	bool getMAIIsGFX940XDL(unsigned Opc);
551
552	struct CanBeVOPD {
553	bool X;
554	bool Y;
555	};
556
557	/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
558	LLVM_READONLY
559	unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
560
561	LLVM_READONLY
562	CanBeVOPD getCanBeVOPD(unsigned Opc);
563
564	LLVM_READONLY
565	const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
566	uint8_t NumComponents,
567	uint8_t NumFormat,
568	const MCSubtargetInfo &STI);
569	LLVM_READONLY
570	const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
571	const MCSubtargetInfo &STI);
572
573	LLVM_READONLY
574	int getMCOpcode(uint16_t Opcode, unsigned Gen);
575
576	LLVM_READONLY
577	unsigned getVOPDOpcode(unsigned Opc);
578
579	LLVM_READONLY
580	int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily);
581
582	LLVM_READONLY
583	bool isVOPD(unsigned Opc);
584
585	LLVM_READNONE
586	bool isMAC(unsigned Opc);
587
588	LLVM_READNONE
589	bool isPermlane16(unsigned Opc);
590
591	LLVM_READNONE
592	bool isGenericAtomic(unsigned Opc);
593
594	LLVM_READNONE
595	bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
596
597	namespace VOPD {
598
599	enum Component : unsigned {
600	DST = `0`,
601	SRC0,
602	SRC1,
603	SRC2,
604
605	DST_NUM = `1`,
606	MAX_SRC_NUM = `3`,
607	MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
608	};
609
610	// LSB mask for VGPR banks per VOPD component operand.
611	// 4 banks result in a mask 3, setting 2 lower bits.
612	constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {`1`, `3`, `3`, `1`};
613
614	enum ComponentIndex : unsigned { X = `0`, Y = `1` };
615	constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
616	constexpr unsigned COMPONENTS_NUM = `2`;
617
618	// Properties of VOPD components.
619	class ComponentProps {
620	private:
621	unsigned SrcOperandsNum = `0`;
622	unsigned MandatoryLiteralIdx = ~`0u`;
623	bool HasSrc2Acc = false;
624
625	public:
626	ComponentProps() = default;
627	ComponentProps(const MCInstrDesc &OpDesc);
628
629	// Return the total number of src operands this component has.
630	unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
631
632	// Return the number of src operands of this component visible to the parser.
633	unsigned getCompParsedSrcOperandsNum() const {
634	return SrcOperandsNum - HasSrc2Acc;
635	}
636
637	// Return true iif this component has a mandatory literal.
638	bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~`0u`; }
639
640	// If this component has a mandatory literal, return component operand
641	// index of this literal (i.e. either Component::SRC1 or Component::SRC2).
642	unsigned getMandatoryLiteralCompOperandIndex() const {
643	assert(hasMandatoryLiteral());
644	return MandatoryLiteralIdx;
645	}
646
647	// Return true iif this component has operand
648	// with component index CompSrcIdx and this operand may be a register.
649	bool hasRegSrcOperand(unsigned CompSrcIdx) const {
650	assert(CompSrcIdx < Component::MAX_SRC_NUM);
651	return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
652	}
653
654	// Return true iif this component has tied src2.
655	bool hasSrc2Acc() const { return HasSrc2Acc; }
656
657	private:
658	bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
659	assert(CompSrcIdx < Component::MAX_SRC_NUM);
660	return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
661	}
662	};
663
664	enum ComponentKind : unsigned {
665	SINGLE = `0`, // A single VOP1 or VOP2 instruction which may be used in VOPD.
666	COMPONENT_X, // A VOPD instruction, X component.
667	COMPONENT_Y, // A VOPD instruction, Y component.
668	MAX = COMPONENT_Y
669	};
670
671	// Interface functions of this class map VOPD component operand indices
672	// to indices of operands in MachineInstr/MCInst or parsed operands array.
673	//
674	// Note that this class operates with 3 kinds of indices:
675	// - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
676	// - MC operand indices (they refer operands in a MachineInstr/MCInst);
677	// - parsed operand indices (they refer operands in parsed operands array).
678	//
679	// For SINGLE components mapping between these indices is trivial.
680	// But things get more complicated for COMPONENT_X and
681	// COMPONENT_Y because these components share the same
682	// MachineInstr/MCInst and the same parsed operands array.
683	// Below is an example of component operand to parsed operand
684	// mapping for the following instruction:
685	//
686	// v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
687	//
688	// PARSED COMPONENT PARSED
689	// COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
690	// -------------------------------------------------------------------
691	// "v_dual_add_f32" 0
692	// v_dual_add_f32 v255 0 (DST) --> 1
693	// v4 1 (SRC0) --> 2
694	// v5 2 (SRC1) --> 3
695	// "::" 4
696	// "v_dual_mov_b32" 5
697	// v_dual_mov_b32 v6 0 (DST) --> 6
698	// v1 1 (SRC0) --> 7
699	// -------------------------------------------------------------------
700	//
701	class ComponentLayout {
702	private:
703	// Regular MachineInstr/MCInst operands are ordered as follows:
704	// dst, src0 [, other src operands]
705	// VOPD MachineInstr/MCInst operands are ordered as follows:
706	// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
707	// Each ComponentKind has operand indices defined below.
708	static constexpr unsigned MC_DST_IDX[] = {`0`, `0`, `1`};
709	static constexpr unsigned FIRST_MC_SRC_IDX[] = {`1`, `2`, `2` / + OpX.MCSrcNum /};
710
711	// Parsed operands of regular instructions are ordered as follows:
712	// Mnemo dst src0 [vsrc1 ...]
713	// Parsed VOPD operands are ordered as follows:
714	// OpXMnemo dstX src0X [vsrc1X\|imm vsrc1X\|vsrc1X imm] '::'
715	// OpYMnemo dstY src0Y [vsrc1Y\|imm vsrc1Y\|vsrc1Y imm]
716	// Each ComponentKind has operand indices defined below.
717	static constexpr unsigned PARSED_DST_IDX[] = {`1`, `1`,
718	`4` / + OpX.ParsedSrcNum /};
719	static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
720	`2`, `2`, `5` / + OpX.ParsedSrcNum /};
721
722	private:
723	const ComponentKind Kind;
724	const ComponentProps PrevComp;
725
726	public:
727	// Create layout for COMPONENT_X or SINGLE component.
728	ComponentLayout(ComponentKind Kind) : Kind(Kind) {
729	assert(Kind == ComponentKind::SINGLE \|\| Kind == ComponentKind::COMPONENT_X);
730	}
731
732	// Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
733	ComponentLayout(const ComponentProps &OpXProps)
734	: Kind(ComponentKind::COMPONENT_Y), PrevComp (OpXProps) {}
735
736	public:
737	// Return the index of dst operand in MCInst operands.
738	unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
739
740	// Return the index of the specified src operand in MCInst operands.
741	unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const {
742	assert(CompSrcIdx < Component::MAX_SRC_NUM);
743	return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx;
744	}
745
746	// Return the index of dst operand in the parsed operands array.
747	unsigned getIndexOfDstInParsedOperands() const {
748	return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
749	}
750
751	// Return the index of the specified src operand in the parsed operands array.
752	unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
753	assert(CompSrcIdx < Component::MAX_SRC_NUM);
754	return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
755	}
756
757	private:
758	unsigned getPrevCompSrcNum() const {
759	return PrevComp.getCompSrcOperandsNum();
760	}
761	unsigned getPrevCompParsedSrcNum() const {
762	return PrevComp.getCompParsedSrcOperandsNum();
763	}
764	};
765
766	// Layout and properties of VOPD components.
767	class ComponentInfo : public ComponentLayout, public ComponentProps {
768	public:
769	// Create ComponentInfo for COMPONENT_X or SINGLE component.
770	ComponentInfo(const MCInstrDesc &OpDesc,
771	ComponentKind Kind = ComponentKind::SINGLE)
772	: ComponentLayout (Kind), ComponentProps (OpDesc) {}
773
774	// Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
775	ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps)
776	: ComponentLayout (OpXProps), ComponentProps (OpDesc) {}
777
778	// Map component operand index to parsed operand index.
779	// Return 0 if the specified operand does not exist.
780	unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
781	};
782
783	// Properties of VOPD instructions.
784	class InstInfo {
785	private:
786	const ComponentInfo CompInfo[COMPONENTS_NUM];
787
788	public:
789	using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>;
790
791	InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
792	: CompInfo{OpX, OpY} {}
793
794	InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
795	: CompInfo{OprInfoX, OprInfoY} {}
796
797	const ComponentInfo &operator[](size_t ComponentIdx) const {
798	assert(ComponentIdx < COMPONENTS_NUM);
799	return CompInfo[ComponentIdx];
800	}
801
802	// Check VOPD operands constraints.
803	// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
804	// for the specified component and MC operand. The callback must return 0
805	// if the operand is not a register or not a VGPR.
806	// If \p SkipSrc is set to true then constraints for source operands are not
807	// checked.
808	bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx,
809	bool SkipSrc = false) const {
810	return getInvalidCompOperandIndex(GetRegIdx: GetRegIdx, SkipSrc).has_value();
811	}
812
813	// Check VOPD operands constraints.
814	// Return the index of an invalid component operand, if any.
815	// If \p SkipSrc is set to true then constraints for source operands are not
816	// checked.
817	std::optional<unsigned> getInvalidCompOperandIndex(
818	std::function<unsigned(unsigned, unsigned)> GetRegIdx,
819	bool SkipSrc = false) const;
820
821	private:
822	RegIndices
823	getRegIndices(unsigned ComponentIdx,
824	std::function<unsigned(unsigned, unsigned)> GetRegIdx) const;
825	};
826
827	} // namespace VOPD
828
829	LLVM_READONLY
830	std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
831
832	LLVM_READONLY
833	// Get properties of 2 single VOP1/VOP2 instructions
834	// used as components to create a VOPD instruction.
835	VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
836
837	LLVM_READONLY
838	// Get properties of VOPD X and Y components.
839	VOPD::InstInfo
840	getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo);
841
842	LLVM_READONLY
843	bool isTrue16Inst(unsigned Opc);
844
845	LLVM_READONLY
846	unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
847
848	LLVM_READONLY
849	unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
850
851	void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
852	const MCSubtargetInfo *STI);
853
854	bool isGroupSegment(const GlobalValue *GV);
855	bool isGlobalSegment(const GlobalValue *GV);
856	bool isReadOnlySegment(const GlobalValue *GV);
857
858	/// \returns True if constants should be emitted to .text section for given
859	/// target triple \p TT, false otherwise.
860	bool shouldEmitConstantsToTextSection(const Triple &TT);
861
862	/// \returns Integer value requested using \p F's \p Name attribute.
863	///
864	/// \returns \p Default if attribute is not present.
865	///
866	/// \returns \p Default and emits error if requested value cannot be converted
867	/// to integer.
868	int getIntegerAttribute(const Function &F, StringRef Name, int Default);
869
870	/// \returns A pair of integer values requested using \p F's \p Name attribute
871	/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
872	/// is false).
873	///
874	/// \returns \p Default if attribute is not present.
875	///
876	/// \returns \p Default and emits error if one of the requested values cannot be
877	/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
878	/// not present.
879	std::pair<unsigned, unsigned>
880	getIntegerPairAttribute(const Function &F, StringRef Name,
881	std::pair<unsigned, unsigned> Default,
882	bool OnlyFirstRequired = false);
883
884	/// \returns Generate a vector of integer values requested using \p F's \p Name
885	/// attribute.
886	///
887	/// \returns true if exactly Size (>2) number of integers are found in the
888	/// attribute.
889	///
890	/// \returns false if any error occurs.
891	SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
892	unsigned Size);
893
894	/// Represents the counter values to wait for in an s_waitcnt instruction.
895	///
896	/// Large values (including the maximum possible integer) can be used to
897	/// represent "don't care" waits.
898	struct Waitcnt {
899	unsigned LoadCnt = ~`0u`; // Corresponds to Vmcnt prior to gfx12.
900	unsigned ExpCnt = ~`0u`;
901	unsigned DsCnt = ~`0u`; // Corresponds to LGKMcnt prior to gfx12.
902	unsigned StoreCnt = ~`0u`; // Corresponds to VScnt on gfx10/gfx11.
903	unsigned SampleCnt = ~`0u`; // gfx12+ only.
904	unsigned BvhCnt = ~`0u`; // gfx12+ only.
905	unsigned KmCnt = ~`0u`; // gfx12+ only.
906
907	Waitcnt() = default;
908	// Pre-gfx12 constructor.
909	Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
910	: LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt),
911	SampleCnt(~`0u`), BvhCnt(~`0u`), KmCnt(~`0u`) {}
912
913	// gfx12+ constructor.
914	Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
915	unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt)
916	: LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
917	SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt) {}
918
919	bool hasWait() const { return StoreCnt != ~`0u` \|\| hasWaitExceptStoreCnt(); }
920
921	bool hasWaitExceptStoreCnt() const {
922	return LoadCnt != ~`0u` \|\| ExpCnt != ~`0u` \|\| DsCnt != ~`0u` \|\|
923	SampleCnt != ~`0u` \|\| BvhCnt != ~`0u` \|\| KmCnt != ~`0u`;
924	}
925
926	bool hasWaitStoreCnt() const { return StoreCnt != ~`0u`; }
927
928	Waitcnt combined(const Waitcnt &Other) const {
929	// Does the right thing provided self and Other are either both pre-gfx12
930	// or both gfx12+.
931	return Waitcnt(
932	std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
933	std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
934	std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
935	std::min(KmCnt, Other.KmCnt));
936	}
937	};
938
939	// The following methods are only meaningful on targets that support
940	// S_WAITCNT.
941
942	/// \returns Vmcnt bit mask for given isa \p Version.
943	unsigned getVmcntBitMask(const IsaVersion &Version);
944
945	/// \returns Expcnt bit mask for given isa \p Version.
946	unsigned getExpcntBitMask(const IsaVersion &Version);
947
948	/// \returns Lgkmcnt bit mask for given isa \p Version.
949	unsigned getLgkmcntBitMask(const IsaVersion &Version);
950
951	/// \returns Waitcnt bit mask for given isa \p Version.
952	unsigned getWaitcntBitMask(const IsaVersion &Version);
953
954	/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
955	unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
956
957	/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
958	unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
959
960	/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
961	unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
962
963	/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
964	/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
965	/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
966	/// which needs it is deprecated
967	///
968	/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
969	/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
970	/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
971	/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
972	/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
973	/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
974	/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
975	/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
976	/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
977	///
978	void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
979	unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
980
981	Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
982
983	/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
984	unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
985	unsigned Vmcnt);
986
987	/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
988	unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
989	unsigned Expcnt);
990
991	/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
992	unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
993	unsigned Lgkmcnt);
994
995	/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
996	/// \p Version. Should not be used on gfx12+, the instruction which needs
997	/// it is deprecated
998	///
999	/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1000	/// Waitcnt[2:0] = \p Expcnt (gfx11+)
1001	/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
1002	/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
1003	/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
1004	/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
1005	/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
1006	/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
1007	/// Waitcnt[15:10] = \p Vmcnt (gfx11)
1008	/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
1009	///
1010	/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1011	/// isa \p Version.
1012	///
1013	unsigned encodeWaitcnt(const IsaVersion &Version,
1014	unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
1015
1016	unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1017
1018	// The following methods are only meaningful on targets that support
1019	// S_WAIT_CNT, introduced with gfx12.*
1020
1021	/// \returns Loadcnt bit mask for given isa \p Version.
1022	/// Returns 0 for versions that do not support LOADcnt
1023	unsigned getLoadcntBitMask(const IsaVersion &Version);
1024
1025	/// \returns Samplecnt bit mask for given isa \p Version.
1026	/// Returns 0 for versions that do not support SAMPLEcnt
1027	unsigned getSamplecntBitMask(const IsaVersion &Version);
1028
1029	/// \returns Bvhcnt bit mask for given isa \p Version.
1030	/// Returns 0 for versions that do not support BVHcnt
1031	unsigned getBvhcntBitMask(const IsaVersion &Version);
1032
1033	/// \returns Dscnt bit mask for given isa \p Version.
1034	/// Returns 0 for versions that do not support DScnt
1035	unsigned getDscntBitMask(const IsaVersion &Version);
1036
1037	/// \returns Dscnt bit mask for given isa \p Version.
1038	/// Returns 0 for versions that do not support KMcnt
1039	unsigned getKmcntBitMask(const IsaVersion &Version);
1040
1041	/// \return STOREcnt or VScnt bit mask for given isa \p Version.
1042	/// returns 0 for versions that do not support STOREcnt or VScnt.
1043	/// STOREcnt and VScnt are the same counter, the name used
1044	/// depends on the ISA version.
1045	unsigned getStorecntBitMask(const IsaVersion &Version);
1046
1047	// The following are only meaningful on targets that support
1048	// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1049
1050	/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1051	/// isa \p Version.
1052	Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1053
1054	/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1055	/// isa \p Version.
1056	Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1057
1058	/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
1059	/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1060	/// \p Version.
1061	unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1062
1063	/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
1064	/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1065	/// \p Version.
1066	unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1067
1068	namespace Hwreg {
1069
1070	using HwregId = EncodingField<`5`, `0`>;
1071	using HwregOffset = EncodingField<`10`, `6`>;
1072
1073	struct HwregSize : EncodingField<`15`, `11`, `32`> {
1074	using EncodingField::EncodingField;
1075	constexpr uint64_t encode() const { return Value - `1`; }
1076	static ValueType decode(uint64_t Encoded) { return Encoded + `1`; }
1077	};
1078
1079	using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
1080
1081	LLVM_READONLY
1082	int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI);
1083
1084	LLVM_READNONE
1085	StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
1086
1087	} // namespace Hwreg
1088
1089	namespace DepCtr {
1090
1091	int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1092	int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1093	const MCSubtargetInfo &STI);
1094	bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1095	const MCSubtargetInfo &STI);
1096	bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1097	bool &IsDefault, const MCSubtargetInfo &STI);
1098
1099	/// \returns Decoded VaVdst from given immediate \p Encoded.
1100	unsigned decodeFieldVaVdst(unsigned Encoded);
1101
1102	/// \returns Decoded VmVsrc from given immediate \p Encoded.
1103	unsigned decodeFieldVmVsrc(unsigned Encoded);
1104
1105	/// \returns Decoded SaSdst from given immediate \p Encoded.
1106	unsigned decodeFieldSaSdst(unsigned Encoded);
1107
1108	/// \returns \p VmVsrc as an encoded Depctr immediate.
1109	unsigned encodeFieldVmVsrc(unsigned VmVsrc);
1110
1111	/// \returns \p Encoded combined with encoded \p VmVsrc.
1112	unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1113
1114	/// \returns \p VaVdst as an encoded Depctr immediate.
1115	unsigned encodeFieldVaVdst(unsigned VaVdst);
1116
1117	/// \returns \p Encoded combined with encoded \p VaVdst.
1118	unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1119
1120	/// \returns \p SaSdst as an encoded Depctr immediate.
1121	unsigned encodeFieldSaSdst(unsigned SaSdst);
1122
1123	/// \returns \p Encoded combined with encoded \p SaSdst.
1124	unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1125
1126	} // namespace DepCtr
1127
1128	namespace Exp {
1129
1130	bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1131
1132	LLVM_READONLY
1133	unsigned getTgtId(const StringRef Name);
1134
1135	LLVM_READNONE
1136	bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1137
1138	} // namespace Exp
1139
1140	namespace MTBUFFormat {
1141
1142	LLVM_READNONE
1143	int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1144
1145	void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1146
1147	int64_t getDfmt(const StringRef Name);
1148
1149	StringRef getDfmtName(unsigned Id);
1150
1151	int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1152
1153	StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1154
1155	bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1156
1157	bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1158
1159	int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1160
1161	StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1162
1163	bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1164
1165	int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1166	const MCSubtargetInfo &STI);
1167
1168	bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1169
1170	unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1171
1172	} // namespace MTBUFFormat
1173
1174	namespace SendMsg {
1175
1176	LLVM_READONLY
1177	int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI);
1178
1179	LLVM_READONLY
1180	int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
1181
1182	LLVM_READNONE
1183	StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI);
1184
1185	LLVM_READNONE
1186	StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1187
1188	LLVM_READNONE
1189	bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1190
1191	LLVM_READNONE
1192	bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1193	bool Strict = true);
1194
1195	LLVM_READNONE
1196	bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1197	const MCSubtargetInfo &STI, bool Strict = true);
1198
1199	LLVM_READNONE
1200	bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1201
1202	LLVM_READNONE
1203	bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1204
1205	void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1206	uint16_t &StreamId, const MCSubtargetInfo &STI);
1207
1208	LLVM_READNONE
1209	uint64_t encodeMsg(uint64_t MsgId,
1210	uint64_t OpId,
1211	uint64_t StreamId);
1212
1213	} // namespace SendMsg
1214
1215
1216	unsigned getInitialPSInputAddr(const Function &F);
1217
1218	bool getHasColorExport(const Function &F);
1219
1220	bool getHasDepthExport(const Function &F);
1221
1222	LLVM_READNONE
1223	bool isShader(CallingConv::ID CC);
1224
1225	LLVM_READNONE
1226	bool isGraphics(CallingConv::ID CC);
1227
1228	LLVM_READNONE
1229	bool isCompute(CallingConv::ID CC);
1230
1231	LLVM_READNONE
1232	bool isEntryFunctionCC(CallingConv::ID CC);
1233
1234	// These functions are considered entrypoints into the current module, i.e. they
1235	// are allowed to be called from outside the current module. This is different
1236	// from isEntryFunctionCC, which is only true for functions that are entered by
1237	// the hardware. Module entry points include all entry functions but also
1238	// include functions that can be called from other functions inside or outside
1239	// the current module. Module entry functions are allowed to allocate LDS.
1240	LLVM_READNONE
1241	bool isModuleEntryFunctionCC(CallingConv::ID CC);
1242
1243	LLVM_READNONE
1244	bool isChainCC(CallingConv::ID CC);
1245
1246	bool isKernelCC(const Function *Func);
1247
1248	// FIXME: Remove this when calling conventions cleaned up
1249	LLVM_READNONE
1250	inline bool isKernel(CallingConv::ID CC) {
1251	switch (CC) {
1252	case CallingConv::AMDGPU_KERNEL:
1253	case CallingConv::SPIR_KERNEL:
1254	return true;
1255	default:
1256	return false;
1257	}
1258	}
1259
1260	bool hasXNACK(const MCSubtargetInfo &STI);
1261	bool hasSRAMECC(const MCSubtargetInfo &STI);
1262	bool hasMIMG_R128(const MCSubtargetInfo &STI);
1263	bool hasA16(const MCSubtargetInfo &STI);
1264	bool hasG16(const MCSubtargetInfo &STI);
1265	bool hasPackedD16(const MCSubtargetInfo &STI);
1266	bool hasGDS(const MCSubtargetInfo &STI);
1267	unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1268	unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1269
1270	bool isSI(const MCSubtargetInfo &STI);
1271	bool isCI(const MCSubtargetInfo &STI);
1272	bool isVI(const MCSubtargetInfo &STI);
1273	bool isGFX9(const MCSubtargetInfo &STI);
1274	bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1275	bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1276	bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1277	bool isGFX8Plus(const MCSubtargetInfo &STI);
1278	bool isGFX9Plus(const MCSubtargetInfo &STI);
1279	bool isGFX10(const MCSubtargetInfo &STI);
1280	bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1281	bool isGFX10Plus(const MCSubtargetInfo &STI);
1282	bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1283	bool isGFX10Before1030(const MCSubtargetInfo &STI);
1284	bool isGFX11(const MCSubtargetInfo &STI);
1285	bool isGFX11Plus(const MCSubtargetInfo &STI);
1286	bool isGFX12(const MCSubtargetInfo &STI);
1287	bool isGFX12Plus(const MCSubtargetInfo &STI);
1288	bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1289	bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1290	bool isGCN3Encoding(const MCSubtargetInfo &STI);
1291	bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1292	bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1293	bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1294	bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1295	bool isGFX90A(const MCSubtargetInfo &STI);
1296	bool isGFX940(const MCSubtargetInfo &STI);
1297	bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1298	bool hasMAIInsts(const MCSubtargetInfo &STI);
1299	bool hasVOPD(const MCSubtargetInfo &STI);
1300	bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1301	int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1302	unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1303
1304	/// Is Reg - scalar register
1305	bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
1306
1307	/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1308	/// The bit indicating isHi is the LSB of the encoding.
1309	bool isHi(unsigned Reg, const MCRegisterInfo &MRI);
1310
1311	/// If \p Reg is a pseudo reg, return the correct hardware register given
1312	/// \p STI otherwise return \p Reg.
1313	unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
1314
1315	/// Convert hardware register \p Reg to a pseudo register
1316	LLVM_READNONE
1317	unsigned mc2PseudoReg(unsigned Reg);
1318
1319	LLVM_READNONE
1320	bool isInlineValue(unsigned Reg);
1321
1322	/// Is this an AMDGPU specific source operand? These include registers,
1323	/// inline constants, literals and mandatory literals (KImm).
1324	bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
1325
1326	/// Is this a KImm operand?
1327	bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1328
1329	/// Is this floating-point operand?
1330	bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1331
1332	/// Does this operand support only inlinable literals?
1333	bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1334
1335	/// Get the size in bits of a register from the register class \p RC.
1336	unsigned getRegBitWidth(unsigned RCID);
1337
1338	/// Get the size in bits of a register from the register class \p RC.
1339	unsigned getRegBitWidth(const MCRegisterClass &RC);
1340
1341	/// Get size of register operand
1342	unsigned getRegOperandSize(const MCRegisterInfo MRI, const* MCInstrDesc &Desc,
1343	unsigned OpNo);
1344
1345	LLVM_READNONE
1346	inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1347	switch (OpInfo.OperandType) {
1348	case AMDGPU::OPERAND_REG_IMM_INT32:
1349	case AMDGPU::OPERAND_REG_IMM_FP32:
1350	case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1351	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1352	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1353	case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1354	case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1355	case AMDGPU::OPERAND_REG_IMM_V2INT32:
1356	case AMDGPU::OPERAND_REG_IMM_V2FP32:
1357	case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1358	case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1359	case AMDGPU::OPERAND_KIMM32:
1360	case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1361	case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1362	return `4`;
1363
1364	case AMDGPU::OPERAND_REG_IMM_INT64:
1365	case AMDGPU::OPERAND_REG_IMM_FP64:
1366	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1367	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1368	case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1369	return `8`;
1370
1371	case AMDGPU::OPERAND_REG_IMM_INT16:
1372	case AMDGPU::OPERAND_REG_IMM_BF16:
1373	case AMDGPU::OPERAND_REG_IMM_FP16:
1374	case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
1375	case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1376	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1377	case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1378	case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1379	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1380	case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1381	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1382	case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1383	case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
1384	case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1385	case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1386	case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
1387	case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1388	case AMDGPU::OPERAND_REG_IMM_V2INT16:
1389	case AMDGPU::OPERAND_REG_IMM_V2BF16:
1390	case AMDGPU::OPERAND_REG_IMM_V2FP16:
1391	return `2`;
1392
1393	default:
1394	llvm_unreachable("unhandled operand type");
1395	}
1396	}
1397
1398	LLVM_READNONE
1399	inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1400	return getOperandSize(Desc.operands()[OpNo]);
1401	}
1402
1403	/// Is this literal inlinable, and not one of the values intended for floating
1404	/// point values.
1405	LLVM_READNONE
1406	inline bool isInlinableIntLiteral(int64_t Literal) {
1407	return Literal >= -`16` && Literal <= `64`;
1408	}
1409
1410	/// Is this literal inlinable
1411	LLVM_READNONE
1412	bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1413
1414	LLVM_READNONE
1415	bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1416
1417	LLVM_READNONE
1418	bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1419
1420	LLVM_READNONE
1421	bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1422
1423	LLVM_READNONE
1424	bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1425
1426	LLVM_READNONE
1427	bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1428
1429	LLVM_READNONE
1430	std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1431
1432	LLVM_READNONE
1433	std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1434
1435	LLVM_READNONE
1436	std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1437
1438	LLVM_READNONE
1439	bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1440
1441	LLVM_READNONE
1442	bool isInlinableLiteralV2I16(uint32_t Literal);
1443
1444	LLVM_READNONE
1445	bool isInlinableLiteralV2BF16(uint32_t Literal);
1446
1447	LLVM_READNONE
1448	bool isInlinableLiteralV2F16(uint32_t Literal);
1449
1450	LLVM_READNONE
1451	bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1452
1453	bool isArgPassedInSGPR(const Argument *Arg);
1454
1455	bool isArgPassedInSGPR(const CallBase CB, unsigned* ArgNo);
1456
1457	LLVM_READONLY
1458	bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1459	int64_t EncodedOffset);
1460
1461	LLVM_READONLY
1462	bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1463	int64_t EncodedOffset,
1464	bool IsBuffer);
1465
1466	/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1467	/// offsets.
1468	uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1469
1470	/// \returns The encoding that will be used for \p ByteOffset in the
1471	/// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1472	/// S_LOAD instructions have a signed offset, on other subtargets it is
1473	/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1474	std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1475	int64_t ByteOffset, bool IsBuffer);
1476
1477	/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1478	/// instruction. This is only useful on CI.s
1479	std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1480	int64_t ByteOffset);
1481
1482	/// For pre-GFX12 FLAT instructions the offset must be positive;
1483	/// MSB is ignored and forced to zero.
1484	///
1485	/// \return The number of bits available for the signed offset field in flat
1486	/// instructions. Note that some forms of the instruction disallow negative
1487	/// offsets.
1488	unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1489
1490	/// \returns true if this offset is small enough to fit in the SMRD
1491	/// offset field. \p ByteOffset should be the offset in bytes and
1492	/// not the encoded offset.
1493	bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1494
1495	LLVM_READNONE
1496	inline bool isLegalDPALU_DPPControl(unsigned DC) {
1497	return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1498	}
1499
1500	/// \returns true if an instruction may have a 64-bit VGPR operand.
1501	bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);
1502
1503	/// \returns true if an instruction is a DP ALU DPP.
1504	bool isDPALU_DPP(const MCInstrDesc &OpDesc);
1505
1506	/// \returns true if the intrinsic is divergent
1507	bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1508
1509	/// \returns true if the intrinsic is uniform
1510	bool isIntrinsicAlwaysUniform(unsigned IntrID);
1511
1512	/// \returns lds block size in terms of dwords. \p
1513	/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1514	/// must be defined in terms of bytes.
1515	unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1516
1517	} // end namespace AMDGPU
1518
1519	raw_ostream &operator<<(raw_ostream &OS,
1520	const AMDGPU::IsaInfo::TargetIDSetting S);
1521
1522	} // end namespace llvm
1523
1524	#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1525

source code of llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h