1 | //===--- AMDHSAKernelDescriptor.h -----------------------------*- C++ -*---===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// AMDHSA kernel descriptor definitions. For more information, visit |
11 | /// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor |
12 | /// |
13 | /// \warning |
14 | /// Any changes to this file should also be audited for corresponding changes |
15 | /// needed in both the assembler and disassembler, namely: |
16 | /// * AMDGPUAsmPrinter.{cpp,h} |
17 | /// * AMDGPUTargetStreamer.{cpp,h} |
18 | /// * AMDGPUDisassembler.{cpp,h} |
19 | // |
20 | //===----------------------------------------------------------------------===// |
21 | |
22 | #ifndef LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H |
23 | #define LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H |
24 | |
25 | #include <cstddef> |
26 | #include <cstdint> |
27 | |
28 | // Gets offset of specified member in specified type. |
29 | #ifndef offsetof |
30 | #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE*)0)->MEMBER) |
31 | #endif // offsetof |
32 | |
33 | // Creates enumeration entries used for packing bits into integers. Enumeration |
34 | // entries include bit shift amount, bit width, and bit mask. |
35 | #ifndef AMDHSA_BITS_ENUM_ENTRY |
36 | #define AMDHSA_BITS_ENUM_ENTRY(NAME, SHIFT, WIDTH) \ |
37 | NAME ## _SHIFT = (SHIFT), \ |
38 | NAME ## _WIDTH = (WIDTH), \ |
39 | NAME = (((1 << (WIDTH)) - 1) << (SHIFT)) |
40 | #endif // AMDHSA_BITS_ENUM_ENTRY |
41 | |
42 | // Gets bits for specified bit mask from specified source. |
43 | #ifndef AMDHSA_BITS_GET |
44 | #define AMDHSA_BITS_GET(SRC, MSK) ((SRC & MSK) >> MSK ## _SHIFT) |
45 | #endif // AMDHSA_BITS_GET |
46 | |
47 | // Sets bits for specified bit mask in specified destination. |
48 | #ifndef AMDHSA_BITS_SET |
49 | #define AMDHSA_BITS_SET(DST, MSK, VAL) \ |
50 | DST &= ~MSK; \ |
51 | DST |= ((VAL << MSK ## _SHIFT) & MSK) |
52 | #endif // AMDHSA_BITS_SET |
53 | |
54 | namespace llvm { |
55 | namespace amdhsa { |
56 | |
57 | // Floating point rounding modes. Must match hardware definition. |
58 | enum : uint8_t { |
59 | FLOAT_ROUND_MODE_NEAR_EVEN = 0, |
60 | FLOAT_ROUND_MODE_PLUS_INFINITY = 1, |
61 | FLOAT_ROUND_MODE_MINUS_INFINITY = 2, |
62 | FLOAT_ROUND_MODE_ZERO = 3, |
63 | }; |
64 | |
65 | // Floating point denorm modes. Must match hardware definition. |
66 | enum : uint8_t { |
67 | FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0, |
68 | FLOAT_DENORM_MODE_FLUSH_DST = 1, |
69 | FLOAT_DENORM_MODE_FLUSH_SRC = 2, |
70 | FLOAT_DENORM_MODE_FLUSH_NONE = 3, |
71 | }; |
72 | |
73 | // System VGPR workitem IDs. Must match hardware definition. |
74 | enum : uint8_t { |
75 | SYSTEM_VGPR_WORKITEM_ID_X = 0, |
76 | SYSTEM_VGPR_WORKITEM_ID_X_Y = 1, |
77 | SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2, |
78 | SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3, |
79 | }; |
80 | |
81 | // Compute program resource register 1. Must match hardware definition. |
82 | // GFX6+. |
83 | #define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \ |
84 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH) |
85 | // [GFX6-GFX8]. |
86 | #define COMPUTE_PGM_RSRC1_GFX6_GFX8(NAME, SHIFT, WIDTH) \ |
87 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX8_ ## NAME, SHIFT, WIDTH) |
88 | // [GFX6-GFX9]. |
89 | #define COMPUTE_PGM_RSRC1_GFX6_GFX9(NAME, SHIFT, WIDTH) \ |
90 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX9_ ## NAME, SHIFT, WIDTH) |
91 | // [GFX6-GFX11]. |
92 | #define COMPUTE_PGM_RSRC1_GFX6_GFX11(NAME, SHIFT, WIDTH) \ |
93 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX11_##NAME, SHIFT, WIDTH) |
94 | // GFX9+. |
95 | #define COMPUTE_PGM_RSRC1_GFX9_PLUS(NAME, SHIFT, WIDTH) \ |
96 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX9_PLUS_ ## NAME, SHIFT, WIDTH) |
97 | // GFX10+. |
98 | #define COMPUTE_PGM_RSRC1_GFX10_PLUS(NAME, SHIFT, WIDTH) \ |
99 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX10_PLUS_ ## NAME, SHIFT, WIDTH) |
100 | // GFX12+. |
101 | #define COMPUTE_PGM_RSRC1_GFX12_PLUS(NAME, SHIFT, WIDTH) \ |
102 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX12_PLUS_##NAME, SHIFT, WIDTH) |
103 | enum : int32_t { |
104 | COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6), |
105 | COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4), |
106 | COMPUTE_PGM_RSRC1(PRIORITY, 10, 2), |
107 | COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_32, 12, 2), |
108 | COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_16_64, 14, 2), |
109 | COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_32, 16, 2), |
110 | COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_16_64, 18, 2), |
111 | COMPUTE_PGM_RSRC1(PRIV, 20, 1), |
112 | COMPUTE_PGM_RSRC1_GFX6_GFX11(ENABLE_DX10_CLAMP, 21, 1), |
113 | COMPUTE_PGM_RSRC1_GFX12_PLUS(ENABLE_WG_RR_EN, 21, 1), |
114 | COMPUTE_PGM_RSRC1(DEBUG_MODE, 22, 1), |
115 | COMPUTE_PGM_RSRC1_GFX6_GFX11(ENABLE_IEEE_MODE, 23, 1), |
116 | COMPUTE_PGM_RSRC1_GFX12_PLUS(DISABLE_PERF, 23, 1), |
117 | COMPUTE_PGM_RSRC1(BULKY, 24, 1), |
118 | COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1), |
119 | COMPUTE_PGM_RSRC1_GFX6_GFX8(RESERVED0, 26, 1), |
120 | COMPUTE_PGM_RSRC1_GFX9_PLUS(FP16_OVFL, 26, 1), |
121 | COMPUTE_PGM_RSRC1(RESERVED1, 27, 2), |
122 | COMPUTE_PGM_RSRC1_GFX6_GFX9(RESERVED2, 29, 3), |
123 | COMPUTE_PGM_RSRC1_GFX10_PLUS(WGP_MODE, 29, 1), |
124 | COMPUTE_PGM_RSRC1_GFX10_PLUS(MEM_ORDERED, 30, 1), |
125 | COMPUTE_PGM_RSRC1_GFX10_PLUS(FWD_PROGRESS, 31, 1), |
126 | }; |
127 | #undef COMPUTE_PGM_RSRC1 |
128 | |
129 | // Compute program resource register 2. Must match hardware definition. |
130 | // GFX6+. |
131 | #define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \ |
132 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH) |
133 | // [GFX6-GFX11]. |
134 | #define COMPUTE_PGM_RSRC2_GFX6_GFX11(NAME, SHIFT, WIDTH) \ |
135 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX6_GFX11_##NAME, SHIFT, WIDTH) |
136 | // GFX12+. |
137 | #define COMPUTE_PGM_RSRC2_GFX12_PLUS(NAME, SHIFT, WIDTH) \ |
138 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX12_PLUS_##NAME, SHIFT, WIDTH) |
139 | enum : int32_t { |
140 | COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1), |
141 | COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5), |
142 | COMPUTE_PGM_RSRC2_GFX6_GFX11(ENABLE_TRAP_HANDLER, 6, 1), |
143 | COMPUTE_PGM_RSRC2_GFX12_PLUS(RESERVED1, 6, 1), |
144 | COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1), |
145 | COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1), |
146 | COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1), |
147 | COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_INFO, 10, 1), |
148 | COMPUTE_PGM_RSRC2(ENABLE_VGPR_WORKITEM_ID, 11, 2), |
149 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1), |
150 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_MEMORY, 14, 1), |
151 | COMPUTE_PGM_RSRC2(GRANULATED_LDS_SIZE, 15, 9), |
152 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1), |
153 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1), |
154 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1), |
155 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1), |
156 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1), |
157 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1), |
158 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1), |
159 | COMPUTE_PGM_RSRC2(RESERVED0, 31, 1), |
160 | }; |
161 | #undef COMPUTE_PGM_RSRC2 |
162 | |
163 | // Compute program resource register 3 for GFX90A+. Must match hardware |
164 | // definition. |
165 | #define COMPUTE_PGM_RSRC3_GFX90A(NAME, SHIFT, WIDTH) \ |
166 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX90A_ ## NAME, SHIFT, WIDTH) |
167 | enum : int32_t { |
168 | COMPUTE_PGM_RSRC3_GFX90A(ACCUM_OFFSET, 0, 6), |
169 | COMPUTE_PGM_RSRC3_GFX90A(RESERVED0, 6, 10), |
170 | COMPUTE_PGM_RSRC3_GFX90A(TG_SPLIT, 16, 1), |
171 | COMPUTE_PGM_RSRC3_GFX90A(RESERVED1, 17, 15), |
172 | }; |
173 | #undef COMPUTE_PGM_RSRC3_GFX90A |
174 | |
175 | // Compute program resource register 3 for GFX10+. Must match hardware |
176 | // definition. |
177 | // GFX10+. |
178 | #define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \ |
179 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH) |
180 | // [GFX10]. |
181 | #define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \ |
182 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_##NAME, SHIFT, WIDTH) |
183 | // [GFX10-GFX11]. |
184 | #define COMPUTE_PGM_RSRC3_GFX10_GFX11(NAME, SHIFT, WIDTH) \ |
185 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_GFX11_##NAME, SHIFT, WIDTH) |
186 | // GFX11+. |
187 | #define COMPUTE_PGM_RSRC3_GFX11_PLUS(NAME, SHIFT, WIDTH) \ |
188 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_PLUS_ ## NAME, SHIFT, WIDTH) |
189 | // [GFX11]. |
190 | #define COMPUTE_PGM_RSRC3_GFX11(NAME, SHIFT, WIDTH) \ |
191 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_##NAME, SHIFT, WIDTH) |
192 | // GFX12+. |
193 | #define COMPUTE_PGM_RSRC3_GFX12_PLUS(NAME, SHIFT, WIDTH) \ |
194 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX12_PLUS_##NAME, SHIFT, WIDTH) |
195 | enum : int32_t { |
196 | COMPUTE_PGM_RSRC3_GFX10_GFX11(SHARED_VGPR_COUNT, 0, 4), |
197 | COMPUTE_PGM_RSRC3_GFX12_PLUS(RESERVED0, 0, 4), |
198 | COMPUTE_PGM_RSRC3_GFX10(RESERVED1, 4, 8), |
199 | COMPUTE_PGM_RSRC3_GFX11(INST_PREF_SIZE, 4, 6), |
200 | COMPUTE_PGM_RSRC3_GFX11(TRAP_ON_START, 10, 1), |
201 | COMPUTE_PGM_RSRC3_GFX11(TRAP_ON_END, 11, 1), |
202 | COMPUTE_PGM_RSRC3_GFX12_PLUS(INST_PREF_SIZE, 4, 8), |
203 | COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED2, 12, 1), |
204 | COMPUTE_PGM_RSRC3_GFX10_GFX11(RESERVED3, 13, 1), |
205 | COMPUTE_PGM_RSRC3_GFX12_PLUS(GLG_EN, 13, 1), |
206 | COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED4, 14, 17), |
207 | COMPUTE_PGM_RSRC3_GFX10(RESERVED5, 31, 1), |
208 | COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1), |
209 | }; |
210 | #undef COMPUTE_PGM_RSRC3_GFX10_PLUS |
211 | |
212 | // Kernel code properties. Must be kept backwards compatible. |
213 | #define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \ |
214 | AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH) |
215 | enum : int32_t { |
216 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1), |
217 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_PTR, 1, 1), |
218 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_QUEUE_PTR, 2, 1), |
219 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1), |
220 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1), |
221 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1), |
222 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1), |
223 | KERNEL_CODE_PROPERTY(RESERVED0, 7, 3), |
224 | KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+ |
225 | KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1), |
226 | KERNEL_CODE_PROPERTY(RESERVED1, 12, 4), |
227 | }; |
228 | #undef KERNEL_CODE_PROPERTY |
229 | |
230 | // Kernarg preload specification. |
231 | #define KERNARG_PRELOAD_SPEC(NAME, SHIFT, WIDTH) \ |
232 | AMDHSA_BITS_ENUM_ENTRY(KERNARG_PRELOAD_SPEC_##NAME, SHIFT, WIDTH) |
233 | enum : int32_t { |
234 | KERNARG_PRELOAD_SPEC(LENGTH, 0, 7), |
235 | KERNARG_PRELOAD_SPEC(OFFSET, 7, 9), |
236 | }; |
237 | #undef KERNARG_PRELOAD_SPEC |
238 | |
239 | // Kernel descriptor. Must be kept backwards compatible. |
240 | struct kernel_descriptor_t { |
241 | uint32_t group_segment_fixed_size; |
242 | uint32_t private_segment_fixed_size; |
243 | uint32_t kernarg_size; |
244 | uint8_t reserved0[4]; |
245 | int64_t kernel_code_entry_byte_offset; |
246 | uint8_t reserved1[20]; |
247 | uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+ |
248 | uint32_t compute_pgm_rsrc1; |
249 | uint32_t compute_pgm_rsrc2; |
250 | uint16_t kernel_code_properties; |
251 | uint16_t kernarg_preload; |
252 | uint8_t reserved3[4]; |
253 | }; |
254 | |
255 | enum : uint32_t { |
256 | GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0, |
257 | PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4, |
258 | KERNARG_SIZE_OFFSET = 8, |
259 | RESERVED0_OFFSET = 12, |
260 | KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16, |
261 | RESERVED1_OFFSET = 24, |
262 | COMPUTE_PGM_RSRC3_OFFSET = 44, |
263 | COMPUTE_PGM_RSRC1_OFFSET = 48, |
264 | COMPUTE_PGM_RSRC2_OFFSET = 52, |
265 | KERNEL_CODE_PROPERTIES_OFFSET = 56, |
266 | KERNARG_PRELOAD_OFFSET = 58, |
267 | RESERVED3_OFFSET = 60 |
268 | }; |
269 | |
270 | static_assert( |
271 | sizeof(kernel_descriptor_t) == 64, |
272 | "invalid size for kernel_descriptor_t" ); |
273 | static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) == |
274 | GROUP_SEGMENT_FIXED_SIZE_OFFSET, |
275 | "invalid offset for group_segment_fixed_size" ); |
276 | static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) == |
277 | PRIVATE_SEGMENT_FIXED_SIZE_OFFSET, |
278 | "invalid offset for private_segment_fixed_size" ); |
279 | static_assert(offsetof(kernel_descriptor_t, kernarg_size) == |
280 | KERNARG_SIZE_OFFSET, |
281 | "invalid offset for kernarg_size" ); |
282 | static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET, |
283 | "invalid offset for reserved0" ); |
284 | static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == |
285 | KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET, |
286 | "invalid offset for kernel_code_entry_byte_offset" ); |
287 | static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET, |
288 | "invalid offset for reserved1" ); |
289 | static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == |
290 | COMPUTE_PGM_RSRC3_OFFSET, |
291 | "invalid offset for compute_pgm_rsrc3" ); |
292 | static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == |
293 | COMPUTE_PGM_RSRC1_OFFSET, |
294 | "invalid offset for compute_pgm_rsrc1" ); |
295 | static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == |
296 | COMPUTE_PGM_RSRC2_OFFSET, |
297 | "invalid offset for compute_pgm_rsrc2" ); |
298 | static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) == |
299 | KERNEL_CODE_PROPERTIES_OFFSET, |
300 | "invalid offset for kernel_code_properties" ); |
301 | static_assert(offsetof(kernel_descriptor_t, kernarg_preload) == |
302 | KERNARG_PRELOAD_OFFSET, |
303 | "invalid offset for kernarg_preload" ); |
304 | static_assert(offsetof(kernel_descriptor_t, reserved3) == RESERVED3_OFFSET, |
305 | "invalid offset for reserved3" ); |
306 | |
307 | } // end namespace amdhsa |
308 | } // end namespace llvm |
309 | |
310 | #endif // LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H |
311 | |