1 | /* SPDX-License-Identifier: GPL-2.0 OR MIT */ |
2 | /* |
3 | * Copyright 2014-2022 Advanced Micro Devices, Inc. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), |
7 | * to deal in the Software without restriction, including without limitation |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
9 | * and/or sell copies of the Software, and to permit persons to whom the |
10 | * Software is furnished to do so, subject to the following conditions: |
11 | * |
12 | * The above copyright notice and this permission notice shall be included in |
13 | * all copies or substantial portions of the Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | */ |
23 | |
24 | #ifndef KFD_CRAT_H_INCLUDED |
25 | #define KFD_CRAT_H_INCLUDED |
26 | |
27 | #include <linux/types.h> |
28 | |
29 | #pragma pack(1) |
30 | |
31 | /* |
32 | * 4CC signature value for the CRAT ACPI table |
33 | */ |
34 | |
35 | #define CRAT_SIGNATURE "CRAT" |
36 | |
37 | /* |
38 | * Component Resource Association Table (CRAT) |
39 | */ |
40 | |
41 | #define CRAT_OEMID_LENGTH 6 |
42 | #define CRAT_OEMTABLEID_LENGTH 8 |
43 | #define CRAT_RESERVED_LENGTH 6 |
44 | |
45 | #define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1) |
46 | |
47 | /* Compute Unit flags */ |
48 | #define COMPUTE_UNIT_CPU (1 << 0) /* Create Virtual CRAT for CPU */ |
49 | #define COMPUTE_UNIT_GPU (1 << 1) /* Create Virtual CRAT for GPU */ |
50 | |
51 | struct { |
52 | uint32_t ; |
53 | uint32_t ; |
54 | uint8_t ; |
55 | uint8_t ; |
56 | uint8_t [CRAT_OEMID_LENGTH]; |
57 | uint8_t [CRAT_OEMTABLEID_LENGTH]; |
58 | uint32_t ; |
59 | uint32_t ; |
60 | uint32_t ; |
61 | uint32_t ; |
62 | uint16_t num_domains; |
63 | uint8_t [CRAT_RESERVED_LENGTH]; |
64 | }; |
65 | |
66 | /* |
67 | * The header structure is immediately followed by total_entries of the |
68 | * data definitions |
69 | */ |
70 | |
71 | /* |
72 | * The currently defined subtype entries in the CRAT |
73 | */ |
74 | #define CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY 0 |
75 | #define CRAT_SUBTYPE_MEMORY_AFFINITY 1 |
76 | #define CRAT_SUBTYPE_CACHE_AFFINITY 2 |
77 | #define CRAT_SUBTYPE_TLB_AFFINITY 3 |
78 | #define CRAT_SUBTYPE_CCOMPUTE_AFFINITY 4 |
79 | #define CRAT_SUBTYPE_IOLINK_AFFINITY 5 |
80 | #define CRAT_SUBTYPE_MAX 6 |
81 | |
82 | /* |
83 | * Do not change the value of CRAT_SIBLINGMAP_SIZE from 32 |
84 | * as it breaks the ABI. |
85 | */ |
86 | #define CRAT_SIBLINGMAP_SIZE 32 |
87 | |
88 | /* |
89 | * ComputeUnit Affinity structure and definitions |
90 | */ |
91 | #define CRAT_CU_FLAGS_ENABLED 0x00000001 |
92 | #define CRAT_CU_FLAGS_HOT_PLUGGABLE 0x00000002 |
93 | #define CRAT_CU_FLAGS_CPU_PRESENT 0x00000004 |
94 | #define CRAT_CU_FLAGS_GPU_PRESENT 0x00000008 |
95 | #define CRAT_CU_FLAGS_IOMMU_PRESENT 0x00000010 |
96 | #define CRAT_CU_FLAGS_RESERVED 0xffffffe0 |
97 | |
98 | #define CRAT_COMPUTEUNIT_RESERVED_LENGTH 4 |
99 | |
100 | struct crat_subtype_computeunit { |
101 | uint8_t type; |
102 | uint8_t length; |
103 | uint16_t reserved; |
104 | uint32_t flags; |
105 | uint32_t proximity_domain; |
106 | uint32_t processor_id_low; |
107 | uint16_t num_cpu_cores; |
108 | uint16_t num_simd_cores; |
109 | uint16_t max_waves_simd; |
110 | uint16_t io_count; |
111 | uint16_t hsa_capability; |
112 | uint16_t lds_size_in_kb; |
113 | uint8_t wave_front_size; |
114 | uint8_t num_banks; |
115 | uint16_t micro_engine_id; |
116 | uint8_t array_count; |
117 | uint8_t num_cu_per_array; |
118 | uint8_t num_simd_per_cu; |
119 | uint8_t max_slots_scatch_cu; |
120 | uint8_t reserved2[CRAT_COMPUTEUNIT_RESERVED_LENGTH]; |
121 | }; |
122 | |
123 | /* |
124 | * HSA Memory Affinity structure and definitions |
125 | */ |
126 | #define CRAT_MEM_FLAGS_ENABLED 0x00000001 |
127 | #define CRAT_MEM_FLAGS_HOT_PLUGGABLE 0x00000002 |
128 | #define CRAT_MEM_FLAGS_NON_VOLATILE 0x00000004 |
129 | #define CRAT_MEM_FLAGS_RESERVED 0xfffffff8 |
130 | |
131 | #define CRAT_MEMORY_RESERVED_LENGTH 8 |
132 | |
133 | struct crat_subtype_memory { |
134 | uint8_t type; |
135 | uint8_t length; |
136 | uint16_t reserved; |
137 | uint32_t flags; |
138 | uint32_t proximity_domain; |
139 | uint32_t base_addr_low; |
140 | uint32_t base_addr_high; |
141 | uint32_t length_low; |
142 | uint32_t length_high; |
143 | uint32_t width; |
144 | uint8_t visibility_type; /* for virtual (dGPU) CRAT */ |
145 | uint8_t reserved2[CRAT_MEMORY_RESERVED_LENGTH - 1]; |
146 | }; |
147 | |
148 | /* |
149 | * HSA Cache Affinity structure and definitions |
150 | */ |
151 | #define CRAT_CACHE_FLAGS_ENABLED 0x00000001 |
152 | #define CRAT_CACHE_FLAGS_DATA_CACHE 0x00000002 |
153 | #define CRAT_CACHE_FLAGS_INST_CACHE 0x00000004 |
154 | #define CRAT_CACHE_FLAGS_CPU_CACHE 0x00000008 |
155 | #define CRAT_CACHE_FLAGS_SIMD_CACHE 0x00000010 |
156 | #define CRAT_CACHE_FLAGS_RESERVED 0xffffffe0 |
157 | |
158 | #define CRAT_CACHE_RESERVED_LENGTH 8 |
159 | |
160 | struct crat_subtype_cache { |
161 | uint8_t type; |
162 | uint8_t length; |
163 | uint16_t reserved; |
164 | uint32_t flags; |
165 | uint32_t processor_id_low; |
166 | uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; |
167 | uint32_t cache_size; |
168 | uint8_t cache_level; |
169 | uint8_t lines_per_tag; |
170 | uint16_t cache_line_size; |
171 | uint8_t associativity; |
172 | uint8_t cache_properties; |
173 | uint16_t cache_latency; |
174 | uint8_t reserved2[CRAT_CACHE_RESERVED_LENGTH]; |
175 | }; |
176 | |
177 | /* |
178 | * HSA TLB Affinity structure and definitions |
179 | */ |
180 | #define CRAT_TLB_FLAGS_ENABLED 0x00000001 |
181 | #define CRAT_TLB_FLAGS_DATA_TLB 0x00000002 |
182 | #define CRAT_TLB_FLAGS_INST_TLB 0x00000004 |
183 | #define CRAT_TLB_FLAGS_CPU_TLB 0x00000008 |
184 | #define CRAT_TLB_FLAGS_SIMD_TLB 0x00000010 |
185 | #define CRAT_TLB_FLAGS_RESERVED 0xffffffe0 |
186 | |
187 | #define CRAT_TLB_RESERVED_LENGTH 4 |
188 | |
189 | struct crat_subtype_tlb { |
190 | uint8_t type; |
191 | uint8_t length; |
192 | uint16_t reserved; |
193 | uint32_t flags; |
194 | uint32_t processor_id_low; |
195 | uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; |
196 | uint32_t tlb_level; |
197 | uint8_t data_tlb_associativity_2mb; |
198 | uint8_t data_tlb_size_2mb; |
199 | uint8_t instruction_tlb_associativity_2mb; |
200 | uint8_t instruction_tlb_size_2mb; |
201 | uint8_t data_tlb_associativity_4k; |
202 | uint8_t data_tlb_size_4k; |
203 | uint8_t instruction_tlb_associativity_4k; |
204 | uint8_t instruction_tlb_size_4k; |
205 | uint8_t data_tlb_associativity_1gb; |
206 | uint8_t data_tlb_size_1gb; |
207 | uint8_t instruction_tlb_associativity_1gb; |
208 | uint8_t instruction_tlb_size_1gb; |
209 | uint8_t reserved2[CRAT_TLB_RESERVED_LENGTH]; |
210 | }; |
211 | |
212 | /* |
213 | * HSA CCompute/APU Affinity structure and definitions |
214 | */ |
215 | #define CRAT_CCOMPUTE_FLAGS_ENABLED 0x00000001 |
216 | #define CRAT_CCOMPUTE_FLAGS_RESERVED 0xfffffffe |
217 | |
218 | #define CRAT_CCOMPUTE_RESERVED_LENGTH 16 |
219 | |
220 | struct crat_subtype_ccompute { |
221 | uint8_t type; |
222 | uint8_t length; |
223 | uint16_t reserved; |
224 | uint32_t flags; |
225 | uint32_t processor_id_low; |
226 | uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; |
227 | uint32_t apu_size; |
228 | uint8_t reserved2[CRAT_CCOMPUTE_RESERVED_LENGTH]; |
229 | }; |
230 | |
231 | /* |
232 | * HSA IO Link Affinity structure and definitions |
233 | */ |
234 | #define CRAT_IOLINK_FLAGS_ENABLED (1 << 0) |
235 | #define CRAT_IOLINK_FLAGS_NON_COHERENT (1 << 1) |
236 | #define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2) |
237 | #define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3) |
238 | #define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4) |
239 | #define CRAT_IOLINK_FLAGS_BI_DIRECTIONAL (1 << 31) |
240 | #define CRAT_IOLINK_FLAGS_RESERVED_MASK 0x7fffffe0 |
241 | |
242 | /* |
243 | * IO interface types |
244 | */ |
245 | #define CRAT_IOLINK_TYPE_UNDEFINED 0 |
246 | #define CRAT_IOLINK_TYPE_HYPERTRANSPORT 1 |
247 | #define CRAT_IOLINK_TYPE_PCIEXPRESS 2 |
248 | #define CRAT_IOLINK_TYPE_AMBA 3 |
249 | #define CRAT_IOLINK_TYPE_MIPI 4 |
250 | #define CRAT_IOLINK_TYPE_QPI_1_1 5 |
251 | #define CRAT_IOLINK_TYPE_RESERVED1 6 |
252 | #define CRAT_IOLINK_TYPE_RESERVED2 7 |
253 | #define CRAT_IOLINK_TYPE_RAPID_IO 8 |
254 | #define CRAT_IOLINK_TYPE_INFINIBAND 9 |
255 | #define CRAT_IOLINK_TYPE_RESERVED3 10 |
256 | #define CRAT_IOLINK_TYPE_XGMI 11 |
257 | #define CRAT_IOLINK_TYPE_XGOP 12 |
258 | #define CRAT_IOLINK_TYPE_GZ 13 |
259 | #define CRAT_IOLINK_TYPE_ETHERNET_RDMA 14 |
260 | #define CRAT_IOLINK_TYPE_RDMA_OTHER 15 |
261 | #define CRAT_IOLINK_TYPE_OTHER 16 |
262 | #define CRAT_IOLINK_TYPE_MAX 255 |
263 | |
264 | #define CRAT_IOLINK_RESERVED_LENGTH 24 |
265 | |
266 | struct crat_subtype_iolink { |
267 | uint8_t type; |
268 | uint8_t length; |
269 | uint16_t reserved; |
270 | uint32_t flags; |
271 | uint32_t proximity_domain_from; |
272 | uint32_t proximity_domain_to; |
273 | uint8_t io_interface_type; |
274 | uint8_t version_major; |
275 | uint16_t version_minor; |
276 | uint32_t minimum_latency; |
277 | uint32_t maximum_latency; |
278 | uint32_t minimum_bandwidth_mbs; |
279 | uint32_t maximum_bandwidth_mbs; |
280 | uint32_t recommended_transfer_size; |
281 | uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1]; |
282 | uint8_t weight_xgmi; |
283 | }; |
284 | |
285 | /* |
286 | * HSA generic sub-type header |
287 | */ |
288 | |
289 | #define CRAT_SUBTYPE_FLAGS_ENABLED 0x00000001 |
290 | |
291 | struct crat_subtype_generic { |
292 | uint8_t type; |
293 | uint8_t length; |
294 | uint16_t reserved; |
295 | uint32_t flags; |
296 | }; |
297 | |
298 | #pragma pack() |
299 | |
300 | struct kfd_node; |
301 | |
302 | /* Static table to describe GPU Cache information */ |
303 | struct kfd_gpu_cache_info { |
304 | uint32_t cache_size; |
305 | uint32_t cache_level; |
306 | uint32_t flags; |
307 | /* Indicates how many Compute Units share this cache |
308 | * within a SA. Value = 1 indicates the cache is not shared |
309 | */ |
310 | uint32_t num_cu_shared; |
311 | }; |
312 | int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info); |
313 | |
314 | void kfd_destroy_crat_image(void *crat_image); |
315 | int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, |
316 | uint32_t proximity_domain); |
317 | int kfd_create_crat_image_virtual(void **crat_image, size_t *size, |
318 | int flags, struct kfd_node *kdev, |
319 | uint32_t proximity_domain); |
320 | |
321 | #endif /* KFD_CRAT_H_INCLUDED */ |
322 | |