1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | */ |
22 | |
23 | #include <linux/types.h> |
24 | #include <linux/kernel.h> |
25 | #include <linux/pci.h> |
26 | #include <linux/errno.h> |
27 | #include <linux/acpi.h> |
28 | #include <linux/hash.h> |
29 | #include <linux/cpufreq.h> |
30 | #include <linux/log2.h> |
31 | #include <linux/dmi.h> |
32 | #include <linux/atomic.h> |
33 | |
34 | #include "kfd_priv.h" |
35 | #include "kfd_crat.h" |
36 | #include "kfd_topology.h" |
37 | #include "kfd_device_queue_manager.h" |
38 | #include "kfd_iommu.h" |
39 | #include "amdgpu_amdkfd.h" |
40 | |
41 | /* topology_device_list - Master list of all topology devices */ |
42 | static struct list_head topology_device_list; |
43 | static struct kfd_system_properties sys_props; |
44 | |
45 | static DECLARE_RWSEM(topology_lock); |
46 | static atomic_t topology_crat_proximity_domain; |
47 | |
48 | struct kfd_topology_device *kfd_topology_device_by_proximity_domain( |
49 | uint32_t proximity_domain) |
50 | { |
51 | struct kfd_topology_device *top_dev; |
52 | struct kfd_topology_device *device = NULL; |
53 | |
54 | down_read(&topology_lock); |
55 | |
56 | list_for_each_entry(top_dev, &topology_device_list, list) |
57 | if (top_dev->proximity_domain == proximity_domain) { |
58 | device = top_dev; |
59 | break; |
60 | } |
61 | |
62 | up_read(&topology_lock); |
63 | |
64 | return device; |
65 | } |
66 | |
67 | struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id) |
68 | { |
69 | struct kfd_topology_device *top_dev = NULL; |
70 | struct kfd_topology_device *ret = NULL; |
71 | |
72 | down_read(&topology_lock); |
73 | |
74 | list_for_each_entry(top_dev, &topology_device_list, list) |
75 | if (top_dev->gpu_id == gpu_id) { |
76 | ret = top_dev; |
77 | break; |
78 | } |
79 | |
80 | up_read(&topology_lock); |
81 | |
82 | return ret; |
83 | } |
84 | |
85 | struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) |
86 | { |
87 | struct kfd_topology_device *top_dev; |
88 | |
89 | top_dev = kfd_topology_device_by_id(gpu_id); |
90 | if (!top_dev) |
91 | return NULL; |
92 | |
93 | return top_dev->gpu; |
94 | } |
95 | |
96 | struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) |
97 | { |
98 | struct kfd_topology_device *top_dev; |
99 | struct kfd_dev *device = NULL; |
100 | |
101 | down_read(&topology_lock); |
102 | |
103 | list_for_each_entry(top_dev, &topology_device_list, list) |
104 | if (top_dev->gpu && top_dev->gpu->pdev == pdev) { |
105 | device = top_dev->gpu; |
106 | break; |
107 | } |
108 | |
109 | up_read(&topology_lock); |
110 | |
111 | return device; |
112 | } |
113 | |
114 | struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd) |
115 | { |
116 | struct kfd_topology_device *top_dev; |
117 | struct kfd_dev *device = NULL; |
118 | |
119 | down_read(&topology_lock); |
120 | |
121 | list_for_each_entry(top_dev, &topology_device_list, list) |
122 | if (top_dev->gpu && top_dev->gpu->kgd == kgd) { |
123 | device = top_dev->gpu; |
124 | break; |
125 | } |
126 | |
127 | up_read(&topology_lock); |
128 | |
129 | return device; |
130 | } |
131 | |
132 | /* Called with write topology_lock acquired */ |
133 | static void kfd_release_topology_device(struct kfd_topology_device *dev) |
134 | { |
135 | struct kfd_mem_properties *mem; |
136 | struct kfd_cache_properties *cache; |
137 | struct kfd_iolink_properties *iolink; |
138 | struct kfd_perf_properties *perf; |
139 | |
140 | list_del(&dev->list); |
141 | |
142 | while (dev->mem_props.next != &dev->mem_props) { |
143 | mem = container_of(dev->mem_props.next, |
144 | struct kfd_mem_properties, list); |
145 | list_del(&mem->list); |
146 | kfree(mem); |
147 | } |
148 | |
149 | while (dev->cache_props.next != &dev->cache_props) { |
150 | cache = container_of(dev->cache_props.next, |
151 | struct kfd_cache_properties, list); |
152 | list_del(&cache->list); |
153 | kfree(cache); |
154 | } |
155 | |
156 | while (dev->io_link_props.next != &dev->io_link_props) { |
157 | iolink = container_of(dev->io_link_props.next, |
158 | struct kfd_iolink_properties, list); |
159 | list_del(&iolink->list); |
160 | kfree(iolink); |
161 | } |
162 | |
163 | while (dev->perf_props.next != &dev->perf_props) { |
164 | perf = container_of(dev->perf_props.next, |
165 | struct kfd_perf_properties, list); |
166 | list_del(&perf->list); |
167 | kfree(perf); |
168 | } |
169 | |
170 | kfree(dev); |
171 | } |
172 | |
173 | void kfd_release_topology_device_list(struct list_head *device_list) |
174 | { |
175 | struct kfd_topology_device *dev; |
176 | |
177 | while (!list_empty(device_list)) { |
178 | dev = list_first_entry(device_list, |
179 | struct kfd_topology_device, list); |
180 | kfd_release_topology_device(dev); |
181 | } |
182 | } |
183 | |
184 | static void kfd_release_live_view(void) |
185 | { |
186 | kfd_release_topology_device_list(&topology_device_list); |
187 | memset(&sys_props, 0, sizeof(sys_props)); |
188 | } |
189 | |
190 | struct kfd_topology_device *kfd_create_topology_device( |
191 | struct list_head *device_list) |
192 | { |
193 | struct kfd_topology_device *dev; |
194 | |
195 | dev = kfd_alloc_struct(dev); |
196 | if (!dev) { |
197 | pr_err("No memory to allocate a topology device" ); |
198 | return NULL; |
199 | } |
200 | |
201 | INIT_LIST_HEAD(&dev->mem_props); |
202 | INIT_LIST_HEAD(&dev->cache_props); |
203 | INIT_LIST_HEAD(&dev->io_link_props); |
204 | INIT_LIST_HEAD(&dev->perf_props); |
205 | |
206 | list_add_tail(&dev->list, device_list); |
207 | |
208 | return dev; |
209 | } |
210 | |
211 | |
212 | #define sysfs_show_gen_prop(buffer, fmt, ...) \ |
213 | snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__) |
214 | #define sysfs_show_32bit_prop(buffer, name, value) \ |
215 | sysfs_show_gen_prop(buffer, "%s %u\n", name, value) |
216 | #define sysfs_show_64bit_prop(buffer, name, value) \ |
217 | sysfs_show_gen_prop(buffer, "%s %llu\n", name, value) |
218 | #define sysfs_show_32bit_val(buffer, value) \ |
219 | sysfs_show_gen_prop(buffer, "%u\n", value) |
220 | #define sysfs_show_str_val(buffer, value) \ |
221 | sysfs_show_gen_prop(buffer, "%s\n", value) |
222 | |
223 | static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr, |
224 | char *buffer) |
225 | { |
226 | ssize_t ret; |
227 | |
228 | /* Making sure that the buffer is an empty string */ |
229 | buffer[0] = 0; |
230 | |
231 | if (attr == &sys_props.attr_genid) { |
232 | ret = sysfs_show_32bit_val(buffer, sys_props.generation_count); |
233 | } else if (attr == &sys_props.attr_props) { |
234 | sysfs_show_64bit_prop(buffer, "platform_oem" , |
235 | sys_props.platform_oem); |
236 | sysfs_show_64bit_prop(buffer, "platform_id" , |
237 | sys_props.platform_id); |
238 | ret = sysfs_show_64bit_prop(buffer, "platform_rev" , |
239 | sys_props.platform_rev); |
240 | } else { |
241 | ret = -EINVAL; |
242 | } |
243 | |
244 | return ret; |
245 | } |
246 | |
247 | static void kfd_topology_kobj_release(struct kobject *kobj) |
248 | { |
249 | kfree(kobj); |
250 | } |
251 | |
252 | static const struct sysfs_ops sysprops_ops = { |
253 | .show = sysprops_show, |
254 | }; |
255 | |
256 | static struct kobj_type sysprops_type = { |
257 | .release = kfd_topology_kobj_release, |
258 | .sysfs_ops = &sysprops_ops, |
259 | }; |
260 | |
261 | static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr, |
262 | char *buffer) |
263 | { |
264 | ssize_t ret; |
265 | struct kfd_iolink_properties *iolink; |
266 | |
267 | /* Making sure that the buffer is an empty string */ |
268 | buffer[0] = 0; |
269 | |
270 | iolink = container_of(attr, struct kfd_iolink_properties, attr); |
271 | sysfs_show_32bit_prop(buffer, "type" , iolink->iolink_type); |
272 | sysfs_show_32bit_prop(buffer, "version_major" , iolink->ver_maj); |
273 | sysfs_show_32bit_prop(buffer, "version_minor" , iolink->ver_min); |
274 | sysfs_show_32bit_prop(buffer, "node_from" , iolink->node_from); |
275 | sysfs_show_32bit_prop(buffer, "node_to" , iolink->node_to); |
276 | sysfs_show_32bit_prop(buffer, "weight" , iolink->weight); |
277 | sysfs_show_32bit_prop(buffer, "min_latency" , iolink->min_latency); |
278 | sysfs_show_32bit_prop(buffer, "max_latency" , iolink->max_latency); |
279 | sysfs_show_32bit_prop(buffer, "min_bandwidth" , iolink->min_bandwidth); |
280 | sysfs_show_32bit_prop(buffer, "max_bandwidth" , iolink->max_bandwidth); |
281 | sysfs_show_32bit_prop(buffer, "recommended_transfer_size" , |
282 | iolink->rec_transfer_size); |
283 | ret = sysfs_show_32bit_prop(buffer, "flags" , iolink->flags); |
284 | |
285 | return ret; |
286 | } |
287 | |
288 | static const struct sysfs_ops iolink_ops = { |
289 | .show = iolink_show, |
290 | }; |
291 | |
292 | static struct kobj_type iolink_type = { |
293 | .release = kfd_topology_kobj_release, |
294 | .sysfs_ops = &iolink_ops, |
295 | }; |
296 | |
297 | static ssize_t mem_show(struct kobject *kobj, struct attribute *attr, |
298 | char *buffer) |
299 | { |
300 | ssize_t ret; |
301 | struct kfd_mem_properties *mem; |
302 | |
303 | /* Making sure that the buffer is an empty string */ |
304 | buffer[0] = 0; |
305 | |
306 | mem = container_of(attr, struct kfd_mem_properties, attr); |
307 | sysfs_show_32bit_prop(buffer, "heap_type" , mem->heap_type); |
308 | sysfs_show_64bit_prop(buffer, "size_in_bytes" , mem->size_in_bytes); |
309 | sysfs_show_32bit_prop(buffer, "flags" , mem->flags); |
310 | sysfs_show_32bit_prop(buffer, "width" , mem->width); |
311 | ret = sysfs_show_32bit_prop(buffer, "mem_clk_max" , mem->mem_clk_max); |
312 | |
313 | return ret; |
314 | } |
315 | |
316 | static const struct sysfs_ops mem_ops = { |
317 | .show = mem_show, |
318 | }; |
319 | |
320 | static struct kobj_type mem_type = { |
321 | .release = kfd_topology_kobj_release, |
322 | .sysfs_ops = &mem_ops, |
323 | }; |
324 | |
325 | static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, |
326 | char *buffer) |
327 | { |
328 | ssize_t ret; |
329 | uint32_t i, j; |
330 | struct kfd_cache_properties *cache; |
331 | |
332 | /* Making sure that the buffer is an empty string */ |
333 | buffer[0] = 0; |
334 | |
335 | cache = container_of(attr, struct kfd_cache_properties, attr); |
336 | sysfs_show_32bit_prop(buffer, "processor_id_low" , |
337 | cache->processor_id_low); |
338 | sysfs_show_32bit_prop(buffer, "level" , cache->cache_level); |
339 | sysfs_show_32bit_prop(buffer, "size" , cache->cache_size); |
340 | sysfs_show_32bit_prop(buffer, "cache_line_size" , cache->cacheline_size); |
341 | sysfs_show_32bit_prop(buffer, "cache_lines_per_tag" , |
342 | cache->cachelines_per_tag); |
343 | sysfs_show_32bit_prop(buffer, "association" , cache->cache_assoc); |
344 | sysfs_show_32bit_prop(buffer, "latency" , cache->cache_latency); |
345 | sysfs_show_32bit_prop(buffer, "type" , cache->cache_type); |
346 | snprintf(buffer, PAGE_SIZE, "%ssibling_map " , buffer); |
347 | for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++) |
348 | for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) { |
349 | /* Check each bit */ |
350 | if (cache->sibling_map[i] & (1 << j)) |
351 | ret = snprintf(buffer, PAGE_SIZE, |
352 | "%s%d%s" , buffer, 1, "," ); |
353 | else |
354 | ret = snprintf(buffer, PAGE_SIZE, |
355 | "%s%d%s" , buffer, 0, "," ); |
356 | } |
357 | /* Replace the last "," with end of line */ |
358 | *(buffer + strlen(buffer) - 1) = 0xA; |
359 | return ret; |
360 | } |
361 | |
362 | static const struct sysfs_ops cache_ops = { |
363 | .show = kfd_cache_show, |
364 | }; |
365 | |
366 | static struct kobj_type cache_type = { |
367 | .release = kfd_topology_kobj_release, |
368 | .sysfs_ops = &cache_ops, |
369 | }; |
370 | |
371 | /****** Sysfs of Performance Counters ******/ |
372 | |
373 | struct kfd_perf_attr { |
374 | struct kobj_attribute attr; |
375 | uint32_t data; |
376 | }; |
377 | |
378 | static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs, |
379 | char *buf) |
380 | { |
381 | struct kfd_perf_attr *attr; |
382 | |
383 | buf[0] = 0; |
384 | attr = container_of(attrs, struct kfd_perf_attr, attr); |
385 | if (!attr->data) /* invalid data for PMC */ |
386 | return 0; |
387 | else |
388 | return sysfs_show_32bit_val(buf, attr->data); |
389 | } |
390 | |
391 | #define KFD_PERF_DESC(_name, _data) \ |
392 | { \ |
393 | .attr = __ATTR(_name, 0444, perf_show, NULL), \ |
394 | .data = _data, \ |
395 | } |
396 | |
397 | static struct kfd_perf_attr perf_attr_iommu[] = { |
398 | KFD_PERF_DESC(max_concurrent, 0), |
399 | KFD_PERF_DESC(num_counters, 0), |
400 | KFD_PERF_DESC(counter_ids, 0), |
401 | }; |
402 | /****************************************/ |
403 | |
404 | static ssize_t node_show(struct kobject *kobj, struct attribute *attr, |
405 | char *buffer) |
406 | { |
407 | struct kfd_topology_device *dev; |
408 | char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; |
409 | uint32_t i; |
410 | uint32_t log_max_watch_addr; |
411 | |
412 | /* Making sure that the buffer is an empty string */ |
413 | buffer[0] = 0; |
414 | |
415 | if (strcmp(attr->name, "gpu_id" ) == 0) { |
416 | dev = container_of(attr, struct kfd_topology_device, |
417 | attr_gpuid); |
418 | return sysfs_show_32bit_val(buffer, dev->gpu_id); |
419 | } |
420 | |
421 | if (strcmp(attr->name, "name" ) == 0) { |
422 | dev = container_of(attr, struct kfd_topology_device, |
423 | attr_name); |
424 | for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) { |
425 | public_name[i] = |
426 | (char)dev->node_props.marketing_name[i]; |
427 | if (dev->node_props.marketing_name[i] == 0) |
428 | break; |
429 | } |
430 | public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0; |
431 | return sysfs_show_str_val(buffer, public_name); |
432 | } |
433 | |
434 | dev = container_of(attr, struct kfd_topology_device, |
435 | attr_props); |
436 | sysfs_show_32bit_prop(buffer, "cpu_cores_count" , |
437 | dev->node_props.cpu_cores_count); |
438 | sysfs_show_32bit_prop(buffer, "simd_count" , |
439 | dev->node_props.simd_count); |
440 | sysfs_show_32bit_prop(buffer, "mem_banks_count" , |
441 | dev->node_props.mem_banks_count); |
442 | sysfs_show_32bit_prop(buffer, "caches_count" , |
443 | dev->node_props.caches_count); |
444 | sysfs_show_32bit_prop(buffer, "io_links_count" , |
445 | dev->node_props.io_links_count); |
446 | sysfs_show_32bit_prop(buffer, "cpu_core_id_base" , |
447 | dev->node_props.cpu_core_id_base); |
448 | sysfs_show_32bit_prop(buffer, "simd_id_base" , |
449 | dev->node_props.simd_id_base); |
450 | sysfs_show_32bit_prop(buffer, "max_waves_per_simd" , |
451 | dev->node_props.max_waves_per_simd); |
452 | sysfs_show_32bit_prop(buffer, "lds_size_in_kb" , |
453 | dev->node_props.lds_size_in_kb); |
454 | sysfs_show_32bit_prop(buffer, "gds_size_in_kb" , |
455 | dev->node_props.gds_size_in_kb); |
456 | sysfs_show_32bit_prop(buffer, "wave_front_size" , |
457 | dev->node_props.wave_front_size); |
458 | sysfs_show_32bit_prop(buffer, "array_count" , |
459 | dev->node_props.array_count); |
460 | sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine" , |
461 | dev->node_props.simd_arrays_per_engine); |
462 | sysfs_show_32bit_prop(buffer, "cu_per_simd_array" , |
463 | dev->node_props.cu_per_simd_array); |
464 | sysfs_show_32bit_prop(buffer, "simd_per_cu" , |
465 | dev->node_props.simd_per_cu); |
466 | sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu" , |
467 | dev->node_props.max_slots_scratch_cu); |
468 | sysfs_show_32bit_prop(buffer, "vendor_id" , |
469 | dev->node_props.vendor_id); |
470 | sysfs_show_32bit_prop(buffer, "device_id" , |
471 | dev->node_props.device_id); |
472 | sysfs_show_32bit_prop(buffer, "location_id" , |
473 | dev->node_props.location_id); |
474 | sysfs_show_32bit_prop(buffer, "drm_render_minor" , |
475 | dev->node_props.drm_render_minor); |
476 | sysfs_show_64bit_prop(buffer, "hive_id" , |
477 | dev->node_props.hive_id); |
478 | |
479 | if (dev->gpu) { |
480 | log_max_watch_addr = |
481 | __ilog2_u32(dev->gpu->device_info->num_of_watch_points); |
482 | |
483 | if (log_max_watch_addr) { |
484 | dev->node_props.capability |= |
485 | HSA_CAP_WATCH_POINTS_SUPPORTED; |
486 | |
487 | dev->node_props.capability |= |
488 | ((log_max_watch_addr << |
489 | HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) & |
490 | HSA_CAP_WATCH_POINTS_TOTALBITS_MASK); |
491 | } |
492 | |
493 | if (dev->gpu->device_info->asic_family == CHIP_TONGA) |
494 | dev->node_props.capability |= |
495 | HSA_CAP_AQL_QUEUE_DOUBLE_MAP; |
496 | |
497 | sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute" , |
498 | dev->node_props.max_engine_clk_fcompute); |
499 | |
500 | sysfs_show_64bit_prop(buffer, "local_mem_size" , |
501 | (unsigned long long int) 0); |
502 | |
503 | sysfs_show_32bit_prop(buffer, "fw_version" , |
504 | dev->gpu->mec_fw_version); |
505 | sysfs_show_32bit_prop(buffer, "capability" , |
506 | dev->node_props.capability); |
507 | sysfs_show_32bit_prop(buffer, "sdma_fw_version" , |
508 | dev->gpu->sdma_fw_version); |
509 | } |
510 | |
511 | return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute" , |
512 | cpufreq_quick_get_max(0)/1000); |
513 | } |
514 | |
515 | static const struct sysfs_ops node_ops = { |
516 | .show = node_show, |
517 | }; |
518 | |
519 | static struct kobj_type node_type = { |
520 | .release = kfd_topology_kobj_release, |
521 | .sysfs_ops = &node_ops, |
522 | }; |
523 | |
524 | static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr) |
525 | { |
526 | sysfs_remove_file(kobj, attr); |
527 | kobject_del(kobj); |
528 | kobject_put(kobj); |
529 | } |
530 | |
531 | static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) |
532 | { |
533 | struct kfd_iolink_properties *iolink; |
534 | struct kfd_cache_properties *cache; |
535 | struct kfd_mem_properties *mem; |
536 | struct kfd_perf_properties *perf; |
537 | |
538 | if (dev->kobj_iolink) { |
539 | list_for_each_entry(iolink, &dev->io_link_props, list) |
540 | if (iolink->kobj) { |
541 | kfd_remove_sysfs_file(iolink->kobj, |
542 | &iolink->attr); |
543 | iolink->kobj = NULL; |
544 | } |
545 | kobject_del(dev->kobj_iolink); |
546 | kobject_put(dev->kobj_iolink); |
547 | dev->kobj_iolink = NULL; |
548 | } |
549 | |
550 | if (dev->kobj_cache) { |
551 | list_for_each_entry(cache, &dev->cache_props, list) |
552 | if (cache->kobj) { |
553 | kfd_remove_sysfs_file(cache->kobj, |
554 | &cache->attr); |
555 | cache->kobj = NULL; |
556 | } |
557 | kobject_del(dev->kobj_cache); |
558 | kobject_put(dev->kobj_cache); |
559 | dev->kobj_cache = NULL; |
560 | } |
561 | |
562 | if (dev->kobj_mem) { |
563 | list_for_each_entry(mem, &dev->mem_props, list) |
564 | if (mem->kobj) { |
565 | kfd_remove_sysfs_file(mem->kobj, &mem->attr); |
566 | mem->kobj = NULL; |
567 | } |
568 | kobject_del(dev->kobj_mem); |
569 | kobject_put(dev->kobj_mem); |
570 | dev->kobj_mem = NULL; |
571 | } |
572 | |
573 | if (dev->kobj_perf) { |
574 | list_for_each_entry(perf, &dev->perf_props, list) { |
575 | kfree(perf->attr_group); |
576 | perf->attr_group = NULL; |
577 | } |
578 | kobject_del(dev->kobj_perf); |
579 | kobject_put(dev->kobj_perf); |
580 | dev->kobj_perf = NULL; |
581 | } |
582 | |
583 | if (dev->kobj_node) { |
584 | sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid); |
585 | sysfs_remove_file(dev->kobj_node, &dev->attr_name); |
586 | sysfs_remove_file(dev->kobj_node, &dev->attr_props); |
587 | kobject_del(dev->kobj_node); |
588 | kobject_put(dev->kobj_node); |
589 | dev->kobj_node = NULL; |
590 | } |
591 | } |
592 | |
593 | static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, |
594 | uint32_t id) |
595 | { |
596 | struct kfd_iolink_properties *iolink; |
597 | struct kfd_cache_properties *cache; |
598 | struct kfd_mem_properties *mem; |
599 | struct kfd_perf_properties *perf; |
600 | int ret; |
601 | uint32_t i, num_attrs; |
602 | struct attribute **attrs; |
603 | |
604 | if (WARN_ON(dev->kobj_node)) |
605 | return -EEXIST; |
606 | |
607 | /* |
608 | * Creating the sysfs folders |
609 | */ |
610 | dev->kobj_node = kfd_alloc_struct(dev->kobj_node); |
611 | if (!dev->kobj_node) |
612 | return -ENOMEM; |
613 | |
614 | ret = kobject_init_and_add(dev->kobj_node, &node_type, |
615 | sys_props.kobj_nodes, "%d" , id); |
616 | if (ret < 0) |
617 | return ret; |
618 | |
619 | dev->kobj_mem = kobject_create_and_add("mem_banks" , dev->kobj_node); |
620 | if (!dev->kobj_mem) |
621 | return -ENOMEM; |
622 | |
623 | dev->kobj_cache = kobject_create_and_add("caches" , dev->kobj_node); |
624 | if (!dev->kobj_cache) |
625 | return -ENOMEM; |
626 | |
627 | dev->kobj_iolink = kobject_create_and_add("io_links" , dev->kobj_node); |
628 | if (!dev->kobj_iolink) |
629 | return -ENOMEM; |
630 | |
631 | dev->kobj_perf = kobject_create_and_add("perf" , dev->kobj_node); |
632 | if (!dev->kobj_perf) |
633 | return -ENOMEM; |
634 | |
635 | /* |
636 | * Creating sysfs files for node properties |
637 | */ |
638 | dev->attr_gpuid.name = "gpu_id" ; |
639 | dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE; |
640 | sysfs_attr_init(&dev->attr_gpuid); |
641 | dev->attr_name.name = "name" ; |
642 | dev->attr_name.mode = KFD_SYSFS_FILE_MODE; |
643 | sysfs_attr_init(&dev->attr_name); |
644 | dev->attr_props.name = "properties" ; |
645 | dev->attr_props.mode = KFD_SYSFS_FILE_MODE; |
646 | sysfs_attr_init(&dev->attr_props); |
647 | ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid); |
648 | if (ret < 0) |
649 | return ret; |
650 | ret = sysfs_create_file(dev->kobj_node, &dev->attr_name); |
651 | if (ret < 0) |
652 | return ret; |
653 | ret = sysfs_create_file(dev->kobj_node, &dev->attr_props); |
654 | if (ret < 0) |
655 | return ret; |
656 | |
657 | i = 0; |
658 | list_for_each_entry(mem, &dev->mem_props, list) { |
659 | mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); |
660 | if (!mem->kobj) |
661 | return -ENOMEM; |
662 | ret = kobject_init_and_add(mem->kobj, &mem_type, |
663 | dev->kobj_mem, "%d" , i); |
664 | if (ret < 0) |
665 | return ret; |
666 | |
667 | mem->attr.name = "properties" ; |
668 | mem->attr.mode = KFD_SYSFS_FILE_MODE; |
669 | sysfs_attr_init(&mem->attr); |
670 | ret = sysfs_create_file(mem->kobj, &mem->attr); |
671 | if (ret < 0) |
672 | return ret; |
673 | i++; |
674 | } |
675 | |
676 | i = 0; |
677 | list_for_each_entry(cache, &dev->cache_props, list) { |
678 | cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); |
679 | if (!cache->kobj) |
680 | return -ENOMEM; |
681 | ret = kobject_init_and_add(cache->kobj, &cache_type, |
682 | dev->kobj_cache, "%d" , i); |
683 | if (ret < 0) |
684 | return ret; |
685 | |
686 | cache->attr.name = "properties" ; |
687 | cache->attr.mode = KFD_SYSFS_FILE_MODE; |
688 | sysfs_attr_init(&cache->attr); |
689 | ret = sysfs_create_file(cache->kobj, &cache->attr); |
690 | if (ret < 0) |
691 | return ret; |
692 | i++; |
693 | } |
694 | |
695 | i = 0; |
696 | list_for_each_entry(iolink, &dev->io_link_props, list) { |
697 | iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); |
698 | if (!iolink->kobj) |
699 | return -ENOMEM; |
700 | ret = kobject_init_and_add(iolink->kobj, &iolink_type, |
701 | dev->kobj_iolink, "%d" , i); |
702 | if (ret < 0) |
703 | return ret; |
704 | |
705 | iolink->attr.name = "properties" ; |
706 | iolink->attr.mode = KFD_SYSFS_FILE_MODE; |
707 | sysfs_attr_init(&iolink->attr); |
708 | ret = sysfs_create_file(iolink->kobj, &iolink->attr); |
709 | if (ret < 0) |
710 | return ret; |
711 | i++; |
712 | } |
713 | |
714 | /* All hardware blocks have the same number of attributes. */ |
715 | num_attrs = ARRAY_SIZE(perf_attr_iommu); |
716 | list_for_each_entry(perf, &dev->perf_props, list) { |
717 | perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) |
718 | * num_attrs + sizeof(struct attribute_group), |
719 | GFP_KERNEL); |
720 | if (!perf->attr_group) |
721 | return -ENOMEM; |
722 | |
723 | attrs = (struct attribute **)(perf->attr_group + 1); |
724 | if (!strcmp(perf->block_name, "iommu" )) { |
725 | /* Information of IOMMU's num_counters and counter_ids is shown |
726 | * under /sys/bus/event_source/devices/amd_iommu. We don't |
727 | * duplicate here. |
728 | */ |
729 | perf_attr_iommu[0].data = perf->max_concurrent; |
730 | for (i = 0; i < num_attrs; i++) |
731 | attrs[i] = &perf_attr_iommu[i].attr.attr; |
732 | } |
733 | perf->attr_group->name = perf->block_name; |
734 | perf->attr_group->attrs = attrs; |
735 | ret = sysfs_create_group(dev->kobj_perf, perf->attr_group); |
736 | if (ret < 0) |
737 | return ret; |
738 | } |
739 | |
740 | return 0; |
741 | } |
742 | |
743 | /* Called with write topology lock acquired */ |
744 | static int kfd_build_sysfs_node_tree(void) |
745 | { |
746 | struct kfd_topology_device *dev; |
747 | int ret; |
748 | uint32_t i = 0; |
749 | |
750 | list_for_each_entry(dev, &topology_device_list, list) { |
751 | ret = kfd_build_sysfs_node_entry(dev, i); |
752 | if (ret < 0) |
753 | return ret; |
754 | i++; |
755 | } |
756 | |
757 | return 0; |
758 | } |
759 | |
760 | /* Called with write topology lock acquired */ |
761 | static void kfd_remove_sysfs_node_tree(void) |
762 | { |
763 | struct kfd_topology_device *dev; |
764 | |
765 | list_for_each_entry(dev, &topology_device_list, list) |
766 | kfd_remove_sysfs_node_entry(dev); |
767 | } |
768 | |
769 | static int kfd_topology_update_sysfs(void) |
770 | { |
771 | int ret; |
772 | |
773 | pr_info("Creating topology SYSFS entries\n" ); |
774 | if (!sys_props.kobj_topology) { |
775 | sys_props.kobj_topology = |
776 | kfd_alloc_struct(sys_props.kobj_topology); |
777 | if (!sys_props.kobj_topology) |
778 | return -ENOMEM; |
779 | |
780 | ret = kobject_init_and_add(sys_props.kobj_topology, |
781 | &sysprops_type, &kfd_device->kobj, |
782 | "topology" ); |
783 | if (ret < 0) |
784 | return ret; |
785 | |
786 | sys_props.kobj_nodes = kobject_create_and_add("nodes" , |
787 | sys_props.kobj_topology); |
788 | if (!sys_props.kobj_nodes) |
789 | return -ENOMEM; |
790 | |
791 | sys_props.attr_genid.name = "generation_id" ; |
792 | sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE; |
793 | sysfs_attr_init(&sys_props.attr_genid); |
794 | ret = sysfs_create_file(sys_props.kobj_topology, |
795 | &sys_props.attr_genid); |
796 | if (ret < 0) |
797 | return ret; |
798 | |
799 | sys_props.attr_props.name = "system_properties" ; |
800 | sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE; |
801 | sysfs_attr_init(&sys_props.attr_props); |
802 | ret = sysfs_create_file(sys_props.kobj_topology, |
803 | &sys_props.attr_props); |
804 | if (ret < 0) |
805 | return ret; |
806 | } |
807 | |
808 | kfd_remove_sysfs_node_tree(); |
809 | |
810 | return kfd_build_sysfs_node_tree(); |
811 | } |
812 | |
813 | static void kfd_topology_release_sysfs(void) |
814 | { |
815 | kfd_remove_sysfs_node_tree(); |
816 | if (sys_props.kobj_topology) { |
817 | sysfs_remove_file(sys_props.kobj_topology, |
818 | &sys_props.attr_genid); |
819 | sysfs_remove_file(sys_props.kobj_topology, |
820 | &sys_props.attr_props); |
821 | if (sys_props.kobj_nodes) { |
822 | kobject_del(sys_props.kobj_nodes); |
823 | kobject_put(sys_props.kobj_nodes); |
824 | sys_props.kobj_nodes = NULL; |
825 | } |
826 | kobject_del(sys_props.kobj_topology); |
827 | kobject_put(sys_props.kobj_topology); |
828 | sys_props.kobj_topology = NULL; |
829 | } |
830 | } |
831 | |
832 | /* Called with write topology_lock acquired */ |
833 | static void kfd_topology_update_device_list(struct list_head *temp_list, |
834 | struct list_head *master_list) |
835 | { |
836 | while (!list_empty(temp_list)) { |
837 | list_move_tail(temp_list->next, master_list); |
838 | sys_props.num_devices++; |
839 | } |
840 | } |
841 | |
842 | static void kfd_debug_print_topology(void) |
843 | { |
844 | struct kfd_topology_device *dev; |
845 | |
846 | down_read(&topology_lock); |
847 | |
848 | dev = list_last_entry(&topology_device_list, |
849 | struct kfd_topology_device, list); |
850 | if (dev) { |
851 | if (dev->node_props.cpu_cores_count && |
852 | dev->node_props.simd_count) { |
853 | pr_info("Topology: Add APU node [0x%0x:0x%0x]\n" , |
854 | dev->node_props.device_id, |
855 | dev->node_props.vendor_id); |
856 | } else if (dev->node_props.cpu_cores_count) |
857 | pr_info("Topology: Add CPU node\n" ); |
858 | else if (dev->node_props.simd_count) |
859 | pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n" , |
860 | dev->node_props.device_id, |
861 | dev->node_props.vendor_id); |
862 | } |
863 | up_read(&topology_lock); |
864 | } |
865 | |
866 | /* Helper function for intializing platform_xx members of |
867 | * kfd_system_properties. Uses OEM info from the last CPU/APU node. |
868 | */ |
869 | static void kfd_update_system_properties(void) |
870 | { |
871 | struct kfd_topology_device *dev; |
872 | |
873 | down_read(&topology_lock); |
874 | dev = list_last_entry(&topology_device_list, |
875 | struct kfd_topology_device, list); |
876 | if (dev) { |
877 | sys_props.platform_id = |
878 | (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK; |
879 | sys_props.platform_oem = *((uint64_t *)dev->oem_table_id); |
880 | sys_props.platform_rev = dev->oem_revision; |
881 | } |
882 | up_read(&topology_lock); |
883 | } |
884 | |
885 | static void find_system_memory(const struct dmi_header *dm, |
886 | void *private) |
887 | { |
888 | struct kfd_mem_properties *mem; |
889 | u16 mem_width, mem_clock; |
890 | struct kfd_topology_device *kdev = |
891 | (struct kfd_topology_device *)private; |
892 | const u8 *dmi_data = (const u8 *)(dm + 1); |
893 | |
894 | if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) { |
895 | mem_width = (u16)(*(const u16 *)(dmi_data + 0x6)); |
896 | mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11)); |
897 | list_for_each_entry(mem, &kdev->mem_props, list) { |
898 | if (mem_width != 0xFFFF && mem_width != 0) |
899 | mem->width = mem_width; |
900 | if (mem_clock != 0) |
901 | mem->mem_clk_max = mem_clock; |
902 | } |
903 | } |
904 | } |
905 | |
906 | /* |
907 | * Performance counters information is not part of CRAT but we would like to |
908 | * put them in the sysfs under topology directory for Thunk to get the data. |
909 | * This function is called before updating the sysfs. |
910 | */ |
911 | static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev) |
912 | { |
913 | /* These are the only counters supported so far */ |
914 | return kfd_iommu_add_perf_counters(kdev); |
915 | } |
916 | |
917 | /* kfd_add_non_crat_information - Add information that is not currently |
918 | * defined in CRAT but is necessary for KFD topology |
919 | * @dev - topology device to which addition info is added |
920 | */ |
921 | static void kfd_add_non_crat_information(struct kfd_topology_device *kdev) |
922 | { |
923 | /* Check if CPU only node. */ |
924 | if (!kdev->gpu) { |
925 | /* Add system memory information */ |
926 | dmi_walk(find_system_memory, kdev); |
927 | } |
928 | /* TODO: For GPU node, rearrange code from kfd_topology_add_device */ |
929 | } |
930 | |
931 | /* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices. |
932 | * Ignore CRAT for all other devices. AMD APU is identified if both CPU |
933 | * and GPU cores are present. |
934 | * @device_list - topology device list created by parsing ACPI CRAT table. |
935 | * @return - TRUE if invalid, FALSE is valid. |
936 | */ |
937 | static bool kfd_is_acpi_crat_invalid(struct list_head *device_list) |
938 | { |
939 | struct kfd_topology_device *dev; |
940 | |
941 | list_for_each_entry(dev, device_list, list) { |
942 | if (dev->node_props.cpu_cores_count && |
943 | dev->node_props.simd_count) |
944 | return false; |
945 | } |
946 | pr_info("Ignoring ACPI CRAT on non-APU system\n" ); |
947 | return true; |
948 | } |
949 | |
950 | int kfd_topology_init(void) |
951 | { |
952 | void *crat_image = NULL; |
953 | size_t image_size = 0; |
954 | int ret; |
955 | struct list_head temp_topology_device_list; |
956 | int cpu_only_node = 0; |
957 | struct kfd_topology_device *kdev; |
958 | int proximity_domain; |
959 | |
960 | /* topology_device_list - Master list of all topology devices |
961 | * temp_topology_device_list - temporary list created while parsing CRAT |
962 | * or VCRAT. Once parsing is complete the contents of list is moved to |
963 | * topology_device_list |
964 | */ |
965 | |
966 | /* Initialize the head for the both the lists */ |
967 | INIT_LIST_HEAD(&topology_device_list); |
968 | INIT_LIST_HEAD(&temp_topology_device_list); |
969 | init_rwsem(&topology_lock); |
970 | |
971 | memset(&sys_props, 0, sizeof(sys_props)); |
972 | |
973 | /* Proximity domains in ACPI CRAT tables start counting at |
974 | * 0. The same should be true for virtual CRAT tables created |
975 | * at this stage. GPUs added later in kfd_topology_add_device |
976 | * use a counter. |
977 | */ |
978 | proximity_domain = 0; |
979 | |
980 | /* |
981 | * Get the CRAT image from the ACPI. If ACPI doesn't have one |
982 | * or if ACPI CRAT is invalid create a virtual CRAT. |
983 | * NOTE: The current implementation expects all AMD APUs to have |
984 | * CRAT. If no CRAT is available, it is assumed to be a CPU |
985 | */ |
986 | ret = kfd_create_crat_image_acpi(&crat_image, &image_size); |
987 | if (!ret) { |
988 | ret = kfd_parse_crat_table(crat_image, |
989 | &temp_topology_device_list, |
990 | proximity_domain); |
991 | if (ret || |
992 | kfd_is_acpi_crat_invalid(&temp_topology_device_list)) { |
993 | kfd_release_topology_device_list( |
994 | &temp_topology_device_list); |
995 | kfd_destroy_crat_image(crat_image); |
996 | crat_image = NULL; |
997 | } |
998 | } |
999 | |
1000 | if (!crat_image) { |
1001 | ret = kfd_create_crat_image_virtual(&crat_image, &image_size, |
1002 | COMPUTE_UNIT_CPU, NULL, |
1003 | proximity_domain); |
1004 | cpu_only_node = 1; |
1005 | if (ret) { |
1006 | pr_err("Error creating VCRAT table for CPU\n" ); |
1007 | return ret; |
1008 | } |
1009 | |
1010 | ret = kfd_parse_crat_table(crat_image, |
1011 | &temp_topology_device_list, |
1012 | proximity_domain); |
1013 | if (ret) { |
1014 | pr_err("Error parsing VCRAT table for CPU\n" ); |
1015 | goto err; |
1016 | } |
1017 | } |
1018 | |
1019 | kdev = list_first_entry(&temp_topology_device_list, |
1020 | struct kfd_topology_device, list); |
1021 | kfd_add_perf_to_topology(kdev); |
1022 | |
1023 | down_write(&topology_lock); |
1024 | kfd_topology_update_device_list(&temp_topology_device_list, |
1025 | &topology_device_list); |
1026 | atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1); |
1027 | ret = kfd_topology_update_sysfs(); |
1028 | up_write(&topology_lock); |
1029 | |
1030 | if (!ret) { |
1031 | sys_props.generation_count++; |
1032 | kfd_update_system_properties(); |
1033 | kfd_debug_print_topology(); |
1034 | pr_info("Finished initializing topology\n" ); |
1035 | } else |
1036 | pr_err("Failed to update topology in sysfs ret=%d\n" , ret); |
1037 | |
1038 | /* For nodes with GPU, this information gets added |
1039 | * when GPU is detected (kfd_topology_add_device). |
1040 | */ |
1041 | if (cpu_only_node) { |
1042 | /* Add additional information to CPU only node created above */ |
1043 | down_write(&topology_lock); |
1044 | kdev = list_first_entry(&topology_device_list, |
1045 | struct kfd_topology_device, list); |
1046 | up_write(&topology_lock); |
1047 | kfd_add_non_crat_information(kdev); |
1048 | } |
1049 | |
1050 | err: |
1051 | kfd_destroy_crat_image(crat_image); |
1052 | return ret; |
1053 | } |
1054 | |
1055 | void kfd_topology_shutdown(void) |
1056 | { |
1057 | down_write(&topology_lock); |
1058 | kfd_topology_release_sysfs(); |
1059 | kfd_release_live_view(); |
1060 | up_write(&topology_lock); |
1061 | } |
1062 | |
1063 | static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) |
1064 | { |
1065 | uint32_t hashout; |
1066 | uint32_t buf[7]; |
1067 | uint64_t local_mem_size; |
1068 | int i; |
1069 | struct kfd_local_mem_info local_mem_info; |
1070 | |
1071 | if (!gpu) |
1072 | return 0; |
1073 | |
1074 | amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info); |
1075 | |
1076 | local_mem_size = local_mem_info.local_mem_size_private + |
1077 | local_mem_info.local_mem_size_public; |
1078 | |
1079 | buf[0] = gpu->pdev->devfn; |
1080 | buf[1] = gpu->pdev->subsystem_vendor; |
1081 | buf[2] = gpu->pdev->subsystem_device; |
1082 | buf[3] = gpu->pdev->device; |
1083 | buf[4] = gpu->pdev->bus->number; |
1084 | buf[5] = lower_32_bits(local_mem_size); |
1085 | buf[6] = upper_32_bits(local_mem_size); |
1086 | |
1087 | for (i = 0, hashout = 0; i < 7; i++) |
1088 | hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH); |
1089 | |
1090 | return hashout; |
1091 | } |
1092 | /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If |
1093 | * the GPU device is not already present in the topology device |
1094 | * list then return NULL. This means a new topology device has to |
1095 | * be created for this GPU. |
1096 | */ |
1097 | static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) |
1098 | { |
1099 | struct kfd_topology_device *dev; |
1100 | struct kfd_topology_device *out_dev = NULL; |
1101 | |
1102 | down_write(&topology_lock); |
1103 | list_for_each_entry(dev, &topology_device_list, list) { |
1104 | /* Discrete GPUs need their own topology device list |
1105 | * entries. Don't assign them to CPU/APU nodes. |
1106 | */ |
1107 | if (!gpu->device_info->needs_iommu_device && |
1108 | dev->node_props.cpu_cores_count) |
1109 | continue; |
1110 | |
1111 | if (!dev->gpu && (dev->node_props.simd_count > 0)) { |
1112 | dev->gpu = gpu; |
1113 | out_dev = dev; |
1114 | break; |
1115 | } |
1116 | } |
1117 | up_write(&topology_lock); |
1118 | return out_dev; |
1119 | } |
1120 | |
1121 | static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival) |
1122 | { |
1123 | /* |
1124 | * TODO: Generate an event for thunk about the arrival/removal |
1125 | * of the GPU |
1126 | */ |
1127 | } |
1128 | |
1129 | /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info, |
1130 | * patch this after CRAT parsing. |
1131 | */ |
1132 | static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) |
1133 | { |
1134 | struct kfd_mem_properties *mem; |
1135 | struct kfd_local_mem_info local_mem_info; |
1136 | |
1137 | if (!dev) |
1138 | return; |
1139 | |
1140 | /* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with |
1141 | * single bank of VRAM local memory. |
1142 | * for dGPUs - VCRAT reports only one bank of Local Memory |
1143 | * for APUs - If CRAT from ACPI reports more than one bank, then |
1144 | * all the banks will report the same mem_clk_max information |
1145 | */ |
1146 | amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info); |
1147 | |
1148 | list_for_each_entry(mem, &dev->mem_props, list) |
1149 | mem->mem_clk_max = local_mem_info.mem_clk_max; |
1150 | } |
1151 | |
1152 | static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) |
1153 | { |
1154 | struct kfd_iolink_properties *link, *cpu_link; |
1155 | struct kfd_topology_device *cpu_dev; |
1156 | uint32_t cap; |
1157 | uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED; |
1158 | uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED; |
1159 | |
1160 | if (!dev || !dev->gpu) |
1161 | return; |
1162 | |
1163 | pcie_capability_read_dword(dev->gpu->pdev, |
1164 | PCI_EXP_DEVCAP2, &cap); |
1165 | |
1166 | if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | |
1167 | PCI_EXP_DEVCAP2_ATOMIC_COMP64))) |
1168 | cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | |
1169 | CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; |
1170 | |
1171 | if (!dev->gpu->pci_atomic_requested || |
1172 | dev->gpu->device_info->asic_family == CHIP_HAWAII) |
1173 | flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | |
1174 | CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; |
1175 | |
1176 | /* GPU only creates direct links so apply flags setting to all */ |
1177 | list_for_each_entry(link, &dev->io_link_props, list) { |
1178 | link->flags = flag; |
1179 | cpu_dev = kfd_topology_device_by_proximity_domain( |
1180 | link->node_to); |
1181 | if (cpu_dev) { |
1182 | list_for_each_entry(cpu_link, |
1183 | &cpu_dev->io_link_props, list) |
1184 | if (cpu_link->node_to == link->node_from) |
1185 | cpu_link->flags = cpu_flag; |
1186 | } |
1187 | } |
1188 | } |
1189 | |
1190 | int kfd_topology_add_device(struct kfd_dev *gpu) |
1191 | { |
1192 | uint32_t gpu_id; |
1193 | struct kfd_topology_device *dev; |
1194 | struct kfd_cu_info cu_info; |
1195 | int res = 0; |
1196 | struct list_head temp_topology_device_list; |
1197 | void *crat_image = NULL; |
1198 | size_t image_size = 0; |
1199 | int proximity_domain; |
1200 | |
1201 | INIT_LIST_HEAD(&temp_topology_device_list); |
1202 | |
1203 | gpu_id = kfd_generate_gpu_id(gpu); |
1204 | |
1205 | pr_debug("Adding new GPU (ID: 0x%x) to topology\n" , gpu_id); |
1206 | |
1207 | proximity_domain = atomic_inc_return(&topology_crat_proximity_domain); |
1208 | |
1209 | /* Check to see if this gpu device exists in the topology_device_list. |
1210 | * If so, assign the gpu to that device, |
1211 | * else create a Virtual CRAT for this gpu device and then parse that |
1212 | * CRAT to create a new topology device. Once created assign the gpu to |
1213 | * that topology device |
1214 | */ |
1215 | dev = kfd_assign_gpu(gpu); |
1216 | if (!dev) { |
1217 | res = kfd_create_crat_image_virtual(&crat_image, &image_size, |
1218 | COMPUTE_UNIT_GPU, gpu, |
1219 | proximity_domain); |
1220 | if (res) { |
1221 | pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n" , |
1222 | gpu_id); |
1223 | return res; |
1224 | } |
1225 | res = kfd_parse_crat_table(crat_image, |
1226 | &temp_topology_device_list, |
1227 | proximity_domain); |
1228 | if (res) { |
1229 | pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n" , |
1230 | gpu_id); |
1231 | goto err; |
1232 | } |
1233 | |
1234 | down_write(&topology_lock); |
1235 | kfd_topology_update_device_list(&temp_topology_device_list, |
1236 | &topology_device_list); |
1237 | |
1238 | /* Update the SYSFS tree, since we added another topology |
1239 | * device |
1240 | */ |
1241 | res = kfd_topology_update_sysfs(); |
1242 | up_write(&topology_lock); |
1243 | |
1244 | if (!res) |
1245 | sys_props.generation_count++; |
1246 | else |
1247 | pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n" , |
1248 | gpu_id, res); |
1249 | dev = kfd_assign_gpu(gpu); |
1250 | if (WARN_ON(!dev)) { |
1251 | res = -ENODEV; |
1252 | goto err; |
1253 | } |
1254 | } |
1255 | |
1256 | dev->gpu_id = gpu_id; |
1257 | gpu->id = gpu_id; |
1258 | |
1259 | /* TODO: Move the following lines to function |
1260 | * kfd_add_non_crat_information |
1261 | */ |
1262 | |
1263 | /* Fill-in additional information that is not available in CRAT but |
1264 | * needed for the topology |
1265 | */ |
1266 | |
1267 | amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info); |
1268 | dev->node_props.simd_arrays_per_engine = |
1269 | cu_info.num_shader_arrays_per_engine; |
1270 | |
1271 | dev->node_props.vendor_id = gpu->pdev->vendor; |
1272 | dev->node_props.device_id = gpu->pdev->device; |
1273 | dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number, |
1274 | gpu->pdev->devfn); |
1275 | dev->node_props.max_engine_clk_fcompute = |
1276 | amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd); |
1277 | dev->node_props.max_engine_clk_ccompute = |
1278 | cpufreq_quick_get_max(0) / 1000; |
1279 | dev->node_props.drm_render_minor = |
1280 | gpu->shared_resources.drm_render_minor; |
1281 | |
1282 | dev->node_props.hive_id = gpu->hive_id; |
1283 | |
1284 | kfd_fill_mem_clk_max_info(dev); |
1285 | kfd_fill_iolink_non_crat_info(dev); |
1286 | |
1287 | switch (dev->gpu->device_info->asic_family) { |
1288 | case CHIP_KAVERI: |
1289 | case CHIP_HAWAII: |
1290 | case CHIP_TONGA: |
1291 | dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 << |
1292 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & |
1293 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); |
1294 | break; |
1295 | case CHIP_CARRIZO: |
1296 | case CHIP_FIJI: |
1297 | case CHIP_POLARIS10: |
1298 | case CHIP_POLARIS11: |
1299 | case CHIP_POLARIS12: |
1300 | pr_debug("Adding doorbell packet type capability\n" ); |
1301 | dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << |
1302 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & |
1303 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); |
1304 | break; |
1305 | case CHIP_VEGA10: |
1306 | case CHIP_VEGA12: |
1307 | case CHIP_VEGA20: |
1308 | case CHIP_RAVEN: |
1309 | dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << |
1310 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & |
1311 | HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); |
1312 | break; |
1313 | default: |
1314 | WARN(1, "Unexpected ASIC family %u" , |
1315 | dev->gpu->device_info->asic_family); |
1316 | } |
1317 | |
1318 | /* Fix errors in CZ CRAT. |
1319 | * simd_count: Carrizo CRAT reports wrong simd_count, probably |
1320 | * because it doesn't consider masked out CUs |
1321 | * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd |
1322 | * capability flag: Carrizo CRAT doesn't report IOMMU flags |
1323 | */ |
1324 | if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { |
1325 | dev->node_props.simd_count = |
1326 | cu_info.simd_per_cu * cu_info.cu_active_number; |
1327 | dev->node_props.max_waves_per_simd = 10; |
1328 | dev->node_props.capability |= HSA_CAP_ATS_PRESENT; |
1329 | } |
1330 | |
1331 | kfd_debug_print_topology(); |
1332 | |
1333 | if (!res) |
1334 | kfd_notify_gpu_change(gpu_id, 1); |
1335 | err: |
1336 | kfd_destroy_crat_image(crat_image); |
1337 | return res; |
1338 | } |
1339 | |
1340 | int kfd_topology_remove_device(struct kfd_dev *gpu) |
1341 | { |
1342 | struct kfd_topology_device *dev, *tmp; |
1343 | uint32_t gpu_id; |
1344 | int res = -ENODEV; |
1345 | |
1346 | down_write(&topology_lock); |
1347 | |
1348 | list_for_each_entry_safe(dev, tmp, &topology_device_list, list) |
1349 | if (dev->gpu == gpu) { |
1350 | gpu_id = dev->gpu_id; |
1351 | kfd_remove_sysfs_node_entry(dev); |
1352 | kfd_release_topology_device(dev); |
1353 | sys_props.num_devices--; |
1354 | res = 0; |
1355 | if (kfd_topology_update_sysfs() < 0) |
1356 | kfd_topology_release_sysfs(); |
1357 | break; |
1358 | } |
1359 | |
1360 | up_write(&topology_lock); |
1361 | |
1362 | if (!res) |
1363 | kfd_notify_gpu_change(gpu_id, 0); |
1364 | |
1365 | return res; |
1366 | } |
1367 | |
1368 | /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD |
1369 | * topology. If GPU device is found @idx, then valid kfd_dev pointer is |
1370 | * returned through @kdev |
1371 | * Return - 0: On success (@kdev will be NULL for non GPU nodes) |
1372 | * -1: If end of list |
1373 | */ |
1374 | int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev) |
1375 | { |
1376 | |
1377 | struct kfd_topology_device *top_dev; |
1378 | uint8_t device_idx = 0; |
1379 | |
1380 | *kdev = NULL; |
1381 | down_read(&topology_lock); |
1382 | |
1383 | list_for_each_entry(top_dev, &topology_device_list, list) { |
1384 | if (device_idx == idx) { |
1385 | *kdev = top_dev->gpu; |
1386 | up_read(&topology_lock); |
1387 | return 0; |
1388 | } |
1389 | |
1390 | device_idx++; |
1391 | } |
1392 | |
1393 | up_read(&topology_lock); |
1394 | |
1395 | return -1; |
1396 | |
1397 | } |
1398 | |
1399 | static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) |
1400 | { |
1401 | int first_cpu_of_numa_node; |
1402 | |
1403 | if (!cpumask || cpumask == cpu_none_mask) |
1404 | return -1; |
1405 | first_cpu_of_numa_node = cpumask_first(cpumask); |
1406 | if (first_cpu_of_numa_node >= nr_cpu_ids) |
1407 | return -1; |
1408 | #ifdef CONFIG_X86_64 |
1409 | return cpu_data(first_cpu_of_numa_node).apicid; |
1410 | #else |
1411 | return first_cpu_of_numa_node; |
1412 | #endif |
1413 | } |
1414 | |
1415 | /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor |
1416 | * of the given NUMA node (numa_node_id) |
1417 | * Return -1 on failure |
1418 | */ |
1419 | int kfd_numa_node_to_apic_id(int numa_node_id) |
1420 | { |
1421 | if (numa_node_id == -1) { |
1422 | pr_warn("Invalid NUMA Node. Use online CPU mask\n" ); |
1423 | return kfd_cpumask_to_apic_id(cpu_online_mask); |
1424 | } |
1425 | return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id)); |
1426 | } |
1427 | |
1428 | #if defined(CONFIG_DEBUG_FS) |
1429 | |
1430 | int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) |
1431 | { |
1432 | struct kfd_topology_device *dev; |
1433 | unsigned int i = 0; |
1434 | int r = 0; |
1435 | |
1436 | down_read(&topology_lock); |
1437 | |
1438 | list_for_each_entry(dev, &topology_device_list, list) { |
1439 | if (!dev->gpu) { |
1440 | i++; |
1441 | continue; |
1442 | } |
1443 | |
1444 | seq_printf(m, "Node %u, gpu_id %x:\n" , i++, dev->gpu->id); |
1445 | r = dqm_debugfs_hqds(m, dev->gpu->dqm); |
1446 | if (r) |
1447 | break; |
1448 | } |
1449 | |
1450 | up_read(&topology_lock); |
1451 | |
1452 | return r; |
1453 | } |
1454 | |
1455 | int kfd_debugfs_rls_by_device(struct seq_file *m, void *data) |
1456 | { |
1457 | struct kfd_topology_device *dev; |
1458 | unsigned int i = 0; |
1459 | int r = 0; |
1460 | |
1461 | down_read(&topology_lock); |
1462 | |
1463 | list_for_each_entry(dev, &topology_device_list, list) { |
1464 | if (!dev->gpu) { |
1465 | i++; |
1466 | continue; |
1467 | } |
1468 | |
1469 | seq_printf(m, "Node %u, gpu_id %x:\n" , i++, dev->gpu->id); |
1470 | r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets); |
1471 | if (r) |
1472 | break; |
1473 | } |
1474 | |
1475 | up_read(&topology_lock); |
1476 | |
1477 | return r; |
1478 | } |
1479 | |
1480 | #endif |
1481 | |