1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
2 | /* |
3 | * Copyright 2014-2022 Advanced Micro Devices, Inc. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), |
7 | * to deal in the Software without restriction, including without limitation |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
9 | * and/or sell copies of the Software, and to permit persons to whom the |
10 | * Software is furnished to do so, subject to the following conditions: |
11 | * |
12 | * The above copyright notice and this permission notice shall be included in |
13 | * all copies or substantial portions of the Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | * |
23 | */ |
24 | |
25 | #include <linux/slab.h> |
26 | #include <linux/list.h> |
27 | #include "kfd_device_queue_manager.h" |
28 | #include "kfd_priv.h" |
29 | #include "kfd_kernel_queue.h" |
30 | #include "amdgpu_amdkfd.h" |
31 | |
32 | static inline struct process_queue_node *get_queue_by_qid( |
33 | struct process_queue_manager *pqm, unsigned int qid) |
34 | { |
35 | struct process_queue_node *pqn; |
36 | |
37 | list_for_each_entry(pqn, &pqm->queues, process_queue_list) { |
38 | if ((pqn->q && pqn->q->properties.queue_id == qid) || |
39 | (pqn->kq && pqn->kq->queue->properties.queue_id == qid)) |
40 | return pqn; |
41 | } |
42 | |
43 | return NULL; |
44 | } |
45 | |
46 | static int assign_queue_slot_by_qid(struct process_queue_manager *pqm, |
47 | unsigned int qid) |
48 | { |
49 | if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) |
50 | return -EINVAL; |
51 | |
52 | if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) { |
53 | pr_err("Cannot create new queue because requested qid(%u) is in use\n" , qid); |
54 | return -ENOSPC; |
55 | } |
56 | |
57 | return 0; |
58 | } |
59 | |
60 | static int find_available_queue_slot(struct process_queue_manager *pqm, |
61 | unsigned int *qid) |
62 | { |
63 | unsigned long found; |
64 | |
65 | found = find_first_zero_bit(addr: pqm->queue_slot_bitmap, |
66 | KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); |
67 | |
68 | pr_debug("The new slot id %lu\n" , found); |
69 | |
70 | if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { |
71 | pr_info("Cannot open more queues for process with pasid 0x%x\n" , |
72 | pqm->process->pasid); |
73 | return -ENOMEM; |
74 | } |
75 | |
76 | set_bit(nr: found, addr: pqm->queue_slot_bitmap); |
77 | *qid = found; |
78 | |
79 | return 0; |
80 | } |
81 | |
82 | void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) |
83 | { |
84 | struct kfd_node *dev = pdd->dev; |
85 | |
86 | if (pdd->already_dequeued) |
87 | return; |
88 | |
89 | dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); |
90 | pdd->already_dequeued = true; |
91 | } |
92 | |
93 | int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, |
94 | void *gws) |
95 | { |
96 | struct kfd_node *dev = NULL; |
97 | struct process_queue_node *pqn; |
98 | struct kfd_process_device *pdd; |
99 | struct kgd_mem *mem = NULL; |
100 | int ret; |
101 | |
102 | pqn = get_queue_by_qid(pqm, qid); |
103 | if (!pqn) { |
104 | pr_err("Queue id does not match any known queue\n" ); |
105 | return -EINVAL; |
106 | } |
107 | |
108 | if (pqn->q) |
109 | dev = pqn->q->device; |
110 | if (WARN_ON(!dev)) |
111 | return -ENODEV; |
112 | |
113 | pdd = kfd_get_process_device_data(dev, p: pqm->process); |
114 | if (!pdd) { |
115 | pr_err("Process device data doesn't exist\n" ); |
116 | return -EINVAL; |
117 | } |
118 | |
119 | /* Only allow one queue per process can have GWS assigned */ |
120 | if (gws && pdd->qpd.num_gws) |
121 | return -EBUSY; |
122 | |
123 | if (!gws && pdd->qpd.num_gws == 0) |
124 | return -EINVAL; |
125 | |
126 | if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) { |
127 | if (gws) |
128 | ret = amdgpu_amdkfd_add_gws_to_process(info: pdd->process->kgd_process_info, |
129 | gws, mem: &mem); |
130 | else |
131 | ret = amdgpu_amdkfd_remove_gws_from_process(info: pdd->process->kgd_process_info, |
132 | mem: pqn->q->gws); |
133 | if (unlikely(ret)) |
134 | return ret; |
135 | pqn->q->gws = mem; |
136 | } else { |
137 | /* |
138 | * Intentionally set GWS to a non-NULL value |
139 | * for devices that do not use GWS for global wave |
140 | * synchronization but require the formality |
141 | * of setting GWS for cooperative groups. |
142 | */ |
143 | pqn->q->gws = gws ? ERR_PTR(error: -ENOMEM) : NULL; |
144 | } |
145 | |
146 | pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; |
147 | |
148 | return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, |
149 | pqn->q, NULL); |
150 | } |
151 | |
152 | void kfd_process_dequeue_from_all_devices(struct kfd_process *p) |
153 | { |
154 | int i; |
155 | |
156 | for (i = 0; i < p->n_pdds; i++) |
157 | kfd_process_dequeue_from_device(pdd: p->pdds[i]); |
158 | } |
159 | |
160 | int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) |
161 | { |
162 | INIT_LIST_HEAD(list: &pqm->queues); |
163 | pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, |
164 | GFP_KERNEL); |
165 | if (!pqm->queue_slot_bitmap) |
166 | return -ENOMEM; |
167 | pqm->process = p; |
168 | |
169 | return 0; |
170 | } |
171 | |
172 | void pqm_uninit(struct process_queue_manager *pqm) |
173 | { |
174 | struct process_queue_node *pqn, *next; |
175 | |
176 | list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { |
177 | if (pqn->q && pqn->q->gws && |
178 | KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && |
179 | !pqn->q->device->kfd->shared_resources.enable_mes) |
180 | amdgpu_amdkfd_remove_gws_from_process(info: pqm->process->kgd_process_info, |
181 | mem: pqn->q->gws); |
182 | kfd_procfs_del_queue(q: pqn->q); |
183 | uninit_queue(q: pqn->q); |
184 | list_del(entry: &pqn->process_queue_list); |
185 | kfree(objp: pqn); |
186 | } |
187 | |
188 | bitmap_free(bitmap: pqm->queue_slot_bitmap); |
189 | pqm->queue_slot_bitmap = NULL; |
190 | } |
191 | |
192 | static int init_user_queue(struct process_queue_manager *pqm, |
193 | struct kfd_node *dev, struct queue **q, |
194 | struct queue_properties *q_properties, |
195 | struct file *f, struct amdgpu_bo *wptr_bo, |
196 | unsigned int qid) |
197 | { |
198 | int retval; |
199 | |
200 | /* Doorbell initialized in user space*/ |
201 | q_properties->doorbell_ptr = NULL; |
202 | q_properties->exception_status = KFD_EC_MASK(EC_QUEUE_NEW); |
203 | |
204 | /* let DQM handle it*/ |
205 | q_properties->vmid = 0; |
206 | q_properties->queue_id = qid; |
207 | |
208 | retval = init_queue(q, properties: q_properties); |
209 | if (retval != 0) |
210 | return retval; |
211 | |
212 | (*q)->device = dev; |
213 | (*q)->process = pqm->process; |
214 | |
215 | if (dev->kfd->shared_resources.enable_mes) { |
216 | retval = amdgpu_amdkfd_alloc_gtt_mem(adev: dev->adev, |
217 | AMDGPU_MES_GANG_CTX_SIZE, |
218 | mem_obj: &(*q)->gang_ctx_bo, |
219 | gpu_addr: &(*q)->gang_ctx_gpu_addr, |
220 | cpu_ptr: &(*q)->gang_ctx_cpu_ptr, |
221 | mqd_gfx9: false); |
222 | if (retval) { |
223 | pr_err("failed to allocate gang context bo\n" ); |
224 | goto cleanup; |
225 | } |
226 | memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); |
227 | (*q)->wptr_bo = wptr_bo; |
228 | } |
229 | |
230 | pr_debug("PQM After init queue" ); |
231 | return 0; |
232 | |
233 | cleanup: |
234 | uninit_queue(q: *q); |
235 | *q = NULL; |
236 | return retval; |
237 | } |
238 | |
239 | int pqm_create_queue(struct process_queue_manager *pqm, |
240 | struct kfd_node *dev, |
241 | struct file *f, |
242 | struct queue_properties *properties, |
243 | unsigned int *qid, |
244 | struct amdgpu_bo *wptr_bo, |
245 | const struct kfd_criu_queue_priv_data *q_data, |
246 | const void *restore_mqd, |
247 | const void *restore_ctl_stack, |
248 | uint32_t *p_doorbell_offset_in_process) |
249 | { |
250 | int retval; |
251 | struct kfd_process_device *pdd; |
252 | struct queue *q; |
253 | struct process_queue_node *pqn; |
254 | struct kernel_queue *kq; |
255 | enum kfd_queue_type type = properties->type; |
256 | unsigned int max_queues = 127; /* HWS limit */ |
257 | |
258 | /* |
259 | * On GFX 9.4.3, increase the number of queues that |
260 | * can be created to 255. No HWS limit on GFX 9.4.3. |
261 | */ |
262 | if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) |
263 | max_queues = 255; |
264 | |
265 | q = NULL; |
266 | kq = NULL; |
267 | |
268 | pdd = kfd_get_process_device_data(dev, p: pqm->process); |
269 | if (!pdd) { |
270 | pr_err("Process device data doesn't exist\n" ); |
271 | return -1; |
272 | } |
273 | |
274 | /* |
275 | * for debug process, verify that it is within the static queues limit |
276 | * currently limit is set to half of the total avail HQD slots |
277 | * If we are just about to create DIQ, the is_debug flag is not set yet |
278 | * Hence we also check the type as well |
279 | */ |
280 | if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) |
281 | max_queues = dev->kfd->device_info.max_no_of_hqd/2; |
282 | |
283 | if (pdd->qpd.queue_count >= max_queues) |
284 | return -ENOSPC; |
285 | |
286 | if (q_data) { |
287 | retval = assign_queue_slot_by_qid(pqm, qid: q_data->q_id); |
288 | *qid = q_data->q_id; |
289 | } else |
290 | retval = find_available_queue_slot(pqm, qid); |
291 | |
292 | if (retval != 0) |
293 | return retval; |
294 | |
295 | if (list_empty(head: &pdd->qpd.queues_list) && |
296 | list_empty(head: &pdd->qpd.priv_queue_list)) |
297 | dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); |
298 | |
299 | pqn = kzalloc(size: sizeof(*pqn), GFP_KERNEL); |
300 | if (!pqn) { |
301 | retval = -ENOMEM; |
302 | goto err_allocate_pqn; |
303 | } |
304 | |
305 | switch (type) { |
306 | case KFD_QUEUE_TYPE_SDMA: |
307 | case KFD_QUEUE_TYPE_SDMA_XGMI: |
308 | /* SDMA queues are always allocated statically no matter |
309 | * which scheduler mode is used. We also do not need to |
310 | * check whether a SDMA queue can be allocated here, because |
311 | * allocate_sdma_queue() in create_queue() has the |
312 | * corresponding check logic. |
313 | */ |
314 | retval = init_user_queue(pqm, dev, q: &q, q_properties: properties, f, wptr_bo, qid: *qid); |
315 | if (retval != 0) |
316 | goto err_create_queue; |
317 | pqn->q = q; |
318 | pqn->kq = NULL; |
319 | retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data, |
320 | restore_mqd, restore_ctl_stack); |
321 | print_queue(q); |
322 | break; |
323 | |
324 | case KFD_QUEUE_TYPE_COMPUTE: |
325 | /* check if there is over subscription */ |
326 | if ((dev->dqm->sched_policy == |
327 | KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && |
328 | ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || |
329 | (dev->dqm->active_queue_count >= get_cp_queues_num(dqm: dev->dqm)))) { |
330 | pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n" ); |
331 | retval = -EPERM; |
332 | goto err_create_queue; |
333 | } |
334 | |
335 | retval = init_user_queue(pqm, dev, q: &q, q_properties: properties, f, wptr_bo, qid: *qid); |
336 | if (retval != 0) |
337 | goto err_create_queue; |
338 | pqn->q = q; |
339 | pqn->kq = NULL; |
340 | retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data, |
341 | restore_mqd, restore_ctl_stack); |
342 | print_queue(q); |
343 | break; |
344 | case KFD_QUEUE_TYPE_DIQ: |
345 | kq = kernel_queue_init(dev, type: KFD_QUEUE_TYPE_DIQ); |
346 | if (!kq) { |
347 | retval = -ENOMEM; |
348 | goto err_create_queue; |
349 | } |
350 | kq->queue->properties.queue_id = *qid; |
351 | pqn->kq = kq; |
352 | pqn->q = NULL; |
353 | retval = kfd_process_drain_interrupts(pdd); |
354 | if (retval) |
355 | break; |
356 | |
357 | retval = dev->dqm->ops.create_kernel_queue(dev->dqm, |
358 | kq, &pdd->qpd); |
359 | break; |
360 | default: |
361 | WARN(1, "Invalid queue type %d" , type); |
362 | retval = -EINVAL; |
363 | } |
364 | |
365 | if (retval != 0) { |
366 | pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n" , |
367 | pqm->process->pasid, type, retval); |
368 | goto err_create_queue; |
369 | } |
370 | |
371 | if (q && p_doorbell_offset_in_process) { |
372 | /* Return the doorbell offset within the doorbell page |
373 | * to the caller so it can be passed up to user mode |
374 | * (in bytes). |
375 | * relative doorbell index = Absolute doorbell index - |
376 | * absolute index of first doorbell in the page. |
377 | */ |
378 | uint32_t first_db_index = amdgpu_doorbell_index_on_bar(adev: pdd->dev->adev, |
379 | db_bo: pdd->qpd.proc_doorbells, |
380 | doorbell_index: 0, |
381 | db_size: pdd->dev->kfd->device_info.doorbell_size); |
382 | |
383 | *p_doorbell_offset_in_process = (q->properties.doorbell_off |
384 | - first_db_index) * sizeof(uint32_t); |
385 | } |
386 | |
387 | pr_debug("PQM After DQM create queue\n" ); |
388 | |
389 | list_add(new: &pqn->process_queue_list, head: &pqm->queues); |
390 | |
391 | if (q) { |
392 | pr_debug("PQM done creating queue\n" ); |
393 | kfd_procfs_add_queue(q); |
394 | print_queue_properties(q: &q->properties); |
395 | } |
396 | |
397 | return retval; |
398 | |
399 | err_create_queue: |
400 | uninit_queue(q); |
401 | if (kq) |
402 | kernel_queue_uninit(kq, hanging: false); |
403 | kfree(objp: pqn); |
404 | err_allocate_pqn: |
405 | /* check if queues list is empty unregister process from device */ |
406 | clear_bit(nr: *qid, addr: pqm->queue_slot_bitmap); |
407 | if (list_empty(head: &pdd->qpd.queues_list) && |
408 | list_empty(head: &pdd->qpd.priv_queue_list)) |
409 | dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd); |
410 | return retval; |
411 | } |
412 | |
413 | int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) |
414 | { |
415 | struct process_queue_node *pqn; |
416 | struct kfd_process_device *pdd; |
417 | struct device_queue_manager *dqm; |
418 | struct kfd_node *dev; |
419 | int retval; |
420 | |
421 | dqm = NULL; |
422 | |
423 | retval = 0; |
424 | |
425 | pqn = get_queue_by_qid(pqm, qid); |
426 | if (!pqn) { |
427 | pr_err("Queue id does not match any known queue\n" ); |
428 | return -EINVAL; |
429 | } |
430 | |
431 | dev = NULL; |
432 | if (pqn->kq) |
433 | dev = pqn->kq->dev; |
434 | if (pqn->q) |
435 | dev = pqn->q->device; |
436 | if (WARN_ON(!dev)) |
437 | return -ENODEV; |
438 | |
439 | pdd = kfd_get_process_device_data(dev, p: pqm->process); |
440 | if (!pdd) { |
441 | pr_err("Process device data doesn't exist\n" ); |
442 | return -1; |
443 | } |
444 | |
445 | if (pqn->kq) { |
446 | /* destroy kernel queue (DIQ) */ |
447 | dqm = pqn->kq->dev->dqm; |
448 | dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd); |
449 | kernel_queue_uninit(kq: pqn->kq, hanging: false); |
450 | } |
451 | |
452 | if (pqn->q) { |
453 | kfd_procfs_del_queue(q: pqn->q); |
454 | dqm = pqn->q->device->dqm; |
455 | retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); |
456 | if (retval) { |
457 | pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n" , |
458 | pqm->process->pasid, |
459 | pqn->q->properties.queue_id, retval); |
460 | if (retval != -ETIME) |
461 | goto err_destroy_queue; |
462 | } |
463 | |
464 | if (pqn->q->gws) { |
465 | if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && |
466 | !dev->kfd->shared_resources.enable_mes) |
467 | amdgpu_amdkfd_remove_gws_from_process( |
468 | info: pqm->process->kgd_process_info, |
469 | mem: pqn->q->gws); |
470 | pdd->qpd.num_gws = 0; |
471 | } |
472 | |
473 | if (dev->kfd->shared_resources.enable_mes) { |
474 | amdgpu_amdkfd_free_gtt_mem(adev: dev->adev, |
475 | mem_obj: pqn->q->gang_ctx_bo); |
476 | if (pqn->q->wptr_bo) |
477 | amdgpu_amdkfd_free_gtt_mem(adev: dev->adev, mem_obj: pqn->q->wptr_bo); |
478 | |
479 | } |
480 | uninit_queue(q: pqn->q); |
481 | } |
482 | |
483 | list_del(entry: &pqn->process_queue_list); |
484 | kfree(objp: pqn); |
485 | clear_bit(nr: qid, addr: pqm->queue_slot_bitmap); |
486 | |
487 | if (list_empty(head: &pdd->qpd.queues_list) && |
488 | list_empty(head: &pdd->qpd.priv_queue_list)) |
489 | dqm->ops.unregister_process(dqm, &pdd->qpd); |
490 | |
491 | err_destroy_queue: |
492 | return retval; |
493 | } |
494 | |
495 | int pqm_update_queue_properties(struct process_queue_manager *pqm, |
496 | unsigned int qid, struct queue_properties *p) |
497 | { |
498 | int retval; |
499 | struct process_queue_node *pqn; |
500 | |
501 | pqn = get_queue_by_qid(pqm, qid); |
502 | if (!pqn) { |
503 | pr_debug("No queue %d exists for update operation\n" , qid); |
504 | return -EFAULT; |
505 | } |
506 | |
507 | pqn->q->properties.queue_address = p->queue_address; |
508 | pqn->q->properties.queue_size = p->queue_size; |
509 | pqn->q->properties.queue_percent = p->queue_percent; |
510 | pqn->q->properties.priority = p->priority; |
511 | pqn->q->properties.pm4_target_xcc = p->pm4_target_xcc; |
512 | |
513 | retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, |
514 | pqn->q, NULL); |
515 | if (retval != 0) |
516 | return retval; |
517 | |
518 | return 0; |
519 | } |
520 | |
521 | int pqm_update_mqd(struct process_queue_manager *pqm, |
522 | unsigned int qid, struct mqd_update_info *minfo) |
523 | { |
524 | int retval; |
525 | struct process_queue_node *pqn; |
526 | |
527 | pqn = get_queue_by_qid(pqm, qid); |
528 | if (!pqn) { |
529 | pr_debug("No queue %d exists for update operation\n" , qid); |
530 | return -EFAULT; |
531 | } |
532 | |
533 | /* CUs are masked for debugger requirements so deny user mask */ |
534 | if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr) |
535 | return -EBUSY; |
536 | |
537 | /* ASICs that have WGPs must enforce pairwise enabled mask checks. */ |
538 | if (minfo && minfo->cu_mask.ptr && |
539 | KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) { |
540 | int i; |
541 | |
542 | for (i = 0; i < minfo->cu_mask.count; i += 2) { |
543 | uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3; |
544 | |
545 | if (cu_pair && cu_pair != 0x3) { |
546 | pr_debug("CUs must be adjacent pairwise enabled.\n" ); |
547 | return -EINVAL; |
548 | } |
549 | } |
550 | } |
551 | |
552 | retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, |
553 | pqn->q, minfo); |
554 | if (retval != 0) |
555 | return retval; |
556 | |
557 | if (minfo && minfo->cu_mask.ptr) |
558 | pqn->q->properties.is_user_cu_masked = true; |
559 | |
560 | return 0; |
561 | } |
562 | |
563 | struct kernel_queue *pqm_get_kernel_queue( |
564 | struct process_queue_manager *pqm, |
565 | unsigned int qid) |
566 | { |
567 | struct process_queue_node *pqn; |
568 | |
569 | pqn = get_queue_by_qid(pqm, qid); |
570 | if (pqn && pqn->kq) |
571 | return pqn->kq; |
572 | |
573 | return NULL; |
574 | } |
575 | |
576 | struct queue *pqm_get_user_queue(struct process_queue_manager *pqm, |
577 | unsigned int qid) |
578 | { |
579 | struct process_queue_node *pqn; |
580 | |
581 | pqn = get_queue_by_qid(pqm, qid); |
582 | return pqn ? pqn->q : NULL; |
583 | } |
584 | |
585 | int pqm_get_wave_state(struct process_queue_manager *pqm, |
586 | unsigned int qid, |
587 | void __user *ctl_stack, |
588 | u32 *ctl_stack_used_size, |
589 | u32 *save_area_used_size) |
590 | { |
591 | struct process_queue_node *pqn; |
592 | |
593 | pqn = get_queue_by_qid(pqm, qid); |
594 | if (!pqn) { |
595 | pr_debug("amdkfd: No queue %d exists for operation\n" , |
596 | qid); |
597 | return -EFAULT; |
598 | } |
599 | |
600 | return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm, |
601 | pqn->q, |
602 | ctl_stack, |
603 | ctl_stack_used_size, |
604 | save_area_used_size); |
605 | } |
606 | |
607 | int pqm_get_queue_snapshot(struct process_queue_manager *pqm, |
608 | uint64_t exception_clear_mask, |
609 | void __user *buf, |
610 | int *num_qss_entries, |
611 | uint32_t *entry_size) |
612 | { |
613 | struct process_queue_node *pqn; |
614 | struct kfd_queue_snapshot_entry src; |
615 | uint32_t tmp_entry_size = *entry_size, tmp_qss_entries = *num_qss_entries; |
616 | int r = 0; |
617 | |
618 | *num_qss_entries = 0; |
619 | if (!(*entry_size)) |
620 | return -EINVAL; |
621 | |
622 | *entry_size = min_t(size_t, *entry_size, sizeof(struct kfd_queue_snapshot_entry)); |
623 | mutex_lock(&pqm->process->event_mutex); |
624 | |
625 | memset(&src, 0, sizeof(src)); |
626 | |
627 | list_for_each_entry(pqn, &pqm->queues, process_queue_list) { |
628 | if (!pqn->q) |
629 | continue; |
630 | |
631 | if (*num_qss_entries < tmp_qss_entries) { |
632 | set_queue_snapshot_entry(q: pqn->q, exception_clear_mask, qss_entry: &src); |
633 | |
634 | if (copy_to_user(to: buf, from: &src, n: *entry_size)) { |
635 | r = -EFAULT; |
636 | break; |
637 | } |
638 | buf += tmp_entry_size; |
639 | } |
640 | *num_qss_entries += 1; |
641 | } |
642 | |
643 | mutex_unlock(lock: &pqm->process->event_mutex); |
644 | return r; |
645 | } |
646 | |
647 | static int get_queue_data_sizes(struct kfd_process_device *pdd, |
648 | struct queue *q, |
649 | uint32_t *mqd_size, |
650 | uint32_t *ctl_stack_size) |
651 | { |
652 | int ret; |
653 | |
654 | ret = pqm_get_queue_checkpoint_info(pqm: &pdd->process->pqm, |
655 | qid: q->properties.queue_id, |
656 | mqd_size, |
657 | ctl_stack_size); |
658 | if (ret) |
659 | pr_err("Failed to get queue dump info (%d)\n" , ret); |
660 | |
661 | return ret; |
662 | } |
663 | |
664 | int kfd_process_get_queue_info(struct kfd_process *p, |
665 | uint32_t *num_queues, |
666 | uint64_t *priv_data_sizes) |
667 | { |
668 | uint32_t = 0; |
669 | struct queue *q; |
670 | int i; |
671 | int ret; |
672 | |
673 | *num_queues = 0; |
674 | |
675 | /* Run over all PDDs of the process */ |
676 | for (i = 0; i < p->n_pdds; i++) { |
677 | struct kfd_process_device *pdd = p->pdds[i]; |
678 | |
679 | list_for_each_entry(q, &pdd->qpd.queues_list, list) { |
680 | if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || |
681 | q->properties.type == KFD_QUEUE_TYPE_SDMA || |
682 | q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { |
683 | uint32_t mqd_size, ctl_stack_size; |
684 | |
685 | *num_queues = *num_queues + 1; |
686 | |
687 | ret = get_queue_data_sizes(pdd, q, mqd_size: &mqd_size, ctl_stack_size: &ctl_stack_size); |
688 | if (ret) |
689 | return ret; |
690 | |
691 | extra_data_sizes += mqd_size + ctl_stack_size; |
692 | } else { |
693 | pr_err("Unsupported queue type (%d)\n" , q->properties.type); |
694 | return -EOPNOTSUPP; |
695 | } |
696 | } |
697 | } |
698 | *priv_data_sizes = extra_data_sizes + |
699 | (*num_queues * sizeof(struct kfd_criu_queue_priv_data)); |
700 | |
701 | return 0; |
702 | } |
703 | |
704 | static int pqm_checkpoint_mqd(struct process_queue_manager *pqm, |
705 | unsigned int qid, |
706 | void *mqd, |
707 | void *ctl_stack) |
708 | { |
709 | struct process_queue_node *pqn; |
710 | |
711 | pqn = get_queue_by_qid(pqm, qid); |
712 | if (!pqn) { |
713 | pr_debug("amdkfd: No queue %d exists for operation\n" , qid); |
714 | return -EFAULT; |
715 | } |
716 | |
717 | if (!pqn->q->device->dqm->ops.checkpoint_mqd) { |
718 | pr_err("amdkfd: queue dumping not supported on this device\n" ); |
719 | return -EOPNOTSUPP; |
720 | } |
721 | |
722 | return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm, |
723 | pqn->q, mqd, ctl_stack); |
724 | } |
725 | |
726 | static int criu_checkpoint_queue(struct kfd_process_device *pdd, |
727 | struct queue *q, |
728 | struct kfd_criu_queue_priv_data *q_data) |
729 | { |
730 | uint8_t *mqd, *ctl_stack; |
731 | int ret; |
732 | |
733 | mqd = (void *)(q_data + 1); |
734 | ctl_stack = mqd + q_data->mqd_size; |
735 | |
736 | q_data->gpu_id = pdd->user_gpu_id; |
737 | q_data->type = q->properties.type; |
738 | q_data->format = q->properties.format; |
739 | q_data->q_id = q->properties.queue_id; |
740 | q_data->q_address = q->properties.queue_address; |
741 | q_data->q_size = q->properties.queue_size; |
742 | q_data->priority = q->properties.priority; |
743 | q_data->q_percent = q->properties.queue_percent; |
744 | q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr; |
745 | q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr; |
746 | q_data->doorbell_id = q->doorbell_id; |
747 | |
748 | q_data->sdma_id = q->sdma_id; |
749 | |
750 | q_data->eop_ring_buffer_address = |
751 | q->properties.eop_ring_buffer_address; |
752 | |
753 | q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size; |
754 | |
755 | q_data->ctx_save_restore_area_address = |
756 | q->properties.ctx_save_restore_area_address; |
757 | |
758 | q_data->ctx_save_restore_area_size = |
759 | q->properties.ctx_save_restore_area_size; |
760 | |
761 | q_data->gws = !!q->gws; |
762 | |
763 | ret = pqm_checkpoint_mqd(pqm: &pdd->process->pqm, qid: q->properties.queue_id, mqd, ctl_stack); |
764 | if (ret) { |
765 | pr_err("Failed checkpoint queue_mqd (%d)\n" , ret); |
766 | return ret; |
767 | } |
768 | |
769 | pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n" , q_data->gpu_id, q_data->q_id); |
770 | return ret; |
771 | } |
772 | |
773 | static int criu_checkpoint_queues_device(struct kfd_process_device *pdd, |
774 | uint8_t __user *user_priv, |
775 | unsigned int *q_index, |
776 | uint64_t *queues_priv_data_offset) |
777 | { |
778 | unsigned int q_private_data_size = 0; |
779 | uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */ |
780 | struct queue *q; |
781 | int ret = 0; |
782 | |
783 | list_for_each_entry(q, &pdd->qpd.queues_list, list) { |
784 | struct kfd_criu_queue_priv_data *q_data; |
785 | uint64_t q_data_size; |
786 | uint32_t mqd_size; |
787 | uint32_t ctl_stack_size; |
788 | |
789 | if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE && |
790 | q->properties.type != KFD_QUEUE_TYPE_SDMA && |
791 | q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) { |
792 | |
793 | pr_err("Unsupported queue type (%d)\n" , q->properties.type); |
794 | ret = -EOPNOTSUPP; |
795 | break; |
796 | } |
797 | |
798 | ret = get_queue_data_sizes(pdd, q, mqd_size: &mqd_size, ctl_stack_size: &ctl_stack_size); |
799 | if (ret) |
800 | break; |
801 | |
802 | q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size; |
803 | |
804 | /* Increase local buffer space if needed */ |
805 | if (q_private_data_size < q_data_size) { |
806 | kfree(objp: q_private_data); |
807 | |
808 | q_private_data = kzalloc(size: q_data_size, GFP_KERNEL); |
809 | if (!q_private_data) { |
810 | ret = -ENOMEM; |
811 | break; |
812 | } |
813 | q_private_data_size = q_data_size; |
814 | } |
815 | |
816 | q_data = (struct kfd_criu_queue_priv_data *)q_private_data; |
817 | |
818 | /* data stored in this order: priv_data, mqd, ctl_stack */ |
819 | q_data->mqd_size = mqd_size; |
820 | q_data->ctl_stack_size = ctl_stack_size; |
821 | |
822 | ret = criu_checkpoint_queue(pdd, q, q_data); |
823 | if (ret) |
824 | break; |
825 | |
826 | q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE; |
827 | |
828 | ret = copy_to_user(to: user_priv + *queues_priv_data_offset, |
829 | from: q_data, n: q_data_size); |
830 | if (ret) { |
831 | ret = -EFAULT; |
832 | break; |
833 | } |
834 | *queues_priv_data_offset += q_data_size; |
835 | *q_index = *q_index + 1; |
836 | } |
837 | |
838 | kfree(objp: q_private_data); |
839 | |
840 | return ret; |
841 | } |
842 | |
843 | int kfd_criu_checkpoint_queues(struct kfd_process *p, |
844 | uint8_t __user *user_priv_data, |
845 | uint64_t *priv_data_offset) |
846 | { |
847 | int ret = 0, pdd_index, q_index = 0; |
848 | |
849 | for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) { |
850 | struct kfd_process_device *pdd = p->pdds[pdd_index]; |
851 | |
852 | /* |
853 | * criu_checkpoint_queues_device will copy data to user and update q_index and |
854 | * queues_priv_data_offset |
855 | */ |
856 | ret = criu_checkpoint_queues_device(pdd, user_priv: user_priv_data, q_index: &q_index, |
857 | queues_priv_data_offset: priv_data_offset); |
858 | |
859 | if (ret) |
860 | break; |
861 | } |
862 | |
863 | return ret; |
864 | } |
865 | |
866 | static void set_queue_properties_from_criu(struct queue_properties *qp, |
867 | struct kfd_criu_queue_priv_data *q_data) |
868 | { |
869 | qp->is_interop = false; |
870 | qp->queue_percent = q_data->q_percent; |
871 | qp->priority = q_data->priority; |
872 | qp->queue_address = q_data->q_address; |
873 | qp->queue_size = q_data->q_size; |
874 | qp->read_ptr = (uint32_t *) q_data->read_ptr_addr; |
875 | qp->write_ptr = (uint32_t *) q_data->write_ptr_addr; |
876 | qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address; |
877 | qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size; |
878 | qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address; |
879 | qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size; |
880 | qp->ctl_stack_size = q_data->ctl_stack_size; |
881 | qp->type = q_data->type; |
882 | qp->format = q_data->format; |
883 | } |
884 | |
885 | int kfd_criu_restore_queue(struct kfd_process *p, |
886 | uint8_t __user *user_priv_ptr, |
887 | uint64_t *priv_data_offset, |
888 | uint64_t max_priv_data_size) |
889 | { |
890 | uint8_t *mqd, *ctl_stack, * = NULL; |
891 | struct kfd_criu_queue_priv_data *q_data; |
892 | struct kfd_process_device *pdd; |
893 | uint64_t ; |
894 | struct queue_properties qp; |
895 | unsigned int queue_id; |
896 | int ret = 0; |
897 | |
898 | if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size) |
899 | return -EINVAL; |
900 | |
901 | q_data = kmalloc(size: sizeof(*q_data), GFP_KERNEL); |
902 | if (!q_data) |
903 | return -ENOMEM; |
904 | |
905 | ret = copy_from_user(to: q_data, from: user_priv_ptr + *priv_data_offset, n: sizeof(*q_data)); |
906 | if (ret) { |
907 | ret = -EFAULT; |
908 | goto exit; |
909 | } |
910 | |
911 | *priv_data_offset += sizeof(*q_data); |
912 | q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size; |
913 | |
914 | if (*priv_data_offset + q_extra_data_size > max_priv_data_size) { |
915 | ret = -EINVAL; |
916 | goto exit; |
917 | } |
918 | |
919 | q_extra_data = kmalloc(size: q_extra_data_size, GFP_KERNEL); |
920 | if (!q_extra_data) { |
921 | ret = -ENOMEM; |
922 | goto exit; |
923 | } |
924 | |
925 | ret = copy_from_user(to: q_extra_data, from: user_priv_ptr + *priv_data_offset, n: q_extra_data_size); |
926 | if (ret) { |
927 | ret = -EFAULT; |
928 | goto exit; |
929 | } |
930 | |
931 | *priv_data_offset += q_extra_data_size; |
932 | |
933 | pdd = kfd_process_device_data_by_id(process: p, gpu_id: q_data->gpu_id); |
934 | if (!pdd) { |
935 | pr_err("Failed to get pdd\n" ); |
936 | ret = -EINVAL; |
937 | goto exit; |
938 | } |
939 | |
940 | /* data stored in this order: mqd, ctl_stack */ |
941 | mqd = q_extra_data; |
942 | ctl_stack = mqd + q_data->mqd_size; |
943 | |
944 | memset(&qp, 0, sizeof(qp)); |
945 | set_queue_properties_from_criu(qp: &qp, q_data); |
946 | |
947 | print_queue_properties(q: &qp); |
948 | |
949 | ret = pqm_create_queue(pqm: &p->pqm, dev: pdd->dev, NULL, properties: &qp, qid: &queue_id, NULL, q_data, restore_mqd: mqd, restore_ctl_stack: ctl_stack, |
950 | NULL); |
951 | if (ret) { |
952 | pr_err("Failed to create new queue err:%d\n" , ret); |
953 | goto exit; |
954 | } |
955 | |
956 | if (q_data->gws) |
957 | ret = pqm_set_gws(pqm: &p->pqm, qid: q_data->q_id, gws: pdd->dev->gws); |
958 | |
959 | exit: |
960 | if (ret) |
961 | pr_err("Failed to restore queue (%d)\n" , ret); |
962 | else |
963 | pr_debug("Queue id %d was restored successfully\n" , queue_id); |
964 | |
965 | kfree(objp: q_data); |
966 | |
967 | return ret; |
968 | } |
969 | |
970 | int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm, |
971 | unsigned int qid, |
972 | uint32_t *mqd_size, |
973 | uint32_t *ctl_stack_size) |
974 | { |
975 | struct process_queue_node *pqn; |
976 | |
977 | pqn = get_queue_by_qid(pqm, qid); |
978 | if (!pqn) { |
979 | pr_debug("amdkfd: No queue %d exists for operation\n" , qid); |
980 | return -EFAULT; |
981 | } |
982 | |
983 | if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) { |
984 | pr_err("amdkfd: queue dumping not supported on this device\n" ); |
985 | return -EOPNOTSUPP; |
986 | } |
987 | |
988 | pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm, |
989 | pqn->q, mqd_size, |
990 | ctl_stack_size); |
991 | return 0; |
992 | } |
993 | |
994 | #if defined(CONFIG_DEBUG_FS) |
995 | |
996 | int pqm_debugfs_mqds(struct seq_file *m, void *data) |
997 | { |
998 | struct process_queue_manager *pqm = data; |
999 | struct process_queue_node *pqn; |
1000 | struct queue *q; |
1001 | enum KFD_MQD_TYPE mqd_type; |
1002 | struct mqd_manager *mqd_mgr; |
1003 | int r = 0, xcc, num_xccs = 1; |
1004 | void *mqd; |
1005 | uint64_t size = 0; |
1006 | |
1007 | list_for_each_entry(pqn, &pqm->queues, process_queue_list) { |
1008 | if (pqn->q) { |
1009 | q = pqn->q; |
1010 | switch (q->properties.type) { |
1011 | case KFD_QUEUE_TYPE_SDMA: |
1012 | case KFD_QUEUE_TYPE_SDMA_XGMI: |
1013 | seq_printf(m, fmt: " SDMA queue on device %x\n" , |
1014 | q->device->id); |
1015 | mqd_type = KFD_MQD_TYPE_SDMA; |
1016 | break; |
1017 | case KFD_QUEUE_TYPE_COMPUTE: |
1018 | seq_printf(m, fmt: " Compute queue on device %x\n" , |
1019 | q->device->id); |
1020 | mqd_type = KFD_MQD_TYPE_CP; |
1021 | num_xccs = NUM_XCC(q->device->xcc_mask); |
1022 | break; |
1023 | default: |
1024 | seq_printf(m, |
1025 | fmt: " Bad user queue type %d on device %x\n" , |
1026 | q->properties.type, q->device->id); |
1027 | continue; |
1028 | } |
1029 | mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type]; |
1030 | size = mqd_mgr->mqd_stride(mqd_mgr, |
1031 | &q->properties); |
1032 | } else if (pqn->kq) { |
1033 | q = pqn->kq->queue; |
1034 | mqd_mgr = pqn->kq->mqd_mgr; |
1035 | switch (q->properties.type) { |
1036 | case KFD_QUEUE_TYPE_DIQ: |
1037 | seq_printf(m, fmt: " DIQ on device %x\n" , |
1038 | pqn->kq->dev->id); |
1039 | break; |
1040 | default: |
1041 | seq_printf(m, |
1042 | fmt: " Bad kernel queue type %d on device %x\n" , |
1043 | q->properties.type, |
1044 | pqn->kq->dev->id); |
1045 | continue; |
1046 | } |
1047 | } else { |
1048 | seq_printf(m, |
1049 | fmt: " Weird: Queue node with neither kernel nor user queue\n" ); |
1050 | continue; |
1051 | } |
1052 | |
1053 | for (xcc = 0; xcc < num_xccs; xcc++) { |
1054 | mqd = q->mqd + size * xcc; |
1055 | r = mqd_mgr->debugfs_show_mqd(m, mqd); |
1056 | if (r != 0) |
1057 | break; |
1058 | } |
1059 | } |
1060 | |
1061 | return r; |
1062 | } |
1063 | |
1064 | #endif |
1065 | |