1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
2 | /* |
3 | * Copyright 2016-2022 Advanced Micro Devices, Inc. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), |
7 | * to deal in the Software without restriction, including without limitation |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
9 | * and/or sell copies of the Software, and to permit persons to whom the |
10 | * Software is furnished to do so, subject to the following conditions: |
11 | * |
12 | * The above copyright notice and this permission notice shall be included in |
13 | * all copies or substantial portions of the Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | * |
23 | */ |
24 | |
25 | #include <linux/printk.h> |
26 | #include <linux/slab.h> |
27 | #include <linux/uaccess.h> |
28 | #include "kfd_priv.h" |
29 | #include "kfd_mqd_manager.h" |
30 | #include "v9_structs.h" |
31 | #include "gc/gc_9_0_offset.h" |
32 | #include "gc/gc_9_0_sh_mask.h" |
33 | #include "sdma0/sdma0_4_0_sh_mask.h" |
34 | #include "amdgpu_amdkfd.h" |
35 | #include "kfd_device_queue_manager.h" |
36 | |
37 | static void update_mqd(struct mqd_manager *mm, void *mqd, |
38 | struct queue_properties *q, |
39 | struct mqd_update_info *minfo); |
40 | |
41 | static uint64_t mqd_stride_v9(struct mqd_manager *mm, |
42 | struct queue_properties *q) |
43 | { |
44 | if (mm->dev->kfd->cwsr_enabled && |
45 | q->type == KFD_QUEUE_TYPE_COMPUTE) |
46 | return ALIGN(q->ctl_stack_size, PAGE_SIZE) + |
47 | ALIGN(sizeof(struct v9_mqd), PAGE_SIZE); |
48 | |
49 | return mm->mqd_size; |
50 | } |
51 | |
52 | static inline struct v9_mqd *get_mqd(void *mqd) |
53 | { |
54 | return (struct v9_mqd *)mqd; |
55 | } |
56 | |
57 | static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) |
58 | { |
59 | return (struct v9_sdma_mqd *)mqd; |
60 | } |
61 | |
62 | static void update_cu_mask(struct mqd_manager *mm, void *mqd, |
63 | struct mqd_update_info *minfo, uint32_t inst) |
64 | { |
65 | struct v9_mqd *m; |
66 | uint32_t se_mask[KFD_MAX_NUM_SE] = {0}; |
67 | |
68 | if (!minfo || !minfo->cu_mask.ptr) |
69 | return; |
70 | |
71 | mqd_symmetrically_map_cu_mask(mm, |
72 | cu_mask: minfo->cu_mask.ptr, cu_mask_count: minfo->cu_mask.count, se_mask, inst); |
73 | |
74 | m = get_mqd(mqd); |
75 | |
76 | m->compute_static_thread_mgmt_se0 = se_mask[0]; |
77 | m->compute_static_thread_mgmt_se1 = se_mask[1]; |
78 | m->compute_static_thread_mgmt_se2 = se_mask[2]; |
79 | m->compute_static_thread_mgmt_se3 = se_mask[3]; |
80 | if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) { |
81 | m->compute_static_thread_mgmt_se4 = se_mask[4]; |
82 | m->compute_static_thread_mgmt_se5 = se_mask[5]; |
83 | m->compute_static_thread_mgmt_se6 = se_mask[6]; |
84 | m->compute_static_thread_mgmt_se7 = se_mask[7]; |
85 | |
86 | pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n" , |
87 | m->compute_static_thread_mgmt_se0, |
88 | m->compute_static_thread_mgmt_se1, |
89 | m->compute_static_thread_mgmt_se2, |
90 | m->compute_static_thread_mgmt_se3, |
91 | m->compute_static_thread_mgmt_se4, |
92 | m->compute_static_thread_mgmt_se5, |
93 | m->compute_static_thread_mgmt_se6, |
94 | m->compute_static_thread_mgmt_se7); |
95 | } else { |
96 | pr_debug("inst: %u, update cu mask to %#x %#x %#x %#x\n" , |
97 | inst, m->compute_static_thread_mgmt_se0, |
98 | m->compute_static_thread_mgmt_se1, |
99 | m->compute_static_thread_mgmt_se2, |
100 | m->compute_static_thread_mgmt_se3); |
101 | } |
102 | } |
103 | |
104 | static void set_priority(struct v9_mqd *m, struct queue_properties *q) |
105 | { |
106 | m->cp_hqd_pipe_priority = pipe_priority_map[q->priority]; |
107 | m->cp_hqd_queue_priority = q->priority; |
108 | } |
109 | |
110 | static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node, |
111 | struct queue_properties *q) |
112 | { |
113 | int retval; |
114 | struct kfd_mem_obj *mqd_mem_obj = NULL; |
115 | |
116 | /* For V9 only, due to a HW bug, the control stack of a user mode |
117 | * compute queue needs to be allocated just behind the page boundary |
118 | * of its regular MQD buffer. So we allocate an enlarged MQD buffer: |
119 | * the first page of the buffer serves as the regular MQD buffer |
120 | * purpose and the remaining is for control stack. Although the two |
121 | * parts are in the same buffer object, they need different memory |
122 | * types: MQD part needs UC (uncached) as usual, while control stack |
123 | * needs NC (non coherent), which is different from the UC type which |
124 | * is used when control stack is allocated in user space. |
125 | * |
126 | * Because of all those, we use the gtt allocation function instead |
127 | * of sub-allocation function for this enlarged MQD buffer. Moreover, |
128 | * in order to achieve two memory types in a single buffer object, we |
129 | * pass a special bo flag AMDGPU_GEM_CREATE_CP_MQD_GFX9 to instruct |
130 | * amdgpu memory functions to do so. |
131 | */ |
132 | if (node->kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { |
133 | mqd_mem_obj = kzalloc(size: sizeof(struct kfd_mem_obj), GFP_KERNEL); |
134 | if (!mqd_mem_obj) |
135 | return NULL; |
136 | retval = amdgpu_amdkfd_alloc_gtt_mem(adev: node->adev, |
137 | size: (ALIGN(q->ctl_stack_size, PAGE_SIZE) + |
138 | ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) * |
139 | NUM_XCC(node->xcc_mask), |
140 | mem_obj: &(mqd_mem_obj->gtt_mem), |
141 | gpu_addr: &(mqd_mem_obj->gpu_addr), |
142 | cpu_ptr: (void *)&(mqd_mem_obj->cpu_ptr), mqd_gfx9: true); |
143 | |
144 | if (retval) { |
145 | kfree(objp: mqd_mem_obj); |
146 | return NULL; |
147 | } |
148 | } else { |
149 | retval = kfd_gtt_sa_allocate(node, size: sizeof(struct v9_mqd), |
150 | mem_obj: &mqd_mem_obj); |
151 | if (retval) |
152 | return NULL; |
153 | } |
154 | |
155 | return mqd_mem_obj; |
156 | } |
157 | |
158 | static void init_mqd(struct mqd_manager *mm, void **mqd, |
159 | struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, |
160 | struct queue_properties *q) |
161 | { |
162 | uint64_t addr; |
163 | struct v9_mqd *m; |
164 | |
165 | m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr; |
166 | addr = mqd_mem_obj->gpu_addr; |
167 | |
168 | memset(m, 0, sizeof(struct v9_mqd)); |
169 | |
170 | m->header = 0xC0310800; |
171 | m->compute_pipelinestat_enable = 1; |
172 | m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; |
173 | m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; |
174 | m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; |
175 | m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; |
176 | m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF; |
177 | m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF; |
178 | m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF; |
179 | m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF; |
180 | |
181 | m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | |
182 | 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; |
183 | |
184 | m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; |
185 | |
186 | m->cp_mqd_base_addr_lo = lower_32_bits(addr); |
187 | m->cp_mqd_base_addr_hi = upper_32_bits(addr); |
188 | |
189 | m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT | |
190 | 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | |
191 | 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; |
192 | |
193 | /* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the |
194 | * DISPATCH_PTR. This is required for the kfd debugger |
195 | */ |
196 | m->cp_hqd_hq_status0 = 1 << 14; |
197 | |
198 | if (q->format == KFD_QUEUE_FORMAT_AQL) |
199 | m->cp_hqd_aql_control = |
200 | 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; |
201 | |
202 | if (q->tba_addr) { |
203 | m->compute_pgm_rsrc2 |= |
204 | (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT); |
205 | } |
206 | |
207 | if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) { |
208 | m->cp_hqd_persistent_state |= |
209 | (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); |
210 | m->cp_hqd_ctx_save_base_addr_lo = |
211 | lower_32_bits(q->ctx_save_restore_area_address); |
212 | m->cp_hqd_ctx_save_base_addr_hi = |
213 | upper_32_bits(q->ctx_save_restore_area_address); |
214 | m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size; |
215 | m->cp_hqd_cntl_stack_size = q->ctl_stack_size; |
216 | m->cp_hqd_cntl_stack_offset = q->ctl_stack_size; |
217 | m->cp_hqd_wg_state_offset = q->ctl_stack_size; |
218 | } |
219 | |
220 | *mqd = m; |
221 | if (gart_addr) |
222 | *gart_addr = addr; |
223 | update_mqd(mm, mqd: m, q, NULL); |
224 | } |
225 | |
226 | static int load_mqd(struct mqd_manager *mm, void *mqd, |
227 | uint32_t pipe_id, uint32_t queue_id, |
228 | struct queue_properties *p, struct mm_struct *mms) |
229 | { |
230 | /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ |
231 | uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); |
232 | |
233 | return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id, |
234 | (uint32_t __user *)p->write_ptr, |
235 | wptr_shift, 0, mms, 0); |
236 | } |
237 | |
238 | static void update_mqd(struct mqd_manager *mm, void *mqd, |
239 | struct queue_properties *q, |
240 | struct mqd_update_info *minfo) |
241 | { |
242 | struct v9_mqd *m; |
243 | |
244 | m = get_mqd(mqd); |
245 | |
246 | m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; |
247 | m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; |
248 | pr_debug("cp_hqd_pq_control 0x%x\n" , m->cp_hqd_pq_control); |
249 | |
250 | m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); |
251 | m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); |
252 | |
253 | m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); |
254 | m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); |
255 | m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr); |
256 | m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr); |
257 | |
258 | m->cp_hqd_pq_doorbell_control = |
259 | q->doorbell_off << |
260 | CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; |
261 | pr_debug("cp_hqd_pq_doorbell_control 0x%x\n" , |
262 | m->cp_hqd_pq_doorbell_control); |
263 | |
264 | m->cp_hqd_ib_control = |
265 | 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT | |
266 | 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT; |
267 | |
268 | /* |
269 | * HW does not clamp this field correctly. Maximum EOP queue size |
270 | * is constrained by per-SE EOP done signal count, which is 8-bit. |
271 | * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit |
272 | * more than (EOP entry count - 1) so a queue size of 0x800 dwords |
273 | * is safe, giving a maximum field value of 0xA. |
274 | * |
275 | * Also, do calculation only if EOP is used (size > 0), otherwise |
276 | * the order_base_2 calculation provides incorrect result. |
277 | * |
278 | */ |
279 | m->cp_hqd_eop_control = q->eop_ring_buffer_size ? |
280 | min(0xA, order_base_2(q->eop_ring_buffer_size / 4) - 1) : 0; |
281 | |
282 | m->cp_hqd_eop_base_addr_lo = |
283 | lower_32_bits(q->eop_ring_buffer_address >> 8); |
284 | m->cp_hqd_eop_base_addr_hi = |
285 | upper_32_bits(q->eop_ring_buffer_address >> 8); |
286 | |
287 | m->cp_hqd_iq_timer = 0; |
288 | |
289 | m->cp_hqd_vmid = q->vmid; |
290 | |
291 | if (q->format == KFD_QUEUE_FORMAT_AQL) { |
292 | m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | |
293 | 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT | |
294 | 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT | |
295 | 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT; |
296 | m->cp_hqd_pq_doorbell_control |= 1 << |
297 | CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; |
298 | } |
299 | if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) |
300 | m->cp_hqd_ctx_save_control = 0; |
301 | |
302 | if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) |
303 | update_cu_mask(mm, mqd, minfo, inst: 0); |
304 | set_priority(m, q); |
305 | |
306 | q->is_active = QUEUE_IS_ACTIVE(*q); |
307 | } |
308 | |
309 | |
310 | static uint32_t read_doorbell_id(void *mqd) |
311 | { |
312 | struct v9_mqd *m = (struct v9_mqd *)mqd; |
313 | |
314 | return m->queue_doorbell_id0; |
315 | } |
316 | |
317 | static int get_wave_state(struct mqd_manager *mm, void *mqd, |
318 | struct queue_properties *q, |
319 | void __user *ctl_stack, |
320 | u32 *ctl_stack_used_size, |
321 | u32 *save_area_used_size) |
322 | { |
323 | struct v9_mqd *m; |
324 | struct kfd_context_save_area_header ; |
325 | |
326 | /* Control stack is located one page after MQD. */ |
327 | void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE); |
328 | |
329 | m = get_mqd(mqd); |
330 | |
331 | *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - |
332 | m->cp_hqd_cntl_stack_offset; |
333 | *save_area_used_size = m->cp_hqd_wg_state_offset - |
334 | m->cp_hqd_cntl_stack_size; |
335 | |
336 | header.wave_state.control_stack_size = *ctl_stack_used_size; |
337 | header.wave_state.wave_state_size = *save_area_used_size; |
338 | |
339 | header.wave_state.wave_state_offset = m->cp_hqd_wg_state_offset; |
340 | header.wave_state.control_stack_offset = m->cp_hqd_cntl_stack_offset; |
341 | |
342 | if (copy_to_user(to: ctl_stack, from: &header, n: sizeof(header.wave_state))) |
343 | return -EFAULT; |
344 | |
345 | if (copy_to_user(to: ctl_stack + m->cp_hqd_cntl_stack_offset, |
346 | from: mqd_ctl_stack + m->cp_hqd_cntl_stack_offset, |
347 | n: *ctl_stack_used_size)) |
348 | return -EFAULT; |
349 | |
350 | return 0; |
351 | } |
352 | |
353 | static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size) |
354 | { |
355 | struct v9_mqd *m = get_mqd(mqd); |
356 | |
357 | *ctl_stack_size = m->cp_hqd_cntl_stack_size; |
358 | } |
359 | |
360 | static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst) |
361 | { |
362 | struct v9_mqd *m; |
363 | /* Control stack is located one page after MQD. */ |
364 | void *ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE); |
365 | |
366 | m = get_mqd(mqd); |
367 | |
368 | memcpy(mqd_dst, m, sizeof(struct v9_mqd)); |
369 | memcpy(ctl_stack_dst, ctl_stack, m->cp_hqd_cntl_stack_size); |
370 | } |
371 | |
372 | static void restore_mqd(struct mqd_manager *mm, void **mqd, |
373 | struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, |
374 | struct queue_properties *qp, |
375 | const void *mqd_src, |
376 | const void *ctl_stack_src, u32 ctl_stack_size) |
377 | { |
378 | uint64_t addr; |
379 | struct v9_mqd *m; |
380 | void *ctl_stack; |
381 | |
382 | m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr; |
383 | addr = mqd_mem_obj->gpu_addr; |
384 | |
385 | memcpy(m, mqd_src, sizeof(*m)); |
386 | |
387 | *mqd = m; |
388 | if (gart_addr) |
389 | *gart_addr = addr; |
390 | |
391 | /* Control stack is located one page after MQD. */ |
392 | ctl_stack = (void *)((uintptr_t)*mqd + PAGE_SIZE); |
393 | memcpy(ctl_stack, ctl_stack_src, ctl_stack_size); |
394 | |
395 | m->cp_hqd_pq_doorbell_control = |
396 | qp->doorbell_off << |
397 | CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; |
398 | pr_debug("cp_hqd_pq_doorbell_control 0x%x\n" , |
399 | m->cp_hqd_pq_doorbell_control); |
400 | |
401 | qp->is_active = 0; |
402 | } |
403 | |
404 | static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, |
405 | struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, |
406 | struct queue_properties *q) |
407 | { |
408 | struct v9_mqd *m; |
409 | |
410 | init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q); |
411 | |
412 | m = get_mqd(mqd: *mqd); |
413 | |
414 | m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT | |
415 | 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; |
416 | } |
417 | |
418 | static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd, |
419 | enum kfd_preempt_type type, unsigned int timeout, |
420 | uint32_t pipe_id, uint32_t queue_id) |
421 | { |
422 | int err; |
423 | struct v9_mqd *m; |
424 | u32 doorbell_off; |
425 | |
426 | m = get_mqd(mqd); |
427 | |
428 | doorbell_off = m->cp_hqd_pq_doorbell_control >> |
429 | CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; |
430 | err = amdgpu_amdkfd_unmap_hiq(adev: mm->dev->adev, doorbell_off, inst: 0); |
431 | if (err) |
432 | pr_debug("Destroy HIQ MQD failed: %d\n" , err); |
433 | |
434 | return err; |
435 | } |
436 | |
437 | static void init_mqd_sdma(struct mqd_manager *mm, void **mqd, |
438 | struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, |
439 | struct queue_properties *q) |
440 | { |
441 | struct v9_sdma_mqd *m; |
442 | |
443 | m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr; |
444 | |
445 | memset(m, 0, sizeof(struct v9_sdma_mqd)); |
446 | |
447 | *mqd = m; |
448 | if (gart_addr) |
449 | *gart_addr = mqd_mem_obj->gpu_addr; |
450 | |
451 | mm->update_mqd(mm, m, q, NULL); |
452 | } |
453 | |
454 | #define SDMA_RLC_DUMMY_DEFAULT 0xf |
455 | |
456 | static void update_mqd_sdma(struct mqd_manager *mm, void *mqd, |
457 | struct queue_properties *q, |
458 | struct mqd_update_info *minfo) |
459 | { |
460 | struct v9_sdma_mqd *m; |
461 | |
462 | m = get_sdma_mqd(mqd); |
463 | m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4) |
464 | << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | |
465 | q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | |
466 | 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | |
467 | 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; |
468 | |
469 | m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8); |
470 | m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8); |
471 | m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); |
472 | m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); |
473 | m->sdmax_rlcx_doorbell_offset = |
474 | q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT; |
475 | |
476 | m->sdma_engine_id = q->sdma_engine_id; |
477 | m->sdma_queue_id = q->sdma_queue_id; |
478 | m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT; |
479 | |
480 | q->is_active = QUEUE_IS_ACTIVE(*q); |
481 | } |
482 | |
483 | static void checkpoint_mqd_sdma(struct mqd_manager *mm, |
484 | void *mqd, |
485 | void *mqd_dst, |
486 | void *ctl_stack_dst) |
487 | { |
488 | struct v9_sdma_mqd *m; |
489 | |
490 | m = get_sdma_mqd(mqd); |
491 | |
492 | memcpy(mqd_dst, m, sizeof(struct v9_sdma_mqd)); |
493 | } |
494 | |
495 | static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd, |
496 | struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, |
497 | struct queue_properties *qp, |
498 | const void *mqd_src, |
499 | const void *ctl_stack_src, const u32 ctl_stack_size) |
500 | { |
501 | uint64_t addr; |
502 | struct v9_sdma_mqd *m; |
503 | |
504 | m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr; |
505 | addr = mqd_mem_obj->gpu_addr; |
506 | |
507 | memcpy(m, mqd_src, sizeof(*m)); |
508 | |
509 | m->sdmax_rlcx_doorbell_offset = |
510 | qp->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT; |
511 | |
512 | *mqd = m; |
513 | if (gart_addr) |
514 | *gart_addr = addr; |
515 | |
516 | qp->is_active = 0; |
517 | } |
518 | |
519 | static void init_mqd_hiq_v9_4_3(struct mqd_manager *mm, void **mqd, |
520 | struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, |
521 | struct queue_properties *q) |
522 | { |
523 | struct v9_mqd *m; |
524 | int xcc = 0; |
525 | struct kfd_mem_obj xcc_mqd_mem_obj; |
526 | uint64_t xcc_gart_addr = 0; |
527 | |
528 | memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj)); |
529 | |
530 | for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) { |
531 | kfd_get_hiq_xcc_mqd(dev: mm->dev, mqd_mem_obj: &xcc_mqd_mem_obj, virtual_xcc_id: xcc); |
532 | |
533 | init_mqd(mm, mqd: (void **)&m, mqd_mem_obj: &xcc_mqd_mem_obj, gart_addr: &xcc_gart_addr, q); |
534 | |
535 | m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | |
536 | 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT | |
537 | 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; |
538 | m->cp_mqd_stride_size = kfd_hiq_mqd_stride(dev: mm->dev); |
539 | if (xcc == 0) { |
540 | /* Set no_update_rptr = 0 in Master XCC */ |
541 | m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK; |
542 | |
543 | /* Set the MQD pointer and gart address to XCC0 MQD */ |
544 | *mqd = m; |
545 | *gart_addr = xcc_gart_addr; |
546 | } |
547 | } |
548 | } |
549 | |
550 | static int hiq_load_mqd_kiq_v9_4_3(struct mqd_manager *mm, void *mqd, |
551 | uint32_t pipe_id, uint32_t queue_id, |
552 | struct queue_properties *p, struct mm_struct *mms) |
553 | { |
554 | uint32_t xcc_mask = mm->dev->xcc_mask; |
555 | int xcc_id, err, inst = 0; |
556 | void *xcc_mqd; |
557 | uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(dev: mm->dev); |
558 | |
559 | for_each_inst(xcc_id, xcc_mask) { |
560 | xcc_mqd = mqd + hiq_mqd_size * inst; |
561 | err = mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, xcc_mqd, |
562 | pipe_id, queue_id, |
563 | p->doorbell_off, xcc_id); |
564 | if (err) { |
565 | pr_debug("Failed to load HIQ MQD for XCC: %d\n" , inst); |
566 | break; |
567 | } |
568 | ++inst; |
569 | } |
570 | |
571 | return err; |
572 | } |
573 | |
574 | static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, |
575 | enum kfd_preempt_type type, unsigned int timeout, |
576 | uint32_t pipe_id, uint32_t queue_id) |
577 | { |
578 | uint32_t xcc_mask = mm->dev->xcc_mask; |
579 | int xcc_id, err, inst = 0; |
580 | uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(dev: mm->dev); |
581 | struct v9_mqd *m; |
582 | u32 doorbell_off; |
583 | |
584 | for_each_inst(xcc_id, xcc_mask) { |
585 | m = get_mqd(mqd: mqd + hiq_mqd_size * inst); |
586 | |
587 | doorbell_off = m->cp_hqd_pq_doorbell_control >> |
588 | CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; |
589 | |
590 | err = amdgpu_amdkfd_unmap_hiq(adev: mm->dev->adev, doorbell_off, inst: xcc_id); |
591 | if (err) { |
592 | pr_debug("Destroy HIQ MQD failed for xcc: %d\n" , inst); |
593 | break; |
594 | } |
595 | ++inst; |
596 | } |
597 | |
598 | return err; |
599 | } |
600 | |
601 | static void get_xcc_mqd(struct kfd_mem_obj *mqd_mem_obj, |
602 | struct kfd_mem_obj *xcc_mqd_mem_obj, |
603 | uint64_t offset) |
604 | { |
605 | xcc_mqd_mem_obj->gtt_mem = (offset == 0) ? |
606 | mqd_mem_obj->gtt_mem : NULL; |
607 | xcc_mqd_mem_obj->gpu_addr = mqd_mem_obj->gpu_addr + offset; |
608 | xcc_mqd_mem_obj->cpu_ptr = (uint32_t *)((uintptr_t)mqd_mem_obj->cpu_ptr |
609 | + offset); |
610 | } |
611 | |
612 | static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd, |
613 | struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, |
614 | struct queue_properties *q) |
615 | { |
616 | struct v9_mqd *m; |
617 | int xcc = 0; |
618 | struct kfd_mem_obj xcc_mqd_mem_obj; |
619 | uint64_t xcc_gart_addr = 0; |
620 | uint64_t xcc_ctx_save_restore_area_address; |
621 | uint64_t offset = mm->mqd_stride(mm, q); |
622 | uint32_t local_xcc_start = mm->dev->dqm->current_logical_xcc_start++; |
623 | |
624 | memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj)); |
625 | for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) { |
626 | get_xcc_mqd(mqd_mem_obj, xcc_mqd_mem_obj: &xcc_mqd_mem_obj, offset: offset*xcc); |
627 | |
628 | init_mqd(mm, mqd: (void **)&m, mqd_mem_obj: &xcc_mqd_mem_obj, gart_addr: &xcc_gart_addr, q); |
629 | |
630 | m->cp_mqd_stride_size = offset; |
631 | |
632 | /* |
633 | * Update the CWSR address for each XCC if CWSR is enabled |
634 | * and CWSR area is allocated in thunk |
635 | */ |
636 | if (mm->dev->kfd->cwsr_enabled && |
637 | q->ctx_save_restore_area_address) { |
638 | xcc_ctx_save_restore_area_address = |
639 | q->ctx_save_restore_area_address + |
640 | (xcc * q->ctx_save_restore_area_size); |
641 | |
642 | m->cp_hqd_ctx_save_base_addr_lo = |
643 | lower_32_bits(xcc_ctx_save_restore_area_address); |
644 | m->cp_hqd_ctx_save_base_addr_hi = |
645 | upper_32_bits(xcc_ctx_save_restore_area_address); |
646 | } |
647 | |
648 | if (q->format == KFD_QUEUE_FORMAT_AQL) { |
649 | m->compute_tg_chunk_size = 1; |
650 | m->compute_current_logic_xcc_id = |
651 | (local_xcc_start + xcc) % |
652 | NUM_XCC(mm->dev->xcc_mask); |
653 | |
654 | switch (xcc) { |
655 | case 0: |
656 | /* Master XCC */ |
657 | m->cp_hqd_pq_control &= |
658 | ~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK; |
659 | break; |
660 | default: |
661 | break; |
662 | } |
663 | } else { |
664 | /* PM4 Queue */ |
665 | m->compute_current_logic_xcc_id = 0; |
666 | m->compute_tg_chunk_size = 0; |
667 | m->pm4_target_xcc_in_xcp = q->pm4_target_xcc; |
668 | } |
669 | |
670 | if (xcc == 0) { |
671 | /* Set the MQD pointer and gart address to XCC0 MQD */ |
672 | *mqd = m; |
673 | *gart_addr = xcc_gart_addr; |
674 | } |
675 | } |
676 | } |
677 | |
678 | static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, |
679 | struct queue_properties *q, struct mqd_update_info *minfo) |
680 | { |
681 | struct v9_mqd *m; |
682 | int xcc = 0; |
683 | uint64_t size = mm->mqd_stride(mm, q); |
684 | |
685 | for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) { |
686 | m = get_mqd(mqd: mqd + size * xcc); |
687 | update_mqd(mm, mqd: m, q, minfo); |
688 | |
689 | update_cu_mask(mm, mqd, minfo, inst: xcc); |
690 | |
691 | if (q->format == KFD_QUEUE_FORMAT_AQL) { |
692 | switch (xcc) { |
693 | case 0: |
694 | /* Master XCC */ |
695 | m->cp_hqd_pq_control &= |
696 | ~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK; |
697 | break; |
698 | default: |
699 | break; |
700 | } |
701 | m->compute_tg_chunk_size = 1; |
702 | } else { |
703 | /* PM4 Queue */ |
704 | m->compute_current_logic_xcc_id = 0; |
705 | m->compute_tg_chunk_size = 0; |
706 | m->pm4_target_xcc_in_xcp = q->pm4_target_xcc; |
707 | } |
708 | } |
709 | } |
710 | |
711 | static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, |
712 | enum kfd_preempt_type type, unsigned int timeout, |
713 | uint32_t pipe_id, uint32_t queue_id) |
714 | { |
715 | uint32_t xcc_mask = mm->dev->xcc_mask; |
716 | int xcc_id, err, inst = 0; |
717 | void *xcc_mqd; |
718 | struct v9_mqd *m; |
719 | uint64_t mqd_offset; |
720 | |
721 | m = get_mqd(mqd); |
722 | mqd_offset = m->cp_mqd_stride_size; |
723 | |
724 | for_each_inst(xcc_id, xcc_mask) { |
725 | xcc_mqd = mqd + mqd_offset * inst; |
726 | err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd, |
727 | type, timeout, pipe_id, |
728 | queue_id, xcc_id); |
729 | if (err) { |
730 | pr_debug("Destroy MQD failed for xcc: %d\n" , inst); |
731 | break; |
732 | } |
733 | ++inst; |
734 | } |
735 | |
736 | return err; |
737 | } |
738 | |
739 | static int load_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, |
740 | uint32_t pipe_id, uint32_t queue_id, |
741 | struct queue_properties *p, struct mm_struct *mms) |
742 | { |
743 | /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ |
744 | uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); |
745 | uint32_t xcc_mask = mm->dev->xcc_mask; |
746 | int xcc_id, err, inst = 0; |
747 | void *xcc_mqd; |
748 | uint64_t mqd_stride_size = mm->mqd_stride(mm, p); |
749 | |
750 | for_each_inst(xcc_id, xcc_mask) { |
751 | xcc_mqd = mqd + mqd_stride_size * inst; |
752 | err = mm->dev->kfd2kgd->hqd_load( |
753 | mm->dev->adev, xcc_mqd, pipe_id, queue_id, |
754 | (uint32_t __user *)p->write_ptr, wptr_shift, 0, mms, |
755 | xcc_id); |
756 | if (err) { |
757 | pr_debug("Load MQD failed for xcc: %d\n" , inst); |
758 | break; |
759 | } |
760 | ++inst; |
761 | } |
762 | |
763 | return err; |
764 | } |
765 | |
766 | static int get_wave_state_v9_4_3(struct mqd_manager *mm, void *mqd, |
767 | struct queue_properties *q, |
768 | void __user *ctl_stack, |
769 | u32 *ctl_stack_used_size, |
770 | u32 *save_area_used_size) |
771 | { |
772 | int xcc, err = 0; |
773 | void *xcc_mqd; |
774 | void __user *xcc_ctl_stack; |
775 | uint64_t mqd_stride_size = mm->mqd_stride(mm, q); |
776 | u32 tmp_ctl_stack_used_size = 0, tmp_save_area_used_size = 0; |
777 | |
778 | for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) { |
779 | xcc_mqd = mqd + mqd_stride_size * xcc; |
780 | xcc_ctl_stack = (void __user *)((uintptr_t)ctl_stack + |
781 | q->ctx_save_restore_area_size * xcc); |
782 | |
783 | err = get_wave_state(mm, mqd: xcc_mqd, q, ctl_stack: xcc_ctl_stack, |
784 | ctl_stack_used_size: &tmp_ctl_stack_used_size, |
785 | save_area_used_size: &tmp_save_area_used_size); |
786 | if (err) |
787 | break; |
788 | |
789 | /* |
790 | * Set the ctl_stack_used_size and save_area_used_size to |
791 | * ctl_stack_used_size and save_area_used_size of XCC 0 when |
792 | * passing the info the user-space. |
793 | * For multi XCC, user-space would have to look at the header |
794 | * info of each Control stack area to determine the control |
795 | * stack size and save area used. |
796 | */ |
797 | if (xcc == 0) { |
798 | *ctl_stack_used_size = tmp_ctl_stack_used_size; |
799 | *save_area_used_size = tmp_save_area_used_size; |
800 | } |
801 | } |
802 | |
803 | return err; |
804 | } |
805 | |
806 | #if defined(CONFIG_DEBUG_FS) |
807 | |
808 | static int debugfs_show_mqd(struct seq_file *m, void *data) |
809 | { |
810 | seq_hex_dump(m, prefix_str: " " , prefix_type: DUMP_PREFIX_OFFSET, rowsize: 32, groupsize: 4, |
811 | buf: data, len: sizeof(struct v9_mqd), ascii: false); |
812 | return 0; |
813 | } |
814 | |
815 | static int debugfs_show_mqd_sdma(struct seq_file *m, void *data) |
816 | { |
817 | seq_hex_dump(m, prefix_str: " " , prefix_type: DUMP_PREFIX_OFFSET, rowsize: 32, groupsize: 4, |
818 | buf: data, len: sizeof(struct v9_sdma_mqd), ascii: false); |
819 | return 0; |
820 | } |
821 | |
822 | #endif |
823 | |
824 | struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, |
825 | struct kfd_node *dev) |
826 | { |
827 | struct mqd_manager *mqd; |
828 | |
829 | if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) |
830 | return NULL; |
831 | |
832 | mqd = kzalloc(size: sizeof(*mqd), GFP_KERNEL); |
833 | if (!mqd) |
834 | return NULL; |
835 | |
836 | mqd->dev = dev; |
837 | |
838 | switch (type) { |
839 | case KFD_MQD_TYPE_CP: |
840 | mqd->allocate_mqd = allocate_mqd; |
841 | mqd->free_mqd = kfd_free_mqd_cp; |
842 | mqd->is_occupied = kfd_is_occupied_cp; |
843 | mqd->get_checkpoint_info = get_checkpoint_info; |
844 | mqd->checkpoint_mqd = checkpoint_mqd; |
845 | mqd->restore_mqd = restore_mqd; |
846 | mqd->mqd_size = sizeof(struct v9_mqd); |
847 | mqd->mqd_stride = mqd_stride_v9; |
848 | #if defined(CONFIG_DEBUG_FS) |
849 | mqd->debugfs_show_mqd = debugfs_show_mqd; |
850 | #endif |
851 | if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) { |
852 | mqd->init_mqd = init_mqd_v9_4_3; |
853 | mqd->load_mqd = load_mqd_v9_4_3; |
854 | mqd->update_mqd = update_mqd_v9_4_3; |
855 | mqd->destroy_mqd = destroy_mqd_v9_4_3; |
856 | mqd->get_wave_state = get_wave_state_v9_4_3; |
857 | } else { |
858 | mqd->init_mqd = init_mqd; |
859 | mqd->load_mqd = load_mqd; |
860 | mqd->update_mqd = update_mqd; |
861 | mqd->destroy_mqd = kfd_destroy_mqd_cp; |
862 | mqd->get_wave_state = get_wave_state; |
863 | } |
864 | break; |
865 | case KFD_MQD_TYPE_HIQ: |
866 | mqd->allocate_mqd = allocate_hiq_mqd; |
867 | mqd->free_mqd = free_mqd_hiq_sdma; |
868 | mqd->update_mqd = update_mqd; |
869 | mqd->is_occupied = kfd_is_occupied_cp; |
870 | mqd->mqd_size = sizeof(struct v9_mqd); |
871 | mqd->mqd_stride = kfd_mqd_stride; |
872 | #if defined(CONFIG_DEBUG_FS) |
873 | mqd->debugfs_show_mqd = debugfs_show_mqd; |
874 | #endif |
875 | mqd->read_doorbell_id = read_doorbell_id; |
876 | if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) { |
877 | mqd->init_mqd = init_mqd_hiq_v9_4_3; |
878 | mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3; |
879 | mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3; |
880 | } else { |
881 | mqd->init_mqd = init_mqd_hiq; |
882 | mqd->load_mqd = kfd_hiq_load_mqd_kiq; |
883 | mqd->destroy_mqd = destroy_hiq_mqd; |
884 | } |
885 | break; |
886 | case KFD_MQD_TYPE_DIQ: |
887 | mqd->allocate_mqd = allocate_mqd; |
888 | mqd->init_mqd = init_mqd_hiq; |
889 | mqd->free_mqd = kfd_free_mqd_cp; |
890 | mqd->load_mqd = load_mqd; |
891 | mqd->update_mqd = update_mqd; |
892 | mqd->destroy_mqd = kfd_destroy_mqd_cp; |
893 | mqd->is_occupied = kfd_is_occupied_cp; |
894 | mqd->mqd_size = sizeof(struct v9_mqd); |
895 | #if defined(CONFIG_DEBUG_FS) |
896 | mqd->debugfs_show_mqd = debugfs_show_mqd; |
897 | #endif |
898 | break; |
899 | case KFD_MQD_TYPE_SDMA: |
900 | mqd->allocate_mqd = allocate_sdma_mqd; |
901 | mqd->init_mqd = init_mqd_sdma; |
902 | mqd->free_mqd = free_mqd_hiq_sdma; |
903 | mqd->load_mqd = kfd_load_mqd_sdma; |
904 | mqd->update_mqd = update_mqd_sdma; |
905 | mqd->destroy_mqd = kfd_destroy_mqd_sdma; |
906 | mqd->is_occupied = kfd_is_occupied_sdma; |
907 | mqd->checkpoint_mqd = checkpoint_mqd_sdma; |
908 | mqd->restore_mqd = restore_mqd_sdma; |
909 | mqd->mqd_size = sizeof(struct v9_sdma_mqd); |
910 | mqd->mqd_stride = kfd_mqd_stride; |
911 | #if defined(CONFIG_DEBUG_FS) |
912 | mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; |
913 | #endif |
914 | break; |
915 | default: |
916 | kfree(objp: mqd); |
917 | return NULL; |
918 | } |
919 | |
920 | return mqd; |
921 | } |
922 | |