1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
2 | /* |
3 | * Copyright 2014-2022 Advanced Micro Devices, Inc. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), |
7 | * to deal in the Software without restriction, including without limitation |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
9 | * and/or sell copies of the Software, and to permit persons to whom the |
10 | * Software is furnished to do so, subject to the following conditions: |
11 | * |
12 | * The above copyright notice and this permission notice shall be included in |
13 | * all copies or substantial portions of the Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | * |
23 | */ |
24 | |
25 | #include <linux/printk.h> |
26 | #include <linux/slab.h> |
27 | #include <linux/mm_types.h> |
28 | |
29 | #include "kfd_priv.h" |
30 | #include "kfd_mqd_manager.h" |
31 | #include "cik_regs.h" |
32 | #include "cik_structs.h" |
33 | #include "oss/oss_2_4_sh_mask.h" |
34 | |
35 | static inline struct cik_mqd *get_mqd(void *mqd) |
36 | { |
37 | return (struct cik_mqd *)mqd; |
38 | } |
39 | |
40 | static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) |
41 | { |
42 | return (struct cik_sdma_rlc_registers *)mqd; |
43 | } |
44 | |
45 | static void update_cu_mask(struct mqd_manager *mm, void *mqd, |
46 | struct mqd_update_info *minfo) |
47 | { |
48 | struct cik_mqd *m; |
49 | uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */ |
50 | |
51 | if (!minfo || !minfo->cu_mask.ptr) |
52 | return; |
53 | |
54 | mqd_symmetrically_map_cu_mask(mm, |
55 | cu_mask: minfo->cu_mask.ptr, cu_mask_count: minfo->cu_mask.count, se_mask, inst: 0); |
56 | |
57 | m = get_mqd(mqd); |
58 | m->compute_static_thread_mgmt_se0 = se_mask[0]; |
59 | m->compute_static_thread_mgmt_se1 = se_mask[1]; |
60 | m->compute_static_thread_mgmt_se2 = se_mask[2]; |
61 | m->compute_static_thread_mgmt_se3 = se_mask[3]; |
62 | |
63 | pr_debug("Update cu mask to %#x %#x %#x %#x\n" , |
64 | m->compute_static_thread_mgmt_se0, |
65 | m->compute_static_thread_mgmt_se1, |
66 | m->compute_static_thread_mgmt_se2, |
67 | m->compute_static_thread_mgmt_se3); |
68 | } |
69 | |
70 | static void set_priority(struct cik_mqd *m, struct queue_properties *q) |
71 | { |
72 | m->cp_hqd_pipe_priority = pipe_priority_map[q->priority]; |
73 | m->cp_hqd_queue_priority = q->priority; |
74 | } |
75 | |
76 | static struct kfd_mem_obj *allocate_mqd(struct kfd_node *kfd, |
77 | struct queue_properties *q) |
78 | { |
79 | struct kfd_mem_obj *mqd_mem_obj; |
80 | |
81 | if (kfd_gtt_sa_allocate(node: kfd, size: sizeof(struct cik_mqd), |
82 | mem_obj: &mqd_mem_obj)) |
83 | return NULL; |
84 | |
85 | return mqd_mem_obj; |
86 | } |
87 | |
88 | static void init_mqd(struct mqd_manager *mm, void **mqd, |
89 | struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, |
90 | struct queue_properties *q) |
91 | { |
92 | uint64_t addr; |
93 | struct cik_mqd *m; |
94 | |
95 | m = (struct cik_mqd *) mqd_mem_obj->cpu_ptr; |
96 | addr = mqd_mem_obj->gpu_addr; |
97 | |
98 | memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256)); |
99 | |
100 | m->header = 0xC0310800; |
101 | m->compute_pipelinestat_enable = 1; |
102 | m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; |
103 | m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; |
104 | m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; |
105 | m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; |
106 | |
107 | /* |
108 | * Make sure to use the last queue state saved on mqd when the cp |
109 | * reassigns the queue, so when queue is switched on/off (e.g over |
110 | * subscription or quantum timeout) the context will be consistent |
111 | */ |
112 | m->cp_hqd_persistent_state = |
113 | DEFAULT_CP_HQD_PERSISTENT_STATE | PRELOAD_REQ; |
114 | |
115 | m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN; |
116 | m->cp_mqd_base_addr_lo = lower_32_bits(addr); |
117 | m->cp_mqd_base_addr_hi = upper_32_bits(addr); |
118 | |
119 | m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | |
120 | QUANTUM_DURATION(10); |
121 | |
122 | /* |
123 | * Pipe Priority |
124 | * Identifies the pipe relative priority when this queue is connected |
125 | * to the pipeline. The pipe priority is against the GFX pipe and HP3D. |
126 | * In KFD we are using a fixed pipe priority set to CS_MEDIUM. |
127 | * 0 = CS_LOW (typically below GFX) |
128 | * 1 = CS_MEDIUM (typically between HP3D and GFX |
129 | * 2 = CS_HIGH (typically above HP3D) |
130 | */ |
131 | set_priority(m, q); |
132 | |
133 | if (q->format == KFD_QUEUE_FORMAT_AQL) |
134 | m->cp_hqd_iq_rptr = AQL_ENABLE; |
135 | |
136 | *mqd = m; |
137 | if (gart_addr) |
138 | *gart_addr = addr; |
139 | mm->update_mqd(mm, m, q, NULL); |
140 | } |
141 | |
142 | static void init_mqd_sdma(struct mqd_manager *mm, void **mqd, |
143 | struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, |
144 | struct queue_properties *q) |
145 | { |
146 | struct cik_sdma_rlc_registers *m; |
147 | |
148 | m = (struct cik_sdma_rlc_registers *) mqd_mem_obj->cpu_ptr; |
149 | |
150 | memset(m, 0, sizeof(struct cik_sdma_rlc_registers)); |
151 | |
152 | *mqd = m; |
153 | if (gart_addr) |
154 | *gart_addr = mqd_mem_obj->gpu_addr; |
155 | |
156 | mm->update_mqd(mm, m, q, NULL); |
157 | } |
158 | |
159 | static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, |
160 | uint32_t queue_id, struct queue_properties *p, |
161 | struct mm_struct *mms) |
162 | { |
163 | /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ |
164 | uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); |
165 | uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1); |
166 | |
167 | return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id, |
168 | (uint32_t __user *)p->write_ptr, |
169 | wptr_shift, wptr_mask, mms, 0); |
170 | } |
171 | |
172 | static void __update_mqd(struct mqd_manager *mm, void *mqd, |
173 | struct queue_properties *q, struct mqd_update_info *minfo, |
174 | unsigned int atc_bit) |
175 | { |
176 | struct cik_mqd *m; |
177 | |
178 | m = get_mqd(mqd); |
179 | m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | |
180 | DEFAULT_MIN_AVAIL_SIZE; |
181 | m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE; |
182 | if (atc_bit) { |
183 | m->cp_hqd_pq_control |= PQ_ATC_EN; |
184 | m->cp_hqd_ib_control |= IB_ATC_EN; |
185 | } |
186 | |
187 | /* |
188 | * Calculating queue size which is log base 2 of actual queue size -1 |
189 | * dwords and another -1 for ffs |
190 | */ |
191 | m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; |
192 | m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); |
193 | m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); |
194 | m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); |
195 | m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); |
196 | m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off); |
197 | |
198 | m->cp_hqd_vmid = q->vmid; |
199 | |
200 | if (q->format == KFD_QUEUE_FORMAT_AQL) |
201 | m->cp_hqd_pq_control |= NO_UPDATE_RPTR; |
202 | |
203 | update_cu_mask(mm, mqd, minfo); |
204 | set_priority(m, q); |
205 | |
206 | q->is_active = QUEUE_IS_ACTIVE(*q); |
207 | } |
208 | |
209 | static uint32_t read_doorbell_id(void *mqd) |
210 | { |
211 | struct cik_mqd *m = (struct cik_mqd *)mqd; |
212 | |
213 | return m->queue_doorbell_id0; |
214 | } |
215 | |
216 | static void update_mqd(struct mqd_manager *mm, void *mqd, |
217 | struct queue_properties *q, |
218 | struct mqd_update_info *minfo) |
219 | { |
220 | __update_mqd(mm, mqd, q, minfo, atc_bit: 0); |
221 | } |
222 | |
223 | static void update_mqd_sdma(struct mqd_manager *mm, void *mqd, |
224 | struct queue_properties *q, |
225 | struct mqd_update_info *minfo) |
226 | { |
227 | struct cik_sdma_rlc_registers *m; |
228 | |
229 | m = get_sdma_mqd(mqd); |
230 | m->sdma_rlc_rb_cntl = order_base_2(q->queue_size / 4) |
231 | << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | |
232 | q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | |
233 | 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | |
234 | 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; |
235 | |
236 | m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8); |
237 | m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8); |
238 | m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); |
239 | m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); |
240 | m->sdma_rlc_doorbell = |
241 | q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT; |
242 | |
243 | m->sdma_rlc_virtual_addr = q->sdma_vm_addr; |
244 | |
245 | m->sdma_engine_id = q->sdma_engine_id; |
246 | m->sdma_queue_id = q->sdma_queue_id; |
247 | |
248 | q->is_active = QUEUE_IS_ACTIVE(*q); |
249 | } |
250 | |
251 | static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst) |
252 | { |
253 | struct cik_mqd *m; |
254 | |
255 | m = get_mqd(mqd); |
256 | |
257 | memcpy(mqd_dst, m, sizeof(struct cik_mqd)); |
258 | } |
259 | |
260 | static void restore_mqd(struct mqd_manager *mm, void **mqd, |
261 | struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, |
262 | struct queue_properties *qp, |
263 | const void *mqd_src, |
264 | const void *ctl_stack_src, const u32 ctl_stack_size) |
265 | { |
266 | uint64_t addr; |
267 | struct cik_mqd *m; |
268 | |
269 | m = (struct cik_mqd *) mqd_mem_obj->cpu_ptr; |
270 | addr = mqd_mem_obj->gpu_addr; |
271 | |
272 | memcpy(m, mqd_src, sizeof(*m)); |
273 | |
274 | *mqd = m; |
275 | if (gart_addr) |
276 | *gart_addr = addr; |
277 | |
278 | m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(qp->doorbell_off); |
279 | |
280 | pr_debug("cp_hqd_pq_doorbell_control 0x%x\n" , |
281 | m->cp_hqd_pq_doorbell_control); |
282 | |
283 | qp->is_active = 0; |
284 | } |
285 | |
286 | static void checkpoint_mqd_sdma(struct mqd_manager *mm, |
287 | void *mqd, |
288 | void *mqd_dst, |
289 | void *ctl_stack_dst) |
290 | { |
291 | struct cik_sdma_rlc_registers *m; |
292 | |
293 | m = get_sdma_mqd(mqd); |
294 | |
295 | memcpy(mqd_dst, m, sizeof(struct cik_sdma_rlc_registers)); |
296 | } |
297 | |
298 | static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd, |
299 | struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, |
300 | struct queue_properties *qp, |
301 | const void *mqd_src, |
302 | const void *ctl_stack_src, const u32 ctl_stack_size) |
303 | { |
304 | uint64_t addr; |
305 | struct cik_sdma_rlc_registers *m; |
306 | |
307 | m = (struct cik_sdma_rlc_registers *) mqd_mem_obj->cpu_ptr; |
308 | addr = mqd_mem_obj->gpu_addr; |
309 | |
310 | memcpy(m, mqd_src, sizeof(*m)); |
311 | |
312 | m->sdma_rlc_doorbell = |
313 | qp->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT; |
314 | |
315 | *mqd = m; |
316 | if (gart_addr) |
317 | *gart_addr = addr; |
318 | |
319 | qp->is_active = 0; |
320 | } |
321 | |
322 | /* |
323 | * HIQ MQD Implementation, concrete implementation for HIQ MQD implementation. |
324 | * The HIQ queue in Kaveri is using the same MQD structure as all the user mode |
325 | * queues but with different initial values. |
326 | */ |
327 | |
328 | static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, |
329 | struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, |
330 | struct queue_properties *q) |
331 | { |
332 | init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q); |
333 | } |
334 | |
335 | static void update_mqd_hiq(struct mqd_manager *mm, void *mqd, |
336 | struct queue_properties *q, |
337 | struct mqd_update_info *minfo) |
338 | { |
339 | struct cik_mqd *m; |
340 | |
341 | m = get_mqd(mqd); |
342 | m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | |
343 | DEFAULT_MIN_AVAIL_SIZE | |
344 | PRIV_STATE | |
345 | KMD_QUEUE; |
346 | |
347 | /* |
348 | * Calculating queue size which is log base 2 of actual queue |
349 | * size -1 dwords |
350 | */ |
351 | m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; |
352 | m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); |
353 | m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); |
354 | m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); |
355 | m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); |
356 | m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off); |
357 | |
358 | m->cp_hqd_vmid = q->vmid; |
359 | |
360 | q->is_active = QUEUE_IS_ACTIVE(*q); |
361 | |
362 | set_priority(m, q); |
363 | } |
364 | |
365 | #if defined(CONFIG_DEBUG_FS) |
366 | |
367 | static int debugfs_show_mqd(struct seq_file *m, void *data) |
368 | { |
369 | seq_hex_dump(m, prefix_str: " " , prefix_type: DUMP_PREFIX_OFFSET, rowsize: 32, groupsize: 4, |
370 | buf: data, len: sizeof(struct cik_mqd), ascii: false); |
371 | return 0; |
372 | } |
373 | |
374 | static int debugfs_show_mqd_sdma(struct seq_file *m, void *data) |
375 | { |
376 | seq_hex_dump(m, prefix_str: " " , prefix_type: DUMP_PREFIX_OFFSET, rowsize: 32, groupsize: 4, |
377 | buf: data, len: sizeof(struct cik_sdma_rlc_registers), ascii: false); |
378 | return 0; |
379 | } |
380 | |
381 | #endif |
382 | |
383 | struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, |
384 | struct kfd_node *dev) |
385 | { |
386 | struct mqd_manager *mqd; |
387 | |
388 | if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) |
389 | return NULL; |
390 | |
391 | mqd = kzalloc(size: sizeof(*mqd), GFP_KERNEL); |
392 | if (!mqd) |
393 | return NULL; |
394 | |
395 | mqd->dev = dev; |
396 | |
397 | switch (type) { |
398 | case KFD_MQD_TYPE_CP: |
399 | mqd->allocate_mqd = allocate_mqd; |
400 | mqd->init_mqd = init_mqd; |
401 | mqd->free_mqd = kfd_free_mqd_cp; |
402 | mqd->load_mqd = load_mqd; |
403 | mqd->update_mqd = update_mqd; |
404 | mqd->destroy_mqd = kfd_destroy_mqd_cp; |
405 | mqd->is_occupied = kfd_is_occupied_cp; |
406 | mqd->checkpoint_mqd = checkpoint_mqd; |
407 | mqd->restore_mqd = restore_mqd; |
408 | mqd->mqd_size = sizeof(struct cik_mqd); |
409 | #if defined(CONFIG_DEBUG_FS) |
410 | mqd->debugfs_show_mqd = debugfs_show_mqd; |
411 | #endif |
412 | break; |
413 | case KFD_MQD_TYPE_HIQ: |
414 | mqd->allocate_mqd = allocate_hiq_mqd; |
415 | mqd->init_mqd = init_mqd_hiq; |
416 | mqd->free_mqd = free_mqd_hiq_sdma; |
417 | mqd->load_mqd = load_mqd; |
418 | mqd->update_mqd = update_mqd_hiq; |
419 | mqd->destroy_mqd = kfd_destroy_mqd_cp; |
420 | mqd->is_occupied = kfd_is_occupied_cp; |
421 | mqd->mqd_size = sizeof(struct cik_mqd); |
422 | mqd->mqd_stride = kfd_mqd_stride; |
423 | #if defined(CONFIG_DEBUG_FS) |
424 | mqd->debugfs_show_mqd = debugfs_show_mqd; |
425 | #endif |
426 | mqd->read_doorbell_id = read_doorbell_id; |
427 | break; |
428 | case KFD_MQD_TYPE_DIQ: |
429 | mqd->allocate_mqd = allocate_mqd; |
430 | mqd->init_mqd = init_mqd_hiq; |
431 | mqd->free_mqd = kfd_free_mqd_cp; |
432 | mqd->load_mqd = load_mqd; |
433 | mqd->update_mqd = update_mqd_hiq; |
434 | mqd->destroy_mqd = kfd_destroy_mqd_cp; |
435 | mqd->is_occupied = kfd_is_occupied_cp; |
436 | mqd->mqd_size = sizeof(struct cik_mqd); |
437 | mqd->mqd_stride = kfd_mqd_stride; |
438 | #if defined(CONFIG_DEBUG_FS) |
439 | mqd->debugfs_show_mqd = debugfs_show_mqd; |
440 | #endif |
441 | break; |
442 | case KFD_MQD_TYPE_SDMA: |
443 | mqd->allocate_mqd = allocate_sdma_mqd; |
444 | mqd->init_mqd = init_mqd_sdma; |
445 | mqd->free_mqd = free_mqd_hiq_sdma; |
446 | mqd->load_mqd = kfd_load_mqd_sdma; |
447 | mqd->update_mqd = update_mqd_sdma; |
448 | mqd->destroy_mqd = kfd_destroy_mqd_sdma; |
449 | mqd->is_occupied = kfd_is_occupied_sdma; |
450 | mqd->checkpoint_mqd = checkpoint_mqd_sdma; |
451 | mqd->restore_mqd = restore_mqd_sdma; |
452 | mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers); |
453 | mqd->mqd_stride = kfd_mqd_stride; |
454 | #if defined(CONFIG_DEBUG_FS) |
455 | mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; |
456 | #endif |
457 | break; |
458 | default: |
459 | kfree(objp: mqd); |
460 | return NULL; |
461 | } |
462 | |
463 | return mqd; |
464 | } |
465 | |