1 | /* |
2 | * Copyright 2008 Advanced Micro Devices, Inc. |
3 | * Copyright 2008 Red Hat Inc. |
4 | * Copyright 2009 Jerome Glisse. |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
7 | * copy of this software and associated documentation files (the "Software"), |
8 | * to deal in the Software without restriction, including without limitation |
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
10 | * and/or sell copies of the Software, and to permit persons to whom the |
11 | * Software is furnished to do so, subject to the following conditions: |
12 | * |
13 | * The above copyright notice and this permission notice shall be included in |
14 | * all copies or substantial portions of the Software. |
15 | * |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
22 | * OTHER DEALINGS IN THE SOFTWARE. |
23 | * |
24 | * Authors: Dave Airlie |
25 | * Alex Deucher |
26 | * Jerome Glisse |
27 | * Christian König |
28 | */ |
29 | |
30 | #include <drm/drm_device.h> |
31 | #include <drm/drm_file.h> |
32 | |
33 | #include "radeon.h" |
34 | |
35 | /* |
36 | * Rings |
37 | * Most engines on the GPU are fed via ring buffers. Ring |
38 | * buffers are areas of GPU accessible memory that the host |
39 | * writes commands into and the GPU reads commands out of. |
40 | * There is a rptr (read pointer) that determines where the |
41 | * GPU is currently reading, and a wptr (write pointer) |
42 | * which determines where the host has written. When the |
43 | * pointers are equal, the ring is idle. When the host |
44 | * writes commands to the ring buffer, it increments the |
45 | * wptr. The GPU then starts fetching commands and executes |
46 | * them until the pointers are equal again. |
47 | */ |
48 | static void radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring); |
49 | |
50 | /** |
51 | * radeon_ring_supports_scratch_reg - check if the ring supports |
52 | * writing to scratch registers |
53 | * |
54 | * @rdev: radeon_device pointer |
55 | * @ring: radeon_ring structure holding ring information |
56 | * |
57 | * Check if a specific ring supports writing to scratch registers (all asics). |
58 | * Returns true if the ring supports writing to scratch regs, false if not. |
59 | */ |
60 | bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev, |
61 | struct radeon_ring *ring) |
62 | { |
63 | switch (ring->idx) { |
64 | case RADEON_RING_TYPE_GFX_INDEX: |
65 | case CAYMAN_RING_TYPE_CP1_INDEX: |
66 | case CAYMAN_RING_TYPE_CP2_INDEX: |
67 | return true; |
68 | default: |
69 | return false; |
70 | } |
71 | } |
72 | |
73 | /** |
74 | * radeon_ring_free_size - update the free size |
75 | * |
76 | * @rdev: radeon_device pointer |
77 | * @ring: radeon_ring structure holding ring information |
78 | * |
79 | * Update the free dw slots in the ring buffer (all asics). |
80 | */ |
81 | void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring) |
82 | { |
83 | uint32_t rptr = radeon_ring_get_rptr(rdev, ring); |
84 | |
85 | /* This works because ring_size is a power of 2 */ |
86 | ring->ring_free_dw = rptr + (ring->ring_size / 4); |
87 | ring->ring_free_dw -= ring->wptr; |
88 | ring->ring_free_dw &= ring->ptr_mask; |
89 | if (!ring->ring_free_dw) { |
90 | /* this is an empty ring */ |
91 | ring->ring_free_dw = ring->ring_size / 4; |
92 | /* update lockup info to avoid false positive */ |
93 | radeon_ring_lockup_update(rdev, ring); |
94 | } |
95 | } |
96 | |
97 | /** |
98 | * radeon_ring_alloc - allocate space on the ring buffer |
99 | * |
100 | * @rdev: radeon_device pointer |
101 | * @ring: radeon_ring structure holding ring information |
102 | * @ndw: number of dwords to allocate in the ring buffer |
103 | * |
104 | * Allocate @ndw dwords in the ring buffer (all asics). |
105 | * Returns 0 on success, error on failure. |
106 | */ |
107 | int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw) |
108 | { |
109 | int r; |
110 | |
111 | /* make sure we aren't trying to allocate more space than there is on the ring */ |
112 | if (ndw > (ring->ring_size / 4)) |
113 | return -ENOMEM; |
114 | /* Align requested size with padding so unlock_commit can |
115 | * pad safely */ |
116 | radeon_ring_free_size(rdev, ring); |
117 | ndw = (ndw + ring->align_mask) & ~ring->align_mask; |
118 | while (ndw > (ring->ring_free_dw - 1)) { |
119 | radeon_ring_free_size(rdev, ring); |
120 | if (ndw < ring->ring_free_dw) { |
121 | break; |
122 | } |
123 | r = radeon_fence_wait_next(rdev, ring: ring->idx); |
124 | if (r) |
125 | return r; |
126 | } |
127 | ring->count_dw = ndw; |
128 | ring->wptr_old = ring->wptr; |
129 | return 0; |
130 | } |
131 | |
132 | /** |
133 | * radeon_ring_lock - lock the ring and allocate space on it |
134 | * |
135 | * @rdev: radeon_device pointer |
136 | * @ring: radeon_ring structure holding ring information |
137 | * @ndw: number of dwords to allocate in the ring buffer |
138 | * |
139 | * Lock the ring and allocate @ndw dwords in the ring buffer |
140 | * (all asics). |
141 | * Returns 0 on success, error on failure. |
142 | */ |
143 | int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw) |
144 | { |
145 | int r; |
146 | |
147 | mutex_lock(&rdev->ring_lock); |
148 | r = radeon_ring_alloc(rdev, ring, ndw); |
149 | if (r) { |
150 | mutex_unlock(lock: &rdev->ring_lock); |
151 | return r; |
152 | } |
153 | return 0; |
154 | } |
155 | |
156 | /** |
157 | * radeon_ring_commit - tell the GPU to execute the new |
158 | * commands on the ring buffer |
159 | * |
160 | * @rdev: radeon_device pointer |
161 | * @ring: radeon_ring structure holding ring information |
162 | * @hdp_flush: Whether or not to perform an HDP cache flush |
163 | * |
164 | * Update the wptr (write pointer) to tell the GPU to |
165 | * execute new commands on the ring buffer (all asics). |
166 | */ |
167 | void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring, |
168 | bool hdp_flush) |
169 | { |
170 | /* If we are emitting the HDP flush via the ring buffer, we need to |
171 | * do it before padding. |
172 | */ |
173 | if (hdp_flush && rdev->asic->ring[ring->idx]->hdp_flush) |
174 | rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring); |
175 | /* We pad to match fetch size */ |
176 | while (ring->wptr & ring->align_mask) { |
177 | radeon_ring_write(ring, v: ring->nop); |
178 | } |
179 | mb(); |
180 | /* If we are emitting the HDP flush via MMIO, we need to do it after |
181 | * all CPU writes to VRAM finished. |
182 | */ |
183 | if (hdp_flush && rdev->asic->mmio_hdp_flush) |
184 | rdev->asic->mmio_hdp_flush(rdev); |
185 | radeon_ring_set_wptr(rdev, ring); |
186 | } |
187 | |
188 | /** |
189 | * radeon_ring_unlock_commit - tell the GPU to execute the new |
190 | * commands on the ring buffer and unlock it |
191 | * |
192 | * @rdev: radeon_device pointer |
193 | * @ring: radeon_ring structure holding ring information |
194 | * @hdp_flush: Whether or not to perform an HDP cache flush |
195 | * |
196 | * Call radeon_ring_commit() then unlock the ring (all asics). |
197 | */ |
198 | void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring, |
199 | bool hdp_flush) |
200 | { |
201 | radeon_ring_commit(rdev, ring, hdp_flush); |
202 | mutex_unlock(lock: &rdev->ring_lock); |
203 | } |
204 | |
205 | /** |
206 | * radeon_ring_undo - reset the wptr |
207 | * |
208 | * @ring: radeon_ring structure holding ring information |
209 | * |
210 | * Reset the driver's copy of the wptr (all asics). |
211 | */ |
212 | void radeon_ring_undo(struct radeon_ring *ring) |
213 | { |
214 | ring->wptr = ring->wptr_old; |
215 | } |
216 | |
217 | /** |
218 | * radeon_ring_unlock_undo - reset the wptr and unlock the ring |
219 | * |
220 | * @rdev: radeon device structure |
221 | * @ring: radeon_ring structure holding ring information |
222 | * |
223 | * Call radeon_ring_undo() then unlock the ring (all asics). |
224 | */ |
225 | void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *ring) |
226 | { |
227 | radeon_ring_undo(ring); |
228 | mutex_unlock(lock: &rdev->ring_lock); |
229 | } |
230 | |
231 | /** |
232 | * radeon_ring_lockup_update - update lockup variables |
233 | * |
234 | * @rdev: radeon device structure |
235 | * @ring: radeon_ring structure holding ring information |
236 | * |
237 | * Update the last rptr value and timestamp (all asics). |
238 | */ |
239 | void radeon_ring_lockup_update(struct radeon_device *rdev, |
240 | struct radeon_ring *ring) |
241 | { |
242 | atomic_set(v: &ring->last_rptr, radeon_ring_get_rptr(rdev, ring)); |
243 | atomic64_set(v: &ring->last_activity, i: jiffies_64); |
244 | } |
245 | |
246 | /** |
247 | * radeon_ring_test_lockup() - check if ring is lockedup by recording information |
248 | * @rdev: radeon device structure |
249 | * @ring: radeon_ring structure holding ring information |
250 | * |
251 | */ |
252 | bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring) |
253 | { |
254 | uint32_t rptr = radeon_ring_get_rptr(rdev, ring); |
255 | uint64_t last = atomic64_read(v: &ring->last_activity); |
256 | uint64_t elapsed; |
257 | |
258 | if (rptr != atomic_read(v: &ring->last_rptr)) { |
259 | /* ring is still working, no lockup */ |
260 | radeon_ring_lockup_update(rdev, ring); |
261 | return false; |
262 | } |
263 | |
264 | elapsed = jiffies_to_msecs(j: jiffies_64 - last); |
265 | if (radeon_lockup_timeout && elapsed >= radeon_lockup_timeout) { |
266 | dev_err(rdev->dev, "ring %d stalled for more than %llumsec\n" , |
267 | ring->idx, elapsed); |
268 | return true; |
269 | } |
270 | /* give a chance to the GPU ... */ |
271 | return false; |
272 | } |
273 | |
274 | /** |
275 | * radeon_ring_backup - Back up the content of a ring |
276 | * |
277 | * @rdev: radeon_device pointer |
278 | * @ring: the ring we want to back up |
279 | * @data: placeholder for returned commit data |
280 | * |
281 | * Saves all unprocessed commits from a ring, returns the number of dwords saved. |
282 | */ |
283 | unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring, |
284 | uint32_t **data) |
285 | { |
286 | unsigned size, ptr, i; |
287 | |
288 | /* just in case lock the ring */ |
289 | mutex_lock(&rdev->ring_lock); |
290 | *data = NULL; |
291 | |
292 | if (ring->ring_obj == NULL) { |
293 | mutex_unlock(lock: &rdev->ring_lock); |
294 | return 0; |
295 | } |
296 | |
297 | /* it doesn't make sense to save anything if all fences are signaled */ |
298 | if (!radeon_fence_count_emitted(rdev, ring: ring->idx)) { |
299 | mutex_unlock(lock: &rdev->ring_lock); |
300 | return 0; |
301 | } |
302 | |
303 | /* calculate the number of dw on the ring */ |
304 | if (ring->rptr_save_reg) |
305 | ptr = RREG32(ring->rptr_save_reg); |
306 | else if (rdev->wb.enabled) |
307 | ptr = le32_to_cpu(*ring->next_rptr_cpu_addr); |
308 | else { |
309 | /* no way to read back the next rptr */ |
310 | mutex_unlock(lock: &rdev->ring_lock); |
311 | return 0; |
312 | } |
313 | |
314 | size = ring->wptr + (ring->ring_size / 4); |
315 | size -= ptr; |
316 | size &= ring->ptr_mask; |
317 | if (size == 0) { |
318 | mutex_unlock(lock: &rdev->ring_lock); |
319 | return 0; |
320 | } |
321 | |
322 | /* and then save the content of the ring */ |
323 | *data = kvmalloc_array(n: size, size: sizeof(uint32_t), GFP_KERNEL); |
324 | if (!*data) { |
325 | mutex_unlock(lock: &rdev->ring_lock); |
326 | return 0; |
327 | } |
328 | for (i = 0; i < size; ++i) { |
329 | (*data)[i] = ring->ring[ptr++]; |
330 | ptr &= ring->ptr_mask; |
331 | } |
332 | |
333 | mutex_unlock(lock: &rdev->ring_lock); |
334 | return size; |
335 | } |
336 | |
337 | /** |
338 | * radeon_ring_restore - append saved commands to the ring again |
339 | * |
340 | * @rdev: radeon_device pointer |
341 | * @ring: ring to append commands to |
342 | * @size: number of dwords we want to write |
343 | * @data: saved commands |
344 | * |
345 | * Allocates space on the ring and restore the previously saved commands. |
346 | */ |
347 | int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring, |
348 | unsigned size, uint32_t *data) |
349 | { |
350 | int i, r; |
351 | |
352 | if (!size || !data) |
353 | return 0; |
354 | |
355 | /* restore the saved ring content */ |
356 | r = radeon_ring_lock(rdev, ring, ndw: size); |
357 | if (r) |
358 | return r; |
359 | |
360 | for (i = 0; i < size; ++i) { |
361 | radeon_ring_write(ring, v: data[i]); |
362 | } |
363 | |
364 | radeon_ring_unlock_commit(rdev, ring, hdp_flush: false); |
365 | kvfree(addr: data); |
366 | return 0; |
367 | } |
368 | |
369 | /** |
370 | * radeon_ring_init - init driver ring struct. |
371 | * |
372 | * @rdev: radeon_device pointer |
373 | * @ring: radeon_ring structure holding ring information |
374 | * @ring_size: size of the ring |
375 | * @rptr_offs: offset of the rptr writeback location in the WB buffer |
376 | * @nop: nop packet for this ring |
377 | * |
378 | * Initialize the driver information for the selected ring (all asics). |
379 | * Returns 0 on success, error on failure. |
380 | */ |
381 | int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size, |
382 | unsigned rptr_offs, u32 nop) |
383 | { |
384 | int r; |
385 | |
386 | ring->ring_size = ring_size; |
387 | ring->rptr_offs = rptr_offs; |
388 | ring->nop = nop; |
389 | ring->rdev = rdev; |
390 | /* Allocate ring buffer */ |
391 | if (ring->ring_obj == NULL) { |
392 | r = radeon_bo_create(rdev, size: ring->ring_size, PAGE_SIZE, kernel: true, |
393 | RADEON_GEM_DOMAIN_GTT, flags: 0, NULL, |
394 | NULL, bo_ptr: &ring->ring_obj); |
395 | if (r) { |
396 | dev_err(rdev->dev, "(%d) ring create failed\n" , r); |
397 | return r; |
398 | } |
399 | r = radeon_bo_reserve(bo: ring->ring_obj, no_intr: false); |
400 | if (unlikely(r != 0)) |
401 | return r; |
402 | r = radeon_bo_pin(bo: ring->ring_obj, RADEON_GEM_DOMAIN_GTT, |
403 | gpu_addr: &ring->gpu_addr); |
404 | if (r) { |
405 | radeon_bo_unreserve(bo: ring->ring_obj); |
406 | dev_err(rdev->dev, "(%d) ring pin failed\n" , r); |
407 | return r; |
408 | } |
409 | r = radeon_bo_kmap(bo: ring->ring_obj, |
410 | ptr: (void **)&ring->ring); |
411 | radeon_bo_unreserve(bo: ring->ring_obj); |
412 | if (r) { |
413 | dev_err(rdev->dev, "(%d) ring map failed\n" , r); |
414 | return r; |
415 | } |
416 | } |
417 | ring->ptr_mask = (ring->ring_size / 4) - 1; |
418 | ring->ring_free_dw = ring->ring_size / 4; |
419 | if (rdev->wb.enabled) { |
420 | u32 index = RADEON_WB_RING0_NEXT_RPTR + (ring->idx * 4); |
421 | ring->next_rptr_gpu_addr = rdev->wb.gpu_addr + index; |
422 | ring->next_rptr_cpu_addr = &rdev->wb.wb[index/4]; |
423 | } |
424 | radeon_debugfs_ring_init(rdev, ring); |
425 | radeon_ring_lockup_update(rdev, ring); |
426 | return 0; |
427 | } |
428 | |
429 | /** |
430 | * radeon_ring_fini - tear down the driver ring struct. |
431 | * |
432 | * @rdev: radeon_device pointer |
433 | * @ring: radeon_ring structure holding ring information |
434 | * |
435 | * Tear down the driver information for the selected ring (all asics). |
436 | */ |
437 | void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *ring) |
438 | { |
439 | int r; |
440 | struct radeon_bo *ring_obj; |
441 | |
442 | mutex_lock(&rdev->ring_lock); |
443 | ring_obj = ring->ring_obj; |
444 | ring->ready = false; |
445 | ring->ring = NULL; |
446 | ring->ring_obj = NULL; |
447 | mutex_unlock(lock: &rdev->ring_lock); |
448 | |
449 | if (ring_obj) { |
450 | r = radeon_bo_reserve(bo: ring_obj, no_intr: false); |
451 | if (likely(r == 0)) { |
452 | radeon_bo_kunmap(bo: ring_obj); |
453 | radeon_bo_unpin(bo: ring_obj); |
454 | radeon_bo_unreserve(bo: ring_obj); |
455 | } |
456 | radeon_bo_unref(bo: &ring_obj); |
457 | } |
458 | } |
459 | |
460 | /* |
461 | * Debugfs info |
462 | */ |
463 | #if defined(CONFIG_DEBUG_FS) |
464 | |
465 | static int radeon_debugfs_ring_info_show(struct seq_file *m, void *unused) |
466 | { |
467 | struct radeon_ring *ring = m->private; |
468 | struct radeon_device *rdev = ring->rdev; |
469 | |
470 | uint32_t rptr, wptr, rptr_next; |
471 | unsigned count, i, j; |
472 | |
473 | radeon_ring_free_size(rdev, ring); |
474 | count = (ring->ring_size / 4) - ring->ring_free_dw; |
475 | |
476 | wptr = radeon_ring_get_wptr(rdev, ring); |
477 | seq_printf(m, fmt: "wptr: 0x%08x [%5d]\n" , |
478 | wptr, wptr); |
479 | |
480 | rptr = radeon_ring_get_rptr(rdev, ring); |
481 | seq_printf(m, fmt: "rptr: 0x%08x [%5d]\n" , |
482 | rptr, rptr); |
483 | |
484 | if (ring->rptr_save_reg) { |
485 | rptr_next = RREG32(ring->rptr_save_reg); |
486 | seq_printf(m, fmt: "rptr next(0x%04x): 0x%08x [%5d]\n" , |
487 | ring->rptr_save_reg, rptr_next, rptr_next); |
488 | } else |
489 | rptr_next = ~0; |
490 | |
491 | seq_printf(m, fmt: "driver's copy of the wptr: 0x%08x [%5d]\n" , |
492 | ring->wptr, ring->wptr); |
493 | seq_printf(m, fmt: "last semaphore signal addr : 0x%016llx\n" , |
494 | ring->last_semaphore_signal_addr); |
495 | seq_printf(m, fmt: "last semaphore wait addr : 0x%016llx\n" , |
496 | ring->last_semaphore_wait_addr); |
497 | seq_printf(m, fmt: "%u free dwords in ring\n" , ring->ring_free_dw); |
498 | seq_printf(m, fmt: "%u dwords in ring\n" , count); |
499 | |
500 | if (!ring->ring) |
501 | return 0; |
502 | |
503 | /* print 8 dw before current rptr as often it's the last executed |
504 | * packet that is the root issue |
505 | */ |
506 | i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask; |
507 | for (j = 0; j <= (count + 32); j++) { |
508 | seq_printf(m, fmt: "r[%5d]=0x%08x" , i, ring->ring[i]); |
509 | if (rptr == i) |
510 | seq_puts(m, s: " *" ); |
511 | if (rptr_next == i) |
512 | seq_puts(m, s: " #" ); |
513 | seq_puts(m, s: "\n" ); |
514 | i = (i + 1) & ring->ptr_mask; |
515 | } |
516 | return 0; |
517 | } |
518 | |
519 | DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_ring_info); |
520 | |
521 | static const char *radeon_debugfs_ring_idx_to_name(uint32_t ridx) |
522 | { |
523 | switch (ridx) { |
524 | case RADEON_RING_TYPE_GFX_INDEX: |
525 | return "radeon_ring_gfx" ; |
526 | case CAYMAN_RING_TYPE_CP1_INDEX: |
527 | return "radeon_ring_cp1" ; |
528 | case CAYMAN_RING_TYPE_CP2_INDEX: |
529 | return "radeon_ring_cp2" ; |
530 | case R600_RING_TYPE_DMA_INDEX: |
531 | return "radeon_ring_dma1" ; |
532 | case CAYMAN_RING_TYPE_DMA1_INDEX: |
533 | return "radeon_ring_dma2" ; |
534 | case R600_RING_TYPE_UVD_INDEX: |
535 | return "radeon_ring_uvd" ; |
536 | case TN_RING_TYPE_VCE1_INDEX: |
537 | return "radeon_ring_vce1" ; |
538 | case TN_RING_TYPE_VCE2_INDEX: |
539 | return "radeon_ring_vce2" ; |
540 | default: |
541 | return NULL; |
542 | |
543 | } |
544 | } |
545 | #endif |
546 | |
547 | static void radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring) |
548 | { |
549 | #if defined(CONFIG_DEBUG_FS) |
550 | const char *ring_name = radeon_debugfs_ring_idx_to_name(ridx: ring->idx); |
551 | struct dentry *root = rdev->ddev->primary->debugfs_root; |
552 | |
553 | if (ring_name) |
554 | debugfs_create_file(name: ring_name, mode: 0444, parent: root, data: ring, |
555 | fops: &radeon_debugfs_ring_info_fops); |
556 | |
557 | #endif |
558 | } |
559 | |