1 | /* |
2 | * Copyright 2013 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | * Authors: Alex Deucher |
23 | */ |
24 | |
25 | #include "radeon.h" |
26 | #include "radeon_asic.h" |
27 | #include "r600.h" |
28 | #include "r600d.h" |
29 | |
30 | /* |
31 | * DMA |
32 | * Starting with R600, the GPU has an asynchronous |
33 | * DMA engine. The programming model is very similar |
34 | * to the 3D engine (ring buffer, IBs, etc.), but the |
35 | * DMA controller has it's own packet format that is |
36 | * different form the PM4 format used by the 3D engine. |
37 | * It supports copying data, writing embedded data, |
38 | * solid fills, and a number of other things. It also |
39 | * has support for tiling/detiling of buffers. |
40 | */ |
41 | |
42 | /** |
43 | * r600_dma_get_rptr - get the current read pointer |
44 | * |
45 | * @rdev: radeon_device pointer |
46 | * @ring: radeon ring pointer |
47 | * |
48 | * Get the current rptr from the hardware (r6xx+). |
49 | */ |
50 | uint32_t r600_dma_get_rptr(struct radeon_device *rdev, |
51 | struct radeon_ring *ring) |
52 | { |
53 | u32 rptr; |
54 | |
55 | if (rdev->wb.enabled) |
56 | rptr = rdev->wb.wb[ring->rptr_offs/4]; |
57 | else |
58 | rptr = RREG32(DMA_RB_RPTR); |
59 | |
60 | return (rptr & 0x3fffc) >> 2; |
61 | } |
62 | |
63 | /** |
64 | * r600_dma_get_wptr - get the current write pointer |
65 | * |
66 | * @rdev: radeon_device pointer |
67 | * @ring: radeon ring pointer |
68 | * |
69 | * Get the current wptr from the hardware (r6xx+). |
70 | */ |
71 | uint32_t r600_dma_get_wptr(struct radeon_device *rdev, |
72 | struct radeon_ring *ring) |
73 | { |
74 | return (RREG32(DMA_RB_WPTR) & 0x3fffc) >> 2; |
75 | } |
76 | |
77 | /** |
78 | * r600_dma_set_wptr - commit the write pointer |
79 | * |
80 | * @rdev: radeon_device pointer |
81 | * @ring: radeon ring pointer |
82 | * |
83 | * Write the wptr back to the hardware (r6xx+). |
84 | */ |
85 | void r600_dma_set_wptr(struct radeon_device *rdev, |
86 | struct radeon_ring *ring) |
87 | { |
88 | WREG32(DMA_RB_WPTR, (ring->wptr << 2) & 0x3fffc); |
89 | } |
90 | |
91 | /** |
92 | * r600_dma_stop - stop the async dma engine |
93 | * |
94 | * @rdev: radeon_device pointer |
95 | * |
96 | * Stop the async dma engine (r6xx-evergreen). |
97 | */ |
98 | void r600_dma_stop(struct radeon_device *rdev) |
99 | { |
100 | u32 rb_cntl = RREG32(DMA_RB_CNTL); |
101 | |
102 | if (rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) |
103 | radeon_ttm_set_active_vram_size(rdev, size: rdev->mc.visible_vram_size); |
104 | |
105 | rb_cntl &= ~DMA_RB_ENABLE; |
106 | WREG32(DMA_RB_CNTL, rb_cntl); |
107 | |
108 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; |
109 | } |
110 | |
111 | /** |
112 | * r600_dma_resume - setup and start the async dma engine |
113 | * |
114 | * @rdev: radeon_device pointer |
115 | * |
116 | * Set up the DMA ring buffer and enable it. (r6xx-evergreen). |
117 | * Returns 0 for success, error for failure. |
118 | */ |
119 | int r600_dma_resume(struct radeon_device *rdev) |
120 | { |
121 | struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; |
122 | u32 rb_cntl, dma_cntl, ib_cntl; |
123 | u32 rb_bufsz; |
124 | int r; |
125 | |
126 | WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0); |
127 | WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); |
128 | |
129 | /* Set ring buffer size in dwords */ |
130 | rb_bufsz = order_base_2(ring->ring_size / 4); |
131 | rb_cntl = rb_bufsz << 1; |
132 | #ifdef __BIG_ENDIAN |
133 | rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; |
134 | #endif |
135 | WREG32(DMA_RB_CNTL, rb_cntl); |
136 | |
137 | /* Initialize the ring buffer's read and write pointers */ |
138 | WREG32(DMA_RB_RPTR, 0); |
139 | WREG32(DMA_RB_WPTR, 0); |
140 | |
141 | /* set the wb address whether it's enabled or not */ |
142 | WREG32(DMA_RB_RPTR_ADDR_HI, |
143 | upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF); |
144 | WREG32(DMA_RB_RPTR_ADDR_LO, |
145 | ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC)); |
146 | |
147 | if (rdev->wb.enabled) |
148 | rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; |
149 | |
150 | WREG32(DMA_RB_BASE, ring->gpu_addr >> 8); |
151 | |
152 | /* enable DMA IBs */ |
153 | ib_cntl = DMA_IB_ENABLE; |
154 | #ifdef __BIG_ENDIAN |
155 | ib_cntl |= DMA_IB_SWAP_ENABLE; |
156 | #endif |
157 | WREG32(DMA_IB_CNTL, ib_cntl); |
158 | |
159 | dma_cntl = RREG32(DMA_CNTL); |
160 | dma_cntl &= ~CTXEMPTY_INT_ENABLE; |
161 | WREG32(DMA_CNTL, dma_cntl); |
162 | |
163 | if (rdev->family >= CHIP_RV770) |
164 | WREG32(DMA_MODE, 1); |
165 | |
166 | ring->wptr = 0; |
167 | WREG32(DMA_RB_WPTR, ring->wptr << 2); |
168 | |
169 | WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); |
170 | |
171 | ring->ready = true; |
172 | |
173 | r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring); |
174 | if (r) { |
175 | ring->ready = false; |
176 | return r; |
177 | } |
178 | |
179 | if (rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) |
180 | radeon_ttm_set_active_vram_size(rdev, size: rdev->mc.real_vram_size); |
181 | |
182 | return 0; |
183 | } |
184 | |
185 | /** |
186 | * r600_dma_fini - tear down the async dma engine |
187 | * |
188 | * @rdev: radeon_device pointer |
189 | * |
190 | * Stop the async dma engine and free the ring (r6xx-evergreen). |
191 | */ |
192 | void r600_dma_fini(struct radeon_device *rdev) |
193 | { |
194 | r600_dma_stop(rdev); |
195 | radeon_ring_fini(rdev, cp: &rdev->ring[R600_RING_TYPE_DMA_INDEX]); |
196 | } |
197 | |
198 | /** |
199 | * r600_dma_is_lockup - Check if the DMA engine is locked up |
200 | * |
201 | * @rdev: radeon_device pointer |
202 | * @ring: radeon_ring structure holding ring information |
203 | * |
204 | * Check if the async DMA engine is locked up. |
205 | * Returns true if the engine appears to be locked up, false if not. |
206 | */ |
207 | bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) |
208 | { |
209 | u32 reset_mask = r600_gpu_check_soft_reset(rdev); |
210 | |
211 | if (!(reset_mask & RADEON_RESET_DMA)) { |
212 | radeon_ring_lockup_update(rdev, ring); |
213 | return false; |
214 | } |
215 | return radeon_ring_test_lockup(rdev, ring); |
216 | } |
217 | |
218 | |
219 | /** |
220 | * r600_dma_ring_test - simple async dma engine test |
221 | * |
222 | * @rdev: radeon_device pointer |
223 | * @ring: radeon_ring structure holding ring information |
224 | * |
225 | * Test the DMA engine by writing using it to write an |
226 | * value to memory. (r6xx-SI). |
227 | * Returns 0 for success, error for failure. |
228 | */ |
229 | int r600_dma_ring_test(struct radeon_device *rdev, |
230 | struct radeon_ring *ring) |
231 | { |
232 | unsigned i; |
233 | int r; |
234 | unsigned index; |
235 | u32 tmp; |
236 | u64 gpu_addr; |
237 | |
238 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) |
239 | index = R600_WB_DMA_RING_TEST_OFFSET; |
240 | else |
241 | index = CAYMAN_WB_DMA1_RING_TEST_OFFSET; |
242 | |
243 | gpu_addr = rdev->wb.gpu_addr + index; |
244 | |
245 | tmp = 0xCAFEDEAD; |
246 | rdev->wb.wb[index/4] = cpu_to_le32(tmp); |
247 | |
248 | r = radeon_ring_lock(rdev, cp: ring, ndw: 4); |
249 | if (r) { |
250 | DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n" , ring->idx, r); |
251 | return r; |
252 | } |
253 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); |
254 | radeon_ring_write(ring, lower_32_bits(gpu_addr)); |
255 | radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xff); |
256 | radeon_ring_write(ring, v: 0xDEADBEEF); |
257 | radeon_ring_unlock_commit(rdev, cp: ring, hdp_flush: false); |
258 | |
259 | for (i = 0; i < rdev->usec_timeout; i++) { |
260 | tmp = le32_to_cpu(rdev->wb.wb[index/4]); |
261 | if (tmp == 0xDEADBEEF) |
262 | break; |
263 | udelay(1); |
264 | } |
265 | |
266 | if (i < rdev->usec_timeout) { |
267 | DRM_INFO("ring test on %d succeeded in %d usecs\n" , ring->idx, i); |
268 | } else { |
269 | DRM_ERROR("radeon: ring %d test failed (0x%08X)\n" , |
270 | ring->idx, tmp); |
271 | r = -EINVAL; |
272 | } |
273 | return r; |
274 | } |
275 | |
276 | /** |
277 | * r600_dma_fence_ring_emit - emit a fence on the DMA ring |
278 | * |
279 | * @rdev: radeon_device pointer |
280 | * @fence: radeon fence object |
281 | * |
282 | * Add a DMA fence packet to the ring to write |
283 | * the fence seq number and DMA trap packet to generate |
284 | * an interrupt if needed (r6xx-r7xx). |
285 | */ |
286 | void r600_dma_fence_ring_emit(struct radeon_device *rdev, |
287 | struct radeon_fence *fence) |
288 | { |
289 | struct radeon_ring *ring = &rdev->ring[fence->ring]; |
290 | u64 addr = rdev->fence_drv[fence->ring].gpu_addr; |
291 | |
292 | /* write the fence */ |
293 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); |
294 | radeon_ring_write(ring, v: addr & 0xfffffffc); |
295 | radeon_ring_write(ring, v: (upper_32_bits(addr) & 0xff)); |
296 | radeon_ring_write(ring, lower_32_bits(fence->seq)); |
297 | /* generate an interrupt */ |
298 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); |
299 | } |
300 | |
301 | /** |
302 | * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring |
303 | * |
304 | * @rdev: radeon_device pointer |
305 | * @ring: radeon_ring structure holding ring information |
306 | * @semaphore: radeon semaphore object |
307 | * @emit_wait: wait or signal semaphore |
308 | * |
309 | * Add a DMA semaphore packet to the ring wait on or signal |
310 | * other rings (r6xx-SI). |
311 | */ |
312 | bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev, |
313 | struct radeon_ring *ring, |
314 | struct radeon_semaphore *semaphore, |
315 | bool emit_wait) |
316 | { |
317 | u64 addr = semaphore->gpu_addr; |
318 | u32 s = emit_wait ? 0 : 1; |
319 | |
320 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); |
321 | radeon_ring_write(ring, v: addr & 0xfffffffc); |
322 | radeon_ring_write(ring, upper_32_bits(addr) & 0xff); |
323 | |
324 | return true; |
325 | } |
326 | |
327 | /** |
328 | * r600_dma_ib_test - test an IB on the DMA engine |
329 | * |
330 | * @rdev: radeon_device pointer |
331 | * @ring: radeon_ring structure holding ring information |
332 | * |
333 | * Test a simple IB in the DMA ring (r6xx-SI). |
334 | * Returns 0 on success, error on failure. |
335 | */ |
336 | int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) |
337 | { |
338 | struct radeon_ib ib; |
339 | unsigned i; |
340 | unsigned index; |
341 | int r; |
342 | u32 tmp = 0; |
343 | u64 gpu_addr; |
344 | |
345 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) |
346 | index = R600_WB_DMA_RING_TEST_OFFSET; |
347 | else |
348 | index = CAYMAN_WB_DMA1_RING_TEST_OFFSET; |
349 | |
350 | gpu_addr = rdev->wb.gpu_addr + index; |
351 | |
352 | r = radeon_ib_get(rdev, ring: ring->idx, ib: &ib, NULL, size: 256); |
353 | if (r) { |
354 | DRM_ERROR("radeon: failed to get ib (%d).\n" , r); |
355 | return r; |
356 | } |
357 | |
358 | ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1); |
359 | ib.ptr[1] = lower_32_bits(gpu_addr); |
360 | ib.ptr[2] = upper_32_bits(gpu_addr) & 0xff; |
361 | ib.ptr[3] = 0xDEADBEEF; |
362 | ib.length_dw = 4; |
363 | |
364 | r = radeon_ib_schedule(rdev, ib: &ib, NULL, hdp_flush: false); |
365 | if (r) { |
366 | radeon_ib_free(rdev, ib: &ib); |
367 | DRM_ERROR("radeon: failed to schedule ib (%d).\n" , r); |
368 | return r; |
369 | } |
370 | r = radeon_fence_wait_timeout(fence: ib.fence, interruptible: false, timeout: usecs_to_jiffies( |
371 | RADEON_USEC_IB_TEST_TIMEOUT)); |
372 | if (r < 0) { |
373 | DRM_ERROR("radeon: fence wait failed (%d).\n" , r); |
374 | return r; |
375 | } else if (r == 0) { |
376 | DRM_ERROR("radeon: fence wait timed out.\n" ); |
377 | return -ETIMEDOUT; |
378 | } |
379 | r = 0; |
380 | for (i = 0; i < rdev->usec_timeout; i++) { |
381 | tmp = le32_to_cpu(rdev->wb.wb[index/4]); |
382 | if (tmp == 0xDEADBEEF) |
383 | break; |
384 | udelay(1); |
385 | } |
386 | if (i < rdev->usec_timeout) { |
387 | DRM_INFO("ib test on ring %d succeeded in %u usecs\n" , ib.fence->ring, i); |
388 | } else { |
389 | DRM_ERROR("radeon: ib test failed (0x%08X)\n" , tmp); |
390 | r = -EINVAL; |
391 | } |
392 | radeon_ib_free(rdev, ib: &ib); |
393 | return r; |
394 | } |
395 | |
396 | /** |
397 | * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine |
398 | * |
399 | * @rdev: radeon_device pointer |
400 | * @ib: IB object to schedule |
401 | * |
402 | * Schedule an IB in the DMA ring (r6xx-r7xx). |
403 | */ |
404 | void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) |
405 | { |
406 | struct radeon_ring *ring = &rdev->ring[ib->ring]; |
407 | |
408 | if (rdev->wb.enabled) { |
409 | u32 next_rptr = ring->wptr + 4; |
410 | while ((next_rptr & 7) != 5) |
411 | next_rptr++; |
412 | next_rptr += 3; |
413 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); |
414 | radeon_ring_write(ring, v: ring->next_rptr_gpu_addr & 0xfffffffc); |
415 | radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); |
416 | radeon_ring_write(ring, v: next_rptr); |
417 | } |
418 | |
419 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. |
420 | * Pad as necessary with NOPs. |
421 | */ |
422 | while ((ring->wptr & 7) != 5) |
423 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); |
424 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); |
425 | radeon_ring_write(ring, v: (ib->gpu_addr & 0xFFFFFFE0)); |
426 | radeon_ring_write(ring, v: (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF)); |
427 | |
428 | } |
429 | |
430 | /** |
431 | * r600_copy_dma - copy pages using the DMA engine |
432 | * |
433 | * @rdev: radeon_device pointer |
434 | * @src_offset: src GPU address |
435 | * @dst_offset: dst GPU address |
436 | * @num_gpu_pages: number of GPU pages to xfer |
437 | * @resv: reservation object to sync to |
438 | * |
439 | * Copy GPU paging using the DMA engine (r6xx). |
440 | * Used by the radeon ttm implementation to move pages if |
441 | * registered as the asic copy callback. |
442 | */ |
443 | struct radeon_fence *r600_copy_dma(struct radeon_device *rdev, |
444 | uint64_t src_offset, uint64_t dst_offset, |
445 | unsigned num_gpu_pages, |
446 | struct dma_resv *resv) |
447 | { |
448 | struct radeon_fence *fence; |
449 | struct radeon_sync sync; |
450 | int ring_index = rdev->asic->copy.dma_ring_index; |
451 | struct radeon_ring *ring = &rdev->ring[ring_index]; |
452 | u32 size_in_dw, cur_size_in_dw; |
453 | int i, num_loops; |
454 | int r = 0; |
455 | |
456 | radeon_sync_create(sync: &sync); |
457 | |
458 | size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; |
459 | num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); |
460 | r = radeon_ring_lock(rdev, cp: ring, ndw: num_loops * 4 + 8); |
461 | if (r) { |
462 | DRM_ERROR("radeon: moving bo (%d).\n" , r); |
463 | radeon_sync_free(rdev, sync: &sync, NULL); |
464 | return ERR_PTR(error: r); |
465 | } |
466 | |
467 | radeon_sync_resv(rdev, sync: &sync, resv, shared: false); |
468 | radeon_sync_rings(rdev, sync: &sync, waiting_ring: ring->idx); |
469 | |
470 | for (i = 0; i < num_loops; i++) { |
471 | cur_size_in_dw = size_in_dw; |
472 | if (cur_size_in_dw > 0xFFFE) |
473 | cur_size_in_dw = 0xFFFE; |
474 | size_in_dw -= cur_size_in_dw; |
475 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); |
476 | radeon_ring_write(ring, v: dst_offset & 0xfffffffc); |
477 | radeon_ring_write(ring, v: src_offset & 0xfffffffc); |
478 | radeon_ring_write(ring, v: (((upper_32_bits(dst_offset) & 0xff) << 16) | |
479 | (upper_32_bits(src_offset) & 0xff))); |
480 | src_offset += cur_size_in_dw * 4; |
481 | dst_offset += cur_size_in_dw * 4; |
482 | } |
483 | |
484 | r = radeon_fence_emit(rdev, fence: &fence, ring: ring->idx); |
485 | if (r) { |
486 | radeon_ring_unlock_undo(rdev, cp: ring); |
487 | radeon_sync_free(rdev, sync: &sync, NULL); |
488 | return ERR_PTR(error: r); |
489 | } |
490 | |
491 | radeon_ring_unlock_commit(rdev, cp: ring, hdp_flush: false); |
492 | radeon_sync_free(rdev, sync: &sync, fence); |
493 | |
494 | return fence; |
495 | } |
496 | |