1 | /* |
2 | * Copyright 2009 Jerome Glisse. |
3 | * All Rights Reserved. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the |
7 | * "Software"), to deal in the Software without restriction, including |
8 | * without limitation the rights to use, copy, modify, merge, publish, |
9 | * distribute, sub license, and/or sell copies of the Software, and to |
10 | * permit persons to whom the Software is furnished to do so, subject to |
11 | * the following conditions: |
12 | * |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
20 | * |
21 | * The above copyright notice and this permission notice (including the |
22 | * next paragraph) shall be included in all copies or substantial portions |
23 | * of the Software. |
24 | * |
25 | */ |
26 | /* |
27 | * Authors: |
28 | * Jerome Glisse <glisse@freedesktop.org> |
29 | * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> |
30 | * Dave Airlie |
31 | */ |
32 | |
33 | #include <linux/dma-mapping.h> |
34 | #include <linux/iommu.h> |
35 | #include <linux/pagemap.h> |
36 | #include <linux/sched/task.h> |
37 | #include <linux/sched/mm.h> |
38 | #include <linux/seq_file.h> |
39 | #include <linux/slab.h> |
40 | #include <linux/swap.h> |
41 | #include <linux/dma-buf.h> |
42 | #include <linux/sizes.h> |
43 | #include <linux/module.h> |
44 | |
45 | #include <drm/drm_drv.h> |
46 | #include <drm/ttm/ttm_bo.h> |
47 | #include <drm/ttm/ttm_placement.h> |
48 | #include <drm/ttm/ttm_range_manager.h> |
49 | #include <drm/ttm/ttm_tt.h> |
50 | |
51 | #include <drm/amdgpu_drm.h> |
52 | |
53 | #include "amdgpu.h" |
54 | #include "amdgpu_object.h" |
55 | #include "amdgpu_trace.h" |
56 | #include "amdgpu_amdkfd.h" |
57 | #include "amdgpu_sdma.h" |
58 | #include "amdgpu_ras.h" |
59 | #include "amdgpu_hmm.h" |
60 | #include "amdgpu_atomfirmware.h" |
61 | #include "amdgpu_res_cursor.h" |
62 | #include "bif/bif_4_1_d.h" |
63 | |
64 | MODULE_IMPORT_NS(DMA_BUF); |
65 | |
66 | #define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128) |
67 | |
68 | static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, |
69 | struct ttm_tt *ttm, |
70 | struct ttm_resource *bo_mem); |
71 | static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, |
72 | struct ttm_tt *ttm); |
73 | |
74 | static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev, |
75 | unsigned int type, |
76 | uint64_t size_in_page) |
77 | { |
78 | return ttm_range_man_init(bdev: &adev->mman.bdev, type, |
79 | use_tt: false, p_size: size_in_page); |
80 | } |
81 | |
82 | /** |
83 | * amdgpu_evict_flags - Compute placement flags |
84 | * |
85 | * @bo: The buffer object to evict |
86 | * @placement: Possible destination(s) for evicted BO |
87 | * |
88 | * Fill in placement data when ttm_bo_evict() is called |
89 | */ |
90 | static void amdgpu_evict_flags(struct ttm_buffer_object *bo, |
91 | struct ttm_placement *placement) |
92 | { |
93 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev); |
94 | struct amdgpu_bo *abo; |
95 | static const struct ttm_place placements = { |
96 | .fpfn = 0, |
97 | .lpfn = 0, |
98 | .mem_type = TTM_PL_SYSTEM, |
99 | .flags = 0 |
100 | }; |
101 | |
102 | /* Don't handle scatter gather BOs */ |
103 | if (bo->type == ttm_bo_type_sg) { |
104 | placement->num_placement = 0; |
105 | placement->num_busy_placement = 0; |
106 | return; |
107 | } |
108 | |
109 | /* Object isn't an AMDGPU object so ignore */ |
110 | if (!amdgpu_bo_is_amdgpu_bo(bo)) { |
111 | placement->placement = &placements; |
112 | placement->busy_placement = &placements; |
113 | placement->num_placement = 1; |
114 | placement->num_busy_placement = 1; |
115 | return; |
116 | } |
117 | |
118 | abo = ttm_to_amdgpu_bo(tbo: bo); |
119 | if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) { |
120 | placement->num_placement = 0; |
121 | placement->num_busy_placement = 0; |
122 | return; |
123 | } |
124 | |
125 | switch (bo->resource->mem_type) { |
126 | case AMDGPU_PL_GDS: |
127 | case AMDGPU_PL_GWS: |
128 | case AMDGPU_PL_OA: |
129 | case AMDGPU_PL_DOORBELL: |
130 | placement->num_placement = 0; |
131 | placement->num_busy_placement = 0; |
132 | return; |
133 | |
134 | case TTM_PL_VRAM: |
135 | if (!adev->mman.buffer_funcs_enabled) { |
136 | /* Move to system memory */ |
137 | amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); |
138 | } else if (!amdgpu_gmc_vram_full_visible(gmc: &adev->gmc) && |
139 | !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && |
140 | amdgpu_bo_in_cpu_visible_vram(bo: abo)) { |
141 | |
142 | /* Try evicting to the CPU inaccessible part of VRAM |
143 | * first, but only set GTT as busy placement, so this |
144 | * BO will be evicted to GTT rather than causing other |
145 | * BOs to be evicted from VRAM |
146 | */ |
147 | amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | |
148 | AMDGPU_GEM_DOMAIN_GTT | |
149 | AMDGPU_GEM_DOMAIN_CPU); |
150 | abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; |
151 | abo->placements[0].lpfn = 0; |
152 | abo->placement.busy_placement = &abo->placements[1]; |
153 | abo->placement.num_busy_placement = 1; |
154 | } else { |
155 | /* Move to GTT memory */ |
156 | amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT | |
157 | AMDGPU_GEM_DOMAIN_CPU); |
158 | } |
159 | break; |
160 | case TTM_PL_TT: |
161 | case AMDGPU_PL_PREEMPT: |
162 | default: |
163 | amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); |
164 | break; |
165 | } |
166 | *placement = abo->placement; |
167 | } |
168 | |
169 | /** |
170 | * amdgpu_ttm_map_buffer - Map memory into the GART windows |
171 | * @bo: buffer object to map |
172 | * @mem: memory object to map |
173 | * @mm_cur: range to map |
174 | * @window: which GART window to use |
175 | * @ring: DMA ring to use for the copy |
176 | * @tmz: if we should setup a TMZ enabled mapping |
177 | * @size: in number of bytes to map, out number of bytes mapped |
178 | * @addr: resulting address inside the MC address space |
179 | * |
180 | * Setup one of the GART windows to access a specific piece of memory or return |
181 | * the physical address for local memory. |
182 | */ |
183 | static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, |
184 | struct ttm_resource *mem, |
185 | struct amdgpu_res_cursor *mm_cur, |
186 | unsigned int window, struct amdgpu_ring *ring, |
187 | bool tmz, uint64_t *size, uint64_t *addr) |
188 | { |
189 | struct amdgpu_device *adev = ring->adev; |
190 | unsigned int offset, num_pages, num_dw, num_bytes; |
191 | uint64_t src_addr, dst_addr; |
192 | struct amdgpu_job *job; |
193 | void *cpu_addr; |
194 | uint64_t flags; |
195 | unsigned int i; |
196 | int r; |
197 | |
198 | BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < |
199 | AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); |
200 | |
201 | if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT)) |
202 | return -EINVAL; |
203 | |
204 | /* Map only what can't be accessed directly */ |
205 | if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) { |
206 | *addr = amdgpu_ttm_domain_start(adev, type: mem->mem_type) + |
207 | mm_cur->start; |
208 | return 0; |
209 | } |
210 | |
211 | |
212 | /* |
213 | * If start begins at an offset inside the page, then adjust the size |
214 | * and addr accordingly |
215 | */ |
216 | offset = mm_cur->start & ~PAGE_MASK; |
217 | |
218 | num_pages = PFN_UP(*size + offset); |
219 | num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE); |
220 | |
221 | *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset); |
222 | |
223 | *addr = adev->gmc.gart_start; |
224 | *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * |
225 | AMDGPU_GPU_PAGE_SIZE; |
226 | *addr += offset; |
227 | |
228 | num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); |
229 | num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; |
230 | |
231 | r = amdgpu_job_alloc_with_ib(adev, entity: &adev->mman.high_pr, |
232 | AMDGPU_FENCE_OWNER_UNDEFINED, |
233 | size: num_dw * 4 + num_bytes, |
234 | pool_type: AMDGPU_IB_POOL_DELAYED, job: &job); |
235 | if (r) |
236 | return r; |
237 | |
238 | src_addr = num_dw * 4; |
239 | src_addr += job->ibs[0].gpu_addr; |
240 | |
241 | dst_addr = amdgpu_bo_gpu_offset(bo: adev->gart.bo); |
242 | dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; |
243 | amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, |
244 | dst_addr, num_bytes, false); |
245 | |
246 | amdgpu_ring_pad_ib(ring, &job->ibs[0]); |
247 | WARN_ON(job->ibs[0].length_dw > num_dw); |
248 | |
249 | flags = amdgpu_ttm_tt_pte_flags(adev, ttm: bo->ttm, mem); |
250 | if (tmz) |
251 | flags |= AMDGPU_PTE_TMZ; |
252 | |
253 | cpu_addr = &job->ibs[0].ptr[num_dw]; |
254 | |
255 | if (mem->mem_type == TTM_PL_TT) { |
256 | dma_addr_t *dma_addr; |
257 | |
258 | dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT]; |
259 | amdgpu_gart_map(adev, offset: 0, pages: num_pages, dma_addr, flags, dst: cpu_addr); |
260 | } else { |
261 | dma_addr_t dma_address; |
262 | |
263 | dma_address = mm_cur->start; |
264 | dma_address += adev->vm_manager.vram_base_offset; |
265 | |
266 | for (i = 0; i < num_pages; ++i) { |
267 | amdgpu_gart_map(adev, offset: i << PAGE_SHIFT, pages: 1, dma_addr: &dma_address, |
268 | flags, dst: cpu_addr); |
269 | dma_address += PAGE_SIZE; |
270 | } |
271 | } |
272 | |
273 | dma_fence_put(fence: amdgpu_job_submit(job)); |
274 | return 0; |
275 | } |
276 | |
277 | /** |
278 | * amdgpu_ttm_copy_mem_to_mem - Helper function for copy |
279 | * @adev: amdgpu device |
280 | * @src: buffer/address where to read from |
281 | * @dst: buffer/address where to write to |
282 | * @size: number of bytes to copy |
283 | * @tmz: if a secure copy should be used |
284 | * @resv: resv object to sync to |
285 | * @f: Returns the last fence if multiple jobs are submitted. |
286 | * |
287 | * The function copies @size bytes from {src->mem + src->offset} to |
288 | * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a |
289 | * move and different for a BO to BO copy. |
290 | * |
291 | */ |
292 | int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, |
293 | const struct amdgpu_copy_mem *src, |
294 | const struct amdgpu_copy_mem *dst, |
295 | uint64_t size, bool tmz, |
296 | struct dma_resv *resv, |
297 | struct dma_fence **f) |
298 | { |
299 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; |
300 | struct amdgpu_res_cursor src_mm, dst_mm; |
301 | struct dma_fence *fence = NULL; |
302 | int r = 0; |
303 | |
304 | if (!adev->mman.buffer_funcs_enabled) { |
305 | DRM_ERROR("Trying to move memory with ring turned off.\n" ); |
306 | return -EINVAL; |
307 | } |
308 | |
309 | amdgpu_res_first(res: src->mem, start: src->offset, size, cur: &src_mm); |
310 | amdgpu_res_first(res: dst->mem, start: dst->offset, size, cur: &dst_mm); |
311 | |
312 | mutex_lock(&adev->mman.gtt_window_lock); |
313 | while (src_mm.remaining) { |
314 | uint64_t from, to, cur_size; |
315 | struct dma_fence *next; |
316 | |
317 | /* Never copy more than 256MiB at once to avoid a timeout */ |
318 | cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20); |
319 | |
320 | /* Map src to window 0 and dst to window 1. */ |
321 | r = amdgpu_ttm_map_buffer(bo: src->bo, mem: src->mem, mm_cur: &src_mm, |
322 | window: 0, ring, tmz, size: &cur_size, addr: &from); |
323 | if (r) |
324 | goto error; |
325 | |
326 | r = amdgpu_ttm_map_buffer(bo: dst->bo, mem: dst->mem, mm_cur: &dst_mm, |
327 | window: 1, ring, tmz, size: &cur_size, addr: &to); |
328 | if (r) |
329 | goto error; |
330 | |
331 | r = amdgpu_copy_buffer(ring, src_offset: from, dst_offset: to, byte_count: cur_size, |
332 | resv, fence: &next, direct_submit: false, vm_needs_flush: true, tmz); |
333 | if (r) |
334 | goto error; |
335 | |
336 | dma_fence_put(fence); |
337 | fence = next; |
338 | |
339 | amdgpu_res_next(cur: &src_mm, size: cur_size); |
340 | amdgpu_res_next(cur: &dst_mm, size: cur_size); |
341 | } |
342 | error: |
343 | mutex_unlock(lock: &adev->mman.gtt_window_lock); |
344 | if (f) |
345 | *f = dma_fence_get(fence); |
346 | dma_fence_put(fence); |
347 | return r; |
348 | } |
349 | |
350 | /* |
351 | * amdgpu_move_blit - Copy an entire buffer to another buffer |
352 | * |
353 | * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to |
354 | * help move buffers to and from VRAM. |
355 | */ |
356 | static int amdgpu_move_blit(struct ttm_buffer_object *bo, |
357 | bool evict, |
358 | struct ttm_resource *new_mem, |
359 | struct ttm_resource *old_mem) |
360 | { |
361 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev); |
362 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo: bo); |
363 | struct amdgpu_copy_mem src, dst; |
364 | struct dma_fence *fence = NULL; |
365 | int r; |
366 | |
367 | src.bo = bo; |
368 | dst.bo = bo; |
369 | src.mem = old_mem; |
370 | dst.mem = new_mem; |
371 | src.offset = 0; |
372 | dst.offset = 0; |
373 | |
374 | r = amdgpu_ttm_copy_mem_to_mem(adev, src: &src, dst: &dst, |
375 | size: new_mem->size, |
376 | tmz: amdgpu_bo_encrypted(bo: abo), |
377 | resv: bo->base.resv, f: &fence); |
378 | if (r) |
379 | goto error; |
380 | |
381 | /* clear the space being freed */ |
382 | if (old_mem->mem_type == TTM_PL_VRAM && |
383 | (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { |
384 | struct dma_fence *wipe_fence = NULL; |
385 | |
386 | r = amdgpu_fill_buffer(bo: abo, AMDGPU_POISON, NULL, fence: &wipe_fence, |
387 | delayed: false); |
388 | if (r) { |
389 | goto error; |
390 | } else if (wipe_fence) { |
391 | dma_fence_put(fence); |
392 | fence = wipe_fence; |
393 | } |
394 | } |
395 | |
396 | /* Always block for VM page tables before committing the new location */ |
397 | if (bo->type == ttm_bo_type_kernel) |
398 | r = ttm_bo_move_accel_cleanup(bo, fence, evict: true, pipeline: false, new_mem); |
399 | else |
400 | r = ttm_bo_move_accel_cleanup(bo, fence, evict, pipeline: true, new_mem); |
401 | dma_fence_put(fence); |
402 | return r; |
403 | |
404 | error: |
405 | if (fence) |
406 | dma_fence_wait(fence, intr: false); |
407 | dma_fence_put(fence); |
408 | return r; |
409 | } |
410 | |
411 | /* |
412 | * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy |
413 | * |
414 | * Called by amdgpu_bo_move() |
415 | */ |
416 | static bool amdgpu_mem_visible(struct amdgpu_device *adev, |
417 | struct ttm_resource *mem) |
418 | { |
419 | u64 mem_size = (u64)mem->size; |
420 | struct amdgpu_res_cursor cursor; |
421 | u64 end; |
422 | |
423 | if (mem->mem_type == TTM_PL_SYSTEM || |
424 | mem->mem_type == TTM_PL_TT) |
425 | return true; |
426 | if (mem->mem_type != TTM_PL_VRAM) |
427 | return false; |
428 | |
429 | amdgpu_res_first(res: mem, start: 0, size: mem_size, cur: &cursor); |
430 | end = cursor.start + cursor.size; |
431 | while (cursor.remaining) { |
432 | amdgpu_res_next(cur: &cursor, size: cursor.size); |
433 | |
434 | if (!cursor.remaining) |
435 | break; |
436 | |
437 | /* ttm_resource_ioremap only supports contiguous memory */ |
438 | if (end != cursor.start) |
439 | return false; |
440 | |
441 | end = cursor.start + cursor.size; |
442 | } |
443 | |
444 | return end <= adev->gmc.visible_vram_size; |
445 | } |
446 | |
447 | /* |
448 | * amdgpu_bo_move - Move a buffer object to a new memory location |
449 | * |
450 | * Called by ttm_bo_handle_move_mem() |
451 | */ |
452 | static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, |
453 | struct ttm_operation_ctx *ctx, |
454 | struct ttm_resource *new_mem, |
455 | struct ttm_place *hop) |
456 | { |
457 | struct amdgpu_device *adev; |
458 | struct amdgpu_bo *abo; |
459 | struct ttm_resource *old_mem = bo->resource; |
460 | int r; |
461 | |
462 | if (new_mem->mem_type == TTM_PL_TT || |
463 | new_mem->mem_type == AMDGPU_PL_PREEMPT) { |
464 | r = amdgpu_ttm_backend_bind(bdev: bo->bdev, ttm: bo->ttm, bo_mem: new_mem); |
465 | if (r) |
466 | return r; |
467 | } |
468 | |
469 | abo = ttm_to_amdgpu_bo(tbo: bo); |
470 | adev = amdgpu_ttm_adev(bdev: bo->bdev); |
471 | |
472 | if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM && |
473 | bo->ttm == NULL)) { |
474 | ttm_bo_move_null(bo, new_mem); |
475 | goto out; |
476 | } |
477 | if (old_mem->mem_type == TTM_PL_SYSTEM && |
478 | (new_mem->mem_type == TTM_PL_TT || |
479 | new_mem->mem_type == AMDGPU_PL_PREEMPT)) { |
480 | ttm_bo_move_null(bo, new_mem); |
481 | goto out; |
482 | } |
483 | if ((old_mem->mem_type == TTM_PL_TT || |
484 | old_mem->mem_type == AMDGPU_PL_PREEMPT) && |
485 | new_mem->mem_type == TTM_PL_SYSTEM) { |
486 | r = ttm_bo_wait_ctx(bo, ctx); |
487 | if (r) |
488 | return r; |
489 | |
490 | amdgpu_ttm_backend_unbind(bdev: bo->bdev, ttm: bo->ttm); |
491 | ttm_resource_free(bo, res: &bo->resource); |
492 | ttm_bo_assign_mem(bo, new_mem); |
493 | goto out; |
494 | } |
495 | |
496 | if (old_mem->mem_type == AMDGPU_PL_GDS || |
497 | old_mem->mem_type == AMDGPU_PL_GWS || |
498 | old_mem->mem_type == AMDGPU_PL_OA || |
499 | old_mem->mem_type == AMDGPU_PL_DOORBELL || |
500 | new_mem->mem_type == AMDGPU_PL_GDS || |
501 | new_mem->mem_type == AMDGPU_PL_GWS || |
502 | new_mem->mem_type == AMDGPU_PL_OA || |
503 | new_mem->mem_type == AMDGPU_PL_DOORBELL) { |
504 | /* Nothing to save here */ |
505 | ttm_bo_move_null(bo, new_mem); |
506 | goto out; |
507 | } |
508 | |
509 | if (bo->type == ttm_bo_type_device && |
510 | new_mem->mem_type == TTM_PL_VRAM && |
511 | old_mem->mem_type != TTM_PL_VRAM) { |
512 | /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU |
513 | * accesses the BO after it's moved. |
514 | */ |
515 | abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; |
516 | } |
517 | |
518 | if (adev->mman.buffer_funcs_enabled) { |
519 | if (((old_mem->mem_type == TTM_PL_SYSTEM && |
520 | new_mem->mem_type == TTM_PL_VRAM) || |
521 | (old_mem->mem_type == TTM_PL_VRAM && |
522 | new_mem->mem_type == TTM_PL_SYSTEM))) { |
523 | hop->fpfn = 0; |
524 | hop->lpfn = 0; |
525 | hop->mem_type = TTM_PL_TT; |
526 | hop->flags = TTM_PL_FLAG_TEMPORARY; |
527 | return -EMULTIHOP; |
528 | } |
529 | |
530 | r = amdgpu_move_blit(bo, evict, new_mem, old_mem); |
531 | } else { |
532 | r = -ENODEV; |
533 | } |
534 | |
535 | if (r) { |
536 | /* Check that all memory is CPU accessible */ |
537 | if (!amdgpu_mem_visible(adev, mem: old_mem) || |
538 | !amdgpu_mem_visible(adev, mem: new_mem)) { |
539 | pr_err("Move buffer fallback to memcpy unavailable\n" ); |
540 | return r; |
541 | } |
542 | |
543 | r = ttm_bo_move_memcpy(bo, ctx, new_mem); |
544 | if (r) |
545 | return r; |
546 | } |
547 | |
548 | out: |
549 | /* update statistics */ |
550 | atomic64_add(i: bo->base.size, v: &adev->num_bytes_moved); |
551 | amdgpu_bo_move_notify(bo, evict, new_mem); |
552 | return 0; |
553 | } |
554 | |
555 | /* |
556 | * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault |
557 | * |
558 | * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault() |
559 | */ |
560 | static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, |
561 | struct ttm_resource *mem) |
562 | { |
563 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
564 | size_t bus_size = (size_t)mem->size; |
565 | |
566 | switch (mem->mem_type) { |
567 | case TTM_PL_SYSTEM: |
568 | /* system memory */ |
569 | return 0; |
570 | case TTM_PL_TT: |
571 | case AMDGPU_PL_PREEMPT: |
572 | break; |
573 | case TTM_PL_VRAM: |
574 | mem->bus.offset = mem->start << PAGE_SHIFT; |
575 | /* check if it's visible */ |
576 | if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size) |
577 | return -EINVAL; |
578 | |
579 | if (adev->mman.aper_base_kaddr && |
580 | mem->placement & TTM_PL_FLAG_CONTIGUOUS) |
581 | mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr + |
582 | mem->bus.offset; |
583 | |
584 | mem->bus.offset += adev->gmc.aper_base; |
585 | mem->bus.is_iomem = true; |
586 | break; |
587 | case AMDGPU_PL_DOORBELL: |
588 | mem->bus.offset = mem->start << PAGE_SHIFT; |
589 | mem->bus.offset += adev->doorbell.base; |
590 | mem->bus.is_iomem = true; |
591 | mem->bus.caching = ttm_uncached; |
592 | break; |
593 | default: |
594 | return -EINVAL; |
595 | } |
596 | return 0; |
597 | } |
598 | |
599 | static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, |
600 | unsigned long page_offset) |
601 | { |
602 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev); |
603 | struct amdgpu_res_cursor cursor; |
604 | |
605 | amdgpu_res_first(res: bo->resource, start: (u64)page_offset << PAGE_SHIFT, size: 0, |
606 | cur: &cursor); |
607 | |
608 | if (bo->resource->mem_type == AMDGPU_PL_DOORBELL) |
609 | return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT; |
610 | |
611 | return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT; |
612 | } |
613 | |
614 | /** |
615 | * amdgpu_ttm_domain_start - Returns GPU start address |
616 | * @adev: amdgpu device object |
617 | * @type: type of the memory |
618 | * |
619 | * Returns: |
620 | * GPU start address of a memory domain |
621 | */ |
622 | |
623 | uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type) |
624 | { |
625 | switch (type) { |
626 | case TTM_PL_TT: |
627 | return adev->gmc.gart_start; |
628 | case TTM_PL_VRAM: |
629 | return adev->gmc.vram_start; |
630 | } |
631 | |
632 | return 0; |
633 | } |
634 | |
635 | /* |
636 | * TTM backend functions. |
637 | */ |
638 | struct amdgpu_ttm_tt { |
639 | struct ttm_tt ttm; |
640 | struct drm_gem_object *gobj; |
641 | u64 offset; |
642 | uint64_t userptr; |
643 | struct task_struct *usertask; |
644 | uint32_t userflags; |
645 | bool bound; |
646 | int32_t pool_id; |
647 | }; |
648 | |
649 | #define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) |
650 | |
651 | #ifdef CONFIG_DRM_AMDGPU_USERPTR |
652 | /* |
653 | * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user |
654 | * memory and start HMM tracking CPU page table update |
655 | * |
656 | * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only |
657 | * once afterwards to stop HMM tracking |
658 | */ |
659 | int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, |
660 | struct hmm_range **range) |
661 | { |
662 | struct ttm_tt *ttm = bo->tbo.ttm; |
663 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
664 | unsigned long start = gtt->userptr; |
665 | struct vm_area_struct *vma; |
666 | struct mm_struct *mm; |
667 | bool readonly; |
668 | int r = 0; |
669 | |
670 | /* Make sure get_user_pages_done() can cleanup gracefully */ |
671 | *range = NULL; |
672 | |
673 | mm = bo->notifier.mm; |
674 | if (unlikely(!mm)) { |
675 | DRM_DEBUG_DRIVER("BO is not registered?\n" ); |
676 | return -EFAULT; |
677 | } |
678 | |
679 | if (!mmget_not_zero(mm)) /* Happens during process shutdown */ |
680 | return -ESRCH; |
681 | |
682 | mmap_read_lock(mm); |
683 | vma = vma_lookup(mm, addr: start); |
684 | if (unlikely(!vma)) { |
685 | r = -EFAULT; |
686 | goto out_unlock; |
687 | } |
688 | if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && |
689 | vma->vm_file)) { |
690 | r = -EPERM; |
691 | goto out_unlock; |
692 | } |
693 | |
694 | readonly = amdgpu_ttm_tt_is_readonly(ttm); |
695 | r = amdgpu_hmm_range_get_pages(notifier: &bo->notifier, start, npages: ttm->num_pages, |
696 | readonly, NULL, pages, phmm_range: range); |
697 | out_unlock: |
698 | mmap_read_unlock(mm); |
699 | if (r) |
700 | pr_debug("failed %d to get user pages 0x%lx\n" , r, start); |
701 | |
702 | mmput(mm); |
703 | |
704 | return r; |
705 | } |
706 | |
707 | /* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations |
708 | */ |
709 | void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, |
710 | struct hmm_range *range) |
711 | { |
712 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
713 | |
714 | if (gtt && gtt->userptr && range) |
715 | amdgpu_hmm_range_get_pages_done(hmm_range: range); |
716 | } |
717 | |
718 | /* |
719 | * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change |
720 | * Check if the pages backing this ttm range have been invalidated |
721 | * |
722 | * Returns: true if pages are still valid |
723 | */ |
724 | bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, |
725 | struct hmm_range *range) |
726 | { |
727 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
728 | |
729 | if (!gtt || !gtt->userptr || !range) |
730 | return false; |
731 | |
732 | DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n" , |
733 | gtt->userptr, ttm->num_pages); |
734 | |
735 | WARN_ONCE(!range->hmm_pfns, "No user pages to check\n" ); |
736 | |
737 | return !amdgpu_hmm_range_get_pages_done(hmm_range: range); |
738 | } |
739 | #endif |
740 | |
741 | /* |
742 | * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. |
743 | * |
744 | * Called by amdgpu_cs_list_validate(). This creates the page list |
745 | * that backs user memory and will ultimately be mapped into the device |
746 | * address space. |
747 | */ |
748 | void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) |
749 | { |
750 | unsigned long i; |
751 | |
752 | for (i = 0; i < ttm->num_pages; ++i) |
753 | ttm->pages[i] = pages ? pages[i] : NULL; |
754 | } |
755 | |
756 | /* |
757 | * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages |
758 | * |
759 | * Called by amdgpu_ttm_backend_bind() |
760 | **/ |
761 | static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev, |
762 | struct ttm_tt *ttm) |
763 | { |
764 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
765 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
766 | int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); |
767 | enum dma_data_direction direction = write ? |
768 | DMA_BIDIRECTIONAL : DMA_TO_DEVICE; |
769 | int r; |
770 | |
771 | /* Allocate an SG array and squash pages into it */ |
772 | r = sg_alloc_table_from_pages(sgt: ttm->sg, pages: ttm->pages, n_pages: ttm->num_pages, offset: 0, |
773 | size: (u64)ttm->num_pages << PAGE_SHIFT, |
774 | GFP_KERNEL); |
775 | if (r) |
776 | goto release_sg; |
777 | |
778 | /* Map SG to device */ |
779 | r = dma_map_sgtable(dev: adev->dev, sgt: ttm->sg, dir: direction, attrs: 0); |
780 | if (r) |
781 | goto release_sg; |
782 | |
783 | /* convert SG to linear array of pages and dma addresses */ |
784 | drm_prime_sg_to_dma_addr_array(sgt: ttm->sg, addrs: gtt->ttm.dma_address, |
785 | max_pages: ttm->num_pages); |
786 | |
787 | return 0; |
788 | |
789 | release_sg: |
790 | kfree(objp: ttm->sg); |
791 | ttm->sg = NULL; |
792 | return r; |
793 | } |
794 | |
795 | /* |
796 | * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages |
797 | */ |
798 | static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, |
799 | struct ttm_tt *ttm) |
800 | { |
801 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
802 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
803 | int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); |
804 | enum dma_data_direction direction = write ? |
805 | DMA_BIDIRECTIONAL : DMA_TO_DEVICE; |
806 | |
807 | /* double check that we don't free the table twice */ |
808 | if (!ttm->sg || !ttm->sg->sgl) |
809 | return; |
810 | |
811 | /* unmap the pages mapped to the device */ |
812 | dma_unmap_sgtable(dev: adev->dev, sgt: ttm->sg, dir: direction, attrs: 0); |
813 | sg_free_table(ttm->sg); |
814 | } |
815 | |
816 | /* |
817 | * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ... |
818 | * MQDn+CtrlStackn where n is the number of XCCs per partition. |
819 | * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD |
820 | * and uses memory type default, UC. The rest of pages_per_xcc are |
821 | * Ctrl stack and modify their memory type to NC. |
822 | */ |
823 | static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, |
824 | struct ttm_tt *ttm, uint64_t flags) |
825 | { |
826 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
827 | uint64_t total_pages = ttm->num_pages; |
828 | int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); |
829 | uint64_t page_idx, pages_per_xcc; |
830 | int i; |
831 | uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | |
832 | AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); |
833 | |
834 | pages_per_xcc = total_pages; |
835 | do_div(pages_per_xcc, num_xcc); |
836 | |
837 | for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) { |
838 | /* MQD page: use default flags */ |
839 | amdgpu_gart_bind(adev, |
840 | offset: gtt->offset + (page_idx << PAGE_SHIFT), |
841 | pages: 1, dma_addr: >t->ttm.dma_address[page_idx], flags); |
842 | /* |
843 | * Ctrl pages - modify the memory type to NC (ctrl_flags) from |
844 | * the second page of the BO onward. |
845 | */ |
846 | amdgpu_gart_bind(adev, |
847 | offset: gtt->offset + ((page_idx + 1) << PAGE_SHIFT), |
848 | pages: pages_per_xcc - 1, |
849 | dma_addr: >t->ttm.dma_address[page_idx + 1], |
850 | flags: ctrl_flags); |
851 | } |
852 | } |
853 | |
854 | static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, |
855 | struct ttm_buffer_object *tbo, |
856 | uint64_t flags) |
857 | { |
858 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); |
859 | struct ttm_tt *ttm = tbo->ttm; |
860 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
861 | |
862 | if (amdgpu_bo_encrypted(bo: abo)) |
863 | flags |= AMDGPU_PTE_TMZ; |
864 | |
865 | if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { |
866 | amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags); |
867 | } else { |
868 | amdgpu_gart_bind(adev, offset: gtt->offset, pages: ttm->num_pages, |
869 | dma_addr: gtt->ttm.dma_address, flags); |
870 | } |
871 | } |
872 | |
873 | /* |
874 | * amdgpu_ttm_backend_bind - Bind GTT memory |
875 | * |
876 | * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem(). |
877 | * This handles binding GTT memory to the device address space. |
878 | */ |
879 | static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, |
880 | struct ttm_tt *ttm, |
881 | struct ttm_resource *bo_mem) |
882 | { |
883 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
884 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
885 | uint64_t flags; |
886 | int r; |
887 | |
888 | if (!bo_mem) |
889 | return -EINVAL; |
890 | |
891 | if (gtt->bound) |
892 | return 0; |
893 | |
894 | if (gtt->userptr) { |
895 | r = amdgpu_ttm_tt_pin_userptr(bdev, ttm); |
896 | if (r) { |
897 | DRM_ERROR("failed to pin userptr\n" ); |
898 | return r; |
899 | } |
900 | } else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) { |
901 | if (!ttm->sg) { |
902 | struct dma_buf_attachment *attach; |
903 | struct sg_table *sgt; |
904 | |
905 | attach = gtt->gobj->import_attach; |
906 | sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); |
907 | if (IS_ERR(ptr: sgt)) |
908 | return PTR_ERR(ptr: sgt); |
909 | |
910 | ttm->sg = sgt; |
911 | } |
912 | |
913 | drm_prime_sg_to_dma_addr_array(sgt: ttm->sg, addrs: gtt->ttm.dma_address, |
914 | max_pages: ttm->num_pages); |
915 | } |
916 | |
917 | if (!ttm->num_pages) { |
918 | WARN(1, "nothing to bind %u pages for mreg %p back %p!\n" , |
919 | ttm->num_pages, bo_mem, ttm); |
920 | } |
921 | |
922 | if (bo_mem->mem_type != TTM_PL_TT || |
923 | !amdgpu_gtt_mgr_has_gart_addr(mem: bo_mem)) { |
924 | gtt->offset = AMDGPU_BO_INVALID_OFFSET; |
925 | return 0; |
926 | } |
927 | |
928 | /* compute PTE flags relevant to this BO memory */ |
929 | flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem: bo_mem); |
930 | |
931 | /* bind pages into GART page tables */ |
932 | gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; |
933 | amdgpu_gart_bind(adev, offset: gtt->offset, pages: ttm->num_pages, |
934 | dma_addr: gtt->ttm.dma_address, flags); |
935 | gtt->bound = true; |
936 | return 0; |
937 | } |
938 | |
939 | /* |
940 | * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either |
941 | * through AGP or GART aperture. |
942 | * |
943 | * If bo is accessible through AGP aperture, then use AGP aperture |
944 | * to access bo; otherwise allocate logical space in GART aperture |
945 | * and map bo to GART aperture. |
946 | */ |
947 | int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) |
948 | { |
949 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev); |
950 | struct ttm_operation_ctx ctx = { false, false }; |
951 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm); |
952 | struct ttm_placement placement; |
953 | struct ttm_place placements; |
954 | struct ttm_resource *tmp; |
955 | uint64_t addr, flags; |
956 | int r; |
957 | |
958 | if (bo->resource->start != AMDGPU_BO_INVALID_OFFSET) |
959 | return 0; |
960 | |
961 | addr = amdgpu_gmc_agp_addr(bo); |
962 | if (addr != AMDGPU_BO_INVALID_OFFSET) { |
963 | bo->resource->start = addr >> PAGE_SHIFT; |
964 | return 0; |
965 | } |
966 | |
967 | /* allocate GART space */ |
968 | placement.num_placement = 1; |
969 | placement.placement = &placements; |
970 | placement.num_busy_placement = 1; |
971 | placement.busy_placement = &placements; |
972 | placements.fpfn = 0; |
973 | placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; |
974 | placements.mem_type = TTM_PL_TT; |
975 | placements.flags = bo->resource->placement; |
976 | |
977 | r = ttm_bo_mem_space(bo, placement: &placement, mem: &tmp, ctx: &ctx); |
978 | if (unlikely(r)) |
979 | return r; |
980 | |
981 | /* compute PTE flags for this buffer object */ |
982 | flags = amdgpu_ttm_tt_pte_flags(adev, ttm: bo->ttm, mem: tmp); |
983 | |
984 | /* Bind pages */ |
985 | gtt->offset = (u64)tmp->start << PAGE_SHIFT; |
986 | amdgpu_ttm_gart_bind(adev, tbo: bo, flags); |
987 | amdgpu_gart_invalidate_tlb(adev); |
988 | ttm_resource_free(bo, res: &bo->resource); |
989 | ttm_bo_assign_mem(bo, new_mem: tmp); |
990 | |
991 | return 0; |
992 | } |
993 | |
994 | /* |
995 | * amdgpu_ttm_recover_gart - Rebind GTT pages |
996 | * |
997 | * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to |
998 | * rebind GTT pages during a GPU reset. |
999 | */ |
1000 | void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) |
1001 | { |
1002 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: tbo->bdev); |
1003 | uint64_t flags; |
1004 | |
1005 | if (!tbo->ttm) |
1006 | return; |
1007 | |
1008 | flags = amdgpu_ttm_tt_pte_flags(adev, ttm: tbo->ttm, mem: tbo->resource); |
1009 | amdgpu_ttm_gart_bind(adev, tbo, flags); |
1010 | } |
1011 | |
1012 | /* |
1013 | * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages |
1014 | * |
1015 | * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and |
1016 | * ttm_tt_destroy(). |
1017 | */ |
1018 | static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, |
1019 | struct ttm_tt *ttm) |
1020 | { |
1021 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
1022 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1023 | |
1024 | /* if the pages have userptr pinning then clear that first */ |
1025 | if (gtt->userptr) { |
1026 | amdgpu_ttm_tt_unpin_userptr(bdev, ttm); |
1027 | } else if (ttm->sg && gtt->gobj->import_attach) { |
1028 | struct dma_buf_attachment *attach; |
1029 | |
1030 | attach = gtt->gobj->import_attach; |
1031 | dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL); |
1032 | ttm->sg = NULL; |
1033 | } |
1034 | |
1035 | if (!gtt->bound) |
1036 | return; |
1037 | |
1038 | if (gtt->offset == AMDGPU_BO_INVALID_OFFSET) |
1039 | return; |
1040 | |
1041 | /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ |
1042 | amdgpu_gart_unbind(adev, offset: gtt->offset, pages: ttm->num_pages); |
1043 | gtt->bound = false; |
1044 | } |
1045 | |
1046 | static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev, |
1047 | struct ttm_tt *ttm) |
1048 | { |
1049 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1050 | |
1051 | if (gtt->usertask) |
1052 | put_task_struct(t: gtt->usertask); |
1053 | |
1054 | ttm_tt_fini(ttm: >t->ttm); |
1055 | kfree(objp: gtt); |
1056 | } |
1057 | |
1058 | /** |
1059 | * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO |
1060 | * |
1061 | * @bo: The buffer object to create a GTT ttm_tt object around |
1062 | * @page_flags: Page flags to be added to the ttm_tt object |
1063 | * |
1064 | * Called by ttm_tt_create(). |
1065 | */ |
1066 | static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, |
1067 | uint32_t page_flags) |
1068 | { |
1069 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->bdev); |
1070 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo: bo); |
1071 | struct amdgpu_ttm_tt *gtt; |
1072 | enum ttm_caching caching; |
1073 | |
1074 | gtt = kzalloc(size: sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); |
1075 | if (!gtt) |
1076 | return NULL; |
1077 | |
1078 | gtt->gobj = &bo->base; |
1079 | if (adev->gmc.mem_partitions && abo->xcp_id >= 0) |
1080 | gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id); |
1081 | else |
1082 | gtt->pool_id = abo->xcp_id; |
1083 | |
1084 | if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) |
1085 | caching = ttm_write_combined; |
1086 | else |
1087 | caching = ttm_cached; |
1088 | |
1089 | /* allocate space for the uninitialized page entries */ |
1090 | if (ttm_sg_tt_init(ttm_dma: >t->ttm, bo, page_flags, caching)) { |
1091 | kfree(objp: gtt); |
1092 | return NULL; |
1093 | } |
1094 | return >t->ttm; |
1095 | } |
1096 | |
1097 | /* |
1098 | * amdgpu_ttm_tt_populate - Map GTT pages visible to the device |
1099 | * |
1100 | * Map the pages of a ttm_tt object to an address space visible |
1101 | * to the underlying device. |
1102 | */ |
1103 | static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, |
1104 | struct ttm_tt *ttm, |
1105 | struct ttm_operation_ctx *ctx) |
1106 | { |
1107 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); |
1108 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1109 | struct ttm_pool *pool; |
1110 | pgoff_t i; |
1111 | int ret; |
1112 | |
1113 | /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */ |
1114 | if (gtt->userptr) { |
1115 | ttm->sg = kzalloc(size: sizeof(struct sg_table), GFP_KERNEL); |
1116 | if (!ttm->sg) |
1117 | return -ENOMEM; |
1118 | return 0; |
1119 | } |
1120 | |
1121 | if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) |
1122 | return 0; |
1123 | |
1124 | if (adev->mman.ttm_pools && gtt->pool_id >= 0) |
1125 | pool = &adev->mman.ttm_pools[gtt->pool_id]; |
1126 | else |
1127 | pool = &adev->mman.bdev.pool; |
1128 | ret = ttm_pool_alloc(pool, tt: ttm, ctx); |
1129 | if (ret) |
1130 | return ret; |
1131 | |
1132 | for (i = 0; i < ttm->num_pages; ++i) |
1133 | ttm->pages[i]->mapping = bdev->dev_mapping; |
1134 | |
1135 | return 0; |
1136 | } |
1137 | |
1138 | /* |
1139 | * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays |
1140 | * |
1141 | * Unmaps pages of a ttm_tt object from the device address space and |
1142 | * unpopulates the page array backing it. |
1143 | */ |
1144 | static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, |
1145 | struct ttm_tt *ttm) |
1146 | { |
1147 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1148 | struct amdgpu_device *adev; |
1149 | struct ttm_pool *pool; |
1150 | pgoff_t i; |
1151 | |
1152 | amdgpu_ttm_backend_unbind(bdev, ttm); |
1153 | |
1154 | if (gtt->userptr) { |
1155 | amdgpu_ttm_tt_set_user_pages(ttm, NULL); |
1156 | kfree(objp: ttm->sg); |
1157 | ttm->sg = NULL; |
1158 | return; |
1159 | } |
1160 | |
1161 | if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) |
1162 | return; |
1163 | |
1164 | for (i = 0; i < ttm->num_pages; ++i) |
1165 | ttm->pages[i]->mapping = NULL; |
1166 | |
1167 | adev = amdgpu_ttm_adev(bdev); |
1168 | |
1169 | if (adev->mman.ttm_pools && gtt->pool_id >= 0) |
1170 | pool = &adev->mman.ttm_pools[gtt->pool_id]; |
1171 | else |
1172 | pool = &adev->mman.bdev.pool; |
1173 | |
1174 | return ttm_pool_free(pool, tt: ttm); |
1175 | } |
1176 | |
1177 | /** |
1178 | * amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current |
1179 | * task |
1180 | * |
1181 | * @tbo: The ttm_buffer_object that contains the userptr |
1182 | * @user_addr: The returned value |
1183 | */ |
1184 | int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo, |
1185 | uint64_t *user_addr) |
1186 | { |
1187 | struct amdgpu_ttm_tt *gtt; |
1188 | |
1189 | if (!tbo->ttm) |
1190 | return -EINVAL; |
1191 | |
1192 | gtt = (void *)tbo->ttm; |
1193 | *user_addr = gtt->userptr; |
1194 | return 0; |
1195 | } |
1196 | |
1197 | /** |
1198 | * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current |
1199 | * task |
1200 | * |
1201 | * @bo: The ttm_buffer_object to bind this userptr to |
1202 | * @addr: The address in the current tasks VM space to use |
1203 | * @flags: Requirements of userptr object. |
1204 | * |
1205 | * Called by amdgpu_gem_userptr_ioctl() and kfd_ioctl_alloc_memory_of_gpu() to |
1206 | * bind userptr pages to current task and by kfd_ioctl_acquire_vm() to |
1207 | * initialize GPU VM for a KFD process. |
1208 | */ |
1209 | int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, |
1210 | uint64_t addr, uint32_t flags) |
1211 | { |
1212 | struct amdgpu_ttm_tt *gtt; |
1213 | |
1214 | if (!bo->ttm) { |
1215 | /* TODO: We want a separate TTM object type for userptrs */ |
1216 | bo->ttm = amdgpu_ttm_tt_create(bo, page_flags: 0); |
1217 | if (bo->ttm == NULL) |
1218 | return -ENOMEM; |
1219 | } |
1220 | |
1221 | /* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */ |
1222 | bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL; |
1223 | |
1224 | gtt = ttm_to_amdgpu_ttm_tt(bo->ttm); |
1225 | gtt->userptr = addr; |
1226 | gtt->userflags = flags; |
1227 | |
1228 | if (gtt->usertask) |
1229 | put_task_struct(t: gtt->usertask); |
1230 | gtt->usertask = current->group_leader; |
1231 | get_task_struct(t: gtt->usertask); |
1232 | |
1233 | return 0; |
1234 | } |
1235 | |
1236 | /* |
1237 | * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object |
1238 | */ |
1239 | struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) |
1240 | { |
1241 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1242 | |
1243 | if (gtt == NULL) |
1244 | return NULL; |
1245 | |
1246 | if (gtt->usertask == NULL) |
1247 | return NULL; |
1248 | |
1249 | return gtt->usertask->mm; |
1250 | } |
1251 | |
1252 | /* |
1253 | * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an |
1254 | * address range for the current task. |
1255 | * |
1256 | */ |
1257 | bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, |
1258 | unsigned long end, unsigned long *userptr) |
1259 | { |
1260 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1261 | unsigned long size; |
1262 | |
1263 | if (gtt == NULL || !gtt->userptr) |
1264 | return false; |
1265 | |
1266 | /* Return false if no part of the ttm_tt object lies within |
1267 | * the range |
1268 | */ |
1269 | size = (unsigned long)gtt->ttm.num_pages * PAGE_SIZE; |
1270 | if (gtt->userptr > end || gtt->userptr + size <= start) |
1271 | return false; |
1272 | |
1273 | if (userptr) |
1274 | *userptr = gtt->userptr; |
1275 | return true; |
1276 | } |
1277 | |
1278 | /* |
1279 | * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? |
1280 | */ |
1281 | bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) |
1282 | { |
1283 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1284 | |
1285 | if (gtt == NULL || !gtt->userptr) |
1286 | return false; |
1287 | |
1288 | return true; |
1289 | } |
1290 | |
1291 | /* |
1292 | * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only? |
1293 | */ |
1294 | bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) |
1295 | { |
1296 | struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); |
1297 | |
1298 | if (gtt == NULL) |
1299 | return false; |
1300 | |
1301 | return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); |
1302 | } |
1303 | |
1304 | /** |
1305 | * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object |
1306 | * |
1307 | * @ttm: The ttm_tt object to compute the flags for |
1308 | * @mem: The memory registry backing this ttm_tt object |
1309 | * |
1310 | * Figure out the flags to use for a VM PDE (Page Directory Entry). |
1311 | */ |
1312 | uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) |
1313 | { |
1314 | uint64_t flags = 0; |
1315 | |
1316 | if (mem && mem->mem_type != TTM_PL_SYSTEM) |
1317 | flags |= AMDGPU_PTE_VALID; |
1318 | |
1319 | if (mem && (mem->mem_type == TTM_PL_TT || |
1320 | mem->mem_type == AMDGPU_PL_DOORBELL || |
1321 | mem->mem_type == AMDGPU_PL_PREEMPT)) { |
1322 | flags |= AMDGPU_PTE_SYSTEM; |
1323 | |
1324 | if (ttm->caching == ttm_cached) |
1325 | flags |= AMDGPU_PTE_SNOOPED; |
1326 | } |
1327 | |
1328 | if (mem && mem->mem_type == TTM_PL_VRAM && |
1329 | mem->bus.caching == ttm_cached) |
1330 | flags |= AMDGPU_PTE_SNOOPED; |
1331 | |
1332 | return flags; |
1333 | } |
1334 | |
1335 | /** |
1336 | * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object |
1337 | * |
1338 | * @adev: amdgpu_device pointer |
1339 | * @ttm: The ttm_tt object to compute the flags for |
1340 | * @mem: The memory registry backing this ttm_tt object |
1341 | * |
1342 | * Figure out the flags to use for a VM PTE (Page Table Entry). |
1343 | */ |
1344 | uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, |
1345 | struct ttm_resource *mem) |
1346 | { |
1347 | uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem); |
1348 | |
1349 | flags |= adev->gart.gart_pte_flags; |
1350 | flags |= AMDGPU_PTE_READABLE; |
1351 | |
1352 | if (!amdgpu_ttm_tt_is_readonly(ttm)) |
1353 | flags |= AMDGPU_PTE_WRITEABLE; |
1354 | |
1355 | return flags; |
1356 | } |
1357 | |
1358 | /* |
1359 | * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer |
1360 | * object. |
1361 | * |
1362 | * Return true if eviction is sensible. Called by ttm_mem_evict_first() on |
1363 | * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until |
1364 | * it can find space for a new object and by ttm_bo_force_list_clean() which is |
1365 | * used to clean out a memory space. |
1366 | */ |
1367 | static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, |
1368 | const struct ttm_place *place) |
1369 | { |
1370 | struct dma_resv_iter resv_cursor; |
1371 | struct dma_fence *f; |
1372 | |
1373 | if (!amdgpu_bo_is_amdgpu_bo(bo)) |
1374 | return ttm_bo_eviction_valuable(bo, place); |
1375 | |
1376 | /* Swapout? */ |
1377 | if (bo->resource->mem_type == TTM_PL_SYSTEM) |
1378 | return true; |
1379 | |
1380 | if (bo->type == ttm_bo_type_kernel && |
1381 | !amdgpu_vm_evictable(bo: ttm_to_amdgpu_bo(tbo: bo))) |
1382 | return false; |
1383 | |
1384 | /* If bo is a KFD BO, check if the bo belongs to the current process. |
1385 | * If true, then return false as any KFD process needs all its BOs to |
1386 | * be resident to run successfully |
1387 | */ |
1388 | dma_resv_for_each_fence(&resv_cursor, bo->base.resv, |
1389 | DMA_RESV_USAGE_BOOKKEEP, f) { |
1390 | if (amdkfd_fence_check_mm(f, current->mm)) |
1391 | return false; |
1392 | } |
1393 | |
1394 | /* Preemptible BOs don't own system resources managed by the |
1395 | * driver (pages, VRAM, GART space). They point to resources |
1396 | * owned by someone else (e.g. pageable memory in user mode |
1397 | * or a DMABuf). They are used in a preemptible context so we |
1398 | * can guarantee no deadlocks and good QoS in case of MMU |
1399 | * notifiers or DMABuf move notifiers from the resource owner. |
1400 | */ |
1401 | if (bo->resource->mem_type == AMDGPU_PL_PREEMPT) |
1402 | return false; |
1403 | |
1404 | if (bo->resource->mem_type == TTM_PL_TT && |
1405 | amdgpu_bo_encrypted(bo: ttm_to_amdgpu_bo(tbo: bo))) |
1406 | return false; |
1407 | |
1408 | return ttm_bo_eviction_valuable(bo, place); |
1409 | } |
1410 | |
1411 | static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos, |
1412 | void *buf, size_t size, bool write) |
1413 | { |
1414 | while (size) { |
1415 | uint64_t aligned_pos = ALIGN_DOWN(pos, 4); |
1416 | uint64_t bytes = 4 - (pos & 0x3); |
1417 | uint32_t shift = (pos & 0x3) * 8; |
1418 | uint32_t mask = 0xffffffff << shift; |
1419 | uint32_t value = 0; |
1420 | |
1421 | if (size < bytes) { |
1422 | mask &= 0xffffffff >> (bytes - size) * 8; |
1423 | bytes = size; |
1424 | } |
1425 | |
1426 | if (mask != 0xffffffff) { |
1427 | amdgpu_device_mm_access(adev, pos: aligned_pos, buf: &value, size: 4, write: false); |
1428 | if (write) { |
1429 | value &= ~mask; |
1430 | value |= (*(uint32_t *)buf << shift) & mask; |
1431 | amdgpu_device_mm_access(adev, pos: aligned_pos, buf: &value, size: 4, write: true); |
1432 | } else { |
1433 | value = (value & mask) >> shift; |
1434 | memcpy(buf, &value, bytes); |
1435 | } |
1436 | } else { |
1437 | amdgpu_device_mm_access(adev, pos: aligned_pos, buf, size: 4, write); |
1438 | } |
1439 | |
1440 | pos += bytes; |
1441 | buf += bytes; |
1442 | size -= bytes; |
1443 | } |
1444 | } |
1445 | |
1446 | static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, |
1447 | unsigned long offset, void *buf, |
1448 | int len, int write) |
1449 | { |
1450 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo: bo); |
1451 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: abo->tbo.bdev); |
1452 | struct amdgpu_res_cursor src_mm; |
1453 | struct amdgpu_job *job; |
1454 | struct dma_fence *fence; |
1455 | uint64_t src_addr, dst_addr; |
1456 | unsigned int num_dw; |
1457 | int r, idx; |
1458 | |
1459 | if (len != PAGE_SIZE) |
1460 | return -EINVAL; |
1461 | |
1462 | if (!adev->mman.sdma_access_ptr) |
1463 | return -EACCES; |
1464 | |
1465 | if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) |
1466 | return -ENODEV; |
1467 | |
1468 | if (write) |
1469 | memcpy(adev->mman.sdma_access_ptr, buf, len); |
1470 | |
1471 | num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); |
1472 | r = amdgpu_job_alloc_with_ib(adev, entity: &adev->mman.high_pr, |
1473 | AMDGPU_FENCE_OWNER_UNDEFINED, |
1474 | size: num_dw * 4, pool_type: AMDGPU_IB_POOL_DELAYED, |
1475 | job: &job); |
1476 | if (r) |
1477 | goto out; |
1478 | |
1479 | amdgpu_res_first(res: abo->tbo.resource, start: offset, size: len, cur: &src_mm); |
1480 | src_addr = amdgpu_ttm_domain_start(adev, type: bo->resource->mem_type) + |
1481 | src_mm.start; |
1482 | dst_addr = amdgpu_bo_gpu_offset(bo: adev->mman.sdma_access_bo); |
1483 | if (write) |
1484 | swap(src_addr, dst_addr); |
1485 | |
1486 | amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, |
1487 | PAGE_SIZE, false); |
1488 | |
1489 | amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]); |
1490 | WARN_ON(job->ibs[0].length_dw > num_dw); |
1491 | |
1492 | fence = amdgpu_job_submit(job); |
1493 | |
1494 | if (!dma_fence_wait_timeout(fence, intr: false, timeout: adev->sdma_timeout)) |
1495 | r = -ETIMEDOUT; |
1496 | dma_fence_put(fence); |
1497 | |
1498 | if (!(r || write)) |
1499 | memcpy(buf, adev->mman.sdma_access_ptr, len); |
1500 | out: |
1501 | drm_dev_exit(idx); |
1502 | return r; |
1503 | } |
1504 | |
1505 | /** |
1506 | * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object. |
1507 | * |
1508 | * @bo: The buffer object to read/write |
1509 | * @offset: Offset into buffer object |
1510 | * @buf: Secondary buffer to write/read from |
1511 | * @len: Length in bytes of access |
1512 | * @write: true if writing |
1513 | * |
1514 | * This is used to access VRAM that backs a buffer object via MMIO |
1515 | * access for debugging purposes. |
1516 | */ |
1517 | static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, |
1518 | unsigned long offset, void *buf, int len, |
1519 | int write) |
1520 | { |
1521 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo: bo); |
1522 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: abo->tbo.bdev); |
1523 | struct amdgpu_res_cursor cursor; |
1524 | int ret = 0; |
1525 | |
1526 | if (bo->resource->mem_type != TTM_PL_VRAM) |
1527 | return -EIO; |
1528 | |
1529 | if (amdgpu_device_has_timeouts_enabled(adev) && |
1530 | !amdgpu_ttm_access_memory_sdma(bo, offset, buf, len, write)) |
1531 | return len; |
1532 | |
1533 | amdgpu_res_first(res: bo->resource, start: offset, size: len, cur: &cursor); |
1534 | while (cursor.remaining) { |
1535 | size_t count, size = cursor.size; |
1536 | loff_t pos = cursor.start; |
1537 | |
1538 | count = amdgpu_device_aper_access(adev, pos, buf, size, write); |
1539 | size -= count; |
1540 | if (size) { |
1541 | /* using MM to access rest vram and handle un-aligned address */ |
1542 | pos += count; |
1543 | buf += count; |
1544 | amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write); |
1545 | } |
1546 | |
1547 | ret += cursor.size; |
1548 | buf += cursor.size; |
1549 | amdgpu_res_next(cur: &cursor, size: cursor.size); |
1550 | } |
1551 | |
1552 | return ret; |
1553 | } |
1554 | |
1555 | static void |
1556 | amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo) |
1557 | { |
1558 | amdgpu_bo_move_notify(bo, evict: false, NULL); |
1559 | } |
1560 | |
1561 | static struct ttm_device_funcs amdgpu_bo_driver = { |
1562 | .ttm_tt_create = &amdgpu_ttm_tt_create, |
1563 | .ttm_tt_populate = &amdgpu_ttm_tt_populate, |
1564 | .ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate, |
1565 | .ttm_tt_destroy = &amdgpu_ttm_backend_destroy, |
1566 | .eviction_valuable = amdgpu_ttm_bo_eviction_valuable, |
1567 | .evict_flags = &amdgpu_evict_flags, |
1568 | .move = &amdgpu_bo_move, |
1569 | .delete_mem_notify = &amdgpu_bo_delete_mem_notify, |
1570 | .release_notify = &amdgpu_bo_release_notify, |
1571 | .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, |
1572 | .io_mem_pfn = amdgpu_ttm_io_mem_pfn, |
1573 | .access_memory = &amdgpu_ttm_access_memory, |
1574 | }; |
1575 | |
1576 | /* |
1577 | * Firmware Reservation functions |
1578 | */ |
1579 | /** |
1580 | * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram |
1581 | * |
1582 | * @adev: amdgpu_device pointer |
1583 | * |
1584 | * free fw reserved vram if it has been reserved. |
1585 | */ |
1586 | static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) |
1587 | { |
1588 | amdgpu_bo_free_kernel(bo: &adev->mman.fw_vram_usage_reserved_bo, |
1589 | NULL, cpu_addr: &adev->mman.fw_vram_usage_va); |
1590 | } |
1591 | |
1592 | /* |
1593 | * Driver Reservation functions |
1594 | */ |
1595 | /** |
1596 | * amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram |
1597 | * |
1598 | * @adev: amdgpu_device pointer |
1599 | * |
1600 | * free drv reserved vram if it has been reserved. |
1601 | */ |
1602 | static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev) |
1603 | { |
1604 | amdgpu_bo_free_kernel(bo: &adev->mman.drv_vram_usage_reserved_bo, |
1605 | NULL, |
1606 | cpu_addr: &adev->mman.drv_vram_usage_va); |
1607 | } |
1608 | |
1609 | /** |
1610 | * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw |
1611 | * |
1612 | * @adev: amdgpu_device pointer |
1613 | * |
1614 | * create bo vram reservation from fw. |
1615 | */ |
1616 | static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) |
1617 | { |
1618 | uint64_t vram_size = adev->gmc.visible_vram_size; |
1619 | |
1620 | adev->mman.fw_vram_usage_va = NULL; |
1621 | adev->mman.fw_vram_usage_reserved_bo = NULL; |
1622 | |
1623 | if (adev->mman.fw_vram_usage_size == 0 || |
1624 | adev->mman.fw_vram_usage_size > vram_size) |
1625 | return 0; |
1626 | |
1627 | return amdgpu_bo_create_kernel_at(adev, |
1628 | offset: adev->mman.fw_vram_usage_start_offset, |
1629 | size: adev->mman.fw_vram_usage_size, |
1630 | bo_ptr: &adev->mman.fw_vram_usage_reserved_bo, |
1631 | cpu_addr: &adev->mman.fw_vram_usage_va); |
1632 | } |
1633 | |
1634 | /** |
1635 | * amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver |
1636 | * |
1637 | * @adev: amdgpu_device pointer |
1638 | * |
1639 | * create bo vram reservation from drv. |
1640 | */ |
1641 | static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev) |
1642 | { |
1643 | u64 vram_size = adev->gmc.visible_vram_size; |
1644 | |
1645 | adev->mman.drv_vram_usage_va = NULL; |
1646 | adev->mman.drv_vram_usage_reserved_bo = NULL; |
1647 | |
1648 | if (adev->mman.drv_vram_usage_size == 0 || |
1649 | adev->mman.drv_vram_usage_size > vram_size) |
1650 | return 0; |
1651 | |
1652 | return amdgpu_bo_create_kernel_at(adev, |
1653 | offset: adev->mman.drv_vram_usage_start_offset, |
1654 | size: adev->mman.drv_vram_usage_size, |
1655 | bo_ptr: &adev->mman.drv_vram_usage_reserved_bo, |
1656 | cpu_addr: &adev->mman.drv_vram_usage_va); |
1657 | } |
1658 | |
1659 | /* |
1660 | * Memoy training reservation functions |
1661 | */ |
1662 | |
1663 | /** |
1664 | * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram |
1665 | * |
1666 | * @adev: amdgpu_device pointer |
1667 | * |
1668 | * free memory training reserved vram if it has been reserved. |
1669 | */ |
1670 | static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) |
1671 | { |
1672 | struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; |
1673 | |
1674 | ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT; |
1675 | amdgpu_bo_free_kernel(bo: &ctx->c2p_bo, NULL, NULL); |
1676 | ctx->c2p_bo = NULL; |
1677 | |
1678 | return 0; |
1679 | } |
1680 | |
1681 | static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev, |
1682 | uint32_t reserve_size) |
1683 | { |
1684 | struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; |
1685 | |
1686 | memset(ctx, 0, sizeof(*ctx)); |
1687 | |
1688 | ctx->c2p_train_data_offset = |
1689 | ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M); |
1690 | ctx->p2c_train_data_offset = |
1691 | (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); |
1692 | ctx->train_data_size = |
1693 | GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES; |
1694 | |
1695 | DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n" , |
1696 | ctx->train_data_size, |
1697 | ctx->p2c_train_data_offset, |
1698 | ctx->c2p_train_data_offset); |
1699 | } |
1700 | |
1701 | /* |
1702 | * reserve TMR memory at the top of VRAM which holds |
1703 | * IP Discovery data and is protected by PSP. |
1704 | */ |
1705 | static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) |
1706 | { |
1707 | struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; |
1708 | bool mem_train_support = false; |
1709 | uint32_t reserve_size = 0; |
1710 | int ret; |
1711 | |
1712 | if (adev->bios && !amdgpu_sriov_vf(adev)) { |
1713 | if (amdgpu_atomfirmware_mem_training_supported(adev)) |
1714 | mem_train_support = true; |
1715 | else |
1716 | DRM_DEBUG("memory training does not support!\n" ); |
1717 | } |
1718 | |
1719 | /* |
1720 | * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all |
1721 | * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc) |
1722 | * |
1723 | * Otherwise, fallback to legacy approach to check and reserve tmr block for ip |
1724 | * discovery data and G6 memory training data respectively |
1725 | */ |
1726 | if (adev->bios) |
1727 | reserve_size = |
1728 | amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); |
1729 | |
1730 | if (!adev->bios && |
1731 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(9, 4, 3)) |
1732 | reserve_size = max(reserve_size, (uint32_t)280 << 20); |
1733 | else if (!reserve_size) |
1734 | reserve_size = DISCOVERY_TMR_OFFSET; |
1735 | |
1736 | if (mem_train_support) { |
1737 | /* reserve vram for mem train according to TMR location */ |
1738 | amdgpu_ttm_training_data_block_init(adev, reserve_size); |
1739 | ret = amdgpu_bo_create_kernel_at(adev, |
1740 | offset: ctx->c2p_train_data_offset, |
1741 | size: ctx->train_data_size, |
1742 | bo_ptr: &ctx->c2p_bo, |
1743 | NULL); |
1744 | if (ret) { |
1745 | DRM_ERROR("alloc c2p_bo failed(%d)!\n" , ret); |
1746 | amdgpu_ttm_training_reserve_vram_fini(adev); |
1747 | return ret; |
1748 | } |
1749 | ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; |
1750 | } |
1751 | |
1752 | if (!adev->gmc.is_app_apu) { |
1753 | ret = amdgpu_bo_create_kernel_at( |
1754 | adev, offset: adev->gmc.real_vram_size - reserve_size, |
1755 | size: reserve_size, bo_ptr: &adev->mman.fw_reserved_memory, NULL); |
1756 | if (ret) { |
1757 | DRM_ERROR("alloc tmr failed(%d)!\n" , ret); |
1758 | amdgpu_bo_free_kernel(bo: &adev->mman.fw_reserved_memory, |
1759 | NULL, NULL); |
1760 | return ret; |
1761 | } |
1762 | } else { |
1763 | DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n" ); |
1764 | } |
1765 | |
1766 | return 0; |
1767 | } |
1768 | |
1769 | static int amdgpu_ttm_pools_init(struct amdgpu_device *adev) |
1770 | { |
1771 | int i; |
1772 | |
1773 | if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions) |
1774 | return 0; |
1775 | |
1776 | adev->mman.ttm_pools = kcalloc(n: adev->gmc.num_mem_partitions, |
1777 | size: sizeof(*adev->mman.ttm_pools), |
1778 | GFP_KERNEL); |
1779 | if (!adev->mman.ttm_pools) |
1780 | return -ENOMEM; |
1781 | |
1782 | for (i = 0; i < adev->gmc.num_mem_partitions; i++) { |
1783 | ttm_pool_init(pool: &adev->mman.ttm_pools[i], dev: adev->dev, |
1784 | nid: adev->gmc.mem_partitions[i].numa.node, |
1785 | use_dma_alloc: false, use_dma32: false); |
1786 | } |
1787 | return 0; |
1788 | } |
1789 | |
1790 | static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev) |
1791 | { |
1792 | int i; |
1793 | |
1794 | if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools) |
1795 | return; |
1796 | |
1797 | for (i = 0; i < adev->gmc.num_mem_partitions; i++) |
1798 | ttm_pool_fini(pool: &adev->mman.ttm_pools[i]); |
1799 | |
1800 | kfree(objp: adev->mman.ttm_pools); |
1801 | adev->mman.ttm_pools = NULL; |
1802 | } |
1803 | |
1804 | /* |
1805 | * amdgpu_ttm_init - Init the memory management (ttm) as well as various |
1806 | * gtt/vram related fields. |
1807 | * |
1808 | * This initializes all of the memory space pools that the TTM layer |
1809 | * will need such as the GTT space (system memory mapped to the device), |
1810 | * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which |
1811 | * can be mapped per VMID. |
1812 | */ |
1813 | int amdgpu_ttm_init(struct amdgpu_device *adev) |
1814 | { |
1815 | uint64_t gtt_size; |
1816 | int r; |
1817 | |
1818 | mutex_init(&adev->mman.gtt_window_lock); |
1819 | |
1820 | /* No others user of address space so set it to 0 */ |
1821 | r = ttm_device_init(bdev: &adev->mman.bdev, funcs: &amdgpu_bo_driver, dev: adev->dev, |
1822 | mapping: adev_to_drm(adev)->anon_inode->i_mapping, |
1823 | vma_manager: adev_to_drm(adev)->vma_offset_manager, |
1824 | use_dma_alloc: adev->need_swiotlb, |
1825 | use_dma32: dma_addressing_limited(dev: adev->dev)); |
1826 | if (r) { |
1827 | DRM_ERROR("failed initializing buffer object driver(%d).\n" , r); |
1828 | return r; |
1829 | } |
1830 | |
1831 | r = amdgpu_ttm_pools_init(adev); |
1832 | if (r) { |
1833 | DRM_ERROR("failed to init ttm pools(%d).\n" , r); |
1834 | return r; |
1835 | } |
1836 | adev->mman.initialized = true; |
1837 | |
1838 | /* Initialize VRAM pool with all of VRAM divided into pages */ |
1839 | r = amdgpu_vram_mgr_init(adev); |
1840 | if (r) { |
1841 | DRM_ERROR("Failed initializing VRAM heap.\n" ); |
1842 | return r; |
1843 | } |
1844 | |
1845 | /* Change the size here instead of the init above so only lpfn is affected */ |
1846 | amdgpu_ttm_set_buffer_funcs_status(adev, enable: false); |
1847 | #ifdef CONFIG_64BIT |
1848 | #ifdef CONFIG_X86 |
1849 | if (adev->gmc.xgmi.connected_to_cpu) |
1850 | adev->mman.aper_base_kaddr = ioremap_cache(offset: adev->gmc.aper_base, |
1851 | size: adev->gmc.visible_vram_size); |
1852 | |
1853 | else if (adev->gmc.is_app_apu) |
1854 | DRM_DEBUG_DRIVER( |
1855 | "No need to ioremap when real vram size is 0\n" ); |
1856 | else |
1857 | #endif |
1858 | adev->mman.aper_base_kaddr = ioremap_wc(offset: adev->gmc.aper_base, |
1859 | size: adev->gmc.visible_vram_size); |
1860 | #endif |
1861 | |
1862 | /* |
1863 | *The reserved vram for firmware must be pinned to the specified |
1864 | *place on the VRAM, so reserve it early. |
1865 | */ |
1866 | r = amdgpu_ttm_fw_reserve_vram_init(adev); |
1867 | if (r) |
1868 | return r; |
1869 | |
1870 | /* |
1871 | *The reserved vram for driver must be pinned to the specified |
1872 | *place on the VRAM, so reserve it early. |
1873 | */ |
1874 | r = amdgpu_ttm_drv_reserve_vram_init(adev); |
1875 | if (r) |
1876 | return r; |
1877 | |
1878 | /* |
1879 | * only NAVI10 and onwards ASIC support for IP discovery. |
1880 | * If IP discovery enabled, a block of memory should be |
1881 | * reserved for IP discovey. |
1882 | */ |
1883 | if (adev->mman.discovery_bin) { |
1884 | r = amdgpu_ttm_reserve_tmr(adev); |
1885 | if (r) |
1886 | return r; |
1887 | } |
1888 | |
1889 | /* allocate memory as required for VGA |
1890 | * This is used for VGA emulation and pre-OS scanout buffers to |
1891 | * avoid display artifacts while transitioning between pre-OS |
1892 | * and driver. |
1893 | */ |
1894 | if (!adev->gmc.is_app_apu) { |
1895 | r = amdgpu_bo_create_kernel_at(adev, offset: 0, |
1896 | size: adev->mman.stolen_vga_size, |
1897 | bo_ptr: &adev->mman.stolen_vga_memory, |
1898 | NULL); |
1899 | if (r) |
1900 | return r; |
1901 | |
1902 | r = amdgpu_bo_create_kernel_at(adev, offset: adev->mman.stolen_vga_size, |
1903 | size: adev->mman.stolen_extended_size, |
1904 | bo_ptr: &adev->mman.stolen_extended_memory, |
1905 | NULL); |
1906 | |
1907 | if (r) |
1908 | return r; |
1909 | |
1910 | r = amdgpu_bo_create_kernel_at(adev, |
1911 | offset: adev->mman.stolen_reserved_offset, |
1912 | size: adev->mman.stolen_reserved_size, |
1913 | bo_ptr: &adev->mman.stolen_reserved_memory, |
1914 | NULL); |
1915 | if (r) |
1916 | return r; |
1917 | } else { |
1918 | DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n" ); |
1919 | } |
1920 | |
1921 | DRM_INFO("amdgpu: %uM of VRAM memory ready\n" , |
1922 | (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024))); |
1923 | |
1924 | /* Compute GTT size, either based on TTM limit |
1925 | * or whatever the user passed on module init. |
1926 | */ |
1927 | if (amdgpu_gtt_size == -1) |
1928 | gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT; |
1929 | else |
1930 | gtt_size = (uint64_t)amdgpu_gtt_size << 20; |
1931 | |
1932 | /* Initialize GTT memory pool */ |
1933 | r = amdgpu_gtt_mgr_init(adev, gtt_size); |
1934 | if (r) { |
1935 | DRM_ERROR("Failed initializing GTT heap.\n" ); |
1936 | return r; |
1937 | } |
1938 | DRM_INFO("amdgpu: %uM of GTT memory ready.\n" , |
1939 | (unsigned int)(gtt_size / (1024 * 1024))); |
1940 | |
1941 | /* Initiailize doorbell pool on PCI BAR */ |
1942 | r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, size_in_page: adev->doorbell.size / PAGE_SIZE); |
1943 | if (r) { |
1944 | DRM_ERROR("Failed initializing doorbell heap.\n" ); |
1945 | return r; |
1946 | } |
1947 | |
1948 | /* Create a boorbell page for kernel usages */ |
1949 | r = amdgpu_doorbell_create_kernel_doorbells(adev); |
1950 | if (r) { |
1951 | DRM_ERROR("Failed to initialize kernel doorbells.\n" ); |
1952 | return r; |
1953 | } |
1954 | |
1955 | /* Initialize preemptible memory pool */ |
1956 | r = amdgpu_preempt_mgr_init(adev); |
1957 | if (r) { |
1958 | DRM_ERROR("Failed initializing PREEMPT heap.\n" ); |
1959 | return r; |
1960 | } |
1961 | |
1962 | /* Initialize various on-chip memory pools */ |
1963 | r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, size_in_page: adev->gds.gds_size); |
1964 | if (r) { |
1965 | DRM_ERROR("Failed initializing GDS heap.\n" ); |
1966 | return r; |
1967 | } |
1968 | |
1969 | r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, size_in_page: adev->gds.gws_size); |
1970 | if (r) { |
1971 | DRM_ERROR("Failed initializing gws heap.\n" ); |
1972 | return r; |
1973 | } |
1974 | |
1975 | r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, size_in_page: adev->gds.oa_size); |
1976 | if (r) { |
1977 | DRM_ERROR("Failed initializing oa heap.\n" ); |
1978 | return r; |
1979 | } |
1980 | if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, |
1981 | AMDGPU_GEM_DOMAIN_GTT, |
1982 | bo_ptr: &adev->mman.sdma_access_bo, NULL, |
1983 | cpu_addr: &adev->mman.sdma_access_ptr)) |
1984 | DRM_WARN("Debug VRAM access will use slowpath MM access\n" ); |
1985 | |
1986 | return 0; |
1987 | } |
1988 | |
1989 | /* |
1990 | * amdgpu_ttm_fini - De-initialize the TTM memory pools |
1991 | */ |
1992 | void amdgpu_ttm_fini(struct amdgpu_device *adev) |
1993 | { |
1994 | int idx; |
1995 | |
1996 | if (!adev->mman.initialized) |
1997 | return; |
1998 | |
1999 | amdgpu_ttm_pools_fini(adev); |
2000 | |
2001 | amdgpu_ttm_training_reserve_vram_fini(adev); |
2002 | /* return the stolen vga memory back to VRAM */ |
2003 | if (!adev->gmc.is_app_apu) { |
2004 | amdgpu_bo_free_kernel(bo: &adev->mman.stolen_vga_memory, NULL, NULL); |
2005 | amdgpu_bo_free_kernel(bo: &adev->mman.stolen_extended_memory, NULL, NULL); |
2006 | /* return the FW reserved memory back to VRAM */ |
2007 | amdgpu_bo_free_kernel(bo: &adev->mman.fw_reserved_memory, NULL, |
2008 | NULL); |
2009 | if (adev->mman.stolen_reserved_size) |
2010 | amdgpu_bo_free_kernel(bo: &adev->mman.stolen_reserved_memory, |
2011 | NULL, NULL); |
2012 | } |
2013 | amdgpu_bo_free_kernel(bo: &adev->mman.sdma_access_bo, NULL, |
2014 | cpu_addr: &adev->mman.sdma_access_ptr); |
2015 | amdgpu_ttm_fw_reserve_vram_fini(adev); |
2016 | amdgpu_ttm_drv_reserve_vram_fini(adev); |
2017 | |
2018 | if (drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) { |
2019 | |
2020 | if (adev->mman.aper_base_kaddr) |
2021 | iounmap(addr: adev->mman.aper_base_kaddr); |
2022 | adev->mman.aper_base_kaddr = NULL; |
2023 | |
2024 | drm_dev_exit(idx); |
2025 | } |
2026 | |
2027 | amdgpu_vram_mgr_fini(adev); |
2028 | amdgpu_gtt_mgr_fini(adev); |
2029 | amdgpu_preempt_mgr_fini(adev); |
2030 | ttm_range_man_fini(bdev: &adev->mman.bdev, AMDGPU_PL_GDS); |
2031 | ttm_range_man_fini(bdev: &adev->mman.bdev, AMDGPU_PL_GWS); |
2032 | ttm_range_man_fini(bdev: &adev->mman.bdev, AMDGPU_PL_OA); |
2033 | ttm_device_fini(bdev: &adev->mman.bdev); |
2034 | adev->mman.initialized = false; |
2035 | DRM_INFO("amdgpu: ttm finalized\n" ); |
2036 | } |
2037 | |
2038 | /** |
2039 | * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions |
2040 | * |
2041 | * @adev: amdgpu_device pointer |
2042 | * @enable: true when we can use buffer functions. |
2043 | * |
2044 | * Enable/disable use of buffer functions during suspend/resume. This should |
2045 | * only be called at bootup or when userspace isn't running. |
2046 | */ |
2047 | void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) |
2048 | { |
2049 | struct ttm_resource_manager *man = ttm_manager_type(bdev: &adev->mman.bdev, TTM_PL_VRAM); |
2050 | uint64_t size; |
2051 | int r; |
2052 | |
2053 | if (!adev->mman.initialized || amdgpu_in_reset(adev) || |
2054 | adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu) |
2055 | return; |
2056 | |
2057 | if (enable) { |
2058 | struct amdgpu_ring *ring; |
2059 | struct drm_gpu_scheduler *sched; |
2060 | |
2061 | ring = adev->mman.buffer_funcs_ring; |
2062 | sched = &ring->sched; |
2063 | r = drm_sched_entity_init(entity: &adev->mman.high_pr, |
2064 | priority: DRM_SCHED_PRIORITY_KERNEL, sched_list: &sched, |
2065 | num_sched_list: 1, NULL); |
2066 | if (r) { |
2067 | DRM_ERROR("Failed setting up TTM BO move entity (%d)\n" , |
2068 | r); |
2069 | return; |
2070 | } |
2071 | |
2072 | r = drm_sched_entity_init(entity: &adev->mman.low_pr, |
2073 | priority: DRM_SCHED_PRIORITY_NORMAL, sched_list: &sched, |
2074 | num_sched_list: 1, NULL); |
2075 | if (r) { |
2076 | DRM_ERROR("Failed setting up TTM BO move entity (%d)\n" , |
2077 | r); |
2078 | goto error_free_entity; |
2079 | } |
2080 | } else { |
2081 | drm_sched_entity_destroy(entity: &adev->mman.high_pr); |
2082 | drm_sched_entity_destroy(entity: &adev->mman.low_pr); |
2083 | dma_fence_put(fence: man->move); |
2084 | man->move = NULL; |
2085 | } |
2086 | |
2087 | /* this just adjusts TTM size idea, which sets lpfn to the correct value */ |
2088 | if (enable) |
2089 | size = adev->gmc.real_vram_size; |
2090 | else |
2091 | size = adev->gmc.visible_vram_size; |
2092 | man->size = size; |
2093 | adev->mman.buffer_funcs_enabled = enable; |
2094 | |
2095 | return; |
2096 | |
2097 | error_free_entity: |
2098 | drm_sched_entity_destroy(entity: &adev->mman.high_pr); |
2099 | } |
2100 | |
2101 | static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, |
2102 | bool direct_submit, |
2103 | unsigned int num_dw, |
2104 | struct dma_resv *resv, |
2105 | bool vm_needs_flush, |
2106 | struct amdgpu_job **job, |
2107 | bool delayed) |
2108 | { |
2109 | enum amdgpu_ib_pool_type pool = direct_submit ? |
2110 | AMDGPU_IB_POOL_DIRECT : |
2111 | AMDGPU_IB_POOL_DELAYED; |
2112 | int r; |
2113 | struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr : |
2114 | &adev->mman.high_pr; |
2115 | r = amdgpu_job_alloc_with_ib(adev, entity, |
2116 | AMDGPU_FENCE_OWNER_UNDEFINED, |
2117 | size: num_dw * 4, pool_type: pool, job); |
2118 | if (r) |
2119 | return r; |
2120 | |
2121 | if (vm_needs_flush) { |
2122 | (*job)->vm_pd_addr = amdgpu_gmc_pd_addr(bo: adev->gmc.pdb0_bo ? |
2123 | adev->gmc.pdb0_bo : |
2124 | adev->gart.bo); |
2125 | (*job)->vm_needs_flush = true; |
2126 | } |
2127 | if (!resv) |
2128 | return 0; |
2129 | |
2130 | return drm_sched_job_add_resv_dependencies(job: &(*job)->base, resv, |
2131 | usage: DMA_RESV_USAGE_BOOKKEEP); |
2132 | } |
2133 | |
2134 | int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, |
2135 | uint64_t dst_offset, uint32_t byte_count, |
2136 | struct dma_resv *resv, |
2137 | struct dma_fence **fence, bool direct_submit, |
2138 | bool vm_needs_flush, bool tmz) |
2139 | { |
2140 | struct amdgpu_device *adev = ring->adev; |
2141 | unsigned int num_loops, num_dw; |
2142 | struct amdgpu_job *job; |
2143 | uint32_t max_bytes; |
2144 | unsigned int i; |
2145 | int r; |
2146 | |
2147 | if (!direct_submit && !ring->sched.ready) { |
2148 | DRM_ERROR("Trying to move memory with ring turned off.\n" ); |
2149 | return -EINVAL; |
2150 | } |
2151 | |
2152 | max_bytes = adev->mman.buffer_funcs->copy_max_bytes; |
2153 | num_loops = DIV_ROUND_UP(byte_count, max_bytes); |
2154 | num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); |
2155 | r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw, |
2156 | resv, vm_needs_flush, job: &job, delayed: false); |
2157 | if (r) |
2158 | return r; |
2159 | |
2160 | for (i = 0; i < num_loops; i++) { |
2161 | uint32_t cur_size_in_bytes = min(byte_count, max_bytes); |
2162 | |
2163 | amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, |
2164 | dst_offset, cur_size_in_bytes, tmz); |
2165 | |
2166 | src_offset += cur_size_in_bytes; |
2167 | dst_offset += cur_size_in_bytes; |
2168 | byte_count -= cur_size_in_bytes; |
2169 | } |
2170 | |
2171 | amdgpu_ring_pad_ib(ring, &job->ibs[0]); |
2172 | WARN_ON(job->ibs[0].length_dw > num_dw); |
2173 | if (direct_submit) |
2174 | r = amdgpu_job_submit_direct(job, ring, fence); |
2175 | else |
2176 | *fence = amdgpu_job_submit(job); |
2177 | if (r) |
2178 | goto error_free; |
2179 | |
2180 | return r; |
2181 | |
2182 | error_free: |
2183 | amdgpu_job_free(job); |
2184 | DRM_ERROR("Error scheduling IBs (%d)\n" , r); |
2185 | return r; |
2186 | } |
2187 | |
2188 | static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, |
2189 | uint64_t dst_addr, uint32_t byte_count, |
2190 | struct dma_resv *resv, |
2191 | struct dma_fence **fence, |
2192 | bool vm_needs_flush, bool delayed) |
2193 | { |
2194 | struct amdgpu_device *adev = ring->adev; |
2195 | unsigned int num_loops, num_dw; |
2196 | struct amdgpu_job *job; |
2197 | uint32_t max_bytes; |
2198 | unsigned int i; |
2199 | int r; |
2200 | |
2201 | max_bytes = adev->mman.buffer_funcs->fill_max_bytes; |
2202 | num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes); |
2203 | num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8); |
2204 | r = amdgpu_ttm_prepare_job(adev, direct_submit: false, num_dw, resv, vm_needs_flush, |
2205 | job: &job, delayed); |
2206 | if (r) |
2207 | return r; |
2208 | |
2209 | for (i = 0; i < num_loops; i++) { |
2210 | uint32_t cur_size = min(byte_count, max_bytes); |
2211 | |
2212 | amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr, |
2213 | cur_size); |
2214 | |
2215 | dst_addr += cur_size; |
2216 | byte_count -= cur_size; |
2217 | } |
2218 | |
2219 | amdgpu_ring_pad_ib(ring, &job->ibs[0]); |
2220 | WARN_ON(job->ibs[0].length_dw > num_dw); |
2221 | *fence = amdgpu_job_submit(job); |
2222 | return 0; |
2223 | } |
2224 | |
2225 | int amdgpu_fill_buffer(struct amdgpu_bo *bo, |
2226 | uint32_t src_data, |
2227 | struct dma_resv *resv, |
2228 | struct dma_fence **f, |
2229 | bool delayed) |
2230 | { |
2231 | struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->tbo.bdev); |
2232 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; |
2233 | struct dma_fence *fence = NULL; |
2234 | struct amdgpu_res_cursor dst; |
2235 | int r; |
2236 | |
2237 | if (!adev->mman.buffer_funcs_enabled) { |
2238 | DRM_ERROR("Trying to clear memory with ring turned off.\n" ); |
2239 | return -EINVAL; |
2240 | } |
2241 | |
2242 | amdgpu_res_first(res: bo->tbo.resource, start: 0, size: amdgpu_bo_size(bo), cur: &dst); |
2243 | |
2244 | mutex_lock(&adev->mman.gtt_window_lock); |
2245 | while (dst.remaining) { |
2246 | struct dma_fence *next; |
2247 | uint64_t cur_size, to; |
2248 | |
2249 | /* Never fill more than 256MiB at once to avoid timeouts */ |
2250 | cur_size = min(dst.size, 256ULL << 20); |
2251 | |
2252 | r = amdgpu_ttm_map_buffer(bo: &bo->tbo, mem: bo->tbo.resource, mm_cur: &dst, |
2253 | window: 1, ring, tmz: false, size: &cur_size, addr: &to); |
2254 | if (r) |
2255 | goto error; |
2256 | |
2257 | r = amdgpu_ttm_fill_mem(ring, src_data, dst_addr: to, byte_count: cur_size, resv, |
2258 | fence: &next, vm_needs_flush: true, delayed); |
2259 | if (r) |
2260 | goto error; |
2261 | |
2262 | dma_fence_put(fence); |
2263 | fence = next; |
2264 | |
2265 | amdgpu_res_next(cur: &dst, size: cur_size); |
2266 | } |
2267 | error: |
2268 | mutex_unlock(lock: &adev->mman.gtt_window_lock); |
2269 | if (f) |
2270 | *f = dma_fence_get(fence); |
2271 | dma_fence_put(fence); |
2272 | return r; |
2273 | } |
2274 | |
2275 | /** |
2276 | * amdgpu_ttm_evict_resources - evict memory buffers |
2277 | * @adev: amdgpu device object |
2278 | * @mem_type: evicted BO's memory type |
2279 | * |
2280 | * Evicts all @mem_type buffers on the lru list of the memory type. |
2281 | * |
2282 | * Returns: |
2283 | * 0 for success or a negative error code on failure. |
2284 | */ |
2285 | int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type) |
2286 | { |
2287 | struct ttm_resource_manager *man; |
2288 | |
2289 | switch (mem_type) { |
2290 | case TTM_PL_VRAM: |
2291 | case TTM_PL_TT: |
2292 | case AMDGPU_PL_GWS: |
2293 | case AMDGPU_PL_GDS: |
2294 | case AMDGPU_PL_OA: |
2295 | man = ttm_manager_type(bdev: &adev->mman.bdev, mem_type); |
2296 | break; |
2297 | default: |
2298 | DRM_ERROR("Trying to evict invalid memory type\n" ); |
2299 | return -EINVAL; |
2300 | } |
2301 | |
2302 | return ttm_resource_manager_evict_all(bdev: &adev->mman.bdev, man); |
2303 | } |
2304 | |
2305 | #if defined(CONFIG_DEBUG_FS) |
2306 | |
2307 | static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused) |
2308 | { |
2309 | struct amdgpu_device *adev = m->private; |
2310 | |
2311 | return ttm_pool_debugfs(pool: &adev->mman.bdev.pool, m); |
2312 | } |
2313 | |
2314 | DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool); |
2315 | |
2316 | /* |
2317 | * amdgpu_ttm_vram_read - Linear read access to VRAM |
2318 | * |
2319 | * Accesses VRAM via MMIO for debugging purposes. |
2320 | */ |
2321 | static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, |
2322 | size_t size, loff_t *pos) |
2323 | { |
2324 | struct amdgpu_device *adev = file_inode(f)->i_private; |
2325 | ssize_t result = 0; |
2326 | |
2327 | if (size & 0x3 || *pos & 0x3) |
2328 | return -EINVAL; |
2329 | |
2330 | if (*pos >= adev->gmc.mc_vram_size) |
2331 | return -ENXIO; |
2332 | |
2333 | size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos)); |
2334 | while (size) { |
2335 | size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4); |
2336 | uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ]; |
2337 | |
2338 | amdgpu_device_vram_access(adev, pos: *pos, buf: value, size: bytes, write: false); |
2339 | if (copy_to_user(to: buf, from: value, n: bytes)) |
2340 | return -EFAULT; |
2341 | |
2342 | result += bytes; |
2343 | buf += bytes; |
2344 | *pos += bytes; |
2345 | size -= bytes; |
2346 | } |
2347 | |
2348 | return result; |
2349 | } |
2350 | |
2351 | /* |
2352 | * amdgpu_ttm_vram_write - Linear write access to VRAM |
2353 | * |
2354 | * Accesses VRAM via MMIO for debugging purposes. |
2355 | */ |
2356 | static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, |
2357 | size_t size, loff_t *pos) |
2358 | { |
2359 | struct amdgpu_device *adev = file_inode(f)->i_private; |
2360 | ssize_t result = 0; |
2361 | int r; |
2362 | |
2363 | if (size & 0x3 || *pos & 0x3) |
2364 | return -EINVAL; |
2365 | |
2366 | if (*pos >= adev->gmc.mc_vram_size) |
2367 | return -ENXIO; |
2368 | |
2369 | while (size) { |
2370 | uint32_t value; |
2371 | |
2372 | if (*pos >= adev->gmc.mc_vram_size) |
2373 | return result; |
2374 | |
2375 | r = get_user(value, (uint32_t *)buf); |
2376 | if (r) |
2377 | return r; |
2378 | |
2379 | amdgpu_device_mm_access(adev, pos: *pos, buf: &value, size: 4, write: true); |
2380 | |
2381 | result += 4; |
2382 | buf += 4; |
2383 | *pos += 4; |
2384 | size -= 4; |
2385 | } |
2386 | |
2387 | return result; |
2388 | } |
2389 | |
2390 | static const struct file_operations amdgpu_ttm_vram_fops = { |
2391 | .owner = THIS_MODULE, |
2392 | .read = amdgpu_ttm_vram_read, |
2393 | .write = amdgpu_ttm_vram_write, |
2394 | .llseek = default_llseek, |
2395 | }; |
2396 | |
2397 | /* |
2398 | * amdgpu_iomem_read - Virtual read access to GPU mapped memory |
2399 | * |
2400 | * This function is used to read memory that has been mapped to the |
2401 | * GPU and the known addresses are not physical addresses but instead |
2402 | * bus addresses (e.g., what you'd put in an IB or ring buffer). |
2403 | */ |
2404 | static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, |
2405 | size_t size, loff_t *pos) |
2406 | { |
2407 | struct amdgpu_device *adev = file_inode(f)->i_private; |
2408 | struct iommu_domain *dom; |
2409 | ssize_t result = 0; |
2410 | int r; |
2411 | |
2412 | /* retrieve the IOMMU domain if any for this device */ |
2413 | dom = iommu_get_domain_for_dev(dev: adev->dev); |
2414 | |
2415 | while (size) { |
2416 | phys_addr_t addr = *pos & PAGE_MASK; |
2417 | loff_t off = *pos & ~PAGE_MASK; |
2418 | size_t bytes = PAGE_SIZE - off; |
2419 | unsigned long pfn; |
2420 | struct page *p; |
2421 | void *ptr; |
2422 | |
2423 | bytes = min(bytes, size); |
2424 | |
2425 | /* Translate the bus address to a physical address. If |
2426 | * the domain is NULL it means there is no IOMMU active |
2427 | * and the address translation is the identity |
2428 | */ |
2429 | addr = dom ? iommu_iova_to_phys(domain: dom, iova: addr) : addr; |
2430 | |
2431 | pfn = addr >> PAGE_SHIFT; |
2432 | if (!pfn_valid(pfn)) |
2433 | return -EPERM; |
2434 | |
2435 | p = pfn_to_page(pfn); |
2436 | if (p->mapping != adev->mman.bdev.dev_mapping) |
2437 | return -EPERM; |
2438 | |
2439 | ptr = kmap_local_page(page: p); |
2440 | r = copy_to_user(to: buf, from: ptr + off, n: bytes); |
2441 | kunmap_local(ptr); |
2442 | if (r) |
2443 | return -EFAULT; |
2444 | |
2445 | size -= bytes; |
2446 | *pos += bytes; |
2447 | result += bytes; |
2448 | } |
2449 | |
2450 | return result; |
2451 | } |
2452 | |
2453 | /* |
2454 | * amdgpu_iomem_write - Virtual write access to GPU mapped memory |
2455 | * |
2456 | * This function is used to write memory that has been mapped to the |
2457 | * GPU and the known addresses are not physical addresses but instead |
2458 | * bus addresses (e.g., what you'd put in an IB or ring buffer). |
2459 | */ |
2460 | static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, |
2461 | size_t size, loff_t *pos) |
2462 | { |
2463 | struct amdgpu_device *adev = file_inode(f)->i_private; |
2464 | struct iommu_domain *dom; |
2465 | ssize_t result = 0; |
2466 | int r; |
2467 | |
2468 | dom = iommu_get_domain_for_dev(dev: adev->dev); |
2469 | |
2470 | while (size) { |
2471 | phys_addr_t addr = *pos & PAGE_MASK; |
2472 | loff_t off = *pos & ~PAGE_MASK; |
2473 | size_t bytes = PAGE_SIZE - off; |
2474 | unsigned long pfn; |
2475 | struct page *p; |
2476 | void *ptr; |
2477 | |
2478 | bytes = min(bytes, size); |
2479 | |
2480 | addr = dom ? iommu_iova_to_phys(domain: dom, iova: addr) : addr; |
2481 | |
2482 | pfn = addr >> PAGE_SHIFT; |
2483 | if (!pfn_valid(pfn)) |
2484 | return -EPERM; |
2485 | |
2486 | p = pfn_to_page(pfn); |
2487 | if (p->mapping != adev->mman.bdev.dev_mapping) |
2488 | return -EPERM; |
2489 | |
2490 | ptr = kmap_local_page(page: p); |
2491 | r = copy_from_user(to: ptr + off, from: buf, n: bytes); |
2492 | kunmap_local(ptr); |
2493 | if (r) |
2494 | return -EFAULT; |
2495 | |
2496 | size -= bytes; |
2497 | *pos += bytes; |
2498 | result += bytes; |
2499 | } |
2500 | |
2501 | return result; |
2502 | } |
2503 | |
2504 | static const struct file_operations amdgpu_ttm_iomem_fops = { |
2505 | .owner = THIS_MODULE, |
2506 | .read = amdgpu_iomem_read, |
2507 | .write = amdgpu_iomem_write, |
2508 | .llseek = default_llseek |
2509 | }; |
2510 | |
2511 | #endif |
2512 | |
2513 | void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) |
2514 | { |
2515 | #if defined(CONFIG_DEBUG_FS) |
2516 | struct drm_minor *minor = adev_to_drm(adev)->primary; |
2517 | struct dentry *root = minor->debugfs_root; |
2518 | |
2519 | debugfs_create_file_size(name: "amdgpu_vram" , mode: 0444, parent: root, data: adev, |
2520 | fops: &amdgpu_ttm_vram_fops, file_size: adev->gmc.mc_vram_size); |
2521 | debugfs_create_file(name: "amdgpu_iomem" , mode: 0444, parent: root, data: adev, |
2522 | fops: &amdgpu_ttm_iomem_fops); |
2523 | debugfs_create_file(name: "ttm_page_pool" , mode: 0444, parent: root, data: adev, |
2524 | fops: &amdgpu_ttm_page_pool_fops); |
2525 | ttm_resource_manager_create_debugfs(man: ttm_manager_type(bdev: &adev->mman.bdev, |
2526 | TTM_PL_VRAM), |
2527 | parent: root, name: "amdgpu_vram_mm" ); |
2528 | ttm_resource_manager_create_debugfs(man: ttm_manager_type(bdev: &adev->mman.bdev, |
2529 | TTM_PL_TT), |
2530 | parent: root, name: "amdgpu_gtt_mm" ); |
2531 | ttm_resource_manager_create_debugfs(man: ttm_manager_type(bdev: &adev->mman.bdev, |
2532 | AMDGPU_PL_GDS), |
2533 | parent: root, name: "amdgpu_gds_mm" ); |
2534 | ttm_resource_manager_create_debugfs(man: ttm_manager_type(bdev: &adev->mman.bdev, |
2535 | AMDGPU_PL_GWS), |
2536 | parent: root, name: "amdgpu_gws_mm" ); |
2537 | ttm_resource_manager_create_debugfs(man: ttm_manager_type(bdev: &adev->mman.bdev, |
2538 | AMDGPU_PL_OA), |
2539 | parent: root, name: "amdgpu_oa_mm" ); |
2540 | |
2541 | #endif |
2542 | } |
2543 | |