1 | /* |
2 | * Copyright 2008 Advanced Micro Devices, Inc. |
3 | * Copyright 2008 Red Hat Inc. |
4 | * Copyright 2009 Jerome Glisse. |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
7 | * copy of this software and associated documentation files (the "Software"), |
8 | * to deal in the Software without restriction, including without limitation |
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
10 | * and/or sell copies of the Software, and to permit persons to whom the |
11 | * Software is furnished to do so, subject to the following conditions: |
12 | * |
13 | * The above copyright notice and this permission notice shall be included in |
14 | * all copies or substantial portions of the Software. |
15 | * |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
22 | * OTHER DEALINGS IN THE SOFTWARE. |
23 | * |
24 | * Authors: Dave Airlie |
25 | * Alex Deucher |
26 | * Jerome Glisse |
27 | */ |
28 | |
29 | #include <drm/radeon_drm.h> |
30 | #include "radeon.h" |
31 | #include "radeon_trace.h" |
32 | |
33 | /* |
34 | * GPUVM |
35 | * GPUVM is similar to the legacy gart on older asics, however |
36 | * rather than there being a single global gart table |
37 | * for the entire GPU, there are multiple VM page tables active |
38 | * at any given time. The VM page tables can contain a mix |
39 | * vram pages and system memory pages and system memory pages |
40 | * can be mapped as snooped (cached system pages) or unsnooped |
41 | * (uncached system pages). |
42 | * Each VM has an ID associated with it and there is a page table |
43 | * associated with each VMID. When execting a command buffer, |
44 | * the kernel tells the ring what VMID to use for that command |
45 | * buffer. VMIDs are allocated dynamically as commands are submitted. |
46 | * The userspace drivers maintain their own address space and the kernel |
47 | * sets up their pages tables accordingly when they submit their |
48 | * command buffers and a VMID is assigned. |
49 | * Cayman/Trinity support up to 8 active VMs at any given time; |
50 | * SI supports 16. |
51 | */ |
52 | |
53 | /** |
54 | * radeon_vm_num_pdes - return the number of page directory entries |
55 | * |
56 | * @rdev: radeon_device pointer |
57 | * |
58 | * Calculate the number of page directory entries (cayman+). |
59 | */ |
60 | static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) |
61 | { |
62 | return rdev->vm_manager.max_pfn >> radeon_vm_block_size; |
63 | } |
64 | |
65 | /** |
66 | * radeon_vm_directory_size - returns the size of the page directory in bytes |
67 | * |
68 | * @rdev: radeon_device pointer |
69 | * |
70 | * Calculate the size of the page directory in bytes (cayman+). |
71 | */ |
72 | static unsigned radeon_vm_directory_size(struct radeon_device *rdev) |
73 | { |
74 | return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); |
75 | } |
76 | |
77 | /** |
78 | * radeon_vm_manager_init - init the vm manager |
79 | * |
80 | * @rdev: radeon_device pointer |
81 | * |
82 | * Init the vm manager (cayman+). |
83 | * Returns 0 for success, error for failure. |
84 | */ |
85 | int radeon_vm_manager_init(struct radeon_device *rdev) |
86 | { |
87 | int r; |
88 | |
89 | if (!rdev->vm_manager.enabled) { |
90 | r = radeon_asic_vm_init(rdev); |
91 | if (r) |
92 | return r; |
93 | |
94 | rdev->vm_manager.enabled = true; |
95 | } |
96 | return 0; |
97 | } |
98 | |
99 | /** |
100 | * radeon_vm_manager_fini - tear down the vm manager |
101 | * |
102 | * @rdev: radeon_device pointer |
103 | * |
104 | * Tear down the VM manager (cayman+). |
105 | */ |
106 | void radeon_vm_manager_fini(struct radeon_device *rdev) |
107 | { |
108 | int i; |
109 | |
110 | if (!rdev->vm_manager.enabled) |
111 | return; |
112 | |
113 | for (i = 0; i < RADEON_NUM_VM; ++i) |
114 | radeon_fence_unref(fence: &rdev->vm_manager.active[i]); |
115 | radeon_asic_vm_fini(rdev); |
116 | rdev->vm_manager.enabled = false; |
117 | } |
118 | |
119 | /** |
120 | * radeon_vm_get_bos - add the vm BOs to a validation list |
121 | * |
122 | * @rdev: radeon_device pointer |
123 | * @vm: vm providing the BOs |
124 | * @head: head of validation list |
125 | * |
126 | * Add the page directory to the list of BOs to |
127 | * validate for command submission (cayman+). |
128 | */ |
129 | struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, |
130 | struct radeon_vm *vm, |
131 | struct list_head *head) |
132 | { |
133 | struct radeon_bo_list *list; |
134 | unsigned i, idx; |
135 | |
136 | list = kvmalloc_array(n: vm->max_pde_used + 2, |
137 | size: sizeof(struct radeon_bo_list), GFP_KERNEL); |
138 | if (!list) |
139 | return NULL; |
140 | |
141 | /* add the vm page table to the list */ |
142 | list[0].robj = vm->page_directory; |
143 | list[0].preferred_domains = RADEON_GEM_DOMAIN_VRAM; |
144 | list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; |
145 | list[0].tv.bo = &vm->page_directory->tbo; |
146 | list[0].tv.num_shared = 1; |
147 | list[0].tiling_flags = 0; |
148 | list_add(new: &list[0].tv.head, head); |
149 | |
150 | for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { |
151 | if (!vm->page_tables[i].bo) |
152 | continue; |
153 | |
154 | list[idx].robj = vm->page_tables[i].bo; |
155 | list[idx].preferred_domains = RADEON_GEM_DOMAIN_VRAM; |
156 | list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; |
157 | list[idx].tv.bo = &list[idx].robj->tbo; |
158 | list[idx].tv.num_shared = 1; |
159 | list[idx].tiling_flags = 0; |
160 | list_add(new: &list[idx++].tv.head, head); |
161 | } |
162 | |
163 | return list; |
164 | } |
165 | |
166 | /** |
167 | * radeon_vm_grab_id - allocate the next free VMID |
168 | * |
169 | * @rdev: radeon_device pointer |
170 | * @vm: vm to allocate id for |
171 | * @ring: ring we want to submit job to |
172 | * |
173 | * Allocate an id for the vm (cayman+). |
174 | * Returns the fence we need to sync to (if any). |
175 | * |
176 | * Global and local mutex must be locked! |
177 | */ |
178 | struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, |
179 | struct radeon_vm *vm, int ring) |
180 | { |
181 | struct radeon_fence *best[RADEON_NUM_RINGS] = {}; |
182 | struct radeon_vm_id *vm_id = &vm->ids[ring]; |
183 | |
184 | unsigned choices[2] = {}; |
185 | unsigned i; |
186 | |
187 | /* check if the id is still valid */ |
188 | if (vm_id->id && vm_id->last_id_use && |
189 | vm_id->last_id_use == rdev->vm_manager.active[vm_id->id]) |
190 | return NULL; |
191 | |
192 | /* we definitely need to flush */ |
193 | vm_id->pd_gpu_addr = ~0ll; |
194 | |
195 | /* skip over VMID 0, since it is the system VM */ |
196 | for (i = 1; i < rdev->vm_manager.nvm; ++i) { |
197 | struct radeon_fence *fence = rdev->vm_manager.active[i]; |
198 | |
199 | if (fence == NULL) { |
200 | /* found a free one */ |
201 | vm_id->id = i; |
202 | trace_radeon_vm_grab_id(vmid: i, ring); |
203 | return NULL; |
204 | } |
205 | |
206 | if (radeon_fence_is_earlier(a: fence, b: best[fence->ring])) { |
207 | best[fence->ring] = fence; |
208 | choices[fence->ring == ring ? 0 : 1] = i; |
209 | } |
210 | } |
211 | |
212 | for (i = 0; i < 2; ++i) { |
213 | if (choices[i]) { |
214 | vm_id->id = choices[i]; |
215 | trace_radeon_vm_grab_id(vmid: choices[i], ring); |
216 | return rdev->vm_manager.active[choices[i]]; |
217 | } |
218 | } |
219 | |
220 | /* should never happen */ |
221 | BUG(); |
222 | return NULL; |
223 | } |
224 | |
225 | /** |
226 | * radeon_vm_flush - hardware flush the vm |
227 | * |
228 | * @rdev: radeon_device pointer |
229 | * @vm: vm we want to flush |
230 | * @ring: ring to use for flush |
231 | * @updates: last vm update that is waited for |
232 | * |
233 | * Flush the vm (cayman+). |
234 | * |
235 | * Global and local mutex must be locked! |
236 | */ |
237 | void radeon_vm_flush(struct radeon_device *rdev, |
238 | struct radeon_vm *vm, |
239 | int ring, struct radeon_fence *updates) |
240 | { |
241 | uint64_t pd_addr = radeon_bo_gpu_offset(bo: vm->page_directory); |
242 | struct radeon_vm_id *vm_id = &vm->ids[ring]; |
243 | |
244 | if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates || |
245 | radeon_fence_is_earlier(a: vm_id->flushed_updates, b: updates)) { |
246 | |
247 | trace_radeon_vm_flush(pd_addr, ring, id: vm->ids[ring].id); |
248 | radeon_fence_unref(fence: &vm_id->flushed_updates); |
249 | vm_id->flushed_updates = radeon_fence_ref(fence: updates); |
250 | vm_id->pd_gpu_addr = pd_addr; |
251 | radeon_ring_vm_flush(rdev, &rdev->ring[ring], |
252 | vm_id->id, vm_id->pd_gpu_addr); |
253 | |
254 | } |
255 | } |
256 | |
257 | /** |
258 | * radeon_vm_fence - remember fence for vm |
259 | * |
260 | * @rdev: radeon_device pointer |
261 | * @vm: vm we want to fence |
262 | * @fence: fence to remember |
263 | * |
264 | * Fence the vm (cayman+). |
265 | * Set the fence used to protect page table and id. |
266 | * |
267 | * Global and local mutex must be locked! |
268 | */ |
269 | void radeon_vm_fence(struct radeon_device *rdev, |
270 | struct radeon_vm *vm, |
271 | struct radeon_fence *fence) |
272 | { |
273 | unsigned vm_id = vm->ids[fence->ring].id; |
274 | |
275 | radeon_fence_unref(fence: &rdev->vm_manager.active[vm_id]); |
276 | rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence); |
277 | |
278 | radeon_fence_unref(fence: &vm->ids[fence->ring].last_id_use); |
279 | vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence); |
280 | } |
281 | |
282 | /** |
283 | * radeon_vm_bo_find - find the bo_va for a specific vm & bo |
284 | * |
285 | * @vm: requested vm |
286 | * @bo: requested buffer object |
287 | * |
288 | * Find @bo inside the requested vm (cayman+). |
289 | * Search inside the @bos vm list for the requested vm |
290 | * Returns the found bo_va or NULL if none is found |
291 | * |
292 | * Object has to be reserved! |
293 | */ |
294 | struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, |
295 | struct radeon_bo *bo) |
296 | { |
297 | struct radeon_bo_va *bo_va; |
298 | |
299 | list_for_each_entry(bo_va, &bo->va, bo_list) { |
300 | if (bo_va->vm == vm) |
301 | return bo_va; |
302 | |
303 | } |
304 | return NULL; |
305 | } |
306 | |
307 | /** |
308 | * radeon_vm_bo_add - add a bo to a specific vm |
309 | * |
310 | * @rdev: radeon_device pointer |
311 | * @vm: requested vm |
312 | * @bo: radeon buffer object |
313 | * |
314 | * Add @bo into the requested vm (cayman+). |
315 | * Add @bo to the list of bos associated with the vm |
316 | * Returns newly added bo_va or NULL for failure |
317 | * |
318 | * Object has to be reserved! |
319 | */ |
320 | struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, |
321 | struct radeon_vm *vm, |
322 | struct radeon_bo *bo) |
323 | { |
324 | struct radeon_bo_va *bo_va; |
325 | |
326 | bo_va = kzalloc(size: sizeof(struct radeon_bo_va), GFP_KERNEL); |
327 | if (bo_va == NULL) |
328 | return NULL; |
329 | |
330 | bo_va->vm = vm; |
331 | bo_va->bo = bo; |
332 | bo_va->it.start = 0; |
333 | bo_va->it.last = 0; |
334 | bo_va->flags = 0; |
335 | bo_va->ref_count = 1; |
336 | INIT_LIST_HEAD(list: &bo_va->bo_list); |
337 | INIT_LIST_HEAD(list: &bo_va->vm_status); |
338 | |
339 | mutex_lock(&vm->mutex); |
340 | list_add_tail(new: &bo_va->bo_list, head: &bo->va); |
341 | mutex_unlock(lock: &vm->mutex); |
342 | |
343 | return bo_va; |
344 | } |
345 | |
346 | /** |
347 | * radeon_vm_set_pages - helper to call the right asic function |
348 | * |
349 | * @rdev: radeon_device pointer |
350 | * @ib: indirect buffer to fill with commands |
351 | * @pe: addr of the page entry |
352 | * @addr: dst addr to write into pe |
353 | * @count: number of page entries to update |
354 | * @incr: increase next addr by incr bytes |
355 | * @flags: hw access flags |
356 | * |
357 | * Traces the parameters and calls the right asic functions |
358 | * to setup the page table using the DMA. |
359 | */ |
360 | static void radeon_vm_set_pages(struct radeon_device *rdev, |
361 | struct radeon_ib *ib, |
362 | uint64_t pe, |
363 | uint64_t addr, unsigned count, |
364 | uint32_t incr, uint32_t flags) |
365 | { |
366 | trace_radeon_vm_set_page(pe, addr, count, incr, flags); |
367 | |
368 | if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { |
369 | uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; |
370 | radeon_asic_vm_copy_pages(rdev, ib, pe, src, count); |
371 | |
372 | } else if ((flags & R600_PTE_SYSTEM) || (count < 3)) { |
373 | radeon_asic_vm_write_pages(rdev, ib, pe, addr, |
374 | count, incr, flags); |
375 | |
376 | } else { |
377 | radeon_asic_vm_set_pages(rdev, ib, pe, addr, |
378 | count, incr, flags); |
379 | } |
380 | } |
381 | |
382 | /** |
383 | * radeon_vm_clear_bo - initially clear the page dir/table |
384 | * |
385 | * @rdev: radeon_device pointer |
386 | * @bo: bo to clear |
387 | */ |
388 | static int radeon_vm_clear_bo(struct radeon_device *rdev, |
389 | struct radeon_bo *bo) |
390 | { |
391 | struct ttm_operation_ctx ctx = { true, false }; |
392 | struct radeon_ib ib; |
393 | unsigned entries; |
394 | uint64_t addr; |
395 | int r; |
396 | |
397 | r = radeon_bo_reserve(bo, no_intr: false); |
398 | if (r) |
399 | return r; |
400 | |
401 | r = ttm_bo_validate(bo: &bo->tbo, placement: &bo->placement, ctx: &ctx); |
402 | if (r) |
403 | goto error_unreserve; |
404 | |
405 | addr = radeon_bo_gpu_offset(bo); |
406 | entries = radeon_bo_size(bo) / 8; |
407 | |
408 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, ib: &ib, NULL, size: 256); |
409 | if (r) |
410 | goto error_unreserve; |
411 | |
412 | ib.length_dw = 0; |
413 | |
414 | radeon_vm_set_pages(rdev, ib: &ib, pe: addr, addr: 0, count: entries, incr: 0, flags: 0); |
415 | radeon_asic_vm_pad_ib(rdev, &ib); |
416 | WARN_ON(ib.length_dw > 64); |
417 | |
418 | r = radeon_ib_schedule(rdev, ib: &ib, NULL, hdp_flush: false); |
419 | if (r) |
420 | goto error_free; |
421 | |
422 | ib.fence->is_vm_update = true; |
423 | radeon_bo_fence(bo, fence: ib.fence, shared: false); |
424 | |
425 | error_free: |
426 | radeon_ib_free(rdev, ib: &ib); |
427 | |
428 | error_unreserve: |
429 | radeon_bo_unreserve(bo); |
430 | return r; |
431 | } |
432 | |
433 | /** |
434 | * radeon_vm_bo_set_addr - set bos virtual address inside a vm |
435 | * |
436 | * @rdev: radeon_device pointer |
437 | * @bo_va: bo_va to store the address |
438 | * @soffset: requested offset of the buffer in the VM address space |
439 | * @flags: attributes of pages (read/write/valid/etc.) |
440 | * |
441 | * Set offset of @bo_va (cayman+). |
442 | * Validate and set the offset requested within the vm address space. |
443 | * Returns 0 for success, error for failure. |
444 | * |
445 | * Object has to be reserved and gets unreserved by this function! |
446 | */ |
447 | int radeon_vm_bo_set_addr(struct radeon_device *rdev, |
448 | struct radeon_bo_va *bo_va, |
449 | uint64_t soffset, |
450 | uint32_t flags) |
451 | { |
452 | uint64_t size = radeon_bo_size(bo: bo_va->bo); |
453 | struct radeon_vm *vm = bo_va->vm; |
454 | unsigned last_pfn, pt_idx; |
455 | uint64_t eoffset; |
456 | int r; |
457 | |
458 | if (soffset) { |
459 | /* make sure object fit at this offset */ |
460 | eoffset = soffset + size - 1; |
461 | if (soffset >= eoffset) { |
462 | r = -EINVAL; |
463 | goto error_unreserve; |
464 | } |
465 | |
466 | last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; |
467 | if (last_pfn >= rdev->vm_manager.max_pfn) { |
468 | dev_err(rdev->dev, "va above limit (0x%08X >= 0x%08X)\n" , |
469 | last_pfn, rdev->vm_manager.max_pfn); |
470 | r = -EINVAL; |
471 | goto error_unreserve; |
472 | } |
473 | |
474 | } else { |
475 | eoffset = last_pfn = 0; |
476 | } |
477 | |
478 | mutex_lock(&vm->mutex); |
479 | soffset /= RADEON_GPU_PAGE_SIZE; |
480 | eoffset /= RADEON_GPU_PAGE_SIZE; |
481 | if (soffset || eoffset) { |
482 | struct interval_tree_node *it; |
483 | it = interval_tree_iter_first(root: &vm->va, start: soffset, last: eoffset); |
484 | if (it && it != &bo_va->it) { |
485 | struct radeon_bo_va *tmp; |
486 | tmp = container_of(it, struct radeon_bo_va, it); |
487 | /* bo and tmp overlap, invalid offset */ |
488 | dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with " |
489 | "(bo %p 0x%010lx 0x%010lx)\n" , bo_va->bo, |
490 | soffset, tmp->bo, tmp->it.start, tmp->it.last); |
491 | mutex_unlock(lock: &vm->mutex); |
492 | r = -EINVAL; |
493 | goto error_unreserve; |
494 | } |
495 | } |
496 | |
497 | if (bo_va->it.start || bo_va->it.last) { |
498 | /* add a clone of the bo_va to clear the old address */ |
499 | struct radeon_bo_va *tmp; |
500 | tmp = kzalloc(size: sizeof(struct radeon_bo_va), GFP_KERNEL); |
501 | if (!tmp) { |
502 | mutex_unlock(lock: &vm->mutex); |
503 | r = -ENOMEM; |
504 | goto error_unreserve; |
505 | } |
506 | tmp->it.start = bo_va->it.start; |
507 | tmp->it.last = bo_va->it.last; |
508 | tmp->vm = vm; |
509 | tmp->bo = radeon_bo_ref(bo: bo_va->bo); |
510 | |
511 | interval_tree_remove(node: &bo_va->it, root: &vm->va); |
512 | spin_lock(lock: &vm->status_lock); |
513 | bo_va->it.start = 0; |
514 | bo_va->it.last = 0; |
515 | list_del_init(entry: &bo_va->vm_status); |
516 | list_add(new: &tmp->vm_status, head: &vm->freed); |
517 | spin_unlock(lock: &vm->status_lock); |
518 | } |
519 | |
520 | if (soffset || eoffset) { |
521 | spin_lock(lock: &vm->status_lock); |
522 | bo_va->it.start = soffset; |
523 | bo_va->it.last = eoffset; |
524 | list_add(new: &bo_va->vm_status, head: &vm->cleared); |
525 | spin_unlock(lock: &vm->status_lock); |
526 | interval_tree_insert(node: &bo_va->it, root: &vm->va); |
527 | } |
528 | |
529 | bo_va->flags = flags; |
530 | |
531 | soffset >>= radeon_vm_block_size; |
532 | eoffset >>= radeon_vm_block_size; |
533 | |
534 | BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); |
535 | |
536 | if (eoffset > vm->max_pde_used) |
537 | vm->max_pde_used = eoffset; |
538 | |
539 | radeon_bo_unreserve(bo: bo_va->bo); |
540 | |
541 | /* walk over the address space and allocate the page tables */ |
542 | for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { |
543 | struct radeon_bo *pt; |
544 | |
545 | if (vm->page_tables[pt_idx].bo) |
546 | continue; |
547 | |
548 | /* drop mutex to allocate and clear page table */ |
549 | mutex_unlock(lock: &vm->mutex); |
550 | |
551 | r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, |
552 | RADEON_GPU_PAGE_SIZE, kernel: true, |
553 | RADEON_GEM_DOMAIN_VRAM, flags: 0, |
554 | NULL, NULL, bo_ptr: &pt); |
555 | if (r) |
556 | return r; |
557 | |
558 | r = radeon_vm_clear_bo(rdev, bo: pt); |
559 | if (r) { |
560 | radeon_bo_unref(bo: &pt); |
561 | return r; |
562 | } |
563 | |
564 | /* aquire mutex again */ |
565 | mutex_lock(&vm->mutex); |
566 | if (vm->page_tables[pt_idx].bo) { |
567 | /* someone else allocated the pt in the meantime */ |
568 | mutex_unlock(lock: &vm->mutex); |
569 | radeon_bo_unref(bo: &pt); |
570 | mutex_lock(&vm->mutex); |
571 | continue; |
572 | } |
573 | |
574 | vm->page_tables[pt_idx].addr = 0; |
575 | vm->page_tables[pt_idx].bo = pt; |
576 | } |
577 | |
578 | mutex_unlock(lock: &vm->mutex); |
579 | return 0; |
580 | |
581 | error_unreserve: |
582 | radeon_bo_unreserve(bo: bo_va->bo); |
583 | return r; |
584 | } |
585 | |
586 | /** |
587 | * radeon_vm_map_gart - get the physical address of a gart page |
588 | * |
589 | * @rdev: radeon_device pointer |
590 | * @addr: the unmapped addr |
591 | * |
592 | * Look up the physical address of the page that the pte resolves |
593 | * to (cayman+). |
594 | * Returns the physical address of the page. |
595 | */ |
596 | uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) |
597 | { |
598 | uint64_t result; |
599 | |
600 | /* page table offset */ |
601 | result = rdev->gart.pages_entry[addr >> RADEON_GPU_PAGE_SHIFT]; |
602 | result &= ~RADEON_GPU_PAGE_MASK; |
603 | |
604 | return result; |
605 | } |
606 | |
607 | /** |
608 | * radeon_vm_page_flags - translate page flags to what the hw uses |
609 | * |
610 | * @flags: flags comming from userspace |
611 | * |
612 | * Translate the flags the userspace ABI uses to hw flags. |
613 | */ |
614 | static uint32_t radeon_vm_page_flags(uint32_t flags) |
615 | { |
616 | uint32_t hw_flags = 0; |
617 | |
618 | hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; |
619 | hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; |
620 | hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; |
621 | if (flags & RADEON_VM_PAGE_SYSTEM) { |
622 | hw_flags |= R600_PTE_SYSTEM; |
623 | hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; |
624 | } |
625 | return hw_flags; |
626 | } |
627 | |
628 | /** |
629 | * radeon_vm_update_page_directory - make sure that page directory is valid |
630 | * |
631 | * @rdev: radeon_device pointer |
632 | * @vm: requested vm |
633 | * |
634 | * Allocates new page tables if necessary |
635 | * and updates the page directory (cayman+). |
636 | * Returns 0 for success, error for failure. |
637 | * |
638 | * Global and local mutex must be locked! |
639 | */ |
640 | int radeon_vm_update_page_directory(struct radeon_device *rdev, |
641 | struct radeon_vm *vm) |
642 | { |
643 | struct radeon_bo *pd = vm->page_directory; |
644 | uint64_t pd_addr = radeon_bo_gpu_offset(bo: pd); |
645 | uint32_t incr = RADEON_VM_PTE_COUNT * 8; |
646 | uint64_t last_pde = ~0, last_pt = ~0; |
647 | unsigned count = 0, pt_idx, ndw; |
648 | struct radeon_ib ib; |
649 | int r; |
650 | |
651 | /* padding, etc. */ |
652 | ndw = 64; |
653 | |
654 | /* assume the worst case */ |
655 | ndw += vm->max_pde_used * 6; |
656 | |
657 | /* update too big for an IB */ |
658 | if (ndw > 0xfffff) |
659 | return -ENOMEM; |
660 | |
661 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, ib: &ib, NULL, size: ndw * 4); |
662 | if (r) |
663 | return r; |
664 | ib.length_dw = 0; |
665 | |
666 | /* walk over the address space and update the page directory */ |
667 | for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { |
668 | struct radeon_bo *bo = vm->page_tables[pt_idx].bo; |
669 | uint64_t pde, pt; |
670 | |
671 | if (bo == NULL) |
672 | continue; |
673 | |
674 | pt = radeon_bo_gpu_offset(bo); |
675 | if (vm->page_tables[pt_idx].addr == pt) |
676 | continue; |
677 | vm->page_tables[pt_idx].addr = pt; |
678 | |
679 | pde = pd_addr + pt_idx * 8; |
680 | if (((last_pde + 8 * count) != pde) || |
681 | ((last_pt + incr * count) != pt)) { |
682 | |
683 | if (count) { |
684 | radeon_vm_set_pages(rdev, ib: &ib, pe: last_pde, |
685 | addr: last_pt, count, incr, |
686 | R600_PTE_VALID); |
687 | } |
688 | |
689 | count = 1; |
690 | last_pde = pde; |
691 | last_pt = pt; |
692 | } else { |
693 | ++count; |
694 | } |
695 | } |
696 | |
697 | if (count) |
698 | radeon_vm_set_pages(rdev, ib: &ib, pe: last_pde, addr: last_pt, count, |
699 | incr, R600_PTE_VALID); |
700 | |
701 | if (ib.length_dw != 0) { |
702 | radeon_asic_vm_pad_ib(rdev, &ib); |
703 | |
704 | radeon_sync_resv(rdev, sync: &ib.sync, resv: pd->tbo.base.resv, shared: true); |
705 | WARN_ON(ib.length_dw > ndw); |
706 | r = radeon_ib_schedule(rdev, ib: &ib, NULL, hdp_flush: false); |
707 | if (r) { |
708 | radeon_ib_free(rdev, ib: &ib); |
709 | return r; |
710 | } |
711 | ib.fence->is_vm_update = true; |
712 | radeon_bo_fence(bo: pd, fence: ib.fence, shared: false); |
713 | } |
714 | radeon_ib_free(rdev, ib: &ib); |
715 | |
716 | return 0; |
717 | } |
718 | |
719 | /** |
720 | * radeon_vm_frag_ptes - add fragment information to PTEs |
721 | * |
722 | * @rdev: radeon_device pointer |
723 | * @ib: IB for the update |
724 | * @pe_start: first PTE to handle |
725 | * @pe_end: last PTE to handle |
726 | * @addr: addr those PTEs should point to |
727 | * @flags: hw mapping flags |
728 | * |
729 | * Global and local mutex must be locked! |
730 | */ |
731 | static void radeon_vm_frag_ptes(struct radeon_device *rdev, |
732 | struct radeon_ib *ib, |
733 | uint64_t pe_start, uint64_t pe_end, |
734 | uint64_t addr, uint32_t flags) |
735 | { |
736 | /** |
737 | * The MC L1 TLB supports variable sized pages, based on a fragment |
738 | * field in the PTE. When this field is set to a non-zero value, page |
739 | * granularity is increased from 4KB to (1 << (12 + frag)). The PTE |
740 | * flags are considered valid for all PTEs within the fragment range |
741 | * and corresponding mappings are assumed to be physically contiguous. |
742 | * |
743 | * The L1 TLB can store a single PTE for the whole fragment, |
744 | * significantly increasing the space available for translation |
745 | * caching. This leads to large improvements in throughput when the |
746 | * TLB is under pressure. |
747 | * |
748 | * The L2 TLB distributes small and large fragments into two |
749 | * asymmetric partitions. The large fragment cache is significantly |
750 | * larger. Thus, we try to use large fragments wherever possible. |
751 | * Userspace can support this by aligning virtual base address and |
752 | * allocation size to the fragment size. |
753 | */ |
754 | |
755 | /* NI is optimized for 256KB fragments, SI and newer for 64KB */ |
756 | uint64_t frag_flags = ((rdev->family == CHIP_CAYMAN) || |
757 | (rdev->family == CHIP_ARUBA)) ? |
758 | R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB; |
759 | uint64_t frag_align = ((rdev->family == CHIP_CAYMAN) || |
760 | (rdev->family == CHIP_ARUBA)) ? 0x200 : 0x80; |
761 | |
762 | uint64_t frag_start = ALIGN(pe_start, frag_align); |
763 | uint64_t frag_end = pe_end & ~(frag_align - 1); |
764 | |
765 | unsigned count; |
766 | |
767 | /* system pages are non continuously */ |
768 | if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) || |
769 | (frag_start >= frag_end)) { |
770 | |
771 | count = (pe_end - pe_start) / 8; |
772 | radeon_vm_set_pages(rdev, ib, pe: pe_start, addr, count, |
773 | RADEON_GPU_PAGE_SIZE, flags); |
774 | return; |
775 | } |
776 | |
777 | /* handle the 4K area at the beginning */ |
778 | if (pe_start != frag_start) { |
779 | count = (frag_start - pe_start) / 8; |
780 | radeon_vm_set_pages(rdev, ib, pe: pe_start, addr, count, |
781 | RADEON_GPU_PAGE_SIZE, flags); |
782 | addr += RADEON_GPU_PAGE_SIZE * count; |
783 | } |
784 | |
785 | /* handle the area in the middle */ |
786 | count = (frag_end - frag_start) / 8; |
787 | radeon_vm_set_pages(rdev, ib, pe: frag_start, addr, count, |
788 | RADEON_GPU_PAGE_SIZE, flags: flags | frag_flags); |
789 | |
790 | /* handle the 4K area at the end */ |
791 | if (frag_end != pe_end) { |
792 | addr += RADEON_GPU_PAGE_SIZE * count; |
793 | count = (pe_end - frag_end) / 8; |
794 | radeon_vm_set_pages(rdev, ib, pe: frag_end, addr, count, |
795 | RADEON_GPU_PAGE_SIZE, flags); |
796 | } |
797 | } |
798 | |
799 | /** |
800 | * radeon_vm_update_ptes - make sure that page tables are valid |
801 | * |
802 | * @rdev: radeon_device pointer |
803 | * @vm: requested vm |
804 | * @ib: indirect buffer to use for the update |
805 | * @start: start of GPU address range |
806 | * @end: end of GPU address range |
807 | * @dst: destination address to map to |
808 | * @flags: mapping flags |
809 | * |
810 | * Update the page tables in the range @start - @end (cayman+). |
811 | * |
812 | * Global and local mutex must be locked! |
813 | */ |
814 | static int radeon_vm_update_ptes(struct radeon_device *rdev, |
815 | struct radeon_vm *vm, |
816 | struct radeon_ib *ib, |
817 | uint64_t start, uint64_t end, |
818 | uint64_t dst, uint32_t flags) |
819 | { |
820 | uint64_t mask = RADEON_VM_PTE_COUNT - 1; |
821 | uint64_t last_pte = ~0, last_dst = ~0; |
822 | unsigned count = 0; |
823 | uint64_t addr; |
824 | |
825 | /* walk over the address space and update the page tables */ |
826 | for (addr = start; addr < end; ) { |
827 | uint64_t pt_idx = addr >> radeon_vm_block_size; |
828 | struct radeon_bo *pt = vm->page_tables[pt_idx].bo; |
829 | unsigned nptes; |
830 | uint64_t pte; |
831 | int r; |
832 | |
833 | radeon_sync_resv(rdev, sync: &ib->sync, resv: pt->tbo.base.resv, shared: true); |
834 | r = dma_resv_reserve_fences(obj: pt->tbo.base.resv, num_fences: 1); |
835 | if (r) |
836 | return r; |
837 | |
838 | if ((addr & ~mask) == (end & ~mask)) |
839 | nptes = end - addr; |
840 | else |
841 | nptes = RADEON_VM_PTE_COUNT - (addr & mask); |
842 | |
843 | pte = radeon_bo_gpu_offset(bo: pt); |
844 | pte += (addr & mask) * 8; |
845 | |
846 | if ((last_pte + 8 * count) != pte) { |
847 | |
848 | if (count) { |
849 | radeon_vm_frag_ptes(rdev, ib, pe_start: last_pte, |
850 | pe_end: last_pte + 8 * count, |
851 | addr: last_dst, flags); |
852 | } |
853 | |
854 | count = nptes; |
855 | last_pte = pte; |
856 | last_dst = dst; |
857 | } else { |
858 | count += nptes; |
859 | } |
860 | |
861 | addr += nptes; |
862 | dst += nptes * RADEON_GPU_PAGE_SIZE; |
863 | } |
864 | |
865 | if (count) { |
866 | radeon_vm_frag_ptes(rdev, ib, pe_start: last_pte, |
867 | pe_end: last_pte + 8 * count, |
868 | addr: last_dst, flags); |
869 | } |
870 | |
871 | return 0; |
872 | } |
873 | |
874 | /** |
875 | * radeon_vm_fence_pts - fence page tables after an update |
876 | * |
877 | * @vm: requested vm |
878 | * @start: start of GPU address range |
879 | * @end: end of GPU address range |
880 | * @fence: fence to use |
881 | * |
882 | * Fence the page tables in the range @start - @end (cayman+). |
883 | * |
884 | * Global and local mutex must be locked! |
885 | */ |
886 | static void radeon_vm_fence_pts(struct radeon_vm *vm, |
887 | uint64_t start, uint64_t end, |
888 | struct radeon_fence *fence) |
889 | { |
890 | unsigned i; |
891 | |
892 | start >>= radeon_vm_block_size; |
893 | end = (end - 1) >> radeon_vm_block_size; |
894 | |
895 | for (i = start; i <= end; ++i) |
896 | radeon_bo_fence(bo: vm->page_tables[i].bo, fence, shared: true); |
897 | } |
898 | |
899 | /** |
900 | * radeon_vm_bo_update - map a bo into the vm page table |
901 | * |
902 | * @rdev: radeon_device pointer |
903 | * @bo_va: radeon buffer virtual address object |
904 | * @mem: ttm mem |
905 | * |
906 | * Fill in the page table entries for @bo (cayman+). |
907 | * Returns 0 for success, -EINVAL for failure. |
908 | * |
909 | * Object have to be reserved and mutex must be locked! |
910 | */ |
911 | int radeon_vm_bo_update(struct radeon_device *rdev, |
912 | struct radeon_bo_va *bo_va, |
913 | struct ttm_resource *mem) |
914 | { |
915 | struct radeon_vm *vm = bo_va->vm; |
916 | struct radeon_ib ib; |
917 | unsigned nptes, ncmds, ndw; |
918 | uint64_t addr; |
919 | uint32_t flags; |
920 | int r; |
921 | |
922 | if (!bo_va->it.start) { |
923 | dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n" , |
924 | bo_va->bo, vm); |
925 | return -EINVAL; |
926 | } |
927 | |
928 | spin_lock(lock: &vm->status_lock); |
929 | if (mem) { |
930 | if (list_empty(head: &bo_va->vm_status)) { |
931 | spin_unlock(lock: &vm->status_lock); |
932 | return 0; |
933 | } |
934 | list_del_init(entry: &bo_va->vm_status); |
935 | } else { |
936 | list_del(entry: &bo_va->vm_status); |
937 | list_add(new: &bo_va->vm_status, head: &vm->cleared); |
938 | } |
939 | spin_unlock(lock: &vm->status_lock); |
940 | |
941 | bo_va->flags &= ~RADEON_VM_PAGE_VALID; |
942 | bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; |
943 | bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED; |
944 | if (bo_va->bo && radeon_ttm_tt_is_readonly(rdev, ttm: bo_va->bo->tbo.ttm)) |
945 | bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE; |
946 | |
947 | if (mem) { |
948 | addr = (u64)mem->start << PAGE_SHIFT; |
949 | if (mem->mem_type != TTM_PL_SYSTEM) |
950 | bo_va->flags |= RADEON_VM_PAGE_VALID; |
951 | |
952 | if (mem->mem_type == TTM_PL_TT) { |
953 | bo_va->flags |= RADEON_VM_PAGE_SYSTEM; |
954 | if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC))) |
955 | bo_va->flags |= RADEON_VM_PAGE_SNOOPED; |
956 | |
957 | } else { |
958 | addr += rdev->vm_manager.vram_base_offset; |
959 | } |
960 | } else { |
961 | addr = 0; |
962 | } |
963 | |
964 | trace_radeon_vm_bo_update(bo_va); |
965 | |
966 | nptes = bo_va->it.last - bo_va->it.start + 1; |
967 | |
968 | /* reserve space for one command every (1 << BLOCK_SIZE) entries |
969 | or 2k dwords (whatever is smaller) */ |
970 | ncmds = (nptes >> min(radeon_vm_block_size, 11)) + 1; |
971 | |
972 | /* padding, etc. */ |
973 | ndw = 64; |
974 | |
975 | flags = radeon_vm_page_flags(flags: bo_va->flags); |
976 | if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { |
977 | /* only copy commands needed */ |
978 | ndw += ncmds * 7; |
979 | |
980 | } else if (flags & R600_PTE_SYSTEM) { |
981 | /* header for write data commands */ |
982 | ndw += ncmds * 4; |
983 | |
984 | /* body of write data command */ |
985 | ndw += nptes * 2; |
986 | |
987 | } else { |
988 | /* set page commands needed */ |
989 | ndw += ncmds * 10; |
990 | |
991 | /* two extra commands for begin/end of fragment */ |
992 | ndw += 2 * 10; |
993 | } |
994 | |
995 | /* update too big for an IB */ |
996 | if (ndw > 0xfffff) |
997 | return -ENOMEM; |
998 | |
999 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, ib: &ib, NULL, size: ndw * 4); |
1000 | if (r) |
1001 | return r; |
1002 | ib.length_dw = 0; |
1003 | |
1004 | if (!(bo_va->flags & RADEON_VM_PAGE_VALID)) { |
1005 | unsigned i; |
1006 | |
1007 | for (i = 0; i < RADEON_NUM_RINGS; ++i) |
1008 | radeon_sync_fence(sync: &ib.sync, fence: vm->ids[i].last_id_use); |
1009 | } |
1010 | |
1011 | r = radeon_vm_update_ptes(rdev, vm, ib: &ib, start: bo_va->it.start, |
1012 | end: bo_va->it.last + 1, dst: addr, |
1013 | flags: radeon_vm_page_flags(flags: bo_va->flags)); |
1014 | if (r) { |
1015 | radeon_ib_free(rdev, ib: &ib); |
1016 | return r; |
1017 | } |
1018 | |
1019 | radeon_asic_vm_pad_ib(rdev, &ib); |
1020 | WARN_ON(ib.length_dw > ndw); |
1021 | |
1022 | r = radeon_ib_schedule(rdev, ib: &ib, NULL, hdp_flush: false); |
1023 | if (r) { |
1024 | radeon_ib_free(rdev, ib: &ib); |
1025 | return r; |
1026 | } |
1027 | ib.fence->is_vm_update = true; |
1028 | radeon_vm_fence_pts(vm, start: bo_va->it.start, end: bo_va->it.last + 1, fence: ib.fence); |
1029 | radeon_fence_unref(fence: &bo_va->last_pt_update); |
1030 | bo_va->last_pt_update = radeon_fence_ref(fence: ib.fence); |
1031 | radeon_ib_free(rdev, ib: &ib); |
1032 | |
1033 | return 0; |
1034 | } |
1035 | |
1036 | /** |
1037 | * radeon_vm_clear_freed - clear freed BOs in the PT |
1038 | * |
1039 | * @rdev: radeon_device pointer |
1040 | * @vm: requested vm |
1041 | * |
1042 | * Make sure all freed BOs are cleared in the PT. |
1043 | * Returns 0 for success. |
1044 | * |
1045 | * PTs have to be reserved and mutex must be locked! |
1046 | */ |
1047 | int radeon_vm_clear_freed(struct radeon_device *rdev, |
1048 | struct radeon_vm *vm) |
1049 | { |
1050 | struct radeon_bo_va *bo_va; |
1051 | int r = 0; |
1052 | |
1053 | spin_lock(lock: &vm->status_lock); |
1054 | while (!list_empty(head: &vm->freed)) { |
1055 | bo_va = list_first_entry(&vm->freed, |
1056 | struct radeon_bo_va, vm_status); |
1057 | spin_unlock(lock: &vm->status_lock); |
1058 | |
1059 | r = radeon_vm_bo_update(rdev, bo_va, NULL); |
1060 | radeon_bo_unref(bo: &bo_va->bo); |
1061 | radeon_fence_unref(fence: &bo_va->last_pt_update); |
1062 | spin_lock(lock: &vm->status_lock); |
1063 | list_del(entry: &bo_va->vm_status); |
1064 | kfree(objp: bo_va); |
1065 | if (r) |
1066 | break; |
1067 | |
1068 | } |
1069 | spin_unlock(lock: &vm->status_lock); |
1070 | return r; |
1071 | |
1072 | } |
1073 | |
1074 | /** |
1075 | * radeon_vm_clear_invalids - clear invalidated BOs in the PT |
1076 | * |
1077 | * @rdev: radeon_device pointer |
1078 | * @vm: requested vm |
1079 | * |
1080 | * Make sure all invalidated BOs are cleared in the PT. |
1081 | * Returns 0 for success. |
1082 | * |
1083 | * PTs have to be reserved and mutex must be locked! |
1084 | */ |
1085 | int radeon_vm_clear_invalids(struct radeon_device *rdev, |
1086 | struct radeon_vm *vm) |
1087 | { |
1088 | struct radeon_bo_va *bo_va; |
1089 | int r; |
1090 | |
1091 | spin_lock(lock: &vm->status_lock); |
1092 | while (!list_empty(head: &vm->invalidated)) { |
1093 | bo_va = list_first_entry(&vm->invalidated, |
1094 | struct radeon_bo_va, vm_status); |
1095 | spin_unlock(lock: &vm->status_lock); |
1096 | |
1097 | r = radeon_vm_bo_update(rdev, bo_va, NULL); |
1098 | if (r) |
1099 | return r; |
1100 | |
1101 | spin_lock(lock: &vm->status_lock); |
1102 | } |
1103 | spin_unlock(lock: &vm->status_lock); |
1104 | |
1105 | return 0; |
1106 | } |
1107 | |
1108 | /** |
1109 | * radeon_vm_bo_rmv - remove a bo to a specific vm |
1110 | * |
1111 | * @rdev: radeon_device pointer |
1112 | * @bo_va: requested bo_va |
1113 | * |
1114 | * Remove @bo_va->bo from the requested vm (cayman+). |
1115 | * |
1116 | * Object have to be reserved! |
1117 | */ |
1118 | void radeon_vm_bo_rmv(struct radeon_device *rdev, |
1119 | struct radeon_bo_va *bo_va) |
1120 | { |
1121 | struct radeon_vm *vm = bo_va->vm; |
1122 | |
1123 | list_del(entry: &bo_va->bo_list); |
1124 | |
1125 | mutex_lock(&vm->mutex); |
1126 | if (bo_va->it.start || bo_va->it.last) |
1127 | interval_tree_remove(node: &bo_va->it, root: &vm->va); |
1128 | |
1129 | spin_lock(lock: &vm->status_lock); |
1130 | list_del(entry: &bo_va->vm_status); |
1131 | if (bo_va->it.start || bo_va->it.last) { |
1132 | bo_va->bo = radeon_bo_ref(bo: bo_va->bo); |
1133 | list_add(new: &bo_va->vm_status, head: &vm->freed); |
1134 | } else { |
1135 | radeon_fence_unref(fence: &bo_va->last_pt_update); |
1136 | kfree(objp: bo_va); |
1137 | } |
1138 | spin_unlock(lock: &vm->status_lock); |
1139 | |
1140 | mutex_unlock(lock: &vm->mutex); |
1141 | } |
1142 | |
1143 | /** |
1144 | * radeon_vm_bo_invalidate - mark the bo as invalid |
1145 | * |
1146 | * @rdev: radeon_device pointer |
1147 | * @bo: radeon buffer object |
1148 | * |
1149 | * Mark @bo as invalid (cayman+). |
1150 | */ |
1151 | void radeon_vm_bo_invalidate(struct radeon_device *rdev, |
1152 | struct radeon_bo *bo) |
1153 | { |
1154 | struct radeon_bo_va *bo_va; |
1155 | |
1156 | list_for_each_entry(bo_va, &bo->va, bo_list) { |
1157 | spin_lock(lock: &bo_va->vm->status_lock); |
1158 | if (list_empty(head: &bo_va->vm_status) && |
1159 | (bo_va->it.start || bo_va->it.last)) |
1160 | list_add(new: &bo_va->vm_status, head: &bo_va->vm->invalidated); |
1161 | spin_unlock(lock: &bo_va->vm->status_lock); |
1162 | } |
1163 | } |
1164 | |
1165 | /** |
1166 | * radeon_vm_init - initialize a vm instance |
1167 | * |
1168 | * @rdev: radeon_device pointer |
1169 | * @vm: requested vm |
1170 | * |
1171 | * Init @vm fields (cayman+). |
1172 | */ |
1173 | int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) |
1174 | { |
1175 | const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE, |
1176 | RADEON_VM_PTE_COUNT * 8); |
1177 | unsigned pd_size, pd_entries, pts_size; |
1178 | int i, r; |
1179 | |
1180 | vm->ib_bo_va = NULL; |
1181 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
1182 | vm->ids[i].id = 0; |
1183 | vm->ids[i].flushed_updates = NULL; |
1184 | vm->ids[i].last_id_use = NULL; |
1185 | } |
1186 | mutex_init(&vm->mutex); |
1187 | vm->va = RB_ROOT_CACHED; |
1188 | spin_lock_init(&vm->status_lock); |
1189 | INIT_LIST_HEAD(list: &vm->invalidated); |
1190 | INIT_LIST_HEAD(list: &vm->freed); |
1191 | INIT_LIST_HEAD(list: &vm->cleared); |
1192 | |
1193 | pd_size = radeon_vm_directory_size(rdev); |
1194 | pd_entries = radeon_vm_num_pdes(rdev); |
1195 | |
1196 | /* allocate page table array */ |
1197 | pts_size = pd_entries * sizeof(struct radeon_vm_pt); |
1198 | vm->page_tables = kzalloc(size: pts_size, GFP_KERNEL); |
1199 | if (vm->page_tables == NULL) { |
1200 | DRM_ERROR("Cannot allocate memory for page table array\n" ); |
1201 | return -ENOMEM; |
1202 | } |
1203 | |
1204 | r = radeon_bo_create(rdev, size: pd_size, byte_align: align, kernel: true, |
1205 | RADEON_GEM_DOMAIN_VRAM, flags: 0, NULL, |
1206 | NULL, bo_ptr: &vm->page_directory); |
1207 | if (r) |
1208 | return r; |
1209 | |
1210 | r = radeon_vm_clear_bo(rdev, bo: vm->page_directory); |
1211 | if (r) { |
1212 | radeon_bo_unref(bo: &vm->page_directory); |
1213 | vm->page_directory = NULL; |
1214 | return r; |
1215 | } |
1216 | |
1217 | return 0; |
1218 | } |
1219 | |
1220 | /** |
1221 | * radeon_vm_fini - tear down a vm instance |
1222 | * |
1223 | * @rdev: radeon_device pointer |
1224 | * @vm: requested vm |
1225 | * |
1226 | * Tear down @vm (cayman+). |
1227 | * Unbind the VM and remove all bos from the vm bo list |
1228 | */ |
1229 | void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) |
1230 | { |
1231 | struct radeon_bo_va *bo_va, *tmp; |
1232 | int i, r; |
1233 | |
1234 | if (!RB_EMPTY_ROOT(&vm->va.rb_root)) |
1235 | dev_err(rdev->dev, "still active bo inside vm\n" ); |
1236 | |
1237 | rbtree_postorder_for_each_entry_safe(bo_va, tmp, |
1238 | &vm->va.rb_root, it.rb) { |
1239 | interval_tree_remove(node: &bo_va->it, root: &vm->va); |
1240 | r = radeon_bo_reserve(bo: bo_va->bo, no_intr: false); |
1241 | if (!r) { |
1242 | list_del_init(entry: &bo_va->bo_list); |
1243 | radeon_bo_unreserve(bo: bo_va->bo); |
1244 | radeon_fence_unref(fence: &bo_va->last_pt_update); |
1245 | kfree(objp: bo_va); |
1246 | } |
1247 | } |
1248 | list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { |
1249 | radeon_bo_unref(bo: &bo_va->bo); |
1250 | radeon_fence_unref(fence: &bo_va->last_pt_update); |
1251 | kfree(objp: bo_va); |
1252 | } |
1253 | |
1254 | for (i = 0; i < radeon_vm_num_pdes(rdev); i++) |
1255 | radeon_bo_unref(bo: &vm->page_tables[i].bo); |
1256 | kfree(objp: vm->page_tables); |
1257 | |
1258 | radeon_bo_unref(bo: &vm->page_directory); |
1259 | |
1260 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
1261 | radeon_fence_unref(fence: &vm->ids[i].flushed_updates); |
1262 | radeon_fence_unref(fence: &vm->ids[i].last_id_use); |
1263 | } |
1264 | |
1265 | mutex_destroy(lock: &vm->mutex); |
1266 | } |
1267 | |