1 | /* |
2 | * Copyright 2018 Red Hat Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | */ |
22 | #include "nouveau_dmem.h" |
23 | #include "nouveau_drv.h" |
24 | #include "nouveau_chan.h" |
25 | #include "nouveau_dma.h" |
26 | #include "nouveau_mem.h" |
27 | #include "nouveau_bo.h" |
28 | #include "nouveau_svm.h" |
29 | |
30 | #include <nvif/class.h> |
31 | #include <nvif/object.h> |
32 | #include <nvif/push906f.h> |
33 | #include <nvif/if000c.h> |
34 | #include <nvif/if500b.h> |
35 | #include <nvif/if900b.h> |
36 | |
37 | #include <nvhw/class/cla0b5.h> |
38 | |
39 | #include <linux/sched/mm.h> |
40 | #include <linux/hmm.h> |
41 | #include <linux/memremap.h> |
42 | #include <linux/migrate.h> |
43 | |
44 | /* |
45 | * FIXME: this is ugly right now we are using TTM to allocate vram and we pin |
46 | * it in vram while in use. We likely want to overhaul memory management for |
47 | * nouveau to be more page like (not necessarily with system page size but a |
48 | * bigger page size) at lowest level and have some shim layer on top that would |
49 | * provide the same functionality as TTM. |
50 | */ |
51 | #define DMEM_CHUNK_SIZE (2UL << 20) |
52 | #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT) |
53 | |
54 | enum nouveau_aper { |
55 | NOUVEAU_APER_VIRT, |
56 | NOUVEAU_APER_VRAM, |
57 | NOUVEAU_APER_HOST, |
58 | }; |
59 | |
60 | typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages, |
61 | enum nouveau_aper, u64 dst_addr, |
62 | enum nouveau_aper, u64 src_addr); |
63 | typedef int (*nouveau_clear_page_t)(struct nouveau_drm *drm, u32 length, |
64 | enum nouveau_aper, u64 dst_addr); |
65 | |
66 | struct nouveau_dmem_chunk { |
67 | struct list_head list; |
68 | struct nouveau_bo *bo; |
69 | struct nouveau_drm *drm; |
70 | unsigned long callocated; |
71 | struct dev_pagemap pagemap; |
72 | }; |
73 | |
74 | struct nouveau_dmem_migrate { |
75 | nouveau_migrate_copy_t copy_func; |
76 | nouveau_clear_page_t clear_func; |
77 | struct nouveau_channel *chan; |
78 | }; |
79 | |
80 | struct nouveau_dmem { |
81 | struct nouveau_drm *drm; |
82 | struct nouveau_dmem_migrate migrate; |
83 | struct list_head chunks; |
84 | struct mutex mutex; |
85 | struct page *free_pages; |
86 | spinlock_t lock; |
87 | }; |
88 | |
89 | static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page) |
90 | { |
91 | return container_of(page->pgmap, struct nouveau_dmem_chunk, pagemap); |
92 | } |
93 | |
94 | static struct nouveau_drm *page_to_drm(struct page *page) |
95 | { |
96 | struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page); |
97 | |
98 | return chunk->drm; |
99 | } |
100 | |
101 | unsigned long nouveau_dmem_page_addr(struct page *page) |
102 | { |
103 | struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page); |
104 | unsigned long off = (page_to_pfn(page) << PAGE_SHIFT) - |
105 | chunk->pagemap.range.start; |
106 | |
107 | return chunk->bo->offset + off; |
108 | } |
109 | |
110 | static void nouveau_dmem_page_free(struct page *page) |
111 | { |
112 | struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page); |
113 | struct nouveau_dmem *dmem = chunk->drm->dmem; |
114 | |
115 | spin_lock(lock: &dmem->lock); |
116 | page->zone_device_data = dmem->free_pages; |
117 | dmem->free_pages = page; |
118 | |
119 | WARN_ON(!chunk->callocated); |
120 | chunk->callocated--; |
121 | /* |
122 | * FIXME when chunk->callocated reach 0 we should add the chunk to |
123 | * a reclaim list so that it can be freed in case of memory pressure. |
124 | */ |
125 | spin_unlock(lock: &dmem->lock); |
126 | } |
127 | |
128 | static void nouveau_dmem_fence_done(struct nouveau_fence **fence) |
129 | { |
130 | if (fence) { |
131 | nouveau_fence_wait(*fence, lazy: true, intr: false); |
132 | nouveau_fence_unref(fence); |
133 | } else { |
134 | /* |
135 | * FIXME wait for channel to be IDLE before calling finalizing |
136 | * the hmem object. |
137 | */ |
138 | } |
139 | } |
140 | |
141 | static int nouveau_dmem_copy_one(struct nouveau_drm *drm, struct page *spage, |
142 | struct page *dpage, dma_addr_t *dma_addr) |
143 | { |
144 | struct device *dev = drm->dev->dev; |
145 | |
146 | lock_page(page: dpage); |
147 | |
148 | *dma_addr = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); |
149 | if (dma_mapping_error(dev, *dma_addr)) |
150 | return -EIO; |
151 | |
152 | if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr, |
153 | NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(page: spage))) { |
154 | dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); |
155 | return -EIO; |
156 | } |
157 | |
158 | return 0; |
159 | } |
160 | |
161 | static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) |
162 | { |
163 | struct nouveau_drm *drm = page_to_drm(page: vmf->page); |
164 | struct nouveau_dmem *dmem = drm->dmem; |
165 | struct nouveau_fence *fence; |
166 | struct nouveau_svmm *svmm; |
167 | struct page *spage, *dpage; |
168 | unsigned long src = 0, dst = 0; |
169 | dma_addr_t dma_addr = 0; |
170 | vm_fault_t ret = 0; |
171 | struct migrate_vma args = { |
172 | .vma = vmf->vma, |
173 | .start = vmf->address, |
174 | .end = vmf->address + PAGE_SIZE, |
175 | .src = &src, |
176 | .dst = &dst, |
177 | .pgmap_owner = drm->dev, |
178 | .fault_page = vmf->page, |
179 | .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE, |
180 | }; |
181 | |
182 | /* |
183 | * FIXME what we really want is to find some heuristic to migrate more |
184 | * than just one page on CPU fault. When such fault happens it is very |
185 | * likely that more surrounding page will CPU fault too. |
186 | */ |
187 | if (migrate_vma_setup(args: &args) < 0) |
188 | return VM_FAULT_SIGBUS; |
189 | if (!args.cpages) |
190 | return 0; |
191 | |
192 | spage = migrate_pfn_to_page(mpfn: src); |
193 | if (!spage || !(src & MIGRATE_PFN_MIGRATE)) |
194 | goto done; |
195 | |
196 | dpage = alloc_page_vma(GFP_HIGHUSER, vma: vmf->vma, addr: vmf->address); |
197 | if (!dpage) |
198 | goto done; |
199 | |
200 | dst = migrate_pfn(page_to_pfn(dpage)); |
201 | |
202 | svmm = spage->zone_device_data; |
203 | mutex_lock(&svmm->mutex); |
204 | nouveau_svmm_invalidate(svmm, start: args.start, limit: args.end); |
205 | ret = nouveau_dmem_copy_one(drm, spage, dpage, dma_addr: &dma_addr); |
206 | mutex_unlock(lock: &svmm->mutex); |
207 | if (ret) { |
208 | ret = VM_FAULT_SIGBUS; |
209 | goto done; |
210 | } |
211 | |
212 | nouveau_fence_new(&fence, dmem->migrate.chan); |
213 | migrate_vma_pages(migrate: &args); |
214 | nouveau_dmem_fence_done(fence: &fence); |
215 | dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); |
216 | done: |
217 | migrate_vma_finalize(migrate: &args); |
218 | return ret; |
219 | } |
220 | |
221 | static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = { |
222 | .page_free = nouveau_dmem_page_free, |
223 | .migrate_to_ram = nouveau_dmem_migrate_to_ram, |
224 | }; |
225 | |
226 | static int |
227 | nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage) |
228 | { |
229 | struct nouveau_dmem_chunk *chunk; |
230 | struct resource *res; |
231 | struct page *page; |
232 | void *ptr; |
233 | unsigned long i, pfn_first; |
234 | int ret; |
235 | |
236 | chunk = kzalloc(size: sizeof(*chunk), GFP_KERNEL); |
237 | if (chunk == NULL) { |
238 | ret = -ENOMEM; |
239 | goto out; |
240 | } |
241 | |
242 | /* Allocate unused physical address space for device private pages. */ |
243 | res = request_free_mem_region(base: &iomem_resource, DMEM_CHUNK_SIZE, |
244 | name: "nouveau_dmem" ); |
245 | if (IS_ERR(ptr: res)) { |
246 | ret = PTR_ERR(ptr: res); |
247 | goto out_free; |
248 | } |
249 | |
250 | chunk->drm = drm; |
251 | chunk->pagemap.type = MEMORY_DEVICE_PRIVATE; |
252 | chunk->pagemap.range.start = res->start; |
253 | chunk->pagemap.range.end = res->end; |
254 | chunk->pagemap.nr_range = 1; |
255 | chunk->pagemap.ops = &nouveau_dmem_pagemap_ops; |
256 | chunk->pagemap.owner = drm->dev; |
257 | |
258 | ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, align: 0, |
259 | NOUVEAU_GEM_DOMAIN_VRAM, tile_mode: 0, tile_flags: 0, NULL, NULL, |
260 | &chunk->bo); |
261 | if (ret) |
262 | goto out_release; |
263 | |
264 | ret = nouveau_bo_pin(chunk->bo, NOUVEAU_GEM_DOMAIN_VRAM, contig: false); |
265 | if (ret) |
266 | goto out_bo_free; |
267 | |
268 | ptr = memremap_pages(pgmap: &chunk->pagemap, nid: numa_node_id()); |
269 | if (IS_ERR(ptr)) { |
270 | ret = PTR_ERR(ptr); |
271 | goto out_bo_unpin; |
272 | } |
273 | |
274 | mutex_lock(&drm->dmem->mutex); |
275 | list_add(new: &chunk->list, head: &drm->dmem->chunks); |
276 | mutex_unlock(lock: &drm->dmem->mutex); |
277 | |
278 | pfn_first = chunk->pagemap.range.start >> PAGE_SHIFT; |
279 | page = pfn_to_page(pfn_first); |
280 | spin_lock(lock: &drm->dmem->lock); |
281 | for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) { |
282 | page->zone_device_data = drm->dmem->free_pages; |
283 | drm->dmem->free_pages = page; |
284 | } |
285 | *ppage = page; |
286 | chunk->callocated++; |
287 | spin_unlock(lock: &drm->dmem->lock); |
288 | |
289 | NV_INFO(drm, "DMEM: registered %ldMB of device memory\n" , |
290 | DMEM_CHUNK_SIZE >> 20); |
291 | |
292 | return 0; |
293 | |
294 | out_bo_unpin: |
295 | nouveau_bo_unpin(chunk->bo); |
296 | out_bo_free: |
297 | nouveau_bo_ref(NULL, pnvbo: &chunk->bo); |
298 | out_release: |
299 | release_mem_region(chunk->pagemap.range.start, range_len(&chunk->pagemap.range)); |
300 | out_free: |
301 | kfree(objp: chunk); |
302 | out: |
303 | return ret; |
304 | } |
305 | |
306 | static struct page * |
307 | nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm) |
308 | { |
309 | struct nouveau_dmem_chunk *chunk; |
310 | struct page *page = NULL; |
311 | int ret; |
312 | |
313 | spin_lock(lock: &drm->dmem->lock); |
314 | if (drm->dmem->free_pages) { |
315 | page = drm->dmem->free_pages; |
316 | drm->dmem->free_pages = page->zone_device_data; |
317 | chunk = nouveau_page_to_chunk(page); |
318 | chunk->callocated++; |
319 | spin_unlock(lock: &drm->dmem->lock); |
320 | } else { |
321 | spin_unlock(lock: &drm->dmem->lock); |
322 | ret = nouveau_dmem_chunk_alloc(drm, ppage: &page); |
323 | if (ret) |
324 | return NULL; |
325 | } |
326 | |
327 | zone_device_page_init(page); |
328 | return page; |
329 | } |
330 | |
331 | static void |
332 | nouveau_dmem_page_free_locked(struct nouveau_drm *drm, struct page *page) |
333 | { |
334 | unlock_page(page); |
335 | put_page(page); |
336 | } |
337 | |
338 | void |
339 | nouveau_dmem_resume(struct nouveau_drm *drm) |
340 | { |
341 | struct nouveau_dmem_chunk *chunk; |
342 | int ret; |
343 | |
344 | if (drm->dmem == NULL) |
345 | return; |
346 | |
347 | mutex_lock(&drm->dmem->mutex); |
348 | list_for_each_entry(chunk, &drm->dmem->chunks, list) { |
349 | ret = nouveau_bo_pin(chunk->bo, NOUVEAU_GEM_DOMAIN_VRAM, contig: false); |
350 | /* FIXME handle pin failure */ |
351 | WARN_ON(ret); |
352 | } |
353 | mutex_unlock(lock: &drm->dmem->mutex); |
354 | } |
355 | |
356 | void |
357 | nouveau_dmem_suspend(struct nouveau_drm *drm) |
358 | { |
359 | struct nouveau_dmem_chunk *chunk; |
360 | |
361 | if (drm->dmem == NULL) |
362 | return; |
363 | |
364 | mutex_lock(&drm->dmem->mutex); |
365 | list_for_each_entry(chunk, &drm->dmem->chunks, list) |
366 | nouveau_bo_unpin(chunk->bo); |
367 | mutex_unlock(lock: &drm->dmem->mutex); |
368 | } |
369 | |
370 | /* |
371 | * Evict all pages mapping a chunk. |
372 | */ |
373 | static void |
374 | nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) |
375 | { |
376 | unsigned long i, npages = range_len(range: &chunk->pagemap.range) >> PAGE_SHIFT; |
377 | unsigned long *src_pfns, *dst_pfns; |
378 | dma_addr_t *dma_addrs; |
379 | struct nouveau_fence *fence; |
380 | |
381 | src_pfns = kcalloc(n: npages, size: sizeof(*src_pfns), GFP_KERNEL); |
382 | dst_pfns = kcalloc(n: npages, size: sizeof(*dst_pfns), GFP_KERNEL); |
383 | dma_addrs = kcalloc(n: npages, size: sizeof(*dma_addrs), GFP_KERNEL); |
384 | |
385 | migrate_device_range(src_pfns, start: chunk->pagemap.range.start >> PAGE_SHIFT, |
386 | npages); |
387 | |
388 | for (i = 0; i < npages; i++) { |
389 | if (src_pfns[i] & MIGRATE_PFN_MIGRATE) { |
390 | struct page *dpage; |
391 | |
392 | /* |
393 | * _GFP_NOFAIL because the GPU is going away and there |
394 | * is nothing sensible we can do if we can't copy the |
395 | * data back. |
396 | */ |
397 | dpage = alloc_page(GFP_HIGHUSER | __GFP_NOFAIL); |
398 | dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)); |
399 | nouveau_dmem_copy_one(drm: chunk->drm, |
400 | spage: migrate_pfn_to_page(mpfn: src_pfns[i]), dpage, |
401 | dma_addr: &dma_addrs[i]); |
402 | } |
403 | } |
404 | |
405 | nouveau_fence_new(&fence, chunk->drm->dmem->migrate.chan); |
406 | migrate_device_pages(src_pfns, dst_pfns, npages); |
407 | nouveau_dmem_fence_done(fence: &fence); |
408 | migrate_device_finalize(src_pfns, dst_pfns, npages); |
409 | kfree(objp: src_pfns); |
410 | kfree(objp: dst_pfns); |
411 | for (i = 0; i < npages; i++) |
412 | dma_unmap_page(chunk->drm->dev->dev, dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL); |
413 | kfree(objp: dma_addrs); |
414 | } |
415 | |
416 | void |
417 | nouveau_dmem_fini(struct nouveau_drm *drm) |
418 | { |
419 | struct nouveau_dmem_chunk *chunk, *tmp; |
420 | |
421 | if (drm->dmem == NULL) |
422 | return; |
423 | |
424 | mutex_lock(&drm->dmem->mutex); |
425 | |
426 | list_for_each_entry_safe(chunk, tmp, &drm->dmem->chunks, list) { |
427 | nouveau_dmem_evict_chunk(chunk); |
428 | nouveau_bo_unpin(chunk->bo); |
429 | nouveau_bo_ref(NULL, pnvbo: &chunk->bo); |
430 | WARN_ON(chunk->callocated); |
431 | list_del(entry: &chunk->list); |
432 | memunmap_pages(pgmap: &chunk->pagemap); |
433 | release_mem_region(chunk->pagemap.range.start, |
434 | range_len(&chunk->pagemap.range)); |
435 | kfree(objp: chunk); |
436 | } |
437 | |
438 | mutex_unlock(lock: &drm->dmem->mutex); |
439 | } |
440 | |
441 | static int |
442 | nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages, |
443 | enum nouveau_aper dst_aper, u64 dst_addr, |
444 | enum nouveau_aper src_aper, u64 src_addr) |
445 | { |
446 | struct nvif_push *push = drm->dmem->migrate.chan->chan.push; |
447 | u32 launch_dma = 0; |
448 | int ret; |
449 | |
450 | ret = PUSH_WAIT(push, 13); |
451 | if (ret) |
452 | return ret; |
453 | |
454 | if (src_aper != NOUVEAU_APER_VIRT) { |
455 | switch (src_aper) { |
456 | case NOUVEAU_APER_VRAM: |
457 | PUSH_IMMD(push, NVA0B5, SET_SRC_PHYS_MODE, |
458 | NVDEF(NVA0B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB)); |
459 | break; |
460 | case NOUVEAU_APER_HOST: |
461 | PUSH_IMMD(push, NVA0B5, SET_SRC_PHYS_MODE, |
462 | NVDEF(NVA0B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM)); |
463 | break; |
464 | default: |
465 | return -EINVAL; |
466 | } |
467 | |
468 | launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL); |
469 | } |
470 | |
471 | if (dst_aper != NOUVEAU_APER_VIRT) { |
472 | switch (dst_aper) { |
473 | case NOUVEAU_APER_VRAM: |
474 | PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE, |
475 | NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB)); |
476 | break; |
477 | case NOUVEAU_APER_HOST: |
478 | PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE, |
479 | NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM)); |
480 | break; |
481 | default: |
482 | return -EINVAL; |
483 | } |
484 | |
485 | launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, DST_TYPE, PHYSICAL); |
486 | } |
487 | |
488 | PUSH_MTHD(push, NVA0B5, OFFSET_IN_UPPER, |
489 | NVVAL(NVA0B5, OFFSET_IN_UPPER, UPPER, upper_32_bits(src_addr)), |
490 | |
491 | OFFSET_IN_LOWER, lower_32_bits(src_addr), |
492 | |
493 | OFFSET_OUT_UPPER, |
494 | NVVAL(NVA0B5, OFFSET_OUT_UPPER, UPPER, upper_32_bits(dst_addr)), |
495 | |
496 | OFFSET_OUT_LOWER, lower_32_bits(dst_addr), |
497 | PITCH_IN, PAGE_SIZE, |
498 | PITCH_OUT, PAGE_SIZE, |
499 | LINE_LENGTH_IN, PAGE_SIZE, |
500 | LINE_COUNT, npages); |
501 | |
502 | PUSH_MTHD(push, NVA0B5, LAUNCH_DMA, launch_dma | |
503 | NVDEF(NVA0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED) | |
504 | NVDEF(NVA0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE) | |
505 | NVDEF(NVA0B5, LAUNCH_DMA, SEMAPHORE_TYPE, NONE) | |
506 | NVDEF(NVA0B5, LAUNCH_DMA, INTERRUPT_TYPE, NONE) | |
507 | NVDEF(NVA0B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) | |
508 | NVDEF(NVA0B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) | |
509 | NVDEF(NVA0B5, LAUNCH_DMA, MULTI_LINE_ENABLE, TRUE) | |
510 | NVDEF(NVA0B5, LAUNCH_DMA, REMAP_ENABLE, FALSE) | |
511 | NVDEF(NVA0B5, LAUNCH_DMA, BYPASS_L2, USE_PTE_SETTING)); |
512 | return 0; |
513 | } |
514 | |
515 | static int |
516 | nvc0b5_migrate_clear(struct nouveau_drm *drm, u32 length, |
517 | enum nouveau_aper dst_aper, u64 dst_addr) |
518 | { |
519 | struct nvif_push *push = drm->dmem->migrate.chan->chan.push; |
520 | u32 launch_dma = 0; |
521 | int ret; |
522 | |
523 | ret = PUSH_WAIT(push, 12); |
524 | if (ret) |
525 | return ret; |
526 | |
527 | switch (dst_aper) { |
528 | case NOUVEAU_APER_VRAM: |
529 | PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE, |
530 | NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB)); |
531 | break; |
532 | case NOUVEAU_APER_HOST: |
533 | PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE, |
534 | NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM)); |
535 | break; |
536 | default: |
537 | return -EINVAL; |
538 | } |
539 | |
540 | launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, DST_TYPE, PHYSICAL); |
541 | |
542 | PUSH_MTHD(push, NVA0B5, SET_REMAP_CONST_A, 0, |
543 | SET_REMAP_CONST_B, 0, |
544 | |
545 | SET_REMAP_COMPONENTS, |
546 | NVDEF(NVA0B5, SET_REMAP_COMPONENTS, DST_X, CONST_A) | |
547 | NVDEF(NVA0B5, SET_REMAP_COMPONENTS, DST_Y, CONST_B) | |
548 | NVDEF(NVA0B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE, FOUR) | |
549 | NVDEF(NVA0B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS, TWO)); |
550 | |
551 | PUSH_MTHD(push, NVA0B5, OFFSET_OUT_UPPER, |
552 | NVVAL(NVA0B5, OFFSET_OUT_UPPER, UPPER, upper_32_bits(dst_addr)), |
553 | |
554 | OFFSET_OUT_LOWER, lower_32_bits(dst_addr)); |
555 | |
556 | PUSH_MTHD(push, NVA0B5, LINE_LENGTH_IN, length >> 3); |
557 | |
558 | PUSH_MTHD(push, NVA0B5, LAUNCH_DMA, launch_dma | |
559 | NVDEF(NVA0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED) | |
560 | NVDEF(NVA0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE) | |
561 | NVDEF(NVA0B5, LAUNCH_DMA, SEMAPHORE_TYPE, NONE) | |
562 | NVDEF(NVA0B5, LAUNCH_DMA, INTERRUPT_TYPE, NONE) | |
563 | NVDEF(NVA0B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) | |
564 | NVDEF(NVA0B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) | |
565 | NVDEF(NVA0B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) | |
566 | NVDEF(NVA0B5, LAUNCH_DMA, REMAP_ENABLE, TRUE) | |
567 | NVDEF(NVA0B5, LAUNCH_DMA, BYPASS_L2, USE_PTE_SETTING)); |
568 | return 0; |
569 | } |
570 | |
571 | static int |
572 | nouveau_dmem_migrate_init(struct nouveau_drm *drm) |
573 | { |
574 | switch (drm->ttm.copy.oclass) { |
575 | case PASCAL_DMA_COPY_A: |
576 | case PASCAL_DMA_COPY_B: |
577 | case VOLTA_DMA_COPY_A: |
578 | case TURING_DMA_COPY_A: |
579 | drm->dmem->migrate.copy_func = nvc0b5_migrate_copy; |
580 | drm->dmem->migrate.clear_func = nvc0b5_migrate_clear; |
581 | drm->dmem->migrate.chan = drm->ttm.chan; |
582 | return 0; |
583 | default: |
584 | break; |
585 | } |
586 | return -ENODEV; |
587 | } |
588 | |
589 | void |
590 | nouveau_dmem_init(struct nouveau_drm *drm) |
591 | { |
592 | int ret; |
593 | |
594 | /* This only make sense on PASCAL or newer */ |
595 | if (drm->client.device.info.family < NV_DEVICE_INFO_V0_PASCAL) |
596 | return; |
597 | |
598 | if (!(drm->dmem = kzalloc(size: sizeof(*drm->dmem), GFP_KERNEL))) |
599 | return; |
600 | |
601 | drm->dmem->drm = drm; |
602 | mutex_init(&drm->dmem->mutex); |
603 | INIT_LIST_HEAD(list: &drm->dmem->chunks); |
604 | mutex_init(&drm->dmem->mutex); |
605 | spin_lock_init(&drm->dmem->lock); |
606 | |
607 | /* Initialize migration dma helpers before registering memory */ |
608 | ret = nouveau_dmem_migrate_init(drm); |
609 | if (ret) { |
610 | kfree(objp: drm->dmem); |
611 | drm->dmem = NULL; |
612 | } |
613 | } |
614 | |
615 | static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm, |
616 | struct nouveau_svmm *svmm, unsigned long src, |
617 | dma_addr_t *dma_addr, u64 *pfn) |
618 | { |
619 | struct device *dev = drm->dev->dev; |
620 | struct page *dpage, *spage; |
621 | unsigned long paddr; |
622 | |
623 | spage = migrate_pfn_to_page(mpfn: src); |
624 | if (!(src & MIGRATE_PFN_MIGRATE)) |
625 | goto out; |
626 | |
627 | dpage = nouveau_dmem_page_alloc_locked(drm); |
628 | if (!dpage) |
629 | goto out; |
630 | |
631 | paddr = nouveau_dmem_page_addr(page: dpage); |
632 | if (spage) { |
633 | *dma_addr = dma_map_page(dev, spage, 0, page_size(page: spage), |
634 | DMA_BIDIRECTIONAL); |
635 | if (dma_mapping_error(dev, *dma_addr)) |
636 | goto out_free_page; |
637 | if (drm->dmem->migrate.copy_func(drm, 1, |
638 | NOUVEAU_APER_VRAM, paddr, NOUVEAU_APER_HOST, *dma_addr)) |
639 | goto out_dma_unmap; |
640 | } else { |
641 | *dma_addr = DMA_MAPPING_ERROR; |
642 | if (drm->dmem->migrate.clear_func(drm, page_size(page: dpage), |
643 | NOUVEAU_APER_VRAM, paddr)) |
644 | goto out_free_page; |
645 | } |
646 | |
647 | dpage->zone_device_data = svmm; |
648 | *pfn = NVIF_VMM_PFNMAP_V0_V | NVIF_VMM_PFNMAP_V0_VRAM | |
649 | ((paddr >> PAGE_SHIFT) << NVIF_VMM_PFNMAP_V0_ADDR_SHIFT); |
650 | if (src & MIGRATE_PFN_WRITE) |
651 | *pfn |= NVIF_VMM_PFNMAP_V0_W; |
652 | return migrate_pfn(page_to_pfn(dpage)); |
653 | |
654 | out_dma_unmap: |
655 | dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); |
656 | out_free_page: |
657 | nouveau_dmem_page_free_locked(drm, page: dpage); |
658 | out: |
659 | *pfn = NVIF_VMM_PFNMAP_V0_NONE; |
660 | return 0; |
661 | } |
662 | |
663 | static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm, |
664 | struct nouveau_svmm *svmm, struct migrate_vma *args, |
665 | dma_addr_t *dma_addrs, u64 *pfns) |
666 | { |
667 | struct nouveau_fence *fence; |
668 | unsigned long addr = args->start, nr_dma = 0, i; |
669 | |
670 | for (i = 0; addr < args->end; i++) { |
671 | args->dst[i] = nouveau_dmem_migrate_copy_one(drm, svmm, |
672 | src: args->src[i], dma_addr: dma_addrs + nr_dma, pfn: pfns + i); |
673 | if (!dma_mapping_error(drm->dev->dev, dma_addrs[nr_dma])) |
674 | nr_dma++; |
675 | addr += PAGE_SIZE; |
676 | } |
677 | |
678 | nouveau_fence_new(&fence, drm->dmem->migrate.chan); |
679 | migrate_vma_pages(migrate: args); |
680 | nouveau_dmem_fence_done(fence: &fence); |
681 | nouveau_pfns_map(svmm, mm: args->vma->vm_mm, addr: args->start, pfns, npages: i); |
682 | |
683 | while (nr_dma--) { |
684 | dma_unmap_page(drm->dev->dev, dma_addrs[nr_dma], PAGE_SIZE, |
685 | DMA_BIDIRECTIONAL); |
686 | } |
687 | migrate_vma_finalize(migrate: args); |
688 | } |
689 | |
690 | int |
691 | nouveau_dmem_migrate_vma(struct nouveau_drm *drm, |
692 | struct nouveau_svmm *svmm, |
693 | struct vm_area_struct *vma, |
694 | unsigned long start, |
695 | unsigned long end) |
696 | { |
697 | unsigned long npages = (end - start) >> PAGE_SHIFT; |
698 | unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages); |
699 | dma_addr_t *dma_addrs; |
700 | struct migrate_vma args = { |
701 | .vma = vma, |
702 | .start = start, |
703 | .pgmap_owner = drm->dev, |
704 | .flags = MIGRATE_VMA_SELECT_SYSTEM, |
705 | }; |
706 | unsigned long i; |
707 | u64 *pfns; |
708 | int ret = -ENOMEM; |
709 | |
710 | if (drm->dmem == NULL) |
711 | return -ENODEV; |
712 | |
713 | args.src = kcalloc(n: max, size: sizeof(*args.src), GFP_KERNEL); |
714 | if (!args.src) |
715 | goto out; |
716 | args.dst = kcalloc(n: max, size: sizeof(*args.dst), GFP_KERNEL); |
717 | if (!args.dst) |
718 | goto out_free_src; |
719 | |
720 | dma_addrs = kmalloc_array(n: max, size: sizeof(*dma_addrs), GFP_KERNEL); |
721 | if (!dma_addrs) |
722 | goto out_free_dst; |
723 | |
724 | pfns = nouveau_pfns_alloc(npages: max); |
725 | if (!pfns) |
726 | goto out_free_dma; |
727 | |
728 | for (i = 0; i < npages; i += max) { |
729 | if (args.start + (max << PAGE_SHIFT) > end) |
730 | args.end = end; |
731 | else |
732 | args.end = args.start + (max << PAGE_SHIFT); |
733 | |
734 | ret = migrate_vma_setup(args: &args); |
735 | if (ret) |
736 | goto out_free_pfns; |
737 | |
738 | if (args.cpages) |
739 | nouveau_dmem_migrate_chunk(drm, svmm, args: &args, dma_addrs, |
740 | pfns); |
741 | args.start = args.end; |
742 | } |
743 | |
744 | ret = 0; |
745 | out_free_pfns: |
746 | nouveau_pfns_free(pfns); |
747 | out_free_dma: |
748 | kfree(objp: dma_addrs); |
749 | out_free_dst: |
750 | kfree(objp: args.dst); |
751 | out_free_src: |
752 | kfree(objp: args.src); |
753 | out: |
754 | return ret; |
755 | } |
756 | |