1 | /* |
2 | * Copyright 2018 Red Hat Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | */ |
22 | #include "nouveau_svm.h" |
23 | #include "nouveau_drv.h" |
24 | #include "nouveau_chan.h" |
25 | #include "nouveau_dmem.h" |
26 | |
27 | #include <nvif/event.h> |
28 | #include <nvif/object.h> |
29 | #include <nvif/vmm.h> |
30 | |
31 | #include <nvif/class.h> |
32 | #include <nvif/clb069.h> |
33 | #include <nvif/ifc00d.h> |
34 | |
35 | #include <linux/sched/mm.h> |
36 | #include <linux/sort.h> |
37 | #include <linux/hmm.h> |
38 | #include <linux/memremap.h> |
39 | #include <linux/rmap.h> |
40 | |
41 | struct nouveau_svm { |
42 | struct nouveau_drm *drm; |
43 | struct mutex mutex; |
44 | struct list_head inst; |
45 | |
46 | struct nouveau_svm_fault_buffer { |
47 | int id; |
48 | struct nvif_object object; |
49 | u32 entries; |
50 | u32 getaddr; |
51 | u32 putaddr; |
52 | u32 get; |
53 | u32 put; |
54 | struct nvif_event notify; |
55 | struct work_struct work; |
56 | |
57 | struct nouveau_svm_fault { |
58 | u64 inst; |
59 | u64 addr; |
60 | u64 time; |
61 | u32 engine; |
62 | u8 gpc; |
63 | u8 hub; |
64 | u8 access; |
65 | u8 client; |
66 | u8 fault; |
67 | struct nouveau_svmm *svmm; |
68 | } **fault; |
69 | int fault_nr; |
70 | } buffer[]; |
71 | }; |
72 | |
73 | #define FAULT_ACCESS_READ 0 |
74 | #define FAULT_ACCESS_WRITE 1 |
75 | #define FAULT_ACCESS_ATOMIC 2 |
76 | #define FAULT_ACCESS_PREFETCH 3 |
77 | |
78 | #define SVM_DBG(s,f,a...) NV_DEBUG((s)->drm, "svm: "f"\n", ##a) |
79 | #define SVM_ERR(s,f,a...) NV_WARN((s)->drm, "svm: "f"\n", ##a) |
80 | |
81 | struct nouveau_pfnmap_args { |
82 | struct nvif_ioctl_v0 i; |
83 | struct nvif_ioctl_mthd_v0 m; |
84 | struct nvif_vmm_pfnmap_v0 p; |
85 | }; |
86 | |
87 | struct nouveau_ivmm { |
88 | struct nouveau_svmm *svmm; |
89 | u64 inst; |
90 | struct list_head head; |
91 | }; |
92 | |
93 | static struct nouveau_ivmm * |
94 | nouveau_ivmm_find(struct nouveau_svm *svm, u64 inst) |
95 | { |
96 | struct nouveau_ivmm *ivmm; |
97 | list_for_each_entry(ivmm, &svm->inst, head) { |
98 | if (ivmm->inst == inst) |
99 | return ivmm; |
100 | } |
101 | return NULL; |
102 | } |
103 | |
104 | #define SVMM_DBG(s,f,a...) \ |
105 | NV_DEBUG((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a) |
106 | #define SVMM_ERR(s,f,a...) \ |
107 | NV_WARN((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a) |
108 | |
109 | int |
110 | nouveau_svmm_bind(struct drm_device *dev, void *data, |
111 | struct drm_file *file_priv) |
112 | { |
113 | struct nouveau_cli *cli = nouveau_cli(fpriv: file_priv); |
114 | struct drm_nouveau_svm_bind *args = data; |
115 | unsigned target, cmd; |
116 | unsigned long addr, end; |
117 | struct mm_struct *mm; |
118 | |
119 | args->va_start &= PAGE_MASK; |
120 | args->va_end = ALIGN(args->va_end, PAGE_SIZE); |
121 | |
122 | /* Sanity check arguments */ |
123 | if (args->reserved0 || args->reserved1) |
124 | return -EINVAL; |
125 | if (args->header & (~NOUVEAU_SVM_BIND_VALID_MASK)) |
126 | return -EINVAL; |
127 | if (args->va_start >= args->va_end) |
128 | return -EINVAL; |
129 | |
130 | cmd = args->header >> NOUVEAU_SVM_BIND_COMMAND_SHIFT; |
131 | cmd &= NOUVEAU_SVM_BIND_COMMAND_MASK; |
132 | switch (cmd) { |
133 | case NOUVEAU_SVM_BIND_COMMAND__MIGRATE: |
134 | break; |
135 | default: |
136 | return -EINVAL; |
137 | } |
138 | |
139 | /* FIXME support CPU target ie all target value < GPU_VRAM */ |
140 | target = args->header >> NOUVEAU_SVM_BIND_TARGET_SHIFT; |
141 | target &= NOUVEAU_SVM_BIND_TARGET_MASK; |
142 | switch (target) { |
143 | case NOUVEAU_SVM_BIND_TARGET__GPU_VRAM: |
144 | break; |
145 | default: |
146 | return -EINVAL; |
147 | } |
148 | |
149 | /* |
150 | * FIXME: For now refuse non 0 stride, we need to change the migrate |
151 | * kernel function to handle stride to avoid to create a mess within |
152 | * each device driver. |
153 | */ |
154 | if (args->stride) |
155 | return -EINVAL; |
156 | |
157 | /* |
158 | * Ok we are ask to do something sane, for now we only support migrate |
159 | * commands but we will add things like memory policy (what to do on |
160 | * page fault) and maybe some other commands. |
161 | */ |
162 | |
163 | mm = get_task_mm(current); |
164 | if (!mm) { |
165 | return -EINVAL; |
166 | } |
167 | mmap_read_lock(mm); |
168 | |
169 | if (!cli->svm.svmm) { |
170 | mmap_read_unlock(mm); |
171 | mmput(mm); |
172 | return -EINVAL; |
173 | } |
174 | |
175 | for (addr = args->va_start, end = args->va_end; addr < end;) { |
176 | struct vm_area_struct *vma; |
177 | unsigned long next; |
178 | |
179 | vma = find_vma_intersection(mm, start_addr: addr, end_addr: end); |
180 | if (!vma) |
181 | break; |
182 | |
183 | addr = max(addr, vma->vm_start); |
184 | next = min(vma->vm_end, end); |
185 | /* This is a best effort so we ignore errors */ |
186 | nouveau_dmem_migrate_vma(drm: cli->drm, svmm: cli->svm.svmm, vma, start: addr, |
187 | end: next); |
188 | addr = next; |
189 | } |
190 | |
191 | /* |
192 | * FIXME Return the number of page we have migrated, again we need to |
193 | * update the migrate API to return that information so that we can |
194 | * report it to user space. |
195 | */ |
196 | args->result = 0; |
197 | |
198 | mmap_read_unlock(mm); |
199 | mmput(mm); |
200 | |
201 | return 0; |
202 | } |
203 | |
204 | /* Unlink channel instance from SVMM. */ |
205 | void |
206 | nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst) |
207 | { |
208 | struct nouveau_ivmm *ivmm; |
209 | if (svmm) { |
210 | mutex_lock(&svmm->vmm->cli->drm->svm->mutex); |
211 | ivmm = nouveau_ivmm_find(svm: svmm->vmm->cli->drm->svm, inst); |
212 | if (ivmm) { |
213 | list_del(entry: &ivmm->head); |
214 | kfree(objp: ivmm); |
215 | } |
216 | mutex_unlock(lock: &svmm->vmm->cli->drm->svm->mutex); |
217 | } |
218 | } |
219 | |
220 | /* Link channel instance to SVMM. */ |
221 | int |
222 | nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst) |
223 | { |
224 | struct nouveau_ivmm *ivmm; |
225 | if (svmm) { |
226 | if (!(ivmm = kmalloc(size: sizeof(*ivmm), GFP_KERNEL))) |
227 | return -ENOMEM; |
228 | ivmm->svmm = svmm; |
229 | ivmm->inst = inst; |
230 | |
231 | mutex_lock(&svmm->vmm->cli->drm->svm->mutex); |
232 | list_add(new: &ivmm->head, head: &svmm->vmm->cli->drm->svm->inst); |
233 | mutex_unlock(lock: &svmm->vmm->cli->drm->svm->mutex); |
234 | } |
235 | return 0; |
236 | } |
237 | |
238 | /* Invalidate SVMM address-range on GPU. */ |
239 | void |
240 | nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit) |
241 | { |
242 | if (limit > start) { |
243 | nvif_object_mthd(&svmm->vmm->vmm.object, NVIF_VMM_V0_PFNCLR, |
244 | &(struct nvif_vmm_pfnclr_v0) { |
245 | .addr = start, |
246 | .size = limit - start, |
247 | }, sizeof(struct nvif_vmm_pfnclr_v0)); |
248 | } |
249 | } |
250 | |
251 | static int |
252 | nouveau_svmm_invalidate_range_start(struct mmu_notifier *mn, |
253 | const struct mmu_notifier_range *update) |
254 | { |
255 | struct nouveau_svmm *svmm = |
256 | container_of(mn, struct nouveau_svmm, notifier); |
257 | unsigned long start = update->start; |
258 | unsigned long limit = update->end; |
259 | |
260 | if (!mmu_notifier_range_blockable(range: update)) |
261 | return -EAGAIN; |
262 | |
263 | SVMM_DBG(svmm, "invalidate %016lx-%016lx" , start, limit); |
264 | |
265 | mutex_lock(&svmm->mutex); |
266 | if (unlikely(!svmm->vmm)) |
267 | goto out; |
268 | |
269 | /* |
270 | * Ignore invalidation callbacks for device private pages since |
271 | * the invalidation is handled as part of the migration process. |
272 | */ |
273 | if (update->event == MMU_NOTIFY_MIGRATE && |
274 | update->owner == svmm->vmm->cli->drm->dev) |
275 | goto out; |
276 | |
277 | if (limit > svmm->unmanaged.start && start < svmm->unmanaged.limit) { |
278 | if (start < svmm->unmanaged.start) { |
279 | nouveau_svmm_invalidate(svmm, start, |
280 | limit: svmm->unmanaged.limit); |
281 | } |
282 | start = svmm->unmanaged.limit; |
283 | } |
284 | |
285 | nouveau_svmm_invalidate(svmm, start, limit); |
286 | |
287 | out: |
288 | mutex_unlock(lock: &svmm->mutex); |
289 | return 0; |
290 | } |
291 | |
292 | static void nouveau_svmm_free_notifier(struct mmu_notifier *mn) |
293 | { |
294 | kfree(container_of(mn, struct nouveau_svmm, notifier)); |
295 | } |
296 | |
297 | static const struct mmu_notifier_ops nouveau_mn_ops = { |
298 | .invalidate_range_start = nouveau_svmm_invalidate_range_start, |
299 | .free_notifier = nouveau_svmm_free_notifier, |
300 | }; |
301 | |
302 | void |
303 | nouveau_svmm_fini(struct nouveau_svmm **psvmm) |
304 | { |
305 | struct nouveau_svmm *svmm = *psvmm; |
306 | if (svmm) { |
307 | mutex_lock(&svmm->mutex); |
308 | svmm->vmm = NULL; |
309 | mutex_unlock(lock: &svmm->mutex); |
310 | mmu_notifier_put(subscription: &svmm->notifier); |
311 | *psvmm = NULL; |
312 | } |
313 | } |
314 | |
315 | int |
316 | nouveau_svmm_init(struct drm_device *dev, void *data, |
317 | struct drm_file *file_priv) |
318 | { |
319 | struct nouveau_cli *cli = nouveau_cli(fpriv: file_priv); |
320 | struct nouveau_svmm *svmm; |
321 | struct drm_nouveau_svm_init *args = data; |
322 | int ret; |
323 | |
324 | /* We need to fail if svm is disabled */ |
325 | if (!cli->drm->svm) |
326 | return -ENOSYS; |
327 | |
328 | /* Allocate tracking for SVM-enabled VMM. */ |
329 | if (!(svmm = kzalloc(size: sizeof(*svmm), GFP_KERNEL))) |
330 | return -ENOMEM; |
331 | svmm->vmm = &cli->svm; |
332 | svmm->unmanaged.start = args->unmanaged_addr; |
333 | svmm->unmanaged.limit = args->unmanaged_addr + args->unmanaged_size; |
334 | mutex_init(&svmm->mutex); |
335 | |
336 | /* Check that SVM isn't already enabled for the client. */ |
337 | mutex_lock(&cli->mutex); |
338 | if (cli->svm.cli) { |
339 | ret = -EBUSY; |
340 | goto out_free; |
341 | } |
342 | |
343 | /* Allocate a new GPU VMM that can support SVM (managed by the |
344 | * client, with replayable faults enabled). |
345 | * |
346 | * All future channel/memory allocations will make use of this |
347 | * VMM instead of the standard one. |
348 | */ |
349 | ret = nvif_vmm_ctor(&cli->mmu, "svmVmm" , |
350 | cli->vmm.vmm.object.oclass, MANAGED, |
351 | args->unmanaged_addr, args->unmanaged_size, |
352 | &(struct gp100_vmm_v0) { |
353 | .fault_replay = true, |
354 | }, sizeof(struct gp100_vmm_v0), &cli->svm.vmm); |
355 | if (ret) |
356 | goto out_free; |
357 | |
358 | mmap_write_lock(current->mm); |
359 | svmm->notifier.ops = &nouveau_mn_ops; |
360 | ret = __mmu_notifier_register(subscription: &svmm->notifier, current->mm); |
361 | if (ret) |
362 | goto out_mm_unlock; |
363 | /* Note, ownership of svmm transfers to mmu_notifier */ |
364 | |
365 | cli->svm.svmm = svmm; |
366 | cli->svm.cli = cli; |
367 | mmap_write_unlock(current->mm); |
368 | mutex_unlock(lock: &cli->mutex); |
369 | return 0; |
370 | |
371 | out_mm_unlock: |
372 | mmap_write_unlock(current->mm); |
373 | out_free: |
374 | mutex_unlock(lock: &cli->mutex); |
375 | kfree(objp: svmm); |
376 | return ret; |
377 | } |
378 | |
379 | /* Issue fault replay for GPU to retry accesses that faulted previously. */ |
380 | static void |
381 | nouveau_svm_fault_replay(struct nouveau_svm *svm) |
382 | { |
383 | SVM_DBG(svm, "replay" ); |
384 | WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object, |
385 | GP100_VMM_VN_FAULT_REPLAY, |
386 | &(struct gp100_vmm_fault_replay_vn) {}, |
387 | sizeof(struct gp100_vmm_fault_replay_vn))); |
388 | } |
389 | |
390 | /* Cancel a replayable fault that could not be handled. |
391 | * |
392 | * Cancelling the fault will trigger recovery to reset the engine |
393 | * and kill the offending channel (ie. GPU SIGSEGV). |
394 | */ |
395 | static void |
396 | nouveau_svm_fault_cancel(struct nouveau_svm *svm, |
397 | u64 inst, u8 hub, u8 gpc, u8 client) |
398 | { |
399 | SVM_DBG(svm, "cancel %016llx %d %02x %02x" , inst, hub, gpc, client); |
400 | WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object, |
401 | GP100_VMM_VN_FAULT_CANCEL, |
402 | &(struct gp100_vmm_fault_cancel_v0) { |
403 | .hub = hub, |
404 | .gpc = gpc, |
405 | .client = client, |
406 | .inst = inst, |
407 | }, sizeof(struct gp100_vmm_fault_cancel_v0))); |
408 | } |
409 | |
410 | static void |
411 | nouveau_svm_fault_cancel_fault(struct nouveau_svm *svm, |
412 | struct nouveau_svm_fault *fault) |
413 | { |
414 | nouveau_svm_fault_cancel(svm, inst: fault->inst, |
415 | hub: fault->hub, |
416 | gpc: fault->gpc, |
417 | client: fault->client); |
418 | } |
419 | |
420 | static int |
421 | nouveau_svm_fault_priority(u8 fault) |
422 | { |
423 | switch (fault) { |
424 | case FAULT_ACCESS_PREFETCH: |
425 | return 0; |
426 | case FAULT_ACCESS_READ: |
427 | return 1; |
428 | case FAULT_ACCESS_WRITE: |
429 | return 2; |
430 | case FAULT_ACCESS_ATOMIC: |
431 | return 3; |
432 | default: |
433 | WARN_ON_ONCE(1); |
434 | return -1; |
435 | } |
436 | } |
437 | |
438 | static int |
439 | nouveau_svm_fault_cmp(const void *a, const void *b) |
440 | { |
441 | const struct nouveau_svm_fault *fa = *(struct nouveau_svm_fault **)a; |
442 | const struct nouveau_svm_fault *fb = *(struct nouveau_svm_fault **)b; |
443 | int ret; |
444 | if ((ret = (s64)fa->inst - fb->inst)) |
445 | return ret; |
446 | if ((ret = (s64)fa->addr - fb->addr)) |
447 | return ret; |
448 | return nouveau_svm_fault_priority(fault: fa->access) - |
449 | nouveau_svm_fault_priority(fault: fb->access); |
450 | } |
451 | |
452 | static void |
453 | nouveau_svm_fault_cache(struct nouveau_svm *svm, |
454 | struct nouveau_svm_fault_buffer *buffer, u32 offset) |
455 | { |
456 | struct nvif_object *memory = &buffer->object; |
457 | const u32 instlo = nvif_rd32(memory, offset + 0x00); |
458 | const u32 insthi = nvif_rd32(memory, offset + 0x04); |
459 | const u32 addrlo = nvif_rd32(memory, offset + 0x08); |
460 | const u32 addrhi = nvif_rd32(memory, offset + 0x0c); |
461 | const u32 timelo = nvif_rd32(memory, offset + 0x10); |
462 | const u32 timehi = nvif_rd32(memory, offset + 0x14); |
463 | const u32 engine = nvif_rd32(memory, offset + 0x18); |
464 | const u32 info = nvif_rd32(memory, offset + 0x1c); |
465 | const u64 inst = (u64)insthi << 32 | instlo; |
466 | const u8 gpc = (info & 0x1f000000) >> 24; |
467 | const u8 hub = (info & 0x00100000) >> 20; |
468 | const u8 client = (info & 0x00007f00) >> 8; |
469 | struct nouveau_svm_fault *fault; |
470 | |
471 | //XXX: i think we're supposed to spin waiting */ |
472 | if (WARN_ON(!(info & 0x80000000))) |
473 | return; |
474 | |
475 | nvif_mask(memory, offset + 0x1c, 0x80000000, 0x00000000); |
476 | |
477 | if (!buffer->fault[buffer->fault_nr]) { |
478 | fault = kmalloc(size: sizeof(*fault), GFP_KERNEL); |
479 | if (WARN_ON(!fault)) { |
480 | nouveau_svm_fault_cancel(svm, inst, hub, gpc, client); |
481 | return; |
482 | } |
483 | buffer->fault[buffer->fault_nr] = fault; |
484 | } |
485 | |
486 | fault = buffer->fault[buffer->fault_nr++]; |
487 | fault->inst = inst; |
488 | fault->addr = (u64)addrhi << 32 | addrlo; |
489 | fault->time = (u64)timehi << 32 | timelo; |
490 | fault->engine = engine; |
491 | fault->gpc = gpc; |
492 | fault->hub = hub; |
493 | fault->access = (info & 0x000f0000) >> 16; |
494 | fault->client = client; |
495 | fault->fault = (info & 0x0000001f); |
496 | |
497 | SVM_DBG(svm, "fault %016llx %016llx %02x" , |
498 | fault->inst, fault->addr, fault->access); |
499 | } |
500 | |
501 | struct svm_notifier { |
502 | struct mmu_interval_notifier notifier; |
503 | struct nouveau_svmm *svmm; |
504 | }; |
505 | |
506 | static bool nouveau_svm_range_invalidate(struct mmu_interval_notifier *mni, |
507 | const struct mmu_notifier_range *range, |
508 | unsigned long cur_seq) |
509 | { |
510 | struct svm_notifier *sn = |
511 | container_of(mni, struct svm_notifier, notifier); |
512 | |
513 | if (range->event == MMU_NOTIFY_EXCLUSIVE && |
514 | range->owner == sn->svmm->vmm->cli->drm->dev) |
515 | return true; |
516 | |
517 | /* |
518 | * serializes the update to mni->invalidate_seq done by caller and |
519 | * prevents invalidation of the PTE from progressing while HW is being |
520 | * programmed. This is very hacky and only works because the normal |
521 | * notifier that does invalidation is always called after the range |
522 | * notifier. |
523 | */ |
524 | if (mmu_notifier_range_blockable(range)) |
525 | mutex_lock(&sn->svmm->mutex); |
526 | else if (!mutex_trylock(lock: &sn->svmm->mutex)) |
527 | return false; |
528 | mmu_interval_set_seq(interval_sub: mni, cur_seq); |
529 | mutex_unlock(lock: &sn->svmm->mutex); |
530 | return true; |
531 | } |
532 | |
533 | static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = { |
534 | .invalidate = nouveau_svm_range_invalidate, |
535 | }; |
536 | |
537 | static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm, |
538 | struct hmm_range *range, |
539 | struct nouveau_pfnmap_args *args) |
540 | { |
541 | struct page *page; |
542 | |
543 | /* |
544 | * The address prepared here is passed through nvif_object_ioctl() |
545 | * to an eventual DMA map in something like gp100_vmm_pgt_pfn() |
546 | * |
547 | * This is all just encoding the internal hmm representation into a |
548 | * different nouveau internal representation. |
549 | */ |
550 | if (!(range->hmm_pfns[0] & HMM_PFN_VALID)) { |
551 | args->p.phys[0] = 0; |
552 | return; |
553 | } |
554 | |
555 | page = hmm_pfn_to_page(hmm_pfn: range->hmm_pfns[0]); |
556 | /* |
557 | * Only map compound pages to the GPU if the CPU is also mapping the |
558 | * page as a compound page. Otherwise, the PTE protections might not be |
559 | * consistent (e.g., CPU only maps part of a compound page). |
560 | * Note that the underlying page might still be larger than the |
561 | * CPU mapping (e.g., a PUD sized compound page partially mapped with |
562 | * a PMD sized page table entry). |
563 | */ |
564 | if (hmm_pfn_to_map_order(hmm_pfn: range->hmm_pfns[0])) { |
565 | unsigned long addr = args->p.addr; |
566 | |
567 | args->p.page = hmm_pfn_to_map_order(hmm_pfn: range->hmm_pfns[0]) + |
568 | PAGE_SHIFT; |
569 | args->p.size = 1UL << args->p.page; |
570 | args->p.addr &= ~(args->p.size - 1); |
571 | page -= (addr - args->p.addr) >> PAGE_SHIFT; |
572 | } |
573 | if (is_device_private_page(page)) |
574 | args->p.phys[0] = nouveau_dmem_page_addr(page) | |
575 | NVIF_VMM_PFNMAP_V0_V | |
576 | NVIF_VMM_PFNMAP_V0_VRAM; |
577 | else |
578 | args->p.phys[0] = page_to_phys(page) | |
579 | NVIF_VMM_PFNMAP_V0_V | |
580 | NVIF_VMM_PFNMAP_V0_HOST; |
581 | if (range->hmm_pfns[0] & HMM_PFN_WRITE) |
582 | args->p.phys[0] |= NVIF_VMM_PFNMAP_V0_W; |
583 | } |
584 | |
585 | static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm, |
586 | struct nouveau_drm *drm, |
587 | struct nouveau_pfnmap_args *args, u32 size, |
588 | struct svm_notifier *notifier) |
589 | { |
590 | unsigned long timeout = |
591 | jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); |
592 | struct mm_struct *mm = svmm->notifier.mm; |
593 | struct page *page; |
594 | unsigned long start = args->p.addr; |
595 | unsigned long notifier_seq; |
596 | int ret = 0; |
597 | |
598 | ret = mmu_interval_notifier_insert(interval_sub: ¬ifier->notifier, mm, |
599 | start: args->p.addr, length: args->p.size, |
600 | ops: &nouveau_svm_mni_ops); |
601 | if (ret) |
602 | return ret; |
603 | |
604 | while (true) { |
605 | if (time_after(jiffies, timeout)) { |
606 | ret = -EBUSY; |
607 | goto out; |
608 | } |
609 | |
610 | notifier_seq = mmu_interval_read_begin(interval_sub: ¬ifier->notifier); |
611 | mmap_read_lock(mm); |
612 | ret = make_device_exclusive_range(mm, start, end: start + PAGE_SIZE, |
613 | pages: &page, arg: drm->dev); |
614 | mmap_read_unlock(mm); |
615 | if (ret <= 0 || !page) { |
616 | ret = -EINVAL; |
617 | goto out; |
618 | } |
619 | |
620 | mutex_lock(&svmm->mutex); |
621 | if (!mmu_interval_read_retry(interval_sub: ¬ifier->notifier, |
622 | seq: notifier_seq)) |
623 | break; |
624 | mutex_unlock(lock: &svmm->mutex); |
625 | } |
626 | |
627 | /* Map the page on the GPU. */ |
628 | args->p.page = 12; |
629 | args->p.size = PAGE_SIZE; |
630 | args->p.addr = start; |
631 | args->p.phys[0] = page_to_phys(page) | |
632 | NVIF_VMM_PFNMAP_V0_V | |
633 | NVIF_VMM_PFNMAP_V0_W | |
634 | NVIF_VMM_PFNMAP_V0_A | |
635 | NVIF_VMM_PFNMAP_V0_HOST; |
636 | |
637 | ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL); |
638 | mutex_unlock(lock: &svmm->mutex); |
639 | |
640 | unlock_page(page); |
641 | put_page(page); |
642 | |
643 | out: |
644 | mmu_interval_notifier_remove(interval_sub: ¬ifier->notifier); |
645 | return ret; |
646 | } |
647 | |
648 | static int nouveau_range_fault(struct nouveau_svmm *svmm, |
649 | struct nouveau_drm *drm, |
650 | struct nouveau_pfnmap_args *args, u32 size, |
651 | unsigned long hmm_flags, |
652 | struct svm_notifier *notifier) |
653 | { |
654 | unsigned long timeout = |
655 | jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); |
656 | /* Have HMM fault pages within the fault window to the GPU. */ |
657 | unsigned long hmm_pfns[1]; |
658 | struct hmm_range range = { |
659 | .notifier = ¬ifier->notifier, |
660 | .default_flags = hmm_flags, |
661 | .hmm_pfns = hmm_pfns, |
662 | .dev_private_owner = drm->dev, |
663 | }; |
664 | struct mm_struct *mm = svmm->notifier.mm; |
665 | int ret; |
666 | |
667 | ret = mmu_interval_notifier_insert(interval_sub: ¬ifier->notifier, mm, |
668 | start: args->p.addr, length: args->p.size, |
669 | ops: &nouveau_svm_mni_ops); |
670 | if (ret) |
671 | return ret; |
672 | |
673 | range.start = notifier->notifier.interval_tree.start; |
674 | range.end = notifier->notifier.interval_tree.last + 1; |
675 | |
676 | while (true) { |
677 | if (time_after(jiffies, timeout)) { |
678 | ret = -EBUSY; |
679 | goto out; |
680 | } |
681 | |
682 | range.notifier_seq = mmu_interval_read_begin(interval_sub: range.notifier); |
683 | mmap_read_lock(mm); |
684 | ret = hmm_range_fault(range: &range); |
685 | mmap_read_unlock(mm); |
686 | if (ret) { |
687 | if (ret == -EBUSY) |
688 | continue; |
689 | goto out; |
690 | } |
691 | |
692 | mutex_lock(&svmm->mutex); |
693 | if (mmu_interval_read_retry(interval_sub: range.notifier, |
694 | seq: range.notifier_seq)) { |
695 | mutex_unlock(lock: &svmm->mutex); |
696 | continue; |
697 | } |
698 | break; |
699 | } |
700 | |
701 | nouveau_hmm_convert_pfn(drm, range: &range, args); |
702 | |
703 | ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL); |
704 | mutex_unlock(lock: &svmm->mutex); |
705 | |
706 | out: |
707 | mmu_interval_notifier_remove(interval_sub: ¬ifier->notifier); |
708 | |
709 | return ret; |
710 | } |
711 | |
712 | static void |
713 | nouveau_svm_fault(struct work_struct *work) |
714 | { |
715 | struct nouveau_svm_fault_buffer *buffer = container_of(work, typeof(*buffer), work); |
716 | struct nouveau_svm *svm = container_of(buffer, typeof(*svm), buffer[buffer->id]); |
717 | struct nvif_object *device = &svm->drm->client.device.object; |
718 | struct nouveau_svmm *svmm; |
719 | struct { |
720 | struct nouveau_pfnmap_args i; |
721 | u64 phys[1]; |
722 | } args; |
723 | unsigned long hmm_flags; |
724 | u64 inst, start, limit; |
725 | int fi, fn; |
726 | int replay = 0, atomic = 0, ret; |
727 | |
728 | /* Parse available fault buffer entries into a cache, and update |
729 | * the GET pointer so HW can reuse the entries. |
730 | */ |
731 | SVM_DBG(svm, "fault handler" ); |
732 | if (buffer->get == buffer->put) { |
733 | buffer->put = nvif_rd32(device, buffer->putaddr); |
734 | buffer->get = nvif_rd32(device, buffer->getaddr); |
735 | if (buffer->get == buffer->put) |
736 | return; |
737 | } |
738 | buffer->fault_nr = 0; |
739 | |
740 | SVM_DBG(svm, "get %08x put %08x" , buffer->get, buffer->put); |
741 | while (buffer->get != buffer->put) { |
742 | nouveau_svm_fault_cache(svm, buffer, offset: buffer->get * 0x20); |
743 | if (++buffer->get == buffer->entries) |
744 | buffer->get = 0; |
745 | } |
746 | nvif_wr32(device, buffer->getaddr, buffer->get); |
747 | SVM_DBG(svm, "%d fault(s) pending" , buffer->fault_nr); |
748 | |
749 | /* Sort parsed faults by instance pointer to prevent unnecessary |
750 | * instance to SVMM translations, followed by address and access |
751 | * type to reduce the amount of work when handling the faults. |
752 | */ |
753 | sort(base: buffer->fault, num: buffer->fault_nr, size: sizeof(*buffer->fault), |
754 | cmp_func: nouveau_svm_fault_cmp, NULL); |
755 | |
756 | /* Lookup SVMM structure for each unique instance pointer. */ |
757 | mutex_lock(&svm->mutex); |
758 | for (fi = 0, svmm = NULL; fi < buffer->fault_nr; fi++) { |
759 | if (!svmm || buffer->fault[fi]->inst != inst) { |
760 | struct nouveau_ivmm *ivmm = |
761 | nouveau_ivmm_find(svm, inst: buffer->fault[fi]->inst); |
762 | svmm = ivmm ? ivmm->svmm : NULL; |
763 | inst = buffer->fault[fi]->inst; |
764 | SVM_DBG(svm, "inst %016llx -> svm-%p" , inst, svmm); |
765 | } |
766 | buffer->fault[fi]->svmm = svmm; |
767 | } |
768 | mutex_unlock(lock: &svm->mutex); |
769 | |
770 | /* Process list of faults. */ |
771 | args.i.i.version = 0; |
772 | args.i.i.type = NVIF_IOCTL_V0_MTHD; |
773 | args.i.m.version = 0; |
774 | args.i.m.method = NVIF_VMM_V0_PFNMAP; |
775 | args.i.p.version = 0; |
776 | |
777 | for (fi = 0; fn = fi + 1, fi < buffer->fault_nr; fi = fn) { |
778 | struct svm_notifier notifier; |
779 | struct mm_struct *mm; |
780 | |
781 | /* Cancel any faults from non-SVM channels. */ |
782 | if (!(svmm = buffer->fault[fi]->svmm)) { |
783 | nouveau_svm_fault_cancel_fault(svm, fault: buffer->fault[fi]); |
784 | continue; |
785 | } |
786 | SVMM_DBG(svmm, "addr %016llx" , buffer->fault[fi]->addr); |
787 | |
788 | /* We try and group handling of faults within a small |
789 | * window into a single update. |
790 | */ |
791 | start = buffer->fault[fi]->addr; |
792 | limit = start + PAGE_SIZE; |
793 | if (start < svmm->unmanaged.limit) |
794 | limit = min_t(u64, limit, svmm->unmanaged.start); |
795 | |
796 | /* |
797 | * Prepare the GPU-side update of all pages within the |
798 | * fault window, determining required pages and access |
799 | * permissions based on pending faults. |
800 | */ |
801 | args.i.p.addr = start; |
802 | args.i.p.page = PAGE_SHIFT; |
803 | args.i.p.size = PAGE_SIZE; |
804 | /* |
805 | * Determine required permissions based on GPU fault |
806 | * access flags. |
807 | */ |
808 | switch (buffer->fault[fi]->access) { |
809 | case 0: /* READ. */ |
810 | hmm_flags = HMM_PFN_REQ_FAULT; |
811 | break; |
812 | case 2: /* ATOMIC. */ |
813 | atomic = true; |
814 | break; |
815 | case 3: /* PREFETCH. */ |
816 | hmm_flags = 0; |
817 | break; |
818 | default: |
819 | hmm_flags = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE; |
820 | break; |
821 | } |
822 | |
823 | mm = svmm->notifier.mm; |
824 | if (!mmget_not_zero(mm)) { |
825 | nouveau_svm_fault_cancel_fault(svm, fault: buffer->fault[fi]); |
826 | continue; |
827 | } |
828 | |
829 | notifier.svmm = svmm; |
830 | if (atomic) |
831 | ret = nouveau_atomic_range_fault(svmm, drm: svm->drm, |
832 | args: &args.i, size: sizeof(args), |
833 | notifier: ¬ifier); |
834 | else |
835 | ret = nouveau_range_fault(svmm, drm: svm->drm, args: &args.i, |
836 | size: sizeof(args), hmm_flags, |
837 | notifier: ¬ifier); |
838 | mmput(mm); |
839 | |
840 | limit = args.i.p.addr + args.i.p.size; |
841 | for (fn = fi; ++fn < buffer->fault_nr; ) { |
842 | /* It's okay to skip over duplicate addresses from the |
843 | * same SVMM as faults are ordered by access type such |
844 | * that only the first one needs to be handled. |
845 | * |
846 | * ie. WRITE faults appear first, thus any handling of |
847 | * pending READ faults will already be satisfied. |
848 | * But if a large page is mapped, make sure subsequent |
849 | * fault addresses have sufficient access permission. |
850 | */ |
851 | if (buffer->fault[fn]->svmm != svmm || |
852 | buffer->fault[fn]->addr >= limit || |
853 | (buffer->fault[fi]->access == FAULT_ACCESS_READ && |
854 | !(args.phys[0] & NVIF_VMM_PFNMAP_V0_V)) || |
855 | (buffer->fault[fi]->access != FAULT_ACCESS_READ && |
856 | buffer->fault[fi]->access != FAULT_ACCESS_PREFETCH && |
857 | !(args.phys[0] & NVIF_VMM_PFNMAP_V0_W)) || |
858 | (buffer->fault[fi]->access != FAULT_ACCESS_READ && |
859 | buffer->fault[fi]->access != FAULT_ACCESS_WRITE && |
860 | buffer->fault[fi]->access != FAULT_ACCESS_PREFETCH && |
861 | !(args.phys[0] & NVIF_VMM_PFNMAP_V0_A))) |
862 | break; |
863 | } |
864 | |
865 | /* If handling failed completely, cancel all faults. */ |
866 | if (ret) { |
867 | while (fi < fn) { |
868 | struct nouveau_svm_fault *fault = |
869 | buffer->fault[fi++]; |
870 | |
871 | nouveau_svm_fault_cancel_fault(svm, fault); |
872 | } |
873 | } else |
874 | replay++; |
875 | } |
876 | |
877 | /* Issue fault replay to the GPU. */ |
878 | if (replay) |
879 | nouveau_svm_fault_replay(svm); |
880 | } |
881 | |
882 | static int |
883 | nouveau_svm_event(struct nvif_event *event, void *argv, u32 argc) |
884 | { |
885 | struct nouveau_svm_fault_buffer *buffer = container_of(event, typeof(*buffer), notify); |
886 | |
887 | schedule_work(work: &buffer->work); |
888 | return NVIF_EVENT_KEEP; |
889 | } |
890 | |
891 | static struct nouveau_pfnmap_args * |
892 | nouveau_pfns_to_args(void *pfns) |
893 | { |
894 | return container_of(pfns, struct nouveau_pfnmap_args, p.phys); |
895 | } |
896 | |
897 | u64 * |
898 | nouveau_pfns_alloc(unsigned long npages) |
899 | { |
900 | struct nouveau_pfnmap_args *args; |
901 | |
902 | args = kzalloc(struct_size(args, p.phys, npages), GFP_KERNEL); |
903 | if (!args) |
904 | return NULL; |
905 | |
906 | args->i.type = NVIF_IOCTL_V0_MTHD; |
907 | args->m.method = NVIF_VMM_V0_PFNMAP; |
908 | args->p.page = PAGE_SHIFT; |
909 | |
910 | return args->p.phys; |
911 | } |
912 | |
913 | void |
914 | nouveau_pfns_free(u64 *pfns) |
915 | { |
916 | struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns); |
917 | |
918 | kfree(objp: args); |
919 | } |
920 | |
921 | void |
922 | nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm, |
923 | unsigned long addr, u64 *pfns, unsigned long npages) |
924 | { |
925 | struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns); |
926 | |
927 | args->p.addr = addr; |
928 | args->p.size = npages << PAGE_SHIFT; |
929 | |
930 | mutex_lock(&svmm->mutex); |
931 | |
932 | nvif_object_ioctl(&svmm->vmm->vmm.object, args, |
933 | struct_size(args, p.phys, npages), NULL); |
934 | |
935 | mutex_unlock(lock: &svmm->mutex); |
936 | } |
937 | |
938 | static void |
939 | nouveau_svm_fault_buffer_fini(struct nouveau_svm *svm, int id) |
940 | { |
941 | struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id]; |
942 | |
943 | nvif_event_block(&buffer->notify); |
944 | flush_work(work: &buffer->work); |
945 | } |
946 | |
947 | static int |
948 | nouveau_svm_fault_buffer_init(struct nouveau_svm *svm, int id) |
949 | { |
950 | struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id]; |
951 | struct nvif_object *device = &svm->drm->client.device.object; |
952 | |
953 | buffer->get = nvif_rd32(device, buffer->getaddr); |
954 | buffer->put = nvif_rd32(device, buffer->putaddr); |
955 | SVM_DBG(svm, "get %08x put %08x (init)" , buffer->get, buffer->put); |
956 | |
957 | return nvif_event_allow(&buffer->notify); |
958 | } |
959 | |
960 | static void |
961 | nouveau_svm_fault_buffer_dtor(struct nouveau_svm *svm, int id) |
962 | { |
963 | struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id]; |
964 | int i; |
965 | |
966 | if (!nvif_object_constructed(&buffer->object)) |
967 | return; |
968 | |
969 | nouveau_svm_fault_buffer_fini(svm, id); |
970 | |
971 | if (buffer->fault) { |
972 | for (i = 0; buffer->fault[i] && i < buffer->entries; i++) |
973 | kfree(objp: buffer->fault[i]); |
974 | kvfree(addr: buffer->fault); |
975 | } |
976 | |
977 | nvif_event_dtor(&buffer->notify); |
978 | nvif_object_dtor(&buffer->object); |
979 | } |
980 | |
981 | static int |
982 | nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id) |
983 | { |
984 | struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id]; |
985 | struct nouveau_drm *drm = svm->drm; |
986 | struct nvif_object *device = &drm->client.device.object; |
987 | struct nvif_clb069_v0 args = {}; |
988 | int ret; |
989 | |
990 | buffer->id = id; |
991 | |
992 | ret = nvif_object_ctor(device, "svmFaultBuffer" , 0, oclass, &args, |
993 | sizeof(args), &buffer->object); |
994 | if (ret < 0) { |
995 | SVM_ERR(svm, "Fault buffer allocation failed: %d" , ret); |
996 | return ret; |
997 | } |
998 | |
999 | nvif_object_map(&buffer->object, NULL, 0); |
1000 | buffer->entries = args.entries; |
1001 | buffer->getaddr = args.get; |
1002 | buffer->putaddr = args.put; |
1003 | INIT_WORK(&buffer->work, nouveau_svm_fault); |
1004 | |
1005 | ret = nvif_event_ctor(&buffer->object, "svmFault" , id, nouveau_svm_event, true, NULL, 0, |
1006 | &buffer->notify); |
1007 | if (ret) |
1008 | return ret; |
1009 | |
1010 | buffer->fault = kvcalloc(n: buffer->entries, size: sizeof(*buffer->fault), GFP_KERNEL); |
1011 | if (!buffer->fault) |
1012 | return -ENOMEM; |
1013 | |
1014 | return nouveau_svm_fault_buffer_init(svm, id); |
1015 | } |
1016 | |
1017 | void |
1018 | nouveau_svm_resume(struct nouveau_drm *drm) |
1019 | { |
1020 | struct nouveau_svm *svm = drm->svm; |
1021 | if (svm) |
1022 | nouveau_svm_fault_buffer_init(svm, id: 0); |
1023 | } |
1024 | |
1025 | void |
1026 | nouveau_svm_suspend(struct nouveau_drm *drm) |
1027 | { |
1028 | struct nouveau_svm *svm = drm->svm; |
1029 | if (svm) |
1030 | nouveau_svm_fault_buffer_fini(svm, id: 0); |
1031 | } |
1032 | |
1033 | void |
1034 | nouveau_svm_fini(struct nouveau_drm *drm) |
1035 | { |
1036 | struct nouveau_svm *svm = drm->svm; |
1037 | if (svm) { |
1038 | nouveau_svm_fault_buffer_dtor(svm, id: 0); |
1039 | kfree(objp: drm->svm); |
1040 | drm->svm = NULL; |
1041 | } |
1042 | } |
1043 | |
1044 | void |
1045 | nouveau_svm_init(struct nouveau_drm *drm) |
1046 | { |
1047 | static const struct nvif_mclass buffers[] = { |
1048 | { VOLTA_FAULT_BUFFER_A, 0 }, |
1049 | { MAXWELL_FAULT_BUFFER_A, 0 }, |
1050 | {} |
1051 | }; |
1052 | struct nouveau_svm *svm; |
1053 | int ret; |
1054 | |
1055 | /* Disable on Volta and newer until channel recovery is fixed, |
1056 | * otherwise clients will have a trivial way to trash the GPU |
1057 | * for everyone. |
1058 | */ |
1059 | if (drm->client.device.info.family > NV_DEVICE_INFO_V0_PASCAL) |
1060 | return; |
1061 | |
1062 | drm->svm = svm = kzalloc(struct_size(drm->svm, buffer, 1), GFP_KERNEL); |
1063 | if (!drm->svm) |
1064 | return; |
1065 | |
1066 | drm->svm->drm = drm; |
1067 | mutex_init(&drm->svm->mutex); |
1068 | INIT_LIST_HEAD(list: &drm->svm->inst); |
1069 | |
1070 | ret = nvif_mclass(&drm->client.device.object, buffers); |
1071 | if (ret < 0) { |
1072 | SVM_DBG(svm, "No supported fault buffer class" ); |
1073 | nouveau_svm_fini(drm); |
1074 | return; |
1075 | } |
1076 | |
1077 | ret = nouveau_svm_fault_buffer_ctor(svm, oclass: buffers[ret].oclass, id: 0); |
1078 | if (ret) { |
1079 | nouveau_svm_fini(drm); |
1080 | return; |
1081 | } |
1082 | |
1083 | SVM_DBG(svm, "Initialised" ); |
1084 | } |
1085 | |