1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. |
3 | * All Rights Reserved. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the |
7 | * "Software"), to deal in the Software without restriction, including |
8 | * without limitation the rights to use, copy, modify, merge, publish, |
9 | * distribute, sub license, and/or sell copies of the Software, and to |
10 | * permit persons to whom the Software is furnished to do so, subject to |
11 | * the following conditions: |
12 | * |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
20 | * |
21 | * The above copyright notice and this permission notice (including the |
22 | * next paragraph) shall be included in all copies or substantial portions |
23 | * of the Software. |
24 | * |
25 | */ |
26 | /* |
27 | * Authors: |
28 | * Christian König <christian.koenig@amd.com> |
29 | */ |
30 | |
31 | /** |
32 | * DOC: MMU Notifier |
33 | * |
34 | * For coherent userptr handling registers an MMU notifier to inform the driver |
35 | * about updates on the page tables of a process. |
36 | * |
37 | * When somebody tries to invalidate the page tables we block the update until |
38 | * all operations on the pages in question are completed, then those pages are |
39 | * marked as accessed and also dirty if it wasn't a read only access. |
40 | * |
41 | * New command submissions using the userptrs in question are delayed until all |
42 | * page table invalidation are completed and we once more see a coherent process |
43 | * address space. |
44 | */ |
45 | |
46 | #include <linux/firmware.h> |
47 | #include <linux/module.h> |
48 | #include <linux/mmu_notifier.h> |
49 | #include <linux/interval_tree.h> |
50 | #include <drm/drmP.h> |
51 | #include <drm/drm.h> |
52 | |
53 | #include "amdgpu.h" |
54 | #include "amdgpu_amdkfd.h" |
55 | |
56 | /** |
57 | * struct amdgpu_mn |
58 | * |
59 | * @adev: amdgpu device pointer |
60 | * @mm: process address space |
61 | * @mn: MMU notifier structure |
62 | * @type: type of MMU notifier |
63 | * @work: destruction work item |
64 | * @node: hash table node to find structure by adev and mn |
65 | * @lock: rw semaphore protecting the notifier nodes |
66 | * @objects: interval tree containing amdgpu_mn_nodes |
67 | * @read_lock: mutex for recursive locking of @lock |
68 | * @recursion: depth of recursion |
69 | * |
70 | * Data for each amdgpu device and process address space. |
71 | */ |
72 | struct amdgpu_mn { |
73 | /* constant after initialisation */ |
74 | struct amdgpu_device *adev; |
75 | struct mm_struct *mm; |
76 | struct mmu_notifier mn; |
77 | enum amdgpu_mn_type type; |
78 | |
79 | /* only used on destruction */ |
80 | struct work_struct work; |
81 | |
82 | /* protected by adev->mn_lock */ |
83 | struct hlist_node node; |
84 | |
85 | /* objects protected by lock */ |
86 | struct rw_semaphore lock; |
87 | struct rb_root_cached objects; |
88 | struct mutex read_lock; |
89 | atomic_t recursion; |
90 | }; |
91 | |
92 | /** |
93 | * struct amdgpu_mn_node |
94 | * |
95 | * @it: interval node defining start-last of the affected address range |
96 | * @bos: list of all BOs in the affected address range |
97 | * |
98 | * Manages all BOs which are affected of a certain range of address space. |
99 | */ |
100 | struct amdgpu_mn_node { |
101 | struct interval_tree_node it; |
102 | struct list_head bos; |
103 | }; |
104 | |
105 | /** |
106 | * amdgpu_mn_destroy - destroy the MMU notifier |
107 | * |
108 | * @work: previously sheduled work item |
109 | * |
110 | * Lazy destroys the notifier from a work item |
111 | */ |
112 | static void amdgpu_mn_destroy(struct work_struct *work) |
113 | { |
114 | struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work); |
115 | struct amdgpu_device *adev = amn->adev; |
116 | struct amdgpu_mn_node *node, *next_node; |
117 | struct amdgpu_bo *bo, *next_bo; |
118 | |
119 | mutex_lock(&adev->mn_lock); |
120 | down_write(&amn->lock); |
121 | hash_del(&amn->node); |
122 | rbtree_postorder_for_each_entry_safe(node, next_node, |
123 | &amn->objects.rb_root, it.rb) { |
124 | list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { |
125 | bo->mn = NULL; |
126 | list_del_init(&bo->mn_list); |
127 | } |
128 | kfree(node); |
129 | } |
130 | up_write(&amn->lock); |
131 | mutex_unlock(&adev->mn_lock); |
132 | mmu_notifier_unregister_no_release(&amn->mn, amn->mm); |
133 | kfree(amn); |
134 | } |
135 | |
136 | /** |
137 | * amdgpu_mn_release - callback to notify about mm destruction |
138 | * |
139 | * @mn: our notifier |
140 | * @mm: the mm this callback is about |
141 | * |
142 | * Shedule a work item to lazy destroy our notifier. |
143 | */ |
144 | static void amdgpu_mn_release(struct mmu_notifier *mn, |
145 | struct mm_struct *mm) |
146 | { |
147 | struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); |
148 | |
149 | INIT_WORK(&amn->work, amdgpu_mn_destroy); |
150 | schedule_work(&amn->work); |
151 | } |
152 | |
153 | |
154 | /** |
155 | * amdgpu_mn_lock - take the write side lock for this notifier |
156 | * |
157 | * @mn: our notifier |
158 | */ |
159 | void amdgpu_mn_lock(struct amdgpu_mn *mn) |
160 | { |
161 | if (mn) |
162 | down_write(&mn->lock); |
163 | } |
164 | |
165 | /** |
166 | * amdgpu_mn_unlock - drop the write side lock for this notifier |
167 | * |
168 | * @mn: our notifier |
169 | */ |
170 | void amdgpu_mn_unlock(struct amdgpu_mn *mn) |
171 | { |
172 | if (mn) |
173 | up_write(&mn->lock); |
174 | } |
175 | |
176 | /** |
177 | * amdgpu_mn_read_lock - take the read side lock for this notifier |
178 | * |
179 | * @amn: our notifier |
180 | */ |
181 | static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) |
182 | { |
183 | if (blockable) |
184 | mutex_lock(&amn->read_lock); |
185 | else if (!mutex_trylock(&amn->read_lock)) |
186 | return -EAGAIN; |
187 | |
188 | if (atomic_inc_return(&amn->recursion) == 1) |
189 | down_read_non_owner(&amn->lock); |
190 | mutex_unlock(&amn->read_lock); |
191 | |
192 | return 0; |
193 | } |
194 | |
195 | /** |
196 | * amdgpu_mn_read_unlock - drop the read side lock for this notifier |
197 | * |
198 | * @amn: our notifier |
199 | */ |
200 | static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) |
201 | { |
202 | if (atomic_dec_return(&amn->recursion) == 0) |
203 | up_read_non_owner(&amn->lock); |
204 | } |
205 | |
206 | /** |
207 | * amdgpu_mn_invalidate_node - unmap all BOs of a node |
208 | * |
209 | * @node: the node with the BOs to unmap |
210 | * @start: start of address range affected |
211 | * @end: end of address range affected |
212 | * |
213 | * Block for operations on BOs to finish and mark pages as accessed and |
214 | * potentially dirty. |
215 | */ |
216 | static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, |
217 | unsigned long start, |
218 | unsigned long end) |
219 | { |
220 | struct amdgpu_bo *bo; |
221 | long r; |
222 | |
223 | list_for_each_entry(bo, &node->bos, mn_list) { |
224 | |
225 | if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) |
226 | continue; |
227 | |
228 | r = reservation_object_wait_timeout_rcu(bo->tbo.resv, |
229 | true, false, MAX_SCHEDULE_TIMEOUT); |
230 | if (r <= 0) |
231 | DRM_ERROR("(%ld) failed to wait for user bo\n" , r); |
232 | |
233 | amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); |
234 | } |
235 | } |
236 | |
237 | /** |
238 | * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change |
239 | * |
240 | * @mn: our notifier |
241 | * @range: mmu notifier context |
242 | * |
243 | * Block for operations on BOs to finish and mark pages as accessed and |
244 | * potentially dirty. |
245 | */ |
246 | static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, |
247 | const struct mmu_notifier_range *range) |
248 | { |
249 | struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); |
250 | struct interval_tree_node *it; |
251 | unsigned long end; |
252 | |
253 | /* notification is exclusive, but interval is inclusive */ |
254 | end = range->end - 1; |
255 | |
256 | /* TODO we should be able to split locking for interval tree and |
257 | * amdgpu_mn_invalidate_node |
258 | */ |
259 | if (amdgpu_mn_read_lock(amn, range->blockable)) |
260 | return -EAGAIN; |
261 | |
262 | it = interval_tree_iter_first(&amn->objects, range->start, end); |
263 | while (it) { |
264 | struct amdgpu_mn_node *node; |
265 | |
266 | if (!range->blockable) { |
267 | amdgpu_mn_read_unlock(amn); |
268 | return -EAGAIN; |
269 | } |
270 | |
271 | node = container_of(it, struct amdgpu_mn_node, it); |
272 | it = interval_tree_iter_next(it, range->start, end); |
273 | |
274 | amdgpu_mn_invalidate_node(node, range->start, end); |
275 | } |
276 | |
277 | return 0; |
278 | } |
279 | |
280 | /** |
281 | * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change |
282 | * |
283 | * @mn: our notifier |
284 | * @mm: the mm this callback is about |
285 | * @start: start of updated range |
286 | * @end: end of updated range |
287 | * |
288 | * We temporarily evict all BOs between start and end. This |
289 | * necessitates evicting all user-mode queues of the process. The BOs |
290 | * are restorted in amdgpu_mn_invalidate_range_end_hsa. |
291 | */ |
292 | static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, |
293 | const struct mmu_notifier_range *range) |
294 | { |
295 | struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); |
296 | struct interval_tree_node *it; |
297 | unsigned long end; |
298 | |
299 | /* notification is exclusive, but interval is inclusive */ |
300 | end = range->end - 1; |
301 | |
302 | if (amdgpu_mn_read_lock(amn, range->blockable)) |
303 | return -EAGAIN; |
304 | |
305 | it = interval_tree_iter_first(&amn->objects, range->start, end); |
306 | while (it) { |
307 | struct amdgpu_mn_node *node; |
308 | struct amdgpu_bo *bo; |
309 | |
310 | if (!range->blockable) { |
311 | amdgpu_mn_read_unlock(amn); |
312 | return -EAGAIN; |
313 | } |
314 | |
315 | node = container_of(it, struct amdgpu_mn_node, it); |
316 | it = interval_tree_iter_next(it, range->start, end); |
317 | |
318 | list_for_each_entry(bo, &node->bos, mn_list) { |
319 | struct kgd_mem *mem = bo->kfd_bo; |
320 | |
321 | if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, |
322 | range->start, |
323 | end)) |
324 | amdgpu_amdkfd_evict_userptr(mem, range->mm); |
325 | } |
326 | } |
327 | |
328 | return 0; |
329 | } |
330 | |
331 | /** |
332 | * amdgpu_mn_invalidate_range_end - callback to notify about mm change |
333 | * |
334 | * @mn: our notifier |
335 | * @mm: the mm this callback is about |
336 | * @start: start of updated range |
337 | * @end: end of updated range |
338 | * |
339 | * Release the lock again to allow new command submissions. |
340 | */ |
341 | static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, |
342 | const struct mmu_notifier_range *range) |
343 | { |
344 | struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); |
345 | |
346 | amdgpu_mn_read_unlock(amn); |
347 | } |
348 | |
349 | static const struct mmu_notifier_ops amdgpu_mn_ops[] = { |
350 | [AMDGPU_MN_TYPE_GFX] = { |
351 | .release = amdgpu_mn_release, |
352 | .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, |
353 | .invalidate_range_end = amdgpu_mn_invalidate_range_end, |
354 | }, |
355 | [AMDGPU_MN_TYPE_HSA] = { |
356 | .release = amdgpu_mn_release, |
357 | .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, |
358 | .invalidate_range_end = amdgpu_mn_invalidate_range_end, |
359 | }, |
360 | }; |
361 | |
362 | /* Low bits of any reasonable mm pointer will be unused due to struct |
363 | * alignment. Use these bits to make a unique key from the mm pointer |
364 | * and notifier type. |
365 | */ |
366 | #define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) |
367 | |
368 | /** |
369 | * amdgpu_mn_get - create notifier context |
370 | * |
371 | * @adev: amdgpu device pointer |
372 | * @type: type of MMU notifier context |
373 | * |
374 | * Creates a notifier context for current->mm. |
375 | */ |
376 | struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, |
377 | enum amdgpu_mn_type type) |
378 | { |
379 | struct mm_struct *mm = current->mm; |
380 | struct amdgpu_mn *amn; |
381 | unsigned long key = AMDGPU_MN_KEY(mm, type); |
382 | int r; |
383 | |
384 | mutex_lock(&adev->mn_lock); |
385 | if (down_write_killable(&mm->mmap_sem)) { |
386 | mutex_unlock(&adev->mn_lock); |
387 | return ERR_PTR(-EINTR); |
388 | } |
389 | |
390 | hash_for_each_possible(adev->mn_hash, amn, node, key) |
391 | if (AMDGPU_MN_KEY(amn->mm, amn->type) == key) |
392 | goto release_locks; |
393 | |
394 | amn = kzalloc(sizeof(*amn), GFP_KERNEL); |
395 | if (!amn) { |
396 | amn = ERR_PTR(-ENOMEM); |
397 | goto release_locks; |
398 | } |
399 | |
400 | amn->adev = adev; |
401 | amn->mm = mm; |
402 | init_rwsem(&amn->lock); |
403 | amn->type = type; |
404 | amn->mn.ops = &amdgpu_mn_ops[type]; |
405 | amn->objects = RB_ROOT_CACHED; |
406 | mutex_init(&amn->read_lock); |
407 | atomic_set(&amn->recursion, 0); |
408 | |
409 | r = __mmu_notifier_register(&amn->mn, mm); |
410 | if (r) |
411 | goto free_amn; |
412 | |
413 | hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type)); |
414 | |
415 | release_locks: |
416 | up_write(&mm->mmap_sem); |
417 | mutex_unlock(&adev->mn_lock); |
418 | |
419 | return amn; |
420 | |
421 | free_amn: |
422 | up_write(&mm->mmap_sem); |
423 | mutex_unlock(&adev->mn_lock); |
424 | kfree(amn); |
425 | |
426 | return ERR_PTR(r); |
427 | } |
428 | |
429 | /** |
430 | * amdgpu_mn_register - register a BO for notifier updates |
431 | * |
432 | * @bo: amdgpu buffer object |
433 | * @addr: userptr addr we should monitor |
434 | * |
435 | * Registers an MMU notifier for the given BO at the specified address. |
436 | * Returns 0 on success, -ERRNO if anything goes wrong. |
437 | */ |
438 | int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) |
439 | { |
440 | unsigned long end = addr + amdgpu_bo_size(bo) - 1; |
441 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
442 | enum amdgpu_mn_type type = |
443 | bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; |
444 | struct amdgpu_mn *amn; |
445 | struct amdgpu_mn_node *node = NULL, *new_node; |
446 | struct list_head bos; |
447 | struct interval_tree_node *it; |
448 | |
449 | amn = amdgpu_mn_get(adev, type); |
450 | if (IS_ERR(amn)) |
451 | return PTR_ERR(amn); |
452 | |
453 | new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); |
454 | if (!new_node) |
455 | return -ENOMEM; |
456 | |
457 | INIT_LIST_HEAD(&bos); |
458 | |
459 | down_write(&amn->lock); |
460 | |
461 | while ((it = interval_tree_iter_first(&amn->objects, addr, end))) { |
462 | kfree(node); |
463 | node = container_of(it, struct amdgpu_mn_node, it); |
464 | interval_tree_remove(&node->it, &amn->objects); |
465 | addr = min(it->start, addr); |
466 | end = max(it->last, end); |
467 | list_splice(&node->bos, &bos); |
468 | } |
469 | |
470 | if (!node) |
471 | node = new_node; |
472 | else |
473 | kfree(new_node); |
474 | |
475 | bo->mn = amn; |
476 | |
477 | node->it.start = addr; |
478 | node->it.last = end; |
479 | INIT_LIST_HEAD(&node->bos); |
480 | list_splice(&bos, &node->bos); |
481 | list_add(&bo->mn_list, &node->bos); |
482 | |
483 | interval_tree_insert(&node->it, &amn->objects); |
484 | |
485 | up_write(&amn->lock); |
486 | |
487 | return 0; |
488 | } |
489 | |
490 | /** |
491 | * amdgpu_mn_unregister - unregister a BO for notifier updates |
492 | * |
493 | * @bo: amdgpu buffer object |
494 | * |
495 | * Remove any registration of MMU notifier updates from the buffer object. |
496 | */ |
497 | void amdgpu_mn_unregister(struct amdgpu_bo *bo) |
498 | { |
499 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
500 | struct amdgpu_mn *amn; |
501 | struct list_head *head; |
502 | |
503 | mutex_lock(&adev->mn_lock); |
504 | |
505 | amn = bo->mn; |
506 | if (amn == NULL) { |
507 | mutex_unlock(&adev->mn_lock); |
508 | return; |
509 | } |
510 | |
511 | down_write(&amn->lock); |
512 | |
513 | /* save the next list entry for later */ |
514 | head = bo->mn_list.next; |
515 | |
516 | bo->mn = NULL; |
517 | list_del_init(&bo->mn_list); |
518 | |
519 | if (list_empty(head)) { |
520 | struct amdgpu_mn_node *node; |
521 | |
522 | node = container_of(head, struct amdgpu_mn_node, bos); |
523 | interval_tree_remove(&node->it, &amn->objects); |
524 | kfree(node); |
525 | } |
526 | |
527 | up_write(&amn->lock); |
528 | mutex_unlock(&adev->mn_lock); |
529 | } |
530 | |
531 | |