1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * VFIO: IOMMU DMA mapping support for TCE on POWER |
4 | * |
5 | * Copyright (C) 2013 IBM Corp. All rights reserved. |
6 | * Author: Alexey Kardashevskiy <aik@ozlabs.ru> |
7 | * Copyright Gavin Shan, IBM Corporation 2014. |
8 | * |
9 | * Derived from original vfio_iommu_type1.c: |
10 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. |
11 | * Author: Alex Williamson <alex.williamson@redhat.com> |
12 | */ |
13 | |
14 | #include <linux/module.h> |
15 | #include <linux/pci.h> |
16 | #include <linux/slab.h> |
17 | #include <linux/uaccess.h> |
18 | #include <linux/err.h> |
19 | #include <linux/vfio.h> |
20 | #include <linux/vmalloc.h> |
21 | #include <linux/sched/mm.h> |
22 | #include <linux/sched/signal.h> |
23 | #include <linux/mm.h> |
24 | #include "vfio.h" |
25 | |
26 | #include <asm/iommu.h> |
27 | #include <asm/tce.h> |
28 | #include <asm/mmu_context.h> |
29 | |
30 | #define DRIVER_VERSION "0.1" |
31 | #define DRIVER_AUTHOR "aik@ozlabs.ru" |
32 | #define DRIVER_DESC "VFIO IOMMU SPAPR TCE" |
33 | |
34 | static void tce_iommu_detach_group(void *iommu_data, |
35 | struct iommu_group *iommu_group); |
36 | |
37 | /* |
38 | * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation |
39 | * |
40 | * This code handles mapping and unmapping of user data buffers |
41 | * into DMA'ble space using the IOMMU |
42 | */ |
43 | |
44 | struct tce_iommu_group { |
45 | struct list_head next; |
46 | struct iommu_group *grp; |
47 | }; |
48 | |
49 | /* |
50 | * A container needs to remember which preregistered region it has |
51 | * referenced to do proper cleanup at the userspace process exit. |
52 | */ |
53 | struct tce_iommu_prereg { |
54 | struct list_head next; |
55 | struct mm_iommu_table_group_mem_t *mem; |
56 | }; |
57 | |
58 | /* |
59 | * The container descriptor supports only a single group per container. |
60 | * Required by the API as the container is not supplied with the IOMMU group |
61 | * at the moment of initialization. |
62 | */ |
63 | struct tce_container { |
64 | struct mutex lock; |
65 | bool enabled; |
66 | bool v2; |
67 | bool def_window_pending; |
68 | unsigned long locked_pages; |
69 | struct mm_struct *mm; |
70 | struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; |
71 | struct list_head group_list; |
72 | struct list_head prereg_list; |
73 | }; |
74 | |
75 | static long tce_iommu_mm_set(struct tce_container *container) |
76 | { |
77 | if (container->mm) { |
78 | if (container->mm == current->mm) |
79 | return 0; |
80 | return -EPERM; |
81 | } |
82 | BUG_ON(!current->mm); |
83 | container->mm = current->mm; |
84 | mmgrab(mm: container->mm); |
85 | |
86 | return 0; |
87 | } |
88 | |
89 | static long tce_iommu_prereg_free(struct tce_container *container, |
90 | struct tce_iommu_prereg *tcemem) |
91 | { |
92 | long ret; |
93 | |
94 | ret = mm_iommu_put(container->mm, tcemem->mem); |
95 | if (ret) |
96 | return ret; |
97 | |
98 | list_del(entry: &tcemem->next); |
99 | kfree(objp: tcemem); |
100 | |
101 | return 0; |
102 | } |
103 | |
104 | static long tce_iommu_unregister_pages(struct tce_container *container, |
105 | __u64 vaddr, __u64 size) |
106 | { |
107 | struct mm_iommu_table_group_mem_t *mem; |
108 | struct tce_iommu_prereg *tcemem; |
109 | bool found = false; |
110 | long ret; |
111 | |
112 | if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK)) |
113 | return -EINVAL; |
114 | |
115 | mem = mm_iommu_get(container->mm, vaddr, size >> PAGE_SHIFT); |
116 | if (!mem) |
117 | return -ENOENT; |
118 | |
119 | list_for_each_entry(tcemem, &container->prereg_list, next) { |
120 | if (tcemem->mem == mem) { |
121 | found = true; |
122 | break; |
123 | } |
124 | } |
125 | |
126 | if (!found) |
127 | ret = -ENOENT; |
128 | else |
129 | ret = tce_iommu_prereg_free(container, tcemem); |
130 | |
131 | mm_iommu_put(container->mm, mem); |
132 | |
133 | return ret; |
134 | } |
135 | |
136 | static long tce_iommu_register_pages(struct tce_container *container, |
137 | __u64 vaddr, __u64 size) |
138 | { |
139 | long ret = 0; |
140 | struct mm_iommu_table_group_mem_t *mem = NULL; |
141 | struct tce_iommu_prereg *tcemem; |
142 | unsigned long entries = size >> PAGE_SHIFT; |
143 | |
144 | if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) || |
145 | ((vaddr + size) < vaddr)) |
146 | return -EINVAL; |
147 | |
148 | mem = mm_iommu_get(container->mm, vaddr, entries); |
149 | if (mem) { |
150 | list_for_each_entry(tcemem, &container->prereg_list, next) { |
151 | if (tcemem->mem == mem) { |
152 | ret = -EBUSY; |
153 | goto put_exit; |
154 | } |
155 | } |
156 | } else { |
157 | ret = mm_iommu_new(container->mm, vaddr, entries, &mem); |
158 | if (ret) |
159 | return ret; |
160 | } |
161 | |
162 | tcemem = kzalloc(size: sizeof(*tcemem), GFP_KERNEL); |
163 | if (!tcemem) { |
164 | ret = -ENOMEM; |
165 | goto put_exit; |
166 | } |
167 | |
168 | tcemem->mem = mem; |
169 | list_add(new: &tcemem->next, head: &container->prereg_list); |
170 | |
171 | container->enabled = true; |
172 | |
173 | return 0; |
174 | |
175 | put_exit: |
176 | mm_iommu_put(container->mm, mem); |
177 | return ret; |
178 | } |
179 | |
180 | static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa, |
181 | unsigned int it_page_shift) |
182 | { |
183 | struct page *page; |
184 | unsigned long size = 0; |
185 | |
186 | if (mm_iommu_is_devmem(mm, hpa, it_page_shift, &size)) |
187 | return size == (1UL << it_page_shift); |
188 | |
189 | page = pfn_to_page(hpa >> PAGE_SHIFT); |
190 | /* |
191 | * Check that the TCE table granularity is not bigger than the size of |
192 | * a page we just found. Otherwise the hardware can get access to |
193 | * a bigger memory chunk that it should. |
194 | */ |
195 | return page_shift(compound_head(page)) >= it_page_shift; |
196 | } |
197 | |
198 | static inline bool tce_groups_attached(struct tce_container *container) |
199 | { |
200 | return !list_empty(head: &container->group_list); |
201 | } |
202 | |
203 | static long tce_iommu_find_table(struct tce_container *container, |
204 | phys_addr_t ioba, struct iommu_table **ptbl) |
205 | { |
206 | long i; |
207 | |
208 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { |
209 | struct iommu_table *tbl = container->tables[i]; |
210 | |
211 | if (tbl) { |
212 | unsigned long entry = ioba >> tbl->it_page_shift; |
213 | unsigned long start = tbl->it_offset; |
214 | unsigned long end = start + tbl->it_size; |
215 | |
216 | if ((start <= entry) && (entry < end)) { |
217 | *ptbl = tbl; |
218 | return i; |
219 | } |
220 | } |
221 | } |
222 | |
223 | return -1; |
224 | } |
225 | |
226 | static int tce_iommu_find_free_table(struct tce_container *container) |
227 | { |
228 | int i; |
229 | |
230 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { |
231 | if (!container->tables[i]) |
232 | return i; |
233 | } |
234 | |
235 | return -ENOSPC; |
236 | } |
237 | |
238 | static int tce_iommu_enable(struct tce_container *container) |
239 | { |
240 | int ret = 0; |
241 | unsigned long locked; |
242 | struct iommu_table_group *table_group; |
243 | struct tce_iommu_group *tcegrp; |
244 | |
245 | if (container->enabled) |
246 | return -EBUSY; |
247 | |
248 | /* |
249 | * When userspace pages are mapped into the IOMMU, they are effectively |
250 | * locked memory, so, theoretically, we need to update the accounting |
251 | * of locked pages on each map and unmap. For powerpc, the map unmap |
252 | * paths can be very hot, though, and the accounting would kill |
253 | * performance, especially since it would be difficult to impossible |
254 | * to handle the accounting in real mode only. |
255 | * |
256 | * To address that, rather than precisely accounting every page, we |
257 | * instead account for a worst case on locked memory when the iommu is |
258 | * enabled and disabled. The worst case upper bound on locked memory |
259 | * is the size of the whole iommu window, which is usually relatively |
260 | * small (compared to total memory sizes) on POWER hardware. |
261 | * |
262 | * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits, |
263 | * that would effectively kill the guest at random points, much better |
264 | * enforcing the limit based on the max that the guest can map. |
265 | * |
266 | * Unfortunately at the moment it counts whole tables, no matter how |
267 | * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups |
268 | * each with 2GB DMA window, 8GB will be counted here. The reason for |
269 | * this is that we cannot tell here the amount of RAM used by the guest |
270 | * as this information is only available from KVM and VFIO is |
271 | * KVM agnostic. |
272 | * |
273 | * So we do not allow enabling a container without a group attached |
274 | * as there is no way to know how much we should increment |
275 | * the locked_vm counter. |
276 | */ |
277 | if (!tce_groups_attached(container)) |
278 | return -ENODEV; |
279 | |
280 | tcegrp = list_first_entry(&container->group_list, |
281 | struct tce_iommu_group, next); |
282 | table_group = iommu_group_get_iommudata(group: tcegrp->grp); |
283 | if (!table_group) |
284 | return -ENODEV; |
285 | |
286 | if (!table_group->tce32_size) |
287 | return -EPERM; |
288 | |
289 | ret = tce_iommu_mm_set(container); |
290 | if (ret) |
291 | return ret; |
292 | |
293 | locked = table_group->tce32_size >> PAGE_SHIFT; |
294 | ret = account_locked_vm(mm: container->mm, pages: locked, inc: true); |
295 | if (ret) |
296 | return ret; |
297 | |
298 | container->locked_pages = locked; |
299 | |
300 | container->enabled = true; |
301 | |
302 | return ret; |
303 | } |
304 | |
305 | static void tce_iommu_disable(struct tce_container *container) |
306 | { |
307 | if (!container->enabled) |
308 | return; |
309 | |
310 | container->enabled = false; |
311 | |
312 | BUG_ON(!container->mm); |
313 | account_locked_vm(mm: container->mm, pages: container->locked_pages, inc: false); |
314 | } |
315 | |
316 | static void *tce_iommu_open(unsigned long arg) |
317 | { |
318 | struct tce_container *container; |
319 | |
320 | if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) { |
321 | pr_err("tce_vfio: Wrong IOMMU type\n" ); |
322 | return ERR_PTR(error: -EINVAL); |
323 | } |
324 | |
325 | container = kzalloc(size: sizeof(*container), GFP_KERNEL); |
326 | if (!container) |
327 | return ERR_PTR(error: -ENOMEM); |
328 | |
329 | mutex_init(&container->lock); |
330 | INIT_LIST_HEAD_RCU(list: &container->group_list); |
331 | INIT_LIST_HEAD_RCU(list: &container->prereg_list); |
332 | |
333 | container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU; |
334 | |
335 | return container; |
336 | } |
337 | |
338 | static int tce_iommu_clear(struct tce_container *container, |
339 | struct iommu_table *tbl, |
340 | unsigned long entry, unsigned long pages); |
341 | static void tce_iommu_free_table(struct tce_container *container, |
342 | struct iommu_table *tbl); |
343 | |
344 | static void tce_iommu_release(void *iommu_data) |
345 | { |
346 | struct tce_container *container = iommu_data; |
347 | struct tce_iommu_group *tcegrp; |
348 | struct tce_iommu_prereg *tcemem, *tmtmp; |
349 | long i; |
350 | |
351 | while (tce_groups_attached(container)) { |
352 | tcegrp = list_first_entry(&container->group_list, |
353 | struct tce_iommu_group, next); |
354 | tce_iommu_detach_group(iommu_data, iommu_group: tcegrp->grp); |
355 | } |
356 | |
357 | /* |
358 | * If VFIO created a table, it was not disposed |
359 | * by tce_iommu_detach_group() so do it now. |
360 | */ |
361 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { |
362 | struct iommu_table *tbl = container->tables[i]; |
363 | |
364 | if (!tbl) |
365 | continue; |
366 | |
367 | tce_iommu_clear(container, tbl, entry: tbl->it_offset, pages: tbl->it_size); |
368 | tce_iommu_free_table(container, tbl); |
369 | } |
370 | |
371 | list_for_each_entry_safe(tcemem, tmtmp, &container->prereg_list, next) |
372 | WARN_ON(tce_iommu_prereg_free(container, tcemem)); |
373 | |
374 | tce_iommu_disable(container); |
375 | if (container->mm) |
376 | mmdrop(mm: container->mm); |
377 | mutex_destroy(lock: &container->lock); |
378 | |
379 | kfree(objp: container); |
380 | } |
381 | |
382 | static void tce_iommu_unuse_page(unsigned long hpa) |
383 | { |
384 | struct page *page; |
385 | |
386 | page = pfn_to_page(hpa >> PAGE_SHIFT); |
387 | unpin_user_page(page); |
388 | } |
389 | |
390 | static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container, |
391 | unsigned long tce, unsigned long shift, |
392 | unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem) |
393 | { |
394 | long ret = 0; |
395 | struct mm_iommu_table_group_mem_t *mem; |
396 | |
397 | mem = mm_iommu_lookup(container->mm, tce, 1ULL << shift); |
398 | if (!mem) |
399 | return -EINVAL; |
400 | |
401 | ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa); |
402 | if (ret) |
403 | return -EINVAL; |
404 | |
405 | *pmem = mem; |
406 | |
407 | return 0; |
408 | } |
409 | |
410 | static void tce_iommu_unuse_page_v2(struct tce_container *container, |
411 | struct iommu_table *tbl, unsigned long entry) |
412 | { |
413 | struct mm_iommu_table_group_mem_t *mem = NULL; |
414 | int ret; |
415 | unsigned long hpa = 0; |
416 | __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry); |
417 | |
418 | if (!pua) |
419 | return; |
420 | |
421 | ret = tce_iommu_prereg_ua_to_hpa(container, be64_to_cpu(*pua), |
422 | shift: tbl->it_page_shift, phpa: &hpa, pmem: &mem); |
423 | if (ret) |
424 | pr_debug("%s: tce %llx at #%lx was not cached, ret=%d\n" , |
425 | __func__, be64_to_cpu(*pua), entry, ret); |
426 | if (mem) |
427 | mm_iommu_mapped_dec(mem); |
428 | |
429 | *pua = cpu_to_be64(0); |
430 | } |
431 | |
432 | static int tce_iommu_clear(struct tce_container *container, |
433 | struct iommu_table *tbl, |
434 | unsigned long entry, unsigned long pages) |
435 | { |
436 | unsigned long oldhpa; |
437 | long ret; |
438 | enum dma_data_direction direction; |
439 | unsigned long lastentry = entry + pages, firstentry = entry; |
440 | |
441 | for ( ; entry < lastentry; ++entry) { |
442 | if (tbl->it_indirect_levels && tbl->it_userspace) { |
443 | /* |
444 | * For multilevel tables, we can take a shortcut here |
445 | * and skip some TCEs as we know that the userspace |
446 | * addresses cache is a mirror of the real TCE table |
447 | * and if it is missing some indirect levels, then |
448 | * the hardware table does not have them allocated |
449 | * either and therefore does not require updating. |
450 | */ |
451 | __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, |
452 | entry); |
453 | if (!pua) { |
454 | /* align to level_size which is power of two */ |
455 | entry |= tbl->it_level_size - 1; |
456 | continue; |
457 | } |
458 | } |
459 | |
460 | cond_resched(); |
461 | |
462 | direction = DMA_NONE; |
463 | oldhpa = 0; |
464 | ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry, &oldhpa, |
465 | &direction); |
466 | if (ret) |
467 | continue; |
468 | |
469 | if (direction == DMA_NONE) |
470 | continue; |
471 | |
472 | if (container->v2) { |
473 | tce_iommu_unuse_page_v2(container, tbl, entry); |
474 | continue; |
475 | } |
476 | |
477 | tce_iommu_unuse_page(hpa: oldhpa); |
478 | } |
479 | |
480 | iommu_tce_kill(tbl, firstentry, pages); |
481 | |
482 | return 0; |
483 | } |
484 | |
485 | static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa) |
486 | { |
487 | struct page *page = NULL; |
488 | enum dma_data_direction direction = iommu_tce_direction(tce); |
489 | |
490 | if (pin_user_pages_fast(start: tce & PAGE_MASK, nr_pages: 1, |
491 | gup_flags: direction != DMA_TO_DEVICE ? FOLL_WRITE : 0, |
492 | pages: &page) != 1) |
493 | return -EFAULT; |
494 | |
495 | *hpa = __pa((unsigned long) page_address(page)); |
496 | |
497 | return 0; |
498 | } |
499 | |
500 | static long tce_iommu_build(struct tce_container *container, |
501 | struct iommu_table *tbl, |
502 | unsigned long entry, unsigned long tce, unsigned long pages, |
503 | enum dma_data_direction direction) |
504 | { |
505 | long i, ret = 0; |
506 | unsigned long hpa; |
507 | enum dma_data_direction dirtmp; |
508 | |
509 | for (i = 0; i < pages; ++i) { |
510 | unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; |
511 | |
512 | ret = tce_iommu_use_page(tce, hpa: &hpa); |
513 | if (ret) |
514 | break; |
515 | |
516 | if (!tce_page_is_contained(mm: container->mm, hpa, |
517 | it_page_shift: tbl->it_page_shift)) { |
518 | ret = -EPERM; |
519 | break; |
520 | } |
521 | |
522 | hpa |= offset; |
523 | dirtmp = direction; |
524 | ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i, |
525 | &hpa, &dirtmp); |
526 | if (ret) { |
527 | tce_iommu_unuse_page(hpa); |
528 | pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n" , |
529 | __func__, entry << tbl->it_page_shift, |
530 | tce, ret); |
531 | break; |
532 | } |
533 | |
534 | if (dirtmp != DMA_NONE) |
535 | tce_iommu_unuse_page(hpa); |
536 | |
537 | tce += IOMMU_PAGE_SIZE(tbl); |
538 | } |
539 | |
540 | if (ret) |
541 | tce_iommu_clear(container, tbl, entry, pages: i); |
542 | else |
543 | iommu_tce_kill(tbl, entry, pages); |
544 | |
545 | return ret; |
546 | } |
547 | |
548 | static long tce_iommu_build_v2(struct tce_container *container, |
549 | struct iommu_table *tbl, |
550 | unsigned long entry, unsigned long tce, unsigned long pages, |
551 | enum dma_data_direction direction) |
552 | { |
553 | long i, ret = 0; |
554 | unsigned long hpa; |
555 | enum dma_data_direction dirtmp; |
556 | |
557 | for (i = 0; i < pages; ++i) { |
558 | struct mm_iommu_table_group_mem_t *mem = NULL; |
559 | __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i); |
560 | |
561 | ret = tce_iommu_prereg_ua_to_hpa(container, |
562 | tce, shift: tbl->it_page_shift, phpa: &hpa, pmem: &mem); |
563 | if (ret) |
564 | break; |
565 | |
566 | if (!tce_page_is_contained(mm: container->mm, hpa, |
567 | it_page_shift: tbl->it_page_shift)) { |
568 | ret = -EPERM; |
569 | break; |
570 | } |
571 | |
572 | /* Preserve offset within IOMMU page */ |
573 | hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; |
574 | dirtmp = direction; |
575 | |
576 | /* The registered region is being unregistered */ |
577 | if (mm_iommu_mapped_inc(mem)) |
578 | break; |
579 | |
580 | ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i, |
581 | &hpa, &dirtmp); |
582 | if (ret) { |
583 | /* dirtmp cannot be DMA_NONE here */ |
584 | tce_iommu_unuse_page_v2(container, tbl, entry: entry + i); |
585 | pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n" , |
586 | __func__, entry << tbl->it_page_shift, |
587 | tce, ret); |
588 | break; |
589 | } |
590 | |
591 | if (dirtmp != DMA_NONE) |
592 | tce_iommu_unuse_page_v2(container, tbl, entry: entry + i); |
593 | |
594 | *pua = cpu_to_be64(tce); |
595 | |
596 | tce += IOMMU_PAGE_SIZE(tbl); |
597 | } |
598 | |
599 | if (ret) |
600 | tce_iommu_clear(container, tbl, entry, pages: i); |
601 | else |
602 | iommu_tce_kill(tbl, entry, pages); |
603 | |
604 | return ret; |
605 | } |
606 | |
607 | static long tce_iommu_create_table(struct tce_container *container, |
608 | struct iommu_table_group *table_group, |
609 | int num, |
610 | __u32 page_shift, |
611 | __u64 window_size, |
612 | __u32 levels, |
613 | struct iommu_table **ptbl) |
614 | { |
615 | long ret, table_size; |
616 | |
617 | table_size = table_group->ops->get_table_size(page_shift, window_size, |
618 | levels); |
619 | if (!table_size) |
620 | return -EINVAL; |
621 | |
622 | ret = account_locked_vm(mm: container->mm, pages: table_size >> PAGE_SHIFT, inc: true); |
623 | if (ret) |
624 | return ret; |
625 | |
626 | ret = table_group->ops->create_table(table_group, num, |
627 | page_shift, window_size, levels, ptbl); |
628 | |
629 | WARN_ON(!ret && !(*ptbl)->it_ops->free); |
630 | WARN_ON(!ret && ((*ptbl)->it_allocated_size > table_size)); |
631 | |
632 | return ret; |
633 | } |
634 | |
635 | static void tce_iommu_free_table(struct tce_container *container, |
636 | struct iommu_table *tbl) |
637 | { |
638 | unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; |
639 | |
640 | iommu_tce_table_put(tbl); |
641 | account_locked_vm(mm: container->mm, pages, inc: false); |
642 | } |
643 | |
644 | static long tce_iommu_create_window(struct tce_container *container, |
645 | __u32 page_shift, __u64 window_size, __u32 levels, |
646 | __u64 *start_addr) |
647 | { |
648 | struct tce_iommu_group *tcegrp; |
649 | struct iommu_table_group *table_group; |
650 | struct iommu_table *tbl = NULL; |
651 | long ret, num; |
652 | |
653 | num = tce_iommu_find_free_table(container); |
654 | if (num < 0) |
655 | return num; |
656 | |
657 | /* Get the first group for ops::create_table */ |
658 | tcegrp = list_first_entry(&container->group_list, |
659 | struct tce_iommu_group, next); |
660 | table_group = iommu_group_get_iommudata(group: tcegrp->grp); |
661 | if (!table_group) |
662 | return -EFAULT; |
663 | |
664 | if (!(table_group->pgsizes & (1ULL << page_shift))) |
665 | return -EINVAL; |
666 | |
667 | if (!table_group->ops->set_window || !table_group->ops->unset_window || |
668 | !table_group->ops->get_table_size || |
669 | !table_group->ops->create_table) |
670 | return -EPERM; |
671 | |
672 | /* Create TCE table */ |
673 | ret = tce_iommu_create_table(container, table_group, num, |
674 | page_shift, window_size, levels, ptbl: &tbl); |
675 | if (ret) |
676 | return ret; |
677 | |
678 | BUG_ON(!tbl->it_ops->free); |
679 | |
680 | /* |
681 | * Program the table to every group. |
682 | * Groups have been tested for compatibility at the attach time. |
683 | */ |
684 | list_for_each_entry(tcegrp, &container->group_list, next) { |
685 | table_group = iommu_group_get_iommudata(group: tcegrp->grp); |
686 | |
687 | ret = table_group->ops->set_window(table_group, num, tbl); |
688 | if (ret) |
689 | goto unset_exit; |
690 | } |
691 | |
692 | container->tables[num] = tbl; |
693 | |
694 | /* Return start address assigned by platform in create_table() */ |
695 | *start_addr = tbl->it_offset << tbl->it_page_shift; |
696 | |
697 | return 0; |
698 | |
699 | unset_exit: |
700 | list_for_each_entry(tcegrp, &container->group_list, next) { |
701 | table_group = iommu_group_get_iommudata(group: tcegrp->grp); |
702 | table_group->ops->unset_window(table_group, num); |
703 | } |
704 | tce_iommu_free_table(container, tbl); |
705 | |
706 | return ret; |
707 | } |
708 | |
709 | static long tce_iommu_remove_window(struct tce_container *container, |
710 | __u64 start_addr) |
711 | { |
712 | struct iommu_table_group *table_group = NULL; |
713 | struct iommu_table *tbl; |
714 | struct tce_iommu_group *tcegrp; |
715 | int num; |
716 | |
717 | num = tce_iommu_find_table(container, ioba: start_addr, ptbl: &tbl); |
718 | if (num < 0) |
719 | return -EINVAL; |
720 | |
721 | BUG_ON(!tbl->it_size); |
722 | |
723 | /* Detach groups from IOMMUs */ |
724 | list_for_each_entry(tcegrp, &container->group_list, next) { |
725 | table_group = iommu_group_get_iommudata(group: tcegrp->grp); |
726 | |
727 | /* |
728 | * SPAPR TCE IOMMU exposes the default DMA window to |
729 | * the guest via dma32_window_start/size of |
730 | * VFIO_IOMMU_SPAPR_TCE_GET_INFO. Some platforms allow |
731 | * the userspace to remove this window, some do not so |
732 | * here we check for the platform capability. |
733 | */ |
734 | if (!table_group->ops || !table_group->ops->unset_window) |
735 | return -EPERM; |
736 | |
737 | table_group->ops->unset_window(table_group, num); |
738 | } |
739 | |
740 | /* Free table */ |
741 | tce_iommu_clear(container, tbl, entry: tbl->it_offset, pages: tbl->it_size); |
742 | tce_iommu_free_table(container, tbl); |
743 | container->tables[num] = NULL; |
744 | |
745 | return 0; |
746 | } |
747 | |
748 | static long tce_iommu_create_default_window(struct tce_container *container) |
749 | { |
750 | long ret; |
751 | __u64 start_addr = 0; |
752 | struct tce_iommu_group *tcegrp; |
753 | struct iommu_table_group *table_group; |
754 | |
755 | if (!container->def_window_pending) |
756 | return 0; |
757 | |
758 | if (!tce_groups_attached(container)) |
759 | return -ENODEV; |
760 | |
761 | tcegrp = list_first_entry(&container->group_list, |
762 | struct tce_iommu_group, next); |
763 | table_group = iommu_group_get_iommudata(group: tcegrp->grp); |
764 | if (!table_group) |
765 | return -ENODEV; |
766 | |
767 | ret = tce_iommu_create_window(container, page_shift: IOMMU_PAGE_SHIFT_4K, |
768 | window_size: table_group->tce32_size, levels: 1, start_addr: &start_addr); |
769 | WARN_ON_ONCE(!ret && start_addr); |
770 | |
771 | if (!ret) |
772 | container->def_window_pending = false; |
773 | |
774 | return ret; |
775 | } |
776 | |
777 | static long vfio_spapr_ioctl_eeh_pe_op(struct iommu_group *group, |
778 | unsigned long arg) |
779 | { |
780 | struct eeh_pe *pe; |
781 | struct vfio_eeh_pe_op op; |
782 | unsigned long minsz; |
783 | |
784 | pe = eeh_iommu_group_to_pe(group); |
785 | if (!pe) |
786 | return -ENODEV; |
787 | |
788 | minsz = offsetofend(struct vfio_eeh_pe_op, op); |
789 | if (copy_from_user(to: &op, from: (void __user *)arg, n: minsz)) |
790 | return -EFAULT; |
791 | if (op.argsz < minsz || op.flags) |
792 | return -EINVAL; |
793 | |
794 | switch (op.op) { |
795 | case VFIO_EEH_PE_DISABLE: |
796 | return eeh_pe_set_option(pe, EEH_OPT_DISABLE); |
797 | case VFIO_EEH_PE_ENABLE: |
798 | return eeh_pe_set_option(pe, EEH_OPT_ENABLE); |
799 | case VFIO_EEH_PE_UNFREEZE_IO: |
800 | return eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO); |
801 | case VFIO_EEH_PE_UNFREEZE_DMA: |
802 | return eeh_pe_set_option(pe, EEH_OPT_THAW_DMA); |
803 | case VFIO_EEH_PE_GET_STATE: |
804 | return eeh_pe_get_state(pe); |
805 | break; |
806 | case VFIO_EEH_PE_RESET_DEACTIVATE: |
807 | return eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, true); |
808 | case VFIO_EEH_PE_RESET_HOT: |
809 | return eeh_pe_reset(pe, EEH_RESET_HOT, true); |
810 | case VFIO_EEH_PE_RESET_FUNDAMENTAL: |
811 | return eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL, true); |
812 | case VFIO_EEH_PE_CONFIGURE: |
813 | return eeh_pe_configure(pe); |
814 | case VFIO_EEH_PE_INJECT_ERR: |
815 | minsz = offsetofend(struct vfio_eeh_pe_op, err.mask); |
816 | if (op.argsz < minsz) |
817 | return -EINVAL; |
818 | if (copy_from_user(to: &op, from: (void __user *)arg, n: minsz)) |
819 | return -EFAULT; |
820 | |
821 | return eeh_pe_inject_err(pe, op.err.type, op.err.func, |
822 | op.err.addr, op.err.mask); |
823 | default: |
824 | return -EINVAL; |
825 | } |
826 | } |
827 | |
828 | static long tce_iommu_ioctl(void *iommu_data, |
829 | unsigned int cmd, unsigned long arg) |
830 | { |
831 | struct tce_container *container = iommu_data; |
832 | unsigned long minsz, ddwsz; |
833 | long ret; |
834 | |
835 | switch (cmd) { |
836 | case VFIO_CHECK_EXTENSION: |
837 | switch (arg) { |
838 | case VFIO_SPAPR_TCE_IOMMU: |
839 | case VFIO_SPAPR_TCE_v2_IOMMU: |
840 | return 1; |
841 | case VFIO_EEH: |
842 | return eeh_enabled(); |
843 | default: |
844 | return 0; |
845 | } |
846 | } |
847 | |
848 | /* |
849 | * Sanity check to prevent one userspace from manipulating |
850 | * another userspace mm. |
851 | */ |
852 | BUG_ON(!container); |
853 | if (container->mm && container->mm != current->mm) |
854 | return -EPERM; |
855 | |
856 | switch (cmd) { |
857 | case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { |
858 | struct vfio_iommu_spapr_tce_info info; |
859 | struct tce_iommu_group *tcegrp; |
860 | struct iommu_table_group *table_group; |
861 | |
862 | if (!tce_groups_attached(container)) |
863 | return -ENXIO; |
864 | |
865 | tcegrp = list_first_entry(&container->group_list, |
866 | struct tce_iommu_group, next); |
867 | table_group = iommu_group_get_iommudata(group: tcegrp->grp); |
868 | |
869 | if (!table_group) |
870 | return -ENXIO; |
871 | |
872 | minsz = offsetofend(struct vfio_iommu_spapr_tce_info, |
873 | dma32_window_size); |
874 | |
875 | if (copy_from_user(to: &info, from: (void __user *)arg, n: minsz)) |
876 | return -EFAULT; |
877 | |
878 | if (info.argsz < minsz) |
879 | return -EINVAL; |
880 | |
881 | info.dma32_window_start = table_group->tce32_start; |
882 | info.dma32_window_size = table_group->tce32_size; |
883 | info.flags = 0; |
884 | memset(&info.ddw, 0, sizeof(info.ddw)); |
885 | |
886 | if (table_group->max_dynamic_windows_supported && |
887 | container->v2) { |
888 | info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW; |
889 | info.ddw.pgsizes = table_group->pgsizes; |
890 | info.ddw.max_dynamic_windows_supported = |
891 | table_group->max_dynamic_windows_supported; |
892 | info.ddw.levels = table_group->max_levels; |
893 | } |
894 | |
895 | ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw); |
896 | |
897 | if (info.argsz >= ddwsz) |
898 | minsz = ddwsz; |
899 | |
900 | if (copy_to_user(to: (void __user *)arg, from: &info, n: minsz)) |
901 | return -EFAULT; |
902 | |
903 | return 0; |
904 | } |
905 | case VFIO_IOMMU_MAP_DMA: { |
906 | struct vfio_iommu_type1_dma_map param; |
907 | struct iommu_table *tbl = NULL; |
908 | long num; |
909 | enum dma_data_direction direction; |
910 | |
911 | if (!container->enabled) |
912 | return -EPERM; |
913 | |
914 | minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); |
915 | |
916 | if (copy_from_user(to: ¶m, from: (void __user *)arg, n: minsz)) |
917 | return -EFAULT; |
918 | |
919 | if (param.argsz < minsz) |
920 | return -EINVAL; |
921 | |
922 | if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ | |
923 | VFIO_DMA_MAP_FLAG_WRITE)) |
924 | return -EINVAL; |
925 | |
926 | ret = tce_iommu_create_default_window(container); |
927 | if (ret) |
928 | return ret; |
929 | |
930 | num = tce_iommu_find_table(container, ioba: param.iova, ptbl: &tbl); |
931 | if (num < 0) |
932 | return -ENXIO; |
933 | |
934 | if ((param.size & ~IOMMU_PAGE_MASK(tbl)) || |
935 | (param.vaddr & ~IOMMU_PAGE_MASK(tbl))) |
936 | return -EINVAL; |
937 | |
938 | /* iova is checked by the IOMMU API */ |
939 | if (param.flags & VFIO_DMA_MAP_FLAG_READ) { |
940 | if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) |
941 | direction = DMA_BIDIRECTIONAL; |
942 | else |
943 | direction = DMA_TO_DEVICE; |
944 | } else { |
945 | if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) |
946 | direction = DMA_FROM_DEVICE; |
947 | else |
948 | return -EINVAL; |
949 | } |
950 | |
951 | ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr); |
952 | if (ret) |
953 | return ret; |
954 | |
955 | if (container->v2) |
956 | ret = tce_iommu_build_v2(container, tbl, |
957 | entry: param.iova >> tbl->it_page_shift, |
958 | tce: param.vaddr, |
959 | pages: param.size >> tbl->it_page_shift, |
960 | direction); |
961 | else |
962 | ret = tce_iommu_build(container, tbl, |
963 | entry: param.iova >> tbl->it_page_shift, |
964 | tce: param.vaddr, |
965 | pages: param.size >> tbl->it_page_shift, |
966 | direction); |
967 | |
968 | iommu_flush_tce(tbl); |
969 | |
970 | return ret; |
971 | } |
972 | case VFIO_IOMMU_UNMAP_DMA: { |
973 | struct vfio_iommu_type1_dma_unmap param; |
974 | struct iommu_table *tbl = NULL; |
975 | long num; |
976 | |
977 | if (!container->enabled) |
978 | return -EPERM; |
979 | |
980 | minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, |
981 | size); |
982 | |
983 | if (copy_from_user(to: ¶m, from: (void __user *)arg, n: minsz)) |
984 | return -EFAULT; |
985 | |
986 | if (param.argsz < minsz) |
987 | return -EINVAL; |
988 | |
989 | /* No flag is supported now */ |
990 | if (param.flags) |
991 | return -EINVAL; |
992 | |
993 | ret = tce_iommu_create_default_window(container); |
994 | if (ret) |
995 | return ret; |
996 | |
997 | num = tce_iommu_find_table(container, ioba: param.iova, ptbl: &tbl); |
998 | if (num < 0) |
999 | return -ENXIO; |
1000 | |
1001 | if (param.size & ~IOMMU_PAGE_MASK(tbl)) |
1002 | return -EINVAL; |
1003 | |
1004 | ret = iommu_tce_clear_param_check(tbl, param.iova, 0, |
1005 | param.size >> tbl->it_page_shift); |
1006 | if (ret) |
1007 | return ret; |
1008 | |
1009 | ret = tce_iommu_clear(container, tbl, |
1010 | entry: param.iova >> tbl->it_page_shift, |
1011 | pages: param.size >> tbl->it_page_shift); |
1012 | iommu_flush_tce(tbl); |
1013 | |
1014 | return ret; |
1015 | } |
1016 | case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: { |
1017 | struct vfio_iommu_spapr_register_memory param; |
1018 | |
1019 | if (!container->v2) |
1020 | break; |
1021 | |
1022 | minsz = offsetofend(struct vfio_iommu_spapr_register_memory, |
1023 | size); |
1024 | |
1025 | ret = tce_iommu_mm_set(container); |
1026 | if (ret) |
1027 | return ret; |
1028 | |
1029 | if (copy_from_user(to: ¶m, from: (void __user *)arg, n: minsz)) |
1030 | return -EFAULT; |
1031 | |
1032 | if (param.argsz < minsz) |
1033 | return -EINVAL; |
1034 | |
1035 | /* No flag is supported now */ |
1036 | if (param.flags) |
1037 | return -EINVAL; |
1038 | |
1039 | mutex_lock(&container->lock); |
1040 | ret = tce_iommu_register_pages(container, vaddr: param.vaddr, |
1041 | size: param.size); |
1042 | mutex_unlock(lock: &container->lock); |
1043 | |
1044 | return ret; |
1045 | } |
1046 | case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: { |
1047 | struct vfio_iommu_spapr_register_memory param; |
1048 | |
1049 | if (!container->v2) |
1050 | break; |
1051 | |
1052 | if (!container->mm) |
1053 | return -EPERM; |
1054 | |
1055 | minsz = offsetofend(struct vfio_iommu_spapr_register_memory, |
1056 | size); |
1057 | |
1058 | if (copy_from_user(to: ¶m, from: (void __user *)arg, n: minsz)) |
1059 | return -EFAULT; |
1060 | |
1061 | if (param.argsz < minsz) |
1062 | return -EINVAL; |
1063 | |
1064 | /* No flag is supported now */ |
1065 | if (param.flags) |
1066 | return -EINVAL; |
1067 | |
1068 | mutex_lock(&container->lock); |
1069 | ret = tce_iommu_unregister_pages(container, vaddr: param.vaddr, |
1070 | size: param.size); |
1071 | mutex_unlock(lock: &container->lock); |
1072 | |
1073 | return ret; |
1074 | } |
1075 | case VFIO_IOMMU_ENABLE: |
1076 | if (container->v2) |
1077 | break; |
1078 | |
1079 | mutex_lock(&container->lock); |
1080 | ret = tce_iommu_enable(container); |
1081 | mutex_unlock(lock: &container->lock); |
1082 | return ret; |
1083 | |
1084 | |
1085 | case VFIO_IOMMU_DISABLE: |
1086 | if (container->v2) |
1087 | break; |
1088 | |
1089 | mutex_lock(&container->lock); |
1090 | tce_iommu_disable(container); |
1091 | mutex_unlock(lock: &container->lock); |
1092 | return 0; |
1093 | |
1094 | case VFIO_EEH_PE_OP: { |
1095 | struct tce_iommu_group *tcegrp; |
1096 | |
1097 | ret = 0; |
1098 | list_for_each_entry(tcegrp, &container->group_list, next) { |
1099 | ret = vfio_spapr_ioctl_eeh_pe_op(group: tcegrp->grp, arg); |
1100 | if (ret) |
1101 | return ret; |
1102 | } |
1103 | return ret; |
1104 | } |
1105 | |
1106 | case VFIO_IOMMU_SPAPR_TCE_CREATE: { |
1107 | struct vfio_iommu_spapr_tce_create create; |
1108 | |
1109 | if (!container->v2) |
1110 | break; |
1111 | |
1112 | ret = tce_iommu_mm_set(container); |
1113 | if (ret) |
1114 | return ret; |
1115 | |
1116 | if (!tce_groups_attached(container)) |
1117 | return -ENXIO; |
1118 | |
1119 | minsz = offsetofend(struct vfio_iommu_spapr_tce_create, |
1120 | start_addr); |
1121 | |
1122 | if (copy_from_user(to: &create, from: (void __user *)arg, n: minsz)) |
1123 | return -EFAULT; |
1124 | |
1125 | if (create.argsz < minsz) |
1126 | return -EINVAL; |
1127 | |
1128 | if (create.flags) |
1129 | return -EINVAL; |
1130 | |
1131 | mutex_lock(&container->lock); |
1132 | |
1133 | ret = tce_iommu_create_default_window(container); |
1134 | if (!ret) |
1135 | ret = tce_iommu_create_window(container, |
1136 | page_shift: create.page_shift, |
1137 | window_size: create.window_size, levels: create.levels, |
1138 | start_addr: &create.start_addr); |
1139 | |
1140 | mutex_unlock(lock: &container->lock); |
1141 | |
1142 | if (!ret && copy_to_user(to: (void __user *)arg, from: &create, n: minsz)) |
1143 | ret = -EFAULT; |
1144 | |
1145 | return ret; |
1146 | } |
1147 | case VFIO_IOMMU_SPAPR_TCE_REMOVE: { |
1148 | struct vfio_iommu_spapr_tce_remove remove; |
1149 | |
1150 | if (!container->v2) |
1151 | break; |
1152 | |
1153 | ret = tce_iommu_mm_set(container); |
1154 | if (ret) |
1155 | return ret; |
1156 | |
1157 | if (!tce_groups_attached(container)) |
1158 | return -ENXIO; |
1159 | |
1160 | minsz = offsetofend(struct vfio_iommu_spapr_tce_remove, |
1161 | start_addr); |
1162 | |
1163 | if (copy_from_user(to: &remove, from: (void __user *)arg, n: minsz)) |
1164 | return -EFAULT; |
1165 | |
1166 | if (remove.argsz < minsz) |
1167 | return -EINVAL; |
1168 | |
1169 | if (remove.flags) |
1170 | return -EINVAL; |
1171 | |
1172 | if (container->def_window_pending && !remove.start_addr) { |
1173 | container->def_window_pending = false; |
1174 | return 0; |
1175 | } |
1176 | |
1177 | mutex_lock(&container->lock); |
1178 | |
1179 | ret = tce_iommu_remove_window(container, start_addr: remove.start_addr); |
1180 | |
1181 | mutex_unlock(lock: &container->lock); |
1182 | |
1183 | return ret; |
1184 | } |
1185 | } |
1186 | |
1187 | return -ENOTTY; |
1188 | } |
1189 | |
1190 | static void tce_iommu_release_ownership(struct tce_container *container, |
1191 | struct iommu_table_group *table_group) |
1192 | { |
1193 | long i; |
1194 | |
1195 | if (!table_group->ops->unset_window) { |
1196 | WARN_ON_ONCE(1); |
1197 | return; |
1198 | } |
1199 | |
1200 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) |
1201 | if (container->tables[i]) |
1202 | table_group->ops->unset_window(table_group, i); |
1203 | } |
1204 | |
1205 | static long tce_iommu_take_ownership(struct tce_container *container, |
1206 | struct iommu_table_group *table_group) |
1207 | { |
1208 | long i, ret = 0; |
1209 | |
1210 | /* Set all windows to the new group */ |
1211 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { |
1212 | struct iommu_table *tbl = container->tables[i]; |
1213 | |
1214 | if (!tbl) |
1215 | continue; |
1216 | |
1217 | ret = table_group->ops->set_window(table_group, i, tbl); |
1218 | if (ret) |
1219 | goto release_exit; |
1220 | } |
1221 | |
1222 | return 0; |
1223 | |
1224 | release_exit: |
1225 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) |
1226 | table_group->ops->unset_window(table_group, i); |
1227 | |
1228 | return ret; |
1229 | } |
1230 | |
1231 | static int tce_iommu_attach_group(void *iommu_data, |
1232 | struct iommu_group *iommu_group, enum vfio_group_type type) |
1233 | { |
1234 | int ret = 0; |
1235 | struct tce_container *container = iommu_data; |
1236 | struct iommu_table_group *table_group; |
1237 | struct tce_iommu_group *tcegrp = NULL; |
1238 | |
1239 | if (type == VFIO_EMULATED_IOMMU) |
1240 | return -EINVAL; |
1241 | |
1242 | mutex_lock(&container->lock); |
1243 | |
1244 | /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", |
1245 | iommu_group_id(iommu_group), iommu_group); */ |
1246 | table_group = iommu_group_get_iommudata(group: iommu_group); |
1247 | if (!table_group) { |
1248 | ret = -ENODEV; |
1249 | goto unlock_exit; |
1250 | } |
1251 | |
1252 | /* v2 requires full support of dynamic DMA windows */ |
1253 | if (container->v2 && table_group->max_dynamic_windows_supported == 0) { |
1254 | ret = -EINVAL; |
1255 | goto unlock_exit; |
1256 | } |
1257 | |
1258 | /* v1 reuses TCE tables and does not share them among PEs */ |
1259 | if (!container->v2 && tce_groups_attached(container)) { |
1260 | ret = -EBUSY; |
1261 | goto unlock_exit; |
1262 | } |
1263 | |
1264 | /* |
1265 | * Check if new group has the same iommu_table_group_ops |
1266 | * (i.e. compatible) |
1267 | */ |
1268 | list_for_each_entry(tcegrp, &container->group_list, next) { |
1269 | struct iommu_table_group *table_group_tmp; |
1270 | |
1271 | if (tcegrp->grp == iommu_group) { |
1272 | pr_warn("tce_vfio: Group %d is already attached\n" , |
1273 | iommu_group_id(iommu_group)); |
1274 | ret = -EBUSY; |
1275 | goto unlock_exit; |
1276 | } |
1277 | table_group_tmp = iommu_group_get_iommudata(group: tcegrp->grp); |
1278 | if (table_group_tmp->ops->create_table != |
1279 | table_group->ops->create_table) { |
1280 | pr_warn("tce_vfio: Group %d is incompatible with group %d\n" , |
1281 | iommu_group_id(iommu_group), |
1282 | iommu_group_id(tcegrp->grp)); |
1283 | ret = -EPERM; |
1284 | goto unlock_exit; |
1285 | } |
1286 | } |
1287 | |
1288 | tcegrp = kzalloc(size: sizeof(*tcegrp), GFP_KERNEL); |
1289 | if (!tcegrp) { |
1290 | ret = -ENOMEM; |
1291 | goto unlock_exit; |
1292 | } |
1293 | |
1294 | ret = tce_iommu_take_ownership(container, table_group); |
1295 | if (!tce_groups_attached(container) && !container->tables[0]) |
1296 | container->def_window_pending = true; |
1297 | |
1298 | if (!ret) { |
1299 | tcegrp->grp = iommu_group; |
1300 | list_add(new: &tcegrp->next, head: &container->group_list); |
1301 | } |
1302 | |
1303 | if (ret && tcegrp) |
1304 | kfree(objp: tcegrp); |
1305 | |
1306 | unlock_exit: |
1307 | mutex_unlock(lock: &container->lock); |
1308 | |
1309 | return ret; |
1310 | } |
1311 | |
1312 | static void tce_iommu_detach_group(void *iommu_data, |
1313 | struct iommu_group *iommu_group) |
1314 | { |
1315 | struct tce_container *container = iommu_data; |
1316 | struct iommu_table_group *table_group; |
1317 | bool found = false; |
1318 | struct tce_iommu_group *tcegrp; |
1319 | |
1320 | mutex_lock(&container->lock); |
1321 | |
1322 | list_for_each_entry(tcegrp, &container->group_list, next) { |
1323 | if (tcegrp->grp == iommu_group) { |
1324 | found = true; |
1325 | break; |
1326 | } |
1327 | } |
1328 | |
1329 | if (!found) { |
1330 | pr_warn("tce_vfio: detaching unattached group #%u\n" , |
1331 | iommu_group_id(iommu_group)); |
1332 | goto unlock_exit; |
1333 | } |
1334 | |
1335 | list_del(entry: &tcegrp->next); |
1336 | kfree(objp: tcegrp); |
1337 | |
1338 | table_group = iommu_group_get_iommudata(group: iommu_group); |
1339 | BUG_ON(!table_group); |
1340 | |
1341 | tce_iommu_release_ownership(container, table_group); |
1342 | |
1343 | unlock_exit: |
1344 | mutex_unlock(lock: &container->lock); |
1345 | } |
1346 | |
1347 | static const struct vfio_iommu_driver_ops tce_iommu_driver_ops = { |
1348 | .name = "iommu-vfio-powerpc" , |
1349 | .owner = THIS_MODULE, |
1350 | .open = tce_iommu_open, |
1351 | .release = tce_iommu_release, |
1352 | .ioctl = tce_iommu_ioctl, |
1353 | .attach_group = tce_iommu_attach_group, |
1354 | .detach_group = tce_iommu_detach_group, |
1355 | }; |
1356 | |
1357 | static int __init tce_iommu_init(void) |
1358 | { |
1359 | return vfio_register_iommu_driver(ops: &tce_iommu_driver_ops); |
1360 | } |
1361 | |
1362 | static void __exit tce_iommu_cleanup(void) |
1363 | { |
1364 | vfio_unregister_iommu_driver(ops: &tce_iommu_driver_ops); |
1365 | } |
1366 | |
1367 | module_init(tce_iommu_init); |
1368 | module_exit(tce_iommu_cleanup); |
1369 | |
1370 | MODULE_VERSION(DRIVER_VERSION); |
1371 | MODULE_LICENSE("GPL v2" ); |
1372 | MODULE_AUTHOR(DRIVER_AUTHOR); |
1373 | MODULE_DESCRIPTION(DRIVER_DESC); |
1374 | |
1375 | |