1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. |
4 | * |
5 | * VFIO container (/dev/vfio/vfio) |
6 | */ |
7 | #include <linux/file.h> |
8 | #include <linux/slab.h> |
9 | #include <linux/fs.h> |
10 | #include <linux/capability.h> |
11 | #include <linux/iommu.h> |
12 | #include <linux/miscdevice.h> |
13 | #include <linux/vfio.h> |
14 | #include <uapi/linux/vfio.h> |
15 | |
16 | #include "vfio.h" |
17 | |
18 | struct vfio_container { |
19 | struct kref kref; |
20 | struct list_head group_list; |
21 | struct rw_semaphore group_lock; |
22 | struct vfio_iommu_driver *iommu_driver; |
23 | void *iommu_data; |
24 | bool noiommu; |
25 | }; |
26 | |
27 | static struct vfio { |
28 | struct list_head iommu_drivers_list; |
29 | struct mutex iommu_drivers_lock; |
30 | } vfio; |
31 | |
32 | static void *vfio_noiommu_open(unsigned long arg) |
33 | { |
34 | if (arg != VFIO_NOIOMMU_IOMMU) |
35 | return ERR_PTR(error: -EINVAL); |
36 | if (!capable(CAP_SYS_RAWIO)) |
37 | return ERR_PTR(error: -EPERM); |
38 | |
39 | return NULL; |
40 | } |
41 | |
42 | static void vfio_noiommu_release(void *iommu_data) |
43 | { |
44 | } |
45 | |
46 | static long vfio_noiommu_ioctl(void *iommu_data, |
47 | unsigned int cmd, unsigned long arg) |
48 | { |
49 | if (cmd == VFIO_CHECK_EXTENSION) |
50 | return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0; |
51 | |
52 | return -ENOTTY; |
53 | } |
54 | |
55 | static int vfio_noiommu_attach_group(void *iommu_data, |
56 | struct iommu_group *iommu_group, enum vfio_group_type type) |
57 | { |
58 | return 0; |
59 | } |
60 | |
61 | static void vfio_noiommu_detach_group(void *iommu_data, |
62 | struct iommu_group *iommu_group) |
63 | { |
64 | } |
65 | |
66 | static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { |
67 | .name = "vfio-noiommu" , |
68 | .owner = THIS_MODULE, |
69 | .open = vfio_noiommu_open, |
70 | .release = vfio_noiommu_release, |
71 | .ioctl = vfio_noiommu_ioctl, |
72 | .attach_group = vfio_noiommu_attach_group, |
73 | .detach_group = vfio_noiommu_detach_group, |
74 | }; |
75 | |
76 | /* |
77 | * Only noiommu containers can use vfio-noiommu and noiommu containers can only |
78 | * use vfio-noiommu. |
79 | */ |
80 | static bool vfio_iommu_driver_allowed(struct vfio_container *container, |
81 | const struct vfio_iommu_driver *driver) |
82 | { |
83 | if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU)) |
84 | return true; |
85 | return container->noiommu == (driver->ops == &vfio_noiommu_ops); |
86 | } |
87 | |
88 | /* |
89 | * IOMMU driver registration |
90 | */ |
91 | int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) |
92 | { |
93 | struct vfio_iommu_driver *driver, *tmp; |
94 | |
95 | if (WARN_ON(!ops->register_device != !ops->unregister_device)) |
96 | return -EINVAL; |
97 | |
98 | driver = kzalloc(size: sizeof(*driver), GFP_KERNEL); |
99 | if (!driver) |
100 | return -ENOMEM; |
101 | |
102 | driver->ops = ops; |
103 | |
104 | mutex_lock(&vfio.iommu_drivers_lock); |
105 | |
106 | /* Check for duplicates */ |
107 | list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { |
108 | if (tmp->ops == ops) { |
109 | mutex_unlock(lock: &vfio.iommu_drivers_lock); |
110 | kfree(objp: driver); |
111 | return -EINVAL; |
112 | } |
113 | } |
114 | |
115 | list_add(new: &driver->vfio_next, head: &vfio.iommu_drivers_list); |
116 | |
117 | mutex_unlock(lock: &vfio.iommu_drivers_lock); |
118 | |
119 | return 0; |
120 | } |
121 | EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); |
122 | |
123 | void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) |
124 | { |
125 | struct vfio_iommu_driver *driver; |
126 | |
127 | mutex_lock(&vfio.iommu_drivers_lock); |
128 | list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { |
129 | if (driver->ops == ops) { |
130 | list_del(entry: &driver->vfio_next); |
131 | mutex_unlock(lock: &vfio.iommu_drivers_lock); |
132 | kfree(objp: driver); |
133 | return; |
134 | } |
135 | } |
136 | mutex_unlock(lock: &vfio.iommu_drivers_lock); |
137 | } |
138 | EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); |
139 | |
140 | /* |
141 | * Container objects - containers are created when /dev/vfio/vfio is |
142 | * opened, but their lifecycle extends until the last user is done, so |
143 | * it's freed via kref. Must support container/group/device being |
144 | * closed in any order. |
145 | */ |
146 | static void vfio_container_release(struct kref *kref) |
147 | { |
148 | struct vfio_container *container; |
149 | container = container_of(kref, struct vfio_container, kref); |
150 | |
151 | kfree(objp: container); |
152 | } |
153 | |
154 | static void vfio_container_get(struct vfio_container *container) |
155 | { |
156 | kref_get(kref: &container->kref); |
157 | } |
158 | |
159 | static void vfio_container_put(struct vfio_container *container) |
160 | { |
161 | kref_put(kref: &container->kref, release: vfio_container_release); |
162 | } |
163 | |
164 | void vfio_device_container_register(struct vfio_device *device) |
165 | { |
166 | struct vfio_iommu_driver *iommu_driver = |
167 | device->group->container->iommu_driver; |
168 | |
169 | if (iommu_driver && iommu_driver->ops->register_device) |
170 | iommu_driver->ops->register_device( |
171 | device->group->container->iommu_data, device); |
172 | } |
173 | |
174 | void vfio_device_container_unregister(struct vfio_device *device) |
175 | { |
176 | struct vfio_iommu_driver *iommu_driver = |
177 | device->group->container->iommu_driver; |
178 | |
179 | if (iommu_driver && iommu_driver->ops->unregister_device) |
180 | iommu_driver->ops->unregister_device( |
181 | device->group->container->iommu_data, device); |
182 | } |
183 | |
184 | static long |
185 | vfio_container_ioctl_check_extension(struct vfio_container *container, |
186 | unsigned long arg) |
187 | { |
188 | struct vfio_iommu_driver *driver; |
189 | long ret = 0; |
190 | |
191 | down_read(sem: &container->group_lock); |
192 | |
193 | driver = container->iommu_driver; |
194 | |
195 | switch (arg) { |
196 | /* No base extensions yet */ |
197 | default: |
198 | /* |
199 | * If no driver is set, poll all registered drivers for |
200 | * extensions and return the first positive result. If |
201 | * a driver is already set, further queries will be passed |
202 | * only to that driver. |
203 | */ |
204 | if (!driver) { |
205 | mutex_lock(&vfio.iommu_drivers_lock); |
206 | list_for_each_entry(driver, &vfio.iommu_drivers_list, |
207 | vfio_next) { |
208 | |
209 | if (!list_empty(head: &container->group_list) && |
210 | !vfio_iommu_driver_allowed(container, |
211 | driver)) |
212 | continue; |
213 | if (!try_module_get(module: driver->ops->owner)) |
214 | continue; |
215 | |
216 | ret = driver->ops->ioctl(NULL, |
217 | VFIO_CHECK_EXTENSION, |
218 | arg); |
219 | module_put(module: driver->ops->owner); |
220 | if (ret > 0) |
221 | break; |
222 | } |
223 | mutex_unlock(lock: &vfio.iommu_drivers_lock); |
224 | } else |
225 | ret = driver->ops->ioctl(container->iommu_data, |
226 | VFIO_CHECK_EXTENSION, arg); |
227 | } |
228 | |
229 | up_read(sem: &container->group_lock); |
230 | |
231 | return ret; |
232 | } |
233 | |
234 | /* hold write lock on container->group_lock */ |
235 | static int __vfio_container_attach_groups(struct vfio_container *container, |
236 | struct vfio_iommu_driver *driver, |
237 | void *data) |
238 | { |
239 | struct vfio_group *group; |
240 | int ret = -ENODEV; |
241 | |
242 | list_for_each_entry(group, &container->group_list, container_next) { |
243 | ret = driver->ops->attach_group(data, group->iommu_group, |
244 | group->type); |
245 | if (ret) |
246 | goto unwind; |
247 | } |
248 | |
249 | return ret; |
250 | |
251 | unwind: |
252 | list_for_each_entry_continue_reverse(group, &container->group_list, |
253 | container_next) { |
254 | driver->ops->detach_group(data, group->iommu_group); |
255 | } |
256 | |
257 | return ret; |
258 | } |
259 | |
260 | static long vfio_ioctl_set_iommu(struct vfio_container *container, |
261 | unsigned long arg) |
262 | { |
263 | struct vfio_iommu_driver *driver; |
264 | long ret = -ENODEV; |
265 | |
266 | down_write(sem: &container->group_lock); |
267 | |
268 | /* |
269 | * The container is designed to be an unprivileged interface while |
270 | * the group can be assigned to specific users. Therefore, only by |
271 | * adding a group to a container does the user get the privilege of |
272 | * enabling the iommu, which may allocate finite resources. There |
273 | * is no unset_iommu, but by removing all the groups from a container, |
274 | * the container is deprivileged and returns to an unset state. |
275 | */ |
276 | if (list_empty(head: &container->group_list) || container->iommu_driver) { |
277 | up_write(sem: &container->group_lock); |
278 | return -EINVAL; |
279 | } |
280 | |
281 | mutex_lock(&vfio.iommu_drivers_lock); |
282 | list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { |
283 | void *data; |
284 | |
285 | if (!vfio_iommu_driver_allowed(container, driver)) |
286 | continue; |
287 | if (!try_module_get(module: driver->ops->owner)) |
288 | continue; |
289 | |
290 | /* |
291 | * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, |
292 | * so test which iommu driver reported support for this |
293 | * extension and call open on them. We also pass them the |
294 | * magic, allowing a single driver to support multiple |
295 | * interfaces if they'd like. |
296 | */ |
297 | if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { |
298 | module_put(module: driver->ops->owner); |
299 | continue; |
300 | } |
301 | |
302 | data = driver->ops->open(arg); |
303 | if (IS_ERR(ptr: data)) { |
304 | ret = PTR_ERR(ptr: data); |
305 | module_put(module: driver->ops->owner); |
306 | continue; |
307 | } |
308 | |
309 | ret = __vfio_container_attach_groups(container, driver, data); |
310 | if (ret) { |
311 | driver->ops->release(data); |
312 | module_put(module: driver->ops->owner); |
313 | continue; |
314 | } |
315 | |
316 | container->iommu_driver = driver; |
317 | container->iommu_data = data; |
318 | break; |
319 | } |
320 | |
321 | mutex_unlock(lock: &vfio.iommu_drivers_lock); |
322 | up_write(sem: &container->group_lock); |
323 | |
324 | return ret; |
325 | } |
326 | |
327 | static long vfio_fops_unl_ioctl(struct file *filep, |
328 | unsigned int cmd, unsigned long arg) |
329 | { |
330 | struct vfio_container *container = filep->private_data; |
331 | struct vfio_iommu_driver *driver; |
332 | void *data; |
333 | long ret = -EINVAL; |
334 | |
335 | if (!container) |
336 | return ret; |
337 | |
338 | switch (cmd) { |
339 | case VFIO_GET_API_VERSION: |
340 | ret = VFIO_API_VERSION; |
341 | break; |
342 | case VFIO_CHECK_EXTENSION: |
343 | ret = vfio_container_ioctl_check_extension(container, arg); |
344 | break; |
345 | case VFIO_SET_IOMMU: |
346 | ret = vfio_ioctl_set_iommu(container, arg); |
347 | break; |
348 | default: |
349 | driver = container->iommu_driver; |
350 | data = container->iommu_data; |
351 | |
352 | if (driver) /* passthrough all unrecognized ioctls */ |
353 | ret = driver->ops->ioctl(data, cmd, arg); |
354 | } |
355 | |
356 | return ret; |
357 | } |
358 | |
359 | static int vfio_fops_open(struct inode *inode, struct file *filep) |
360 | { |
361 | struct vfio_container *container; |
362 | |
363 | container = kzalloc(size: sizeof(*container), GFP_KERNEL_ACCOUNT); |
364 | if (!container) |
365 | return -ENOMEM; |
366 | |
367 | INIT_LIST_HEAD(list: &container->group_list); |
368 | init_rwsem(&container->group_lock); |
369 | kref_init(kref: &container->kref); |
370 | |
371 | filep->private_data = container; |
372 | |
373 | return 0; |
374 | } |
375 | |
376 | static int vfio_fops_release(struct inode *inode, struct file *filep) |
377 | { |
378 | struct vfio_container *container = filep->private_data; |
379 | |
380 | filep->private_data = NULL; |
381 | |
382 | vfio_container_put(container); |
383 | |
384 | return 0; |
385 | } |
386 | |
387 | static const struct file_operations vfio_fops = { |
388 | .owner = THIS_MODULE, |
389 | .open = vfio_fops_open, |
390 | .release = vfio_fops_release, |
391 | .unlocked_ioctl = vfio_fops_unl_ioctl, |
392 | .compat_ioctl = compat_ptr_ioctl, |
393 | }; |
394 | |
395 | struct vfio_container *vfio_container_from_file(struct file *file) |
396 | { |
397 | struct vfio_container *container; |
398 | |
399 | /* Sanity check, is this really our fd? */ |
400 | if (file->f_op != &vfio_fops) |
401 | return NULL; |
402 | |
403 | container = file->private_data; |
404 | WARN_ON(!container); /* fget ensures we don't race vfio_release */ |
405 | return container; |
406 | } |
407 | |
408 | static struct miscdevice vfio_dev = { |
409 | .minor = VFIO_MINOR, |
410 | .name = "vfio" , |
411 | .fops = &vfio_fops, |
412 | .nodename = "vfio/vfio" , |
413 | .mode = S_IRUGO | S_IWUGO, |
414 | }; |
415 | |
416 | int vfio_container_attach_group(struct vfio_container *container, |
417 | struct vfio_group *group) |
418 | { |
419 | struct vfio_iommu_driver *driver; |
420 | int ret = 0; |
421 | |
422 | lockdep_assert_held(&group->group_lock); |
423 | |
424 | if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) |
425 | return -EPERM; |
426 | |
427 | down_write(sem: &container->group_lock); |
428 | |
429 | /* Real groups and fake groups cannot mix */ |
430 | if (!list_empty(head: &container->group_list) && |
431 | container->noiommu != (group->type == VFIO_NO_IOMMU)) { |
432 | ret = -EPERM; |
433 | goto out_unlock_container; |
434 | } |
435 | |
436 | if (group->type == VFIO_IOMMU) { |
437 | ret = iommu_group_claim_dma_owner(group: group->iommu_group, owner: group); |
438 | if (ret) |
439 | goto out_unlock_container; |
440 | } |
441 | |
442 | driver = container->iommu_driver; |
443 | if (driver) { |
444 | ret = driver->ops->attach_group(container->iommu_data, |
445 | group->iommu_group, |
446 | group->type); |
447 | if (ret) { |
448 | if (group->type == VFIO_IOMMU) |
449 | iommu_group_release_dma_owner( |
450 | group: group->iommu_group); |
451 | goto out_unlock_container; |
452 | } |
453 | } |
454 | |
455 | group->container = container; |
456 | group->container_users = 1; |
457 | container->noiommu = (group->type == VFIO_NO_IOMMU); |
458 | list_add(new: &group->container_next, head: &container->group_list); |
459 | |
460 | /* Get a reference on the container and mark a user within the group */ |
461 | vfio_container_get(container); |
462 | |
463 | out_unlock_container: |
464 | up_write(sem: &container->group_lock); |
465 | return ret; |
466 | } |
467 | |
468 | void vfio_group_detach_container(struct vfio_group *group) |
469 | { |
470 | struct vfio_container *container = group->container; |
471 | struct vfio_iommu_driver *driver; |
472 | |
473 | lockdep_assert_held(&group->group_lock); |
474 | WARN_ON(group->container_users != 1); |
475 | |
476 | down_write(sem: &container->group_lock); |
477 | |
478 | driver = container->iommu_driver; |
479 | if (driver) |
480 | driver->ops->detach_group(container->iommu_data, |
481 | group->iommu_group); |
482 | |
483 | if (group->type == VFIO_IOMMU) |
484 | iommu_group_release_dma_owner(group: group->iommu_group); |
485 | |
486 | group->container = NULL; |
487 | group->container_users = 0; |
488 | list_del(entry: &group->container_next); |
489 | |
490 | /* Detaching the last group deprivileges a container, remove iommu */ |
491 | if (driver && list_empty(head: &container->group_list)) { |
492 | driver->ops->release(container->iommu_data); |
493 | module_put(module: driver->ops->owner); |
494 | container->iommu_driver = NULL; |
495 | container->iommu_data = NULL; |
496 | } |
497 | |
498 | up_write(sem: &container->group_lock); |
499 | |
500 | vfio_container_put(container); |
501 | } |
502 | |
503 | int vfio_group_use_container(struct vfio_group *group) |
504 | { |
505 | lockdep_assert_held(&group->group_lock); |
506 | |
507 | /* |
508 | * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but |
509 | * VFIO_SET_IOMMU hasn't been done yet. |
510 | */ |
511 | if (!group->container->iommu_driver) |
512 | return -EINVAL; |
513 | |
514 | if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) |
515 | return -EPERM; |
516 | |
517 | get_file(f: group->opened_file); |
518 | group->container_users++; |
519 | return 0; |
520 | } |
521 | |
522 | void vfio_group_unuse_container(struct vfio_group *group) |
523 | { |
524 | lockdep_assert_held(&group->group_lock); |
525 | |
526 | WARN_ON(group->container_users <= 1); |
527 | group->container_users--; |
528 | fput(group->opened_file); |
529 | } |
530 | |
531 | int vfio_device_container_pin_pages(struct vfio_device *device, |
532 | dma_addr_t iova, int npage, |
533 | int prot, struct page **pages) |
534 | { |
535 | struct vfio_container *container = device->group->container; |
536 | struct iommu_group *iommu_group = device->group->iommu_group; |
537 | struct vfio_iommu_driver *driver = container->iommu_driver; |
538 | |
539 | if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) |
540 | return -E2BIG; |
541 | |
542 | if (unlikely(!driver || !driver->ops->pin_pages)) |
543 | return -ENOTTY; |
544 | return driver->ops->pin_pages(container->iommu_data, iommu_group, iova, |
545 | npage, prot, pages); |
546 | } |
547 | |
548 | void vfio_device_container_unpin_pages(struct vfio_device *device, |
549 | dma_addr_t iova, int npage) |
550 | { |
551 | struct vfio_container *container = device->group->container; |
552 | |
553 | if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES)) |
554 | return; |
555 | |
556 | container->iommu_driver->ops->unpin_pages(container->iommu_data, iova, |
557 | npage); |
558 | } |
559 | |
560 | int vfio_device_container_dma_rw(struct vfio_device *device, |
561 | dma_addr_t iova, void *data, |
562 | size_t len, bool write) |
563 | { |
564 | struct vfio_container *container = device->group->container; |
565 | struct vfio_iommu_driver *driver = container->iommu_driver; |
566 | |
567 | if (unlikely(!driver || !driver->ops->dma_rw)) |
568 | return -ENOTTY; |
569 | return driver->ops->dma_rw(container->iommu_data, iova, data, len, |
570 | write); |
571 | } |
572 | |
573 | int __init vfio_container_init(void) |
574 | { |
575 | int ret; |
576 | |
577 | mutex_init(&vfio.iommu_drivers_lock); |
578 | INIT_LIST_HEAD(list: &vfio.iommu_drivers_list); |
579 | |
580 | ret = misc_register(misc: &vfio_dev); |
581 | if (ret) { |
582 | pr_err("vfio: misc device register failed\n" ); |
583 | return ret; |
584 | } |
585 | |
586 | if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) { |
587 | ret = vfio_register_iommu_driver(&vfio_noiommu_ops); |
588 | if (ret) |
589 | goto err_misc; |
590 | } |
591 | return 0; |
592 | |
593 | err_misc: |
594 | misc_deregister(misc: &vfio_dev); |
595 | return ret; |
596 | } |
597 | |
598 | void vfio_container_cleanup(void) |
599 | { |
600 | if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) |
601 | vfio_unregister_iommu_driver(&vfio_noiommu_ops); |
602 | misc_deregister(misc: &vfio_dev); |
603 | mutex_destroy(lock: &vfio.iommu_drivers_lock); |
604 | } |
605 | |
606 | MODULE_ALIAS_MISCDEV(VFIO_MINOR); |
607 | MODULE_ALIAS("devname:vfio/vfio" ); |
608 | |