1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
4 *
5 * VFIO container (/dev/vfio/vfio)
6 */
7#include <linux/file.h>
8#include <linux/slab.h>
9#include <linux/fs.h>
10#include <linux/capability.h>
11#include <linux/iommu.h>
12#include <linux/miscdevice.h>
13#include <linux/vfio.h>
14#include <uapi/linux/vfio.h>
15
16#include "vfio.h"
17
18struct vfio_container {
19 struct kref kref;
20 struct list_head group_list;
21 struct rw_semaphore group_lock;
22 struct vfio_iommu_driver *iommu_driver;
23 void *iommu_data;
24 bool noiommu;
25};
26
27static struct vfio {
28 struct list_head iommu_drivers_list;
29 struct mutex iommu_drivers_lock;
30} vfio;
31
32static void *vfio_noiommu_open(unsigned long arg)
33{
34 if (arg != VFIO_NOIOMMU_IOMMU)
35 return ERR_PTR(error: -EINVAL);
36 if (!capable(CAP_SYS_RAWIO))
37 return ERR_PTR(error: -EPERM);
38
39 return NULL;
40}
41
42static void vfio_noiommu_release(void *iommu_data)
43{
44}
45
46static long vfio_noiommu_ioctl(void *iommu_data,
47 unsigned int cmd, unsigned long arg)
48{
49 if (cmd == VFIO_CHECK_EXTENSION)
50 return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
51
52 return -ENOTTY;
53}
54
55static int vfio_noiommu_attach_group(void *iommu_data,
56 struct iommu_group *iommu_group, enum vfio_group_type type)
57{
58 return 0;
59}
60
61static void vfio_noiommu_detach_group(void *iommu_data,
62 struct iommu_group *iommu_group)
63{
64}
65
66static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
67 .name = "vfio-noiommu",
68 .owner = THIS_MODULE,
69 .open = vfio_noiommu_open,
70 .release = vfio_noiommu_release,
71 .ioctl = vfio_noiommu_ioctl,
72 .attach_group = vfio_noiommu_attach_group,
73 .detach_group = vfio_noiommu_detach_group,
74};
75
76/*
77 * Only noiommu containers can use vfio-noiommu and noiommu containers can only
78 * use vfio-noiommu.
79 */
80static bool vfio_iommu_driver_allowed(struct vfio_container *container,
81 const struct vfio_iommu_driver *driver)
82{
83 if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
84 return true;
85 return container->noiommu == (driver->ops == &vfio_noiommu_ops);
86}
87
88/*
89 * IOMMU driver registration
90 */
91int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
92{
93 struct vfio_iommu_driver *driver, *tmp;
94
95 if (WARN_ON(!ops->register_device != !ops->unregister_device))
96 return -EINVAL;
97
98 driver = kzalloc(size: sizeof(*driver), GFP_KERNEL);
99 if (!driver)
100 return -ENOMEM;
101
102 driver->ops = ops;
103
104 mutex_lock(&vfio.iommu_drivers_lock);
105
106 /* Check for duplicates */
107 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
108 if (tmp->ops == ops) {
109 mutex_unlock(lock: &vfio.iommu_drivers_lock);
110 kfree(objp: driver);
111 return -EINVAL;
112 }
113 }
114
115 list_add(new: &driver->vfio_next, head: &vfio.iommu_drivers_list);
116
117 mutex_unlock(lock: &vfio.iommu_drivers_lock);
118
119 return 0;
120}
121EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
122
123void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
124{
125 struct vfio_iommu_driver *driver;
126
127 mutex_lock(&vfio.iommu_drivers_lock);
128 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
129 if (driver->ops == ops) {
130 list_del(entry: &driver->vfio_next);
131 mutex_unlock(lock: &vfio.iommu_drivers_lock);
132 kfree(objp: driver);
133 return;
134 }
135 }
136 mutex_unlock(lock: &vfio.iommu_drivers_lock);
137}
138EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
139
140/*
141 * Container objects - containers are created when /dev/vfio/vfio is
142 * opened, but their lifecycle extends until the last user is done, so
143 * it's freed via kref. Must support container/group/device being
144 * closed in any order.
145 */
146static void vfio_container_release(struct kref *kref)
147{
148 struct vfio_container *container;
149 container = container_of(kref, struct vfio_container, kref);
150
151 kfree(objp: container);
152}
153
154static void vfio_container_get(struct vfio_container *container)
155{
156 kref_get(kref: &container->kref);
157}
158
159static void vfio_container_put(struct vfio_container *container)
160{
161 kref_put(kref: &container->kref, release: vfio_container_release);
162}
163
164void vfio_device_container_register(struct vfio_device *device)
165{
166 struct vfio_iommu_driver *iommu_driver =
167 device->group->container->iommu_driver;
168
169 if (iommu_driver && iommu_driver->ops->register_device)
170 iommu_driver->ops->register_device(
171 device->group->container->iommu_data, device);
172}
173
174void vfio_device_container_unregister(struct vfio_device *device)
175{
176 struct vfio_iommu_driver *iommu_driver =
177 device->group->container->iommu_driver;
178
179 if (iommu_driver && iommu_driver->ops->unregister_device)
180 iommu_driver->ops->unregister_device(
181 device->group->container->iommu_data, device);
182}
183
184static long
185vfio_container_ioctl_check_extension(struct vfio_container *container,
186 unsigned long arg)
187{
188 struct vfio_iommu_driver *driver;
189 long ret = 0;
190
191 down_read(sem: &container->group_lock);
192
193 driver = container->iommu_driver;
194
195 switch (arg) {
196 /* No base extensions yet */
197 default:
198 /*
199 * If no driver is set, poll all registered drivers for
200 * extensions and return the first positive result. If
201 * a driver is already set, further queries will be passed
202 * only to that driver.
203 */
204 if (!driver) {
205 mutex_lock(&vfio.iommu_drivers_lock);
206 list_for_each_entry(driver, &vfio.iommu_drivers_list,
207 vfio_next) {
208
209 if (!list_empty(head: &container->group_list) &&
210 !vfio_iommu_driver_allowed(container,
211 driver))
212 continue;
213 if (!try_module_get(module: driver->ops->owner))
214 continue;
215
216 ret = driver->ops->ioctl(NULL,
217 VFIO_CHECK_EXTENSION,
218 arg);
219 module_put(module: driver->ops->owner);
220 if (ret > 0)
221 break;
222 }
223 mutex_unlock(lock: &vfio.iommu_drivers_lock);
224 } else
225 ret = driver->ops->ioctl(container->iommu_data,
226 VFIO_CHECK_EXTENSION, arg);
227 }
228
229 up_read(sem: &container->group_lock);
230
231 return ret;
232}
233
234/* hold write lock on container->group_lock */
235static int __vfio_container_attach_groups(struct vfio_container *container,
236 struct vfio_iommu_driver *driver,
237 void *data)
238{
239 struct vfio_group *group;
240 int ret = -ENODEV;
241
242 list_for_each_entry(group, &container->group_list, container_next) {
243 ret = driver->ops->attach_group(data, group->iommu_group,
244 group->type);
245 if (ret)
246 goto unwind;
247 }
248
249 return ret;
250
251unwind:
252 list_for_each_entry_continue_reverse(group, &container->group_list,
253 container_next) {
254 driver->ops->detach_group(data, group->iommu_group);
255 }
256
257 return ret;
258}
259
260static long vfio_ioctl_set_iommu(struct vfio_container *container,
261 unsigned long arg)
262{
263 struct vfio_iommu_driver *driver;
264 long ret = -ENODEV;
265
266 down_write(sem: &container->group_lock);
267
268 /*
269 * The container is designed to be an unprivileged interface while
270 * the group can be assigned to specific users. Therefore, only by
271 * adding a group to a container does the user get the privilege of
272 * enabling the iommu, which may allocate finite resources. There
273 * is no unset_iommu, but by removing all the groups from a container,
274 * the container is deprivileged and returns to an unset state.
275 */
276 if (list_empty(head: &container->group_list) || container->iommu_driver) {
277 up_write(sem: &container->group_lock);
278 return -EINVAL;
279 }
280
281 mutex_lock(&vfio.iommu_drivers_lock);
282 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
283 void *data;
284
285 if (!vfio_iommu_driver_allowed(container, driver))
286 continue;
287 if (!try_module_get(module: driver->ops->owner))
288 continue;
289
290 /*
291 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
292 * so test which iommu driver reported support for this
293 * extension and call open on them. We also pass them the
294 * magic, allowing a single driver to support multiple
295 * interfaces if they'd like.
296 */
297 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
298 module_put(module: driver->ops->owner);
299 continue;
300 }
301
302 data = driver->ops->open(arg);
303 if (IS_ERR(ptr: data)) {
304 ret = PTR_ERR(ptr: data);
305 module_put(module: driver->ops->owner);
306 continue;
307 }
308
309 ret = __vfio_container_attach_groups(container, driver, data);
310 if (ret) {
311 driver->ops->release(data);
312 module_put(module: driver->ops->owner);
313 continue;
314 }
315
316 container->iommu_driver = driver;
317 container->iommu_data = data;
318 break;
319 }
320
321 mutex_unlock(lock: &vfio.iommu_drivers_lock);
322 up_write(sem: &container->group_lock);
323
324 return ret;
325}
326
327static long vfio_fops_unl_ioctl(struct file *filep,
328 unsigned int cmd, unsigned long arg)
329{
330 struct vfio_container *container = filep->private_data;
331 struct vfio_iommu_driver *driver;
332 void *data;
333 long ret = -EINVAL;
334
335 if (!container)
336 return ret;
337
338 switch (cmd) {
339 case VFIO_GET_API_VERSION:
340 ret = VFIO_API_VERSION;
341 break;
342 case VFIO_CHECK_EXTENSION:
343 ret = vfio_container_ioctl_check_extension(container, arg);
344 break;
345 case VFIO_SET_IOMMU:
346 ret = vfio_ioctl_set_iommu(container, arg);
347 break;
348 default:
349 driver = container->iommu_driver;
350 data = container->iommu_data;
351
352 if (driver) /* passthrough all unrecognized ioctls */
353 ret = driver->ops->ioctl(data, cmd, arg);
354 }
355
356 return ret;
357}
358
359static int vfio_fops_open(struct inode *inode, struct file *filep)
360{
361 struct vfio_container *container;
362
363 container = kzalloc(size: sizeof(*container), GFP_KERNEL_ACCOUNT);
364 if (!container)
365 return -ENOMEM;
366
367 INIT_LIST_HEAD(list: &container->group_list);
368 init_rwsem(&container->group_lock);
369 kref_init(kref: &container->kref);
370
371 filep->private_data = container;
372
373 return 0;
374}
375
376static int vfio_fops_release(struct inode *inode, struct file *filep)
377{
378 struct vfio_container *container = filep->private_data;
379
380 filep->private_data = NULL;
381
382 vfio_container_put(container);
383
384 return 0;
385}
386
387static const struct file_operations vfio_fops = {
388 .owner = THIS_MODULE,
389 .open = vfio_fops_open,
390 .release = vfio_fops_release,
391 .unlocked_ioctl = vfio_fops_unl_ioctl,
392 .compat_ioctl = compat_ptr_ioctl,
393};
394
395struct vfio_container *vfio_container_from_file(struct file *file)
396{
397 struct vfio_container *container;
398
399 /* Sanity check, is this really our fd? */
400 if (file->f_op != &vfio_fops)
401 return NULL;
402
403 container = file->private_data;
404 WARN_ON(!container); /* fget ensures we don't race vfio_release */
405 return container;
406}
407
408static struct miscdevice vfio_dev = {
409 .minor = VFIO_MINOR,
410 .name = "vfio",
411 .fops = &vfio_fops,
412 .nodename = "vfio/vfio",
413 .mode = S_IRUGO | S_IWUGO,
414};
415
416int vfio_container_attach_group(struct vfio_container *container,
417 struct vfio_group *group)
418{
419 struct vfio_iommu_driver *driver;
420 int ret = 0;
421
422 lockdep_assert_held(&group->group_lock);
423
424 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
425 return -EPERM;
426
427 down_write(sem: &container->group_lock);
428
429 /* Real groups and fake groups cannot mix */
430 if (!list_empty(head: &container->group_list) &&
431 container->noiommu != (group->type == VFIO_NO_IOMMU)) {
432 ret = -EPERM;
433 goto out_unlock_container;
434 }
435
436 if (group->type == VFIO_IOMMU) {
437 ret = iommu_group_claim_dma_owner(group: group->iommu_group, owner: group);
438 if (ret)
439 goto out_unlock_container;
440 }
441
442 driver = container->iommu_driver;
443 if (driver) {
444 ret = driver->ops->attach_group(container->iommu_data,
445 group->iommu_group,
446 group->type);
447 if (ret) {
448 if (group->type == VFIO_IOMMU)
449 iommu_group_release_dma_owner(
450 group: group->iommu_group);
451 goto out_unlock_container;
452 }
453 }
454
455 group->container = container;
456 group->container_users = 1;
457 container->noiommu = (group->type == VFIO_NO_IOMMU);
458 list_add(new: &group->container_next, head: &container->group_list);
459
460 /* Get a reference on the container and mark a user within the group */
461 vfio_container_get(container);
462
463out_unlock_container:
464 up_write(sem: &container->group_lock);
465 return ret;
466}
467
468void vfio_group_detach_container(struct vfio_group *group)
469{
470 struct vfio_container *container = group->container;
471 struct vfio_iommu_driver *driver;
472
473 lockdep_assert_held(&group->group_lock);
474 WARN_ON(group->container_users != 1);
475
476 down_write(sem: &container->group_lock);
477
478 driver = container->iommu_driver;
479 if (driver)
480 driver->ops->detach_group(container->iommu_data,
481 group->iommu_group);
482
483 if (group->type == VFIO_IOMMU)
484 iommu_group_release_dma_owner(group: group->iommu_group);
485
486 group->container = NULL;
487 group->container_users = 0;
488 list_del(entry: &group->container_next);
489
490 /* Detaching the last group deprivileges a container, remove iommu */
491 if (driver && list_empty(head: &container->group_list)) {
492 driver->ops->release(container->iommu_data);
493 module_put(module: driver->ops->owner);
494 container->iommu_driver = NULL;
495 container->iommu_data = NULL;
496 }
497
498 up_write(sem: &container->group_lock);
499
500 vfio_container_put(container);
501}
502
503int vfio_group_use_container(struct vfio_group *group)
504{
505 lockdep_assert_held(&group->group_lock);
506
507 /*
508 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
509 * VFIO_SET_IOMMU hasn't been done yet.
510 */
511 if (!group->container->iommu_driver)
512 return -EINVAL;
513
514 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
515 return -EPERM;
516
517 get_file(f: group->opened_file);
518 group->container_users++;
519 return 0;
520}
521
522void vfio_group_unuse_container(struct vfio_group *group)
523{
524 lockdep_assert_held(&group->group_lock);
525
526 WARN_ON(group->container_users <= 1);
527 group->container_users--;
528 fput(group->opened_file);
529}
530
531int vfio_device_container_pin_pages(struct vfio_device *device,
532 dma_addr_t iova, int npage,
533 int prot, struct page **pages)
534{
535 struct vfio_container *container = device->group->container;
536 struct iommu_group *iommu_group = device->group->iommu_group;
537 struct vfio_iommu_driver *driver = container->iommu_driver;
538
539 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
540 return -E2BIG;
541
542 if (unlikely(!driver || !driver->ops->pin_pages))
543 return -ENOTTY;
544 return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
545 npage, prot, pages);
546}
547
548void vfio_device_container_unpin_pages(struct vfio_device *device,
549 dma_addr_t iova, int npage)
550{
551 struct vfio_container *container = device->group->container;
552
553 if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
554 return;
555
556 container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
557 npage);
558}
559
560int vfio_device_container_dma_rw(struct vfio_device *device,
561 dma_addr_t iova, void *data,
562 size_t len, bool write)
563{
564 struct vfio_container *container = device->group->container;
565 struct vfio_iommu_driver *driver = container->iommu_driver;
566
567 if (unlikely(!driver || !driver->ops->dma_rw))
568 return -ENOTTY;
569 return driver->ops->dma_rw(container->iommu_data, iova, data, len,
570 write);
571}
572
573int __init vfio_container_init(void)
574{
575 int ret;
576
577 mutex_init(&vfio.iommu_drivers_lock);
578 INIT_LIST_HEAD(list: &vfio.iommu_drivers_list);
579
580 ret = misc_register(misc: &vfio_dev);
581 if (ret) {
582 pr_err("vfio: misc device register failed\n");
583 return ret;
584 }
585
586 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
587 ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
588 if (ret)
589 goto err_misc;
590 }
591 return 0;
592
593err_misc:
594 misc_deregister(misc: &vfio_dev);
595 return ret;
596}
597
598void vfio_container_cleanup(void)
599{
600 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
601 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
602 misc_deregister(misc: &vfio_dev);
603 mutex_destroy(lock: &vfio.iommu_drivers_lock);
604}
605
606MODULE_ALIAS_MISCDEV(VFIO_MINOR);
607MODULE_ALIAS("devname:vfio/vfio");
608

source code of linux/drivers/vfio/container.c