1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * VFIO core |
4 | * |
5 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. |
6 | * Author: Alex Williamson <alex.williamson@redhat.com> |
7 | * |
8 | * Derived from original vfio: |
9 | * Copyright 2010 Cisco Systems, Inc. All rights reserved. |
10 | * Author: Tom Lyon, pugs@cisco.com |
11 | */ |
12 | |
13 | #include <linux/vfio.h> |
14 | #include <linux/iommufd.h> |
15 | #include <linux/anon_inodes.h> |
16 | #include "vfio.h" |
17 | |
18 | static struct vfio { |
19 | struct class *class; |
20 | struct list_head group_list; |
21 | struct mutex group_lock; /* locks group_list */ |
22 | struct ida group_ida; |
23 | dev_t group_devt; |
24 | } vfio; |
25 | |
26 | static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, |
27 | char *buf) |
28 | { |
29 | struct vfio_device *it, *device = ERR_PTR(error: -ENODEV); |
30 | |
31 | mutex_lock(&group->device_lock); |
32 | list_for_each_entry(it, &group->device_list, group_next) { |
33 | int ret; |
34 | |
35 | if (it->ops->match) { |
36 | ret = it->ops->match(it, buf); |
37 | if (ret < 0) { |
38 | device = ERR_PTR(error: ret); |
39 | break; |
40 | } |
41 | } else { |
42 | ret = !strcmp(dev_name(dev: it->dev), buf); |
43 | } |
44 | |
45 | if (ret && vfio_device_try_get_registration(device: it)) { |
46 | device = it; |
47 | break; |
48 | } |
49 | } |
50 | mutex_unlock(lock: &group->device_lock); |
51 | |
52 | return device; |
53 | } |
54 | |
55 | /* |
56 | * VFIO Group fd, /dev/vfio/$GROUP |
57 | */ |
58 | static bool vfio_group_has_iommu(struct vfio_group *group) |
59 | { |
60 | lockdep_assert_held(&group->group_lock); |
61 | /* |
62 | * There can only be users if there is a container, and if there is a |
63 | * container there must be users. |
64 | */ |
65 | WARN_ON(!group->container != !group->container_users); |
66 | |
67 | return group->container || group->iommufd; |
68 | } |
69 | |
70 | /* |
71 | * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or |
72 | * if there was no container to unset. Since the ioctl is called on |
73 | * the group, we know that still exists, therefore the only valid |
74 | * transition here is 1->0. |
75 | */ |
76 | static int vfio_group_ioctl_unset_container(struct vfio_group *group) |
77 | { |
78 | int ret = 0; |
79 | |
80 | mutex_lock(&group->group_lock); |
81 | if (!vfio_group_has_iommu(group)) { |
82 | ret = -EINVAL; |
83 | goto out_unlock; |
84 | } |
85 | if (group->container) { |
86 | if (group->container_users != 1) { |
87 | ret = -EBUSY; |
88 | goto out_unlock; |
89 | } |
90 | vfio_group_detach_container(group); |
91 | } |
92 | if (group->iommufd) { |
93 | iommufd_ctx_put(ictx: group->iommufd); |
94 | group->iommufd = NULL; |
95 | } |
96 | |
97 | out_unlock: |
98 | mutex_unlock(lock: &group->group_lock); |
99 | return ret; |
100 | } |
101 | |
102 | static int vfio_group_ioctl_set_container(struct vfio_group *group, |
103 | int __user *arg) |
104 | { |
105 | struct vfio_container *container; |
106 | struct iommufd_ctx *iommufd; |
107 | struct fd f; |
108 | int ret; |
109 | int fd; |
110 | |
111 | if (get_user(fd, arg)) |
112 | return -EFAULT; |
113 | |
114 | f = fdget(fd); |
115 | if (!f.file) |
116 | return -EBADF; |
117 | |
118 | mutex_lock(&group->group_lock); |
119 | if (vfio_group_has_iommu(group)) { |
120 | ret = -EINVAL; |
121 | goto out_unlock; |
122 | } |
123 | if (!group->iommu_group) { |
124 | ret = -ENODEV; |
125 | goto out_unlock; |
126 | } |
127 | |
128 | container = vfio_container_from_file(filep: f.file); |
129 | if (container) { |
130 | ret = vfio_container_attach_group(container, group); |
131 | goto out_unlock; |
132 | } |
133 | |
134 | iommufd = iommufd_ctx_from_file(file: f.file); |
135 | if (!IS_ERR(ptr: iommufd)) { |
136 | if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && |
137 | group->type == VFIO_NO_IOMMU) |
138 | ret = iommufd_vfio_compat_set_no_iommu(ictx: iommufd); |
139 | else |
140 | ret = iommufd_vfio_compat_ioas_create(ictx: iommufd); |
141 | |
142 | if (ret) { |
143 | iommufd_ctx_put(ictx: iommufd); |
144 | goto out_unlock; |
145 | } |
146 | |
147 | group->iommufd = iommufd; |
148 | goto out_unlock; |
149 | } |
150 | |
151 | /* The FD passed is not recognized. */ |
152 | ret = -EBADFD; |
153 | |
154 | out_unlock: |
155 | mutex_unlock(lock: &group->group_lock); |
156 | fdput(fd: f); |
157 | return ret; |
158 | } |
159 | |
160 | static void vfio_device_group_get_kvm_safe(struct vfio_device *device) |
161 | { |
162 | spin_lock(lock: &device->group->kvm_ref_lock); |
163 | vfio_device_get_kvm_safe(device, kvm: device->group->kvm); |
164 | spin_unlock(lock: &device->group->kvm_ref_lock); |
165 | } |
166 | |
167 | static int vfio_df_group_open(struct vfio_device_file *df) |
168 | { |
169 | struct vfio_device *device = df->device; |
170 | int ret; |
171 | |
172 | mutex_lock(&device->group->group_lock); |
173 | if (!vfio_group_has_iommu(group: device->group)) { |
174 | ret = -EINVAL; |
175 | goto out_unlock; |
176 | } |
177 | |
178 | mutex_lock(&device->dev_set->lock); |
179 | |
180 | /* |
181 | * Before the first device open, get the KVM pointer currently |
182 | * associated with the group (if there is one) and obtain a reference |
183 | * now that will be held until the open_count reaches 0 again. Save |
184 | * the pointer in the device for use by drivers. |
185 | */ |
186 | if (device->open_count == 0) |
187 | vfio_device_group_get_kvm_safe(device); |
188 | |
189 | df->iommufd = device->group->iommufd; |
190 | if (df->iommufd && vfio_device_is_noiommu(vdev: device) && device->open_count == 0) { |
191 | /* |
192 | * Require no compat ioas to be assigned to proceed. The basic |
193 | * statement is that the user cannot have done something that |
194 | * implies they expected translation to exist |
195 | */ |
196 | if (!capable(CAP_SYS_RAWIO) || |
197 | vfio_iommufd_device_has_compat_ioas(vdev: device, ictx: df->iommufd)) |
198 | ret = -EPERM; |
199 | else |
200 | ret = 0; |
201 | goto out_put_kvm; |
202 | } |
203 | |
204 | ret = vfio_df_open(df); |
205 | if (ret) |
206 | goto out_put_kvm; |
207 | |
208 | if (df->iommufd && device->open_count == 1) { |
209 | ret = vfio_iommufd_compat_attach_ioas(device, ictx: df->iommufd); |
210 | if (ret) |
211 | goto out_close_device; |
212 | } |
213 | |
214 | /* |
215 | * Paired with smp_load_acquire() in vfio_device_fops::ioctl/ |
216 | * read/write/mmap and vfio_file_has_device_access() |
217 | */ |
218 | smp_store_release(&df->access_granted, true); |
219 | |
220 | mutex_unlock(lock: &device->dev_set->lock); |
221 | mutex_unlock(lock: &device->group->group_lock); |
222 | return 0; |
223 | |
224 | out_close_device: |
225 | vfio_df_close(df); |
226 | out_put_kvm: |
227 | df->iommufd = NULL; |
228 | if (device->open_count == 0) |
229 | vfio_device_put_kvm(device); |
230 | mutex_unlock(lock: &device->dev_set->lock); |
231 | out_unlock: |
232 | mutex_unlock(lock: &device->group->group_lock); |
233 | return ret; |
234 | } |
235 | |
236 | void vfio_df_group_close(struct vfio_device_file *df) |
237 | { |
238 | struct vfio_device *device = df->device; |
239 | |
240 | mutex_lock(&device->group->group_lock); |
241 | mutex_lock(&device->dev_set->lock); |
242 | |
243 | vfio_df_close(df); |
244 | df->iommufd = NULL; |
245 | |
246 | if (device->open_count == 0) |
247 | vfio_device_put_kvm(device); |
248 | |
249 | mutex_unlock(lock: &device->dev_set->lock); |
250 | mutex_unlock(lock: &device->group->group_lock); |
251 | } |
252 | |
253 | static struct file *vfio_device_open_file(struct vfio_device *device) |
254 | { |
255 | struct vfio_device_file *df; |
256 | struct file *filep; |
257 | int ret; |
258 | |
259 | df = vfio_allocate_device_file(device); |
260 | if (IS_ERR(ptr: df)) { |
261 | ret = PTR_ERR(ptr: df); |
262 | goto err_out; |
263 | } |
264 | |
265 | df->group = device->group; |
266 | |
267 | ret = vfio_df_group_open(df); |
268 | if (ret) |
269 | goto err_free; |
270 | |
271 | /* |
272 | * We can't use anon_inode_getfd() because we need to modify |
273 | * the f_mode flags directly to allow more than just ioctls |
274 | */ |
275 | filep = anon_inode_getfile(name: "[vfio-device]" , fops: &vfio_device_fops, |
276 | priv: df, O_RDWR); |
277 | if (IS_ERR(ptr: filep)) { |
278 | ret = PTR_ERR(ptr: filep); |
279 | goto err_close_device; |
280 | } |
281 | |
282 | /* |
283 | * TODO: add an anon_inode interface to do this. |
284 | * Appears to be missing by lack of need rather than |
285 | * explicitly prevented. Now there's need. |
286 | */ |
287 | filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); |
288 | |
289 | if (device->group->type == VFIO_NO_IOMMU) |
290 | dev_warn(device->dev, "vfio-noiommu device opened by user " |
291 | "(%s:%d)\n" , current->comm, task_pid_nr(current)); |
292 | /* |
293 | * On success the ref of device is moved to the file and |
294 | * put in vfio_device_fops_release() |
295 | */ |
296 | return filep; |
297 | |
298 | err_close_device: |
299 | vfio_df_group_close(df); |
300 | err_free: |
301 | kfree(objp: df); |
302 | err_out: |
303 | return ERR_PTR(error: ret); |
304 | } |
305 | |
306 | static int vfio_group_ioctl_get_device_fd(struct vfio_group *group, |
307 | char __user *arg) |
308 | { |
309 | struct vfio_device *device; |
310 | struct file *filep; |
311 | char *buf; |
312 | int fdno; |
313 | int ret; |
314 | |
315 | buf = strndup_user(arg, PAGE_SIZE); |
316 | if (IS_ERR(ptr: buf)) |
317 | return PTR_ERR(ptr: buf); |
318 | |
319 | device = vfio_device_get_from_name(group, buf); |
320 | kfree(objp: buf); |
321 | if (IS_ERR(ptr: device)) |
322 | return PTR_ERR(ptr: device); |
323 | |
324 | fdno = get_unused_fd_flags(O_CLOEXEC); |
325 | if (fdno < 0) { |
326 | ret = fdno; |
327 | goto err_put_device; |
328 | } |
329 | |
330 | filep = vfio_device_open_file(device); |
331 | if (IS_ERR(ptr: filep)) { |
332 | ret = PTR_ERR(ptr: filep); |
333 | goto err_put_fdno; |
334 | } |
335 | |
336 | fd_install(fd: fdno, file: filep); |
337 | return fdno; |
338 | |
339 | err_put_fdno: |
340 | put_unused_fd(fd: fdno); |
341 | err_put_device: |
342 | vfio_device_put_registration(device); |
343 | return ret; |
344 | } |
345 | |
346 | static int vfio_group_ioctl_get_status(struct vfio_group *group, |
347 | struct vfio_group_status __user *arg) |
348 | { |
349 | unsigned long minsz = offsetofend(struct vfio_group_status, flags); |
350 | struct vfio_group_status status; |
351 | |
352 | if (copy_from_user(to: &status, from: arg, n: minsz)) |
353 | return -EFAULT; |
354 | |
355 | if (status.argsz < minsz) |
356 | return -EINVAL; |
357 | |
358 | status.flags = 0; |
359 | |
360 | mutex_lock(&group->group_lock); |
361 | if (!group->iommu_group) { |
362 | mutex_unlock(lock: &group->group_lock); |
363 | return -ENODEV; |
364 | } |
365 | |
366 | /* |
367 | * With the container FD the iommu_group_claim_dma_owner() is done |
368 | * during SET_CONTAINER but for IOMMFD this is done during |
369 | * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd |
370 | * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due |
371 | * to viability. |
372 | */ |
373 | if (vfio_group_has_iommu(group)) |
374 | status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | |
375 | VFIO_GROUP_FLAGS_VIABLE; |
376 | else if (!iommu_group_dma_owner_claimed(group: group->iommu_group)) |
377 | status.flags |= VFIO_GROUP_FLAGS_VIABLE; |
378 | mutex_unlock(lock: &group->group_lock); |
379 | |
380 | if (copy_to_user(to: arg, from: &status, n: minsz)) |
381 | return -EFAULT; |
382 | return 0; |
383 | } |
384 | |
385 | static long vfio_group_fops_unl_ioctl(struct file *filep, |
386 | unsigned int cmd, unsigned long arg) |
387 | { |
388 | struct vfio_group *group = filep->private_data; |
389 | void __user *uarg = (void __user *)arg; |
390 | |
391 | switch (cmd) { |
392 | case VFIO_GROUP_GET_DEVICE_FD: |
393 | return vfio_group_ioctl_get_device_fd(group, arg: uarg); |
394 | case VFIO_GROUP_GET_STATUS: |
395 | return vfio_group_ioctl_get_status(group, arg: uarg); |
396 | case VFIO_GROUP_SET_CONTAINER: |
397 | return vfio_group_ioctl_set_container(group, arg: uarg); |
398 | case VFIO_GROUP_UNSET_CONTAINER: |
399 | return vfio_group_ioctl_unset_container(group); |
400 | default: |
401 | return -ENOTTY; |
402 | } |
403 | } |
404 | |
405 | int vfio_device_block_group(struct vfio_device *device) |
406 | { |
407 | struct vfio_group *group = device->group; |
408 | int ret = 0; |
409 | |
410 | mutex_lock(&group->group_lock); |
411 | if (group->opened_file) { |
412 | ret = -EBUSY; |
413 | goto out_unlock; |
414 | } |
415 | |
416 | group->cdev_device_open_cnt++; |
417 | |
418 | out_unlock: |
419 | mutex_unlock(lock: &group->group_lock); |
420 | return ret; |
421 | } |
422 | |
423 | void vfio_device_unblock_group(struct vfio_device *device) |
424 | { |
425 | struct vfio_group *group = device->group; |
426 | |
427 | mutex_lock(&group->group_lock); |
428 | group->cdev_device_open_cnt--; |
429 | mutex_unlock(lock: &group->group_lock); |
430 | } |
431 | |
432 | static int vfio_group_fops_open(struct inode *inode, struct file *filep) |
433 | { |
434 | struct vfio_group *group = |
435 | container_of(inode->i_cdev, struct vfio_group, cdev); |
436 | int ret; |
437 | |
438 | mutex_lock(&group->group_lock); |
439 | |
440 | /* |
441 | * drivers can be zero if this races with vfio_device_remove_group(), it |
442 | * will be stable at 0 under the group rwsem |
443 | */ |
444 | if (refcount_read(r: &group->drivers) == 0) { |
445 | ret = -ENODEV; |
446 | goto out_unlock; |
447 | } |
448 | |
449 | if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { |
450 | ret = -EPERM; |
451 | goto out_unlock; |
452 | } |
453 | |
454 | if (group->cdev_device_open_cnt) { |
455 | ret = -EBUSY; |
456 | goto out_unlock; |
457 | } |
458 | |
459 | /* |
460 | * Do we need multiple instances of the group open? Seems not. |
461 | */ |
462 | if (group->opened_file) { |
463 | ret = -EBUSY; |
464 | goto out_unlock; |
465 | } |
466 | group->opened_file = filep; |
467 | filep->private_data = group; |
468 | ret = 0; |
469 | out_unlock: |
470 | mutex_unlock(lock: &group->group_lock); |
471 | return ret; |
472 | } |
473 | |
474 | static int vfio_group_fops_release(struct inode *inode, struct file *filep) |
475 | { |
476 | struct vfio_group *group = filep->private_data; |
477 | |
478 | filep->private_data = NULL; |
479 | |
480 | mutex_lock(&group->group_lock); |
481 | /* |
482 | * Device FDs hold a group file reference, therefore the group release |
483 | * is only called when there are no open devices. |
484 | */ |
485 | WARN_ON(group->notifier.head); |
486 | if (group->container) |
487 | vfio_group_detach_container(group); |
488 | if (group->iommufd) { |
489 | iommufd_ctx_put(ictx: group->iommufd); |
490 | group->iommufd = NULL; |
491 | } |
492 | group->opened_file = NULL; |
493 | mutex_unlock(lock: &group->group_lock); |
494 | return 0; |
495 | } |
496 | |
497 | static const struct file_operations vfio_group_fops = { |
498 | .owner = THIS_MODULE, |
499 | .unlocked_ioctl = vfio_group_fops_unl_ioctl, |
500 | .compat_ioctl = compat_ptr_ioctl, |
501 | .open = vfio_group_fops_open, |
502 | .release = vfio_group_fops_release, |
503 | }; |
504 | |
505 | /* |
506 | * Group objects - create, release, get, put, search |
507 | */ |
508 | static struct vfio_group * |
509 | vfio_group_find_from_iommu(struct iommu_group *iommu_group) |
510 | { |
511 | struct vfio_group *group; |
512 | |
513 | lockdep_assert_held(&vfio.group_lock); |
514 | |
515 | /* |
516 | * group->iommu_group from the vfio.group_list cannot be NULL |
517 | * under the vfio.group_lock. |
518 | */ |
519 | list_for_each_entry(group, &vfio.group_list, vfio_next) { |
520 | if (group->iommu_group == iommu_group) |
521 | return group; |
522 | } |
523 | return NULL; |
524 | } |
525 | |
526 | static void vfio_group_release(struct device *dev) |
527 | { |
528 | struct vfio_group *group = container_of(dev, struct vfio_group, dev); |
529 | |
530 | mutex_destroy(lock: &group->device_lock); |
531 | mutex_destroy(lock: &group->group_lock); |
532 | WARN_ON(group->iommu_group); |
533 | WARN_ON(group->cdev_device_open_cnt); |
534 | ida_free(&vfio.group_ida, MINOR(group->dev.devt)); |
535 | kfree(objp: group); |
536 | } |
537 | |
538 | static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, |
539 | enum vfio_group_type type) |
540 | { |
541 | struct vfio_group *group; |
542 | int minor; |
543 | |
544 | group = kzalloc(size: sizeof(*group), GFP_KERNEL); |
545 | if (!group) |
546 | return ERR_PTR(error: -ENOMEM); |
547 | |
548 | minor = ida_alloc_max(ida: &vfio.group_ida, MINORMASK, GFP_KERNEL); |
549 | if (minor < 0) { |
550 | kfree(objp: group); |
551 | return ERR_PTR(error: minor); |
552 | } |
553 | |
554 | device_initialize(dev: &group->dev); |
555 | group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); |
556 | group->dev.class = vfio.class; |
557 | group->dev.release = vfio_group_release; |
558 | cdev_init(&group->cdev, &vfio_group_fops); |
559 | group->cdev.owner = THIS_MODULE; |
560 | |
561 | refcount_set(r: &group->drivers, n: 1); |
562 | mutex_init(&group->group_lock); |
563 | spin_lock_init(&group->kvm_ref_lock); |
564 | INIT_LIST_HEAD(list: &group->device_list); |
565 | mutex_init(&group->device_lock); |
566 | group->iommu_group = iommu_group; |
567 | /* put in vfio_group_release() */ |
568 | iommu_group_ref_get(group: iommu_group); |
569 | group->type = type; |
570 | BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); |
571 | |
572 | return group; |
573 | } |
574 | |
575 | static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, |
576 | enum vfio_group_type type) |
577 | { |
578 | struct vfio_group *group; |
579 | struct vfio_group *ret; |
580 | int err; |
581 | |
582 | lockdep_assert_held(&vfio.group_lock); |
583 | |
584 | group = vfio_group_alloc(iommu_group, type); |
585 | if (IS_ERR(ptr: group)) |
586 | return group; |
587 | |
588 | err = dev_set_name(dev: &group->dev, name: "%s%d" , |
589 | group->type == VFIO_NO_IOMMU ? "noiommu-" : "" , |
590 | iommu_group_id(group: iommu_group)); |
591 | if (err) { |
592 | ret = ERR_PTR(error: err); |
593 | goto err_put; |
594 | } |
595 | |
596 | err = cdev_device_add(cdev: &group->cdev, dev: &group->dev); |
597 | if (err) { |
598 | ret = ERR_PTR(error: err); |
599 | goto err_put; |
600 | } |
601 | |
602 | list_add(new: &group->vfio_next, head: &vfio.group_list); |
603 | |
604 | return group; |
605 | |
606 | err_put: |
607 | put_device(dev: &group->dev); |
608 | return ret; |
609 | } |
610 | |
611 | static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, |
612 | enum vfio_group_type type) |
613 | { |
614 | struct iommu_group *iommu_group; |
615 | struct vfio_group *group; |
616 | int ret; |
617 | |
618 | iommu_group = iommu_group_alloc(); |
619 | if (IS_ERR(ptr: iommu_group)) |
620 | return ERR_CAST(ptr: iommu_group); |
621 | |
622 | ret = iommu_group_set_name(group: iommu_group, name: "vfio-noiommu" ); |
623 | if (ret) |
624 | goto out_put_group; |
625 | ret = iommu_group_add_device(group: iommu_group, dev); |
626 | if (ret) |
627 | goto out_put_group; |
628 | |
629 | mutex_lock(&vfio.group_lock); |
630 | group = vfio_create_group(iommu_group, type); |
631 | mutex_unlock(lock: &vfio.group_lock); |
632 | if (IS_ERR(ptr: group)) { |
633 | ret = PTR_ERR(ptr: group); |
634 | goto out_remove_device; |
635 | } |
636 | iommu_group_put(group: iommu_group); |
637 | return group; |
638 | |
639 | out_remove_device: |
640 | iommu_group_remove_device(dev); |
641 | out_put_group: |
642 | iommu_group_put(group: iommu_group); |
643 | return ERR_PTR(error: ret); |
644 | } |
645 | |
646 | static bool vfio_group_has_device(struct vfio_group *group, struct device *dev) |
647 | { |
648 | struct vfio_device *device; |
649 | |
650 | mutex_lock(&group->device_lock); |
651 | list_for_each_entry(device, &group->device_list, group_next) { |
652 | if (device->dev == dev) { |
653 | mutex_unlock(lock: &group->device_lock); |
654 | return true; |
655 | } |
656 | } |
657 | mutex_unlock(lock: &group->device_lock); |
658 | return false; |
659 | } |
660 | |
661 | static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) |
662 | { |
663 | struct iommu_group *iommu_group; |
664 | struct vfio_group *group; |
665 | |
666 | iommu_group = iommu_group_get(dev); |
667 | if (!iommu_group && vfio_noiommu) { |
668 | /* |
669 | * With noiommu enabled, create an IOMMU group for devices that |
670 | * don't already have one, implying no IOMMU hardware/driver |
671 | * exists. Taint the kernel because we're about to give a DMA |
672 | * capable device to a user without IOMMU protection. |
673 | */ |
674 | group = vfio_noiommu_group_alloc(dev, type: VFIO_NO_IOMMU); |
675 | if (!IS_ERR(ptr: group)) { |
676 | add_taint(TAINT_USER, LOCKDEP_STILL_OK); |
677 | dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n" ); |
678 | } |
679 | return group; |
680 | } |
681 | |
682 | if (!iommu_group) |
683 | return ERR_PTR(error: -EINVAL); |
684 | |
685 | mutex_lock(&vfio.group_lock); |
686 | group = vfio_group_find_from_iommu(iommu_group); |
687 | if (group) { |
688 | if (WARN_ON(vfio_group_has_device(group, dev))) |
689 | group = ERR_PTR(error: -EINVAL); |
690 | else |
691 | refcount_inc(r: &group->drivers); |
692 | } else { |
693 | group = vfio_create_group(iommu_group, type: VFIO_IOMMU); |
694 | } |
695 | mutex_unlock(lock: &vfio.group_lock); |
696 | |
697 | /* The vfio_group holds a reference to the iommu_group */ |
698 | iommu_group_put(group: iommu_group); |
699 | return group; |
700 | } |
701 | |
702 | int vfio_device_set_group(struct vfio_device *device, |
703 | enum vfio_group_type type) |
704 | { |
705 | struct vfio_group *group; |
706 | |
707 | if (type == VFIO_IOMMU) |
708 | group = vfio_group_find_or_alloc(dev: device->dev); |
709 | else |
710 | group = vfio_noiommu_group_alloc(dev: device->dev, type); |
711 | |
712 | if (IS_ERR(ptr: group)) |
713 | return PTR_ERR(ptr: group); |
714 | |
715 | /* Our reference on group is moved to the device */ |
716 | device->group = group; |
717 | return 0; |
718 | } |
719 | |
720 | void vfio_device_remove_group(struct vfio_device *device) |
721 | { |
722 | struct vfio_group *group = device->group; |
723 | struct iommu_group *iommu_group; |
724 | |
725 | if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) |
726 | iommu_group_remove_device(dev: device->dev); |
727 | |
728 | /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */ |
729 | if (!refcount_dec_and_mutex_lock(r: &group->drivers, lock: &vfio.group_lock)) |
730 | return; |
731 | list_del(entry: &group->vfio_next); |
732 | |
733 | /* |
734 | * We could concurrently probe another driver in the group that might |
735 | * race vfio_device_remove_group() with vfio_get_group(), so we have to |
736 | * ensure that the sysfs is all cleaned up under lock otherwise the |
737 | * cdev_device_add() will fail due to the name aready existing. |
738 | */ |
739 | cdev_device_del(cdev: &group->cdev, dev: &group->dev); |
740 | |
741 | mutex_lock(&group->group_lock); |
742 | /* |
743 | * These data structures all have paired operations that can only be |
744 | * undone when the caller holds a live reference on the device. Since |
745 | * all pairs must be undone these WARN_ON's indicate some caller did not |
746 | * properly hold the group reference. |
747 | */ |
748 | WARN_ON(!list_empty(&group->device_list)); |
749 | WARN_ON(group->notifier.head); |
750 | |
751 | /* |
752 | * Revoke all users of group->iommu_group. At this point we know there |
753 | * are no devices active because we are unplugging the last one. Setting |
754 | * iommu_group to NULL blocks all new users. |
755 | */ |
756 | if (group->container) |
757 | vfio_group_detach_container(group); |
758 | iommu_group = group->iommu_group; |
759 | group->iommu_group = NULL; |
760 | mutex_unlock(lock: &group->group_lock); |
761 | mutex_unlock(lock: &vfio.group_lock); |
762 | |
763 | iommu_group_put(group: iommu_group); |
764 | put_device(dev: &group->dev); |
765 | } |
766 | |
767 | void vfio_device_group_register(struct vfio_device *device) |
768 | { |
769 | mutex_lock(&device->group->device_lock); |
770 | list_add(new: &device->group_next, head: &device->group->device_list); |
771 | mutex_unlock(lock: &device->group->device_lock); |
772 | } |
773 | |
774 | void vfio_device_group_unregister(struct vfio_device *device) |
775 | { |
776 | mutex_lock(&device->group->device_lock); |
777 | list_del(entry: &device->group_next); |
778 | mutex_unlock(lock: &device->group->device_lock); |
779 | } |
780 | |
781 | int vfio_device_group_use_iommu(struct vfio_device *device) |
782 | { |
783 | struct vfio_group *group = device->group; |
784 | int ret = 0; |
785 | |
786 | lockdep_assert_held(&group->group_lock); |
787 | |
788 | if (WARN_ON(!group->container)) |
789 | return -EINVAL; |
790 | |
791 | ret = vfio_group_use_container(group); |
792 | if (ret) |
793 | return ret; |
794 | vfio_device_container_register(device); |
795 | return 0; |
796 | } |
797 | |
798 | void vfio_device_group_unuse_iommu(struct vfio_device *device) |
799 | { |
800 | struct vfio_group *group = device->group; |
801 | |
802 | lockdep_assert_held(&group->group_lock); |
803 | |
804 | if (WARN_ON(!group->container)) |
805 | return; |
806 | |
807 | vfio_device_container_unregister(device); |
808 | vfio_group_unuse_container(group); |
809 | } |
810 | |
811 | bool vfio_device_has_container(struct vfio_device *device) |
812 | { |
813 | return device->group->container; |
814 | } |
815 | |
816 | struct vfio_group *vfio_group_from_file(struct file *file) |
817 | { |
818 | struct vfio_group *group = file->private_data; |
819 | |
820 | if (file->f_op != &vfio_group_fops) |
821 | return NULL; |
822 | return group; |
823 | } |
824 | |
825 | /** |
826 | * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file |
827 | * @file: VFIO group file |
828 | * |
829 | * The returned iommu_group is valid as long as a ref is held on the file. This |
830 | * returns a reference on the group. This function is deprecated, only the SPAPR |
831 | * path in kvm should call it. |
832 | */ |
833 | struct iommu_group *vfio_file_iommu_group(struct file *file) |
834 | { |
835 | struct vfio_group *group = vfio_group_from_file(file); |
836 | struct iommu_group *iommu_group = NULL; |
837 | |
838 | if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU)) |
839 | return NULL; |
840 | |
841 | if (!group) |
842 | return NULL; |
843 | |
844 | mutex_lock(&group->group_lock); |
845 | if (group->iommu_group) { |
846 | iommu_group = group->iommu_group; |
847 | iommu_group_ref_get(group: iommu_group); |
848 | } |
849 | mutex_unlock(lock: &group->group_lock); |
850 | return iommu_group; |
851 | } |
852 | EXPORT_SYMBOL_GPL(vfio_file_iommu_group); |
853 | |
854 | /** |
855 | * vfio_file_is_group - True if the file is a vfio group file |
856 | * @file: VFIO group file |
857 | */ |
858 | bool vfio_file_is_group(struct file *file) |
859 | { |
860 | return vfio_group_from_file(file); |
861 | } |
862 | EXPORT_SYMBOL_GPL(vfio_file_is_group); |
863 | |
864 | bool vfio_group_enforced_coherent(struct vfio_group *group) |
865 | { |
866 | struct vfio_device *device; |
867 | bool ret = true; |
868 | |
869 | /* |
870 | * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then |
871 | * any domain later attached to it will also not support it. If the cap |
872 | * is set then the iommu_domain eventually attached to the device/group |
873 | * must use a domain with enforce_cache_coherency(). |
874 | */ |
875 | mutex_lock(&group->device_lock); |
876 | list_for_each_entry(device, &group->device_list, group_next) { |
877 | if (!device_iommu_capable(dev: device->dev, |
878 | cap: IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) { |
879 | ret = false; |
880 | break; |
881 | } |
882 | } |
883 | mutex_unlock(lock: &group->device_lock); |
884 | return ret; |
885 | } |
886 | |
887 | void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm) |
888 | { |
889 | spin_lock(lock: &group->kvm_ref_lock); |
890 | group->kvm = kvm; |
891 | spin_unlock(lock: &group->kvm_ref_lock); |
892 | } |
893 | |
894 | /** |
895 | * vfio_file_has_dev - True if the VFIO file is a handle for device |
896 | * @file: VFIO file to check |
897 | * @device: Device that must be part of the file |
898 | * |
899 | * Returns true if given file has permission to manipulate the given device. |
900 | */ |
901 | bool vfio_file_has_dev(struct file *file, struct vfio_device *device) |
902 | { |
903 | struct vfio_group *group = vfio_group_from_file(file); |
904 | |
905 | if (!group) |
906 | return false; |
907 | |
908 | return group == device->group; |
909 | } |
910 | EXPORT_SYMBOL_GPL(vfio_file_has_dev); |
911 | |
912 | static char *vfio_devnode(const struct device *dev, umode_t *mode) |
913 | { |
914 | return kasprintf(GFP_KERNEL, fmt: "vfio/%s" , dev_name(dev)); |
915 | } |
916 | |
917 | int __init vfio_group_init(void) |
918 | { |
919 | int ret; |
920 | |
921 | ida_init(ida: &vfio.group_ida); |
922 | mutex_init(&vfio.group_lock); |
923 | INIT_LIST_HEAD(list: &vfio.group_list); |
924 | |
925 | ret = vfio_container_init(); |
926 | if (ret) |
927 | return ret; |
928 | |
929 | /* /dev/vfio/$GROUP */ |
930 | vfio.class = class_create(name: "vfio" ); |
931 | if (IS_ERR(ptr: vfio.class)) { |
932 | ret = PTR_ERR(ptr: vfio.class); |
933 | goto err_group_class; |
934 | } |
935 | |
936 | vfio.class->devnode = vfio_devnode; |
937 | |
938 | ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio" ); |
939 | if (ret) |
940 | goto err_alloc_chrdev; |
941 | return 0; |
942 | |
943 | err_alloc_chrdev: |
944 | class_destroy(cls: vfio.class); |
945 | vfio.class = NULL; |
946 | err_group_class: |
947 | vfio_container_cleanup(); |
948 | return ret; |
949 | } |
950 | |
951 | void vfio_group_cleanup(void) |
952 | { |
953 | WARN_ON(!list_empty(&vfio.group_list)); |
954 | ida_destroy(ida: &vfio.group_ida); |
955 | unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); |
956 | class_destroy(cls: vfio.class); |
957 | vfio.class = NULL; |
958 | vfio_container_cleanup(); |
959 | } |
960 | |