1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * VFIO-KVM bridge pseudo device |
4 | * |
5 | * Copyright (C) 2013 Red Hat, Inc. All rights reserved. |
6 | * Author: Alex Williamson <alex.williamson@redhat.com> |
7 | */ |
8 | |
9 | #include <linux/errno.h> |
10 | #include <linux/file.h> |
11 | #include <linux/kvm_host.h> |
12 | #include <linux/list.h> |
13 | #include <linux/module.h> |
14 | #include <linux/mutex.h> |
15 | #include <linux/slab.h> |
16 | #include <linux/uaccess.h> |
17 | #include <linux/vfio.h> |
18 | #include "vfio.h" |
19 | |
20 | #ifdef CONFIG_SPAPR_TCE_IOMMU |
21 | #include <asm/kvm_ppc.h> |
22 | #endif |
23 | |
24 | struct kvm_vfio_file { |
25 | struct list_head node; |
26 | struct file *file; |
27 | #ifdef CONFIG_SPAPR_TCE_IOMMU |
28 | struct iommu_group *iommu_group; |
29 | #endif |
30 | }; |
31 | |
32 | struct kvm_vfio { |
33 | struct list_head file_list; |
34 | struct mutex lock; |
35 | bool noncoherent; |
36 | }; |
37 | |
38 | static void kvm_vfio_file_set_kvm(struct file *file, struct kvm *kvm) |
39 | { |
40 | void (*fn)(struct file *file, struct kvm *kvm); |
41 | |
42 | fn = symbol_get(vfio_file_set_kvm); |
43 | if (!fn) |
44 | return; |
45 | |
46 | fn(file, kvm); |
47 | |
48 | symbol_put(vfio_file_set_kvm); |
49 | } |
50 | |
51 | static bool kvm_vfio_file_enforced_coherent(struct file *file) |
52 | { |
53 | bool (*fn)(struct file *file); |
54 | bool ret; |
55 | |
56 | fn = symbol_get(vfio_file_enforced_coherent); |
57 | if (!fn) |
58 | return false; |
59 | |
60 | ret = fn(file); |
61 | |
62 | symbol_put(vfio_file_enforced_coherent); |
63 | |
64 | return ret; |
65 | } |
66 | |
67 | static bool kvm_vfio_file_is_valid(struct file *file) |
68 | { |
69 | bool (*fn)(struct file *file); |
70 | bool ret; |
71 | |
72 | fn = symbol_get(vfio_file_is_valid); |
73 | if (!fn) |
74 | return false; |
75 | |
76 | ret = fn(file); |
77 | |
78 | symbol_put(vfio_file_is_valid); |
79 | |
80 | return ret; |
81 | } |
82 | |
83 | #ifdef CONFIG_SPAPR_TCE_IOMMU |
84 | static struct iommu_group *kvm_vfio_file_iommu_group(struct file *file) |
85 | { |
86 | struct iommu_group *(*fn)(struct file *file); |
87 | struct iommu_group *ret; |
88 | |
89 | fn = symbol_get(vfio_file_iommu_group); |
90 | if (!fn) |
91 | return NULL; |
92 | |
93 | ret = fn(file); |
94 | |
95 | symbol_put(vfio_file_iommu_group); |
96 | |
97 | return ret; |
98 | } |
99 | |
100 | static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm, |
101 | struct kvm_vfio_file *kvf) |
102 | { |
103 | if (WARN_ON_ONCE(!kvf->iommu_group)) |
104 | return; |
105 | |
106 | kvm_spapr_tce_release_iommu_group(kvm, kvf->iommu_group); |
107 | iommu_group_put(kvf->iommu_group); |
108 | kvf->iommu_group = NULL; |
109 | } |
110 | #endif |
111 | |
112 | /* |
113 | * Groups/devices can use the same or different IOMMU domains. If the same |
114 | * then adding a new group/device may change the coherency of groups/devices |
115 | * we've previously been told about. We don't want to care about any of |
116 | * that so we retest each group/device and bail as soon as we find one that's |
117 | * noncoherent. This means we only ever [un]register_noncoherent_dma once |
118 | * for the whole device. |
119 | */ |
120 | static void kvm_vfio_update_coherency(struct kvm_device *dev) |
121 | { |
122 | struct kvm_vfio *kv = dev->private; |
123 | bool noncoherent = false; |
124 | struct kvm_vfio_file *kvf; |
125 | |
126 | list_for_each_entry(kvf, &kv->file_list, node) { |
127 | if (!kvm_vfio_file_enforced_coherent(file: kvf->file)) { |
128 | noncoherent = true; |
129 | break; |
130 | } |
131 | } |
132 | |
133 | if (noncoherent != kv->noncoherent) { |
134 | kv->noncoherent = noncoherent; |
135 | |
136 | if (kv->noncoherent) |
137 | kvm_arch_register_noncoherent_dma(kvm: dev->kvm); |
138 | else |
139 | kvm_arch_unregister_noncoherent_dma(kvm: dev->kvm); |
140 | } |
141 | } |
142 | |
143 | static int kvm_vfio_file_add(struct kvm_device *dev, unsigned int fd) |
144 | { |
145 | struct kvm_vfio *kv = dev->private; |
146 | struct kvm_vfio_file *kvf; |
147 | struct file *filp; |
148 | int ret = 0; |
149 | |
150 | filp = fget(fd); |
151 | if (!filp) |
152 | return -EBADF; |
153 | |
154 | /* Ensure the FD is a vfio FD. */ |
155 | if (!kvm_vfio_file_is_valid(file: filp)) { |
156 | ret = -EINVAL; |
157 | goto out_fput; |
158 | } |
159 | |
160 | mutex_lock(&kv->lock); |
161 | |
162 | list_for_each_entry(kvf, &kv->file_list, node) { |
163 | if (kvf->file == filp) { |
164 | ret = -EEXIST; |
165 | goto out_unlock; |
166 | } |
167 | } |
168 | |
169 | kvf = kzalloc(size: sizeof(*kvf), GFP_KERNEL_ACCOUNT); |
170 | if (!kvf) { |
171 | ret = -ENOMEM; |
172 | goto out_unlock; |
173 | } |
174 | |
175 | kvf->file = get_file(f: filp); |
176 | list_add_tail(new: &kvf->node, head: &kv->file_list); |
177 | |
178 | kvm_arch_start_assignment(kvm: dev->kvm); |
179 | kvm_vfio_file_set_kvm(file: kvf->file, kvm: dev->kvm); |
180 | kvm_vfio_update_coherency(dev); |
181 | |
182 | out_unlock: |
183 | mutex_unlock(lock: &kv->lock); |
184 | out_fput: |
185 | fput(filp); |
186 | return ret; |
187 | } |
188 | |
189 | static int kvm_vfio_file_del(struct kvm_device *dev, unsigned int fd) |
190 | { |
191 | struct kvm_vfio *kv = dev->private; |
192 | struct kvm_vfio_file *kvf; |
193 | struct fd f; |
194 | int ret; |
195 | |
196 | f = fdget(fd); |
197 | if (!f.file) |
198 | return -EBADF; |
199 | |
200 | ret = -ENOENT; |
201 | |
202 | mutex_lock(&kv->lock); |
203 | |
204 | list_for_each_entry(kvf, &kv->file_list, node) { |
205 | if (kvf->file != f.file) |
206 | continue; |
207 | |
208 | list_del(entry: &kvf->node); |
209 | kvm_arch_end_assignment(kvm: dev->kvm); |
210 | #ifdef CONFIG_SPAPR_TCE_IOMMU |
211 | kvm_spapr_tce_release_vfio_group(dev->kvm, kvf); |
212 | #endif |
213 | kvm_vfio_file_set_kvm(file: kvf->file, NULL); |
214 | fput(kvf->file); |
215 | kfree(objp: kvf); |
216 | ret = 0; |
217 | break; |
218 | } |
219 | |
220 | kvm_vfio_update_coherency(dev); |
221 | |
222 | mutex_unlock(lock: &kv->lock); |
223 | |
224 | fdput(fd: f); |
225 | |
226 | return ret; |
227 | } |
228 | |
229 | #ifdef CONFIG_SPAPR_TCE_IOMMU |
230 | static int kvm_vfio_file_set_spapr_tce(struct kvm_device *dev, |
231 | void __user *arg) |
232 | { |
233 | struct kvm_vfio_spapr_tce param; |
234 | struct kvm_vfio *kv = dev->private; |
235 | struct kvm_vfio_file *kvf; |
236 | struct fd f; |
237 | int ret; |
238 | |
239 | if (copy_from_user(¶m, arg, sizeof(struct kvm_vfio_spapr_tce))) |
240 | return -EFAULT; |
241 | |
242 | f = fdget(param.groupfd); |
243 | if (!f.file) |
244 | return -EBADF; |
245 | |
246 | ret = -ENOENT; |
247 | |
248 | mutex_lock(&kv->lock); |
249 | |
250 | list_for_each_entry(kvf, &kv->file_list, node) { |
251 | if (kvf->file != f.file) |
252 | continue; |
253 | |
254 | if (!kvf->iommu_group) { |
255 | kvf->iommu_group = kvm_vfio_file_iommu_group(kvf->file); |
256 | if (WARN_ON_ONCE(!kvf->iommu_group)) { |
257 | ret = -EIO; |
258 | goto err_fdput; |
259 | } |
260 | } |
261 | |
262 | ret = kvm_spapr_tce_attach_iommu_group(dev->kvm, param.tablefd, |
263 | kvf->iommu_group); |
264 | break; |
265 | } |
266 | |
267 | err_fdput: |
268 | mutex_unlock(&kv->lock); |
269 | fdput(f); |
270 | return ret; |
271 | } |
272 | #endif |
273 | |
274 | static int kvm_vfio_set_file(struct kvm_device *dev, long attr, |
275 | void __user *arg) |
276 | { |
277 | int32_t __user *argp = arg; |
278 | int32_t fd; |
279 | |
280 | switch (attr) { |
281 | case KVM_DEV_VFIO_FILE_ADD: |
282 | if (get_user(fd, argp)) |
283 | return -EFAULT; |
284 | return kvm_vfio_file_add(dev, fd); |
285 | |
286 | case KVM_DEV_VFIO_FILE_DEL: |
287 | if (get_user(fd, argp)) |
288 | return -EFAULT; |
289 | return kvm_vfio_file_del(dev, fd); |
290 | |
291 | #ifdef CONFIG_SPAPR_TCE_IOMMU |
292 | case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: |
293 | return kvm_vfio_file_set_spapr_tce(dev, arg); |
294 | #endif |
295 | } |
296 | |
297 | return -ENXIO; |
298 | } |
299 | |
300 | static int kvm_vfio_set_attr(struct kvm_device *dev, |
301 | struct kvm_device_attr *attr) |
302 | { |
303 | switch (attr->group) { |
304 | case KVM_DEV_VFIO_FILE: |
305 | return kvm_vfio_set_file(dev, attr: attr->attr, |
306 | u64_to_user_ptr(attr->addr)); |
307 | } |
308 | |
309 | return -ENXIO; |
310 | } |
311 | |
312 | static int kvm_vfio_has_attr(struct kvm_device *dev, |
313 | struct kvm_device_attr *attr) |
314 | { |
315 | switch (attr->group) { |
316 | case KVM_DEV_VFIO_FILE: |
317 | switch (attr->attr) { |
318 | case KVM_DEV_VFIO_FILE_ADD: |
319 | case KVM_DEV_VFIO_FILE_DEL: |
320 | #ifdef CONFIG_SPAPR_TCE_IOMMU |
321 | case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: |
322 | #endif |
323 | return 0; |
324 | } |
325 | |
326 | break; |
327 | } |
328 | |
329 | return -ENXIO; |
330 | } |
331 | |
332 | static void kvm_vfio_release(struct kvm_device *dev) |
333 | { |
334 | struct kvm_vfio *kv = dev->private; |
335 | struct kvm_vfio_file *kvf, *tmp; |
336 | |
337 | list_for_each_entry_safe(kvf, tmp, &kv->file_list, node) { |
338 | #ifdef CONFIG_SPAPR_TCE_IOMMU |
339 | kvm_spapr_tce_release_vfio_group(dev->kvm, kvf); |
340 | #endif |
341 | kvm_vfio_file_set_kvm(file: kvf->file, NULL); |
342 | fput(kvf->file); |
343 | list_del(entry: &kvf->node); |
344 | kfree(objp: kvf); |
345 | kvm_arch_end_assignment(kvm: dev->kvm); |
346 | } |
347 | |
348 | kvm_vfio_update_coherency(dev); |
349 | |
350 | kfree(objp: kv); |
351 | kfree(objp: dev); /* alloc by kvm_ioctl_create_device, free by .release */ |
352 | } |
353 | |
354 | static int kvm_vfio_create(struct kvm_device *dev, u32 type); |
355 | |
356 | static struct kvm_device_ops kvm_vfio_ops = { |
357 | .name = "kvm-vfio" , |
358 | .create = kvm_vfio_create, |
359 | .release = kvm_vfio_release, |
360 | .set_attr = kvm_vfio_set_attr, |
361 | .has_attr = kvm_vfio_has_attr, |
362 | }; |
363 | |
364 | static int kvm_vfio_create(struct kvm_device *dev, u32 type) |
365 | { |
366 | struct kvm_device *tmp; |
367 | struct kvm_vfio *kv; |
368 | |
369 | /* Only one VFIO "device" per VM */ |
370 | list_for_each_entry(tmp, &dev->kvm->devices, vm_node) |
371 | if (tmp->ops == &kvm_vfio_ops) |
372 | return -EBUSY; |
373 | |
374 | kv = kzalloc(size: sizeof(*kv), GFP_KERNEL_ACCOUNT); |
375 | if (!kv) |
376 | return -ENOMEM; |
377 | |
378 | INIT_LIST_HEAD(list: &kv->file_list); |
379 | mutex_init(&kv->lock); |
380 | |
381 | dev->private = kv; |
382 | |
383 | return 0; |
384 | } |
385 | |
386 | int kvm_vfio_ops_init(void) |
387 | { |
388 | return kvm_register_device_ops(ops: &kvm_vfio_ops, KVM_DEV_TYPE_VFIO); |
389 | } |
390 | |
391 | void kvm_vfio_ops_exit(void) |
392 | { |
393 | kvm_unregister_device_ops(KVM_DEV_TYPE_VFIO); |
394 | } |
395 | |