1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2018-2020 Intel Corporation. |
4 | * Copyright (C) 2020 Red Hat, Inc. |
5 | * |
6 | * Author: Tiwei Bie <tiwei.bie@intel.com> |
7 | * Jason Wang <jasowang@redhat.com> |
8 | * |
9 | * Thanks Michael S. Tsirkin for the valuable comments and |
10 | * suggestions. And thanks to Cunming Liang and Zhihong Wang for all |
11 | * their supports. |
12 | */ |
13 | |
14 | #include <linux/kernel.h> |
15 | #include <linux/module.h> |
16 | #include <linux/cdev.h> |
17 | #include <linux/device.h> |
18 | #include <linux/mm.h> |
19 | #include <linux/slab.h> |
20 | #include <linux/iommu.h> |
21 | #include <linux/uuid.h> |
22 | #include <linux/vdpa.h> |
23 | #include <linux/nospec.h> |
24 | #include <linux/vhost.h> |
25 | |
26 | #include "vhost.h" |
27 | |
28 | enum { |
29 | VHOST_VDPA_BACKEND_FEATURES = |
30 | (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | |
31 | (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) | |
32 | (1ULL << VHOST_BACKEND_F_IOTLB_ASID), |
33 | }; |
34 | |
35 | #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) |
36 | |
37 | #define VHOST_VDPA_IOTLB_BUCKETS 16 |
38 | |
39 | struct vhost_vdpa_as { |
40 | struct hlist_node hash_link; |
41 | struct vhost_iotlb iotlb; |
42 | u32 id; |
43 | }; |
44 | |
45 | struct vhost_vdpa { |
46 | struct vhost_dev vdev; |
47 | struct iommu_domain *domain; |
48 | struct vhost_virtqueue *vqs; |
49 | struct completion completion; |
50 | struct vdpa_device *vdpa; |
51 | struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS]; |
52 | struct device dev; |
53 | struct cdev cdev; |
54 | atomic_t opened; |
55 | u32 nvqs; |
56 | int virtio_id; |
57 | int minor; |
58 | struct eventfd_ctx *config_ctx; |
59 | int in_batch; |
60 | struct vdpa_iova_range range; |
61 | u32 batch_asid; |
62 | bool suspended; |
63 | }; |
64 | |
65 | static DEFINE_IDA(vhost_vdpa_ida); |
66 | |
67 | static dev_t vhost_vdpa_major; |
68 | |
69 | static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, |
70 | struct vhost_iotlb *iotlb, u64 start, |
71 | u64 last, u32 asid); |
72 | |
73 | static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) |
74 | { |
75 | struct vhost_vdpa_as *as = container_of(iotlb, struct |
76 | vhost_vdpa_as, iotlb); |
77 | return as->id; |
78 | } |
79 | |
80 | static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid) |
81 | { |
82 | struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; |
83 | struct vhost_vdpa_as *as; |
84 | |
85 | hlist_for_each_entry(as, head, hash_link) |
86 | if (as->id == asid) |
87 | return as; |
88 | |
89 | return NULL; |
90 | } |
91 | |
92 | static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid) |
93 | { |
94 | struct vhost_vdpa_as *as = asid_to_as(v, asid); |
95 | |
96 | if (!as) |
97 | return NULL; |
98 | |
99 | return &as->iotlb; |
100 | } |
101 | |
102 | static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid) |
103 | { |
104 | struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; |
105 | struct vhost_vdpa_as *as; |
106 | |
107 | if (asid_to_as(v, asid)) |
108 | return NULL; |
109 | |
110 | if (asid >= v->vdpa->nas) |
111 | return NULL; |
112 | |
113 | as = kmalloc(size: sizeof(*as), GFP_KERNEL); |
114 | if (!as) |
115 | return NULL; |
116 | |
117 | vhost_iotlb_init(iotlb: &as->iotlb, limit: 0, flags: 0); |
118 | as->id = asid; |
119 | hlist_add_head(n: &as->hash_link, h: head); |
120 | |
121 | return as; |
122 | } |
123 | |
124 | static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v, |
125 | u32 asid) |
126 | { |
127 | struct vhost_vdpa_as *as = asid_to_as(v, asid); |
128 | |
129 | if (as) |
130 | return as; |
131 | |
132 | return vhost_vdpa_alloc_as(v, asid); |
133 | } |
134 | |
135 | static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid) |
136 | { |
137 | struct vdpa_device *vdpa = v->vdpa; |
138 | const struct vdpa_config_ops *ops = vdpa->config; |
139 | |
140 | if (ops->reset_map) |
141 | ops->reset_map(vdpa, asid); |
142 | } |
143 | |
144 | static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) |
145 | { |
146 | struct vhost_vdpa_as *as = asid_to_as(v, asid); |
147 | |
148 | if (!as) |
149 | return -EINVAL; |
150 | |
151 | hlist_del(n: &as->hash_link); |
152 | vhost_vdpa_iotlb_unmap(v, iotlb: &as->iotlb, start: 0ULL, last: 0ULL - 1, asid); |
153 | /* |
154 | * Devices with vendor specific IOMMU may need to restore |
155 | * iotlb to the initial or default state, which cannot be |
156 | * cleaned up in the all range unmap call above. Give them |
157 | * a chance to clean up or reset the map to the desired |
158 | * state. |
159 | */ |
160 | vhost_vdpa_reset_map(v, asid); |
161 | kfree(objp: as); |
162 | |
163 | return 0; |
164 | } |
165 | |
166 | static void handle_vq_kick(struct vhost_work *work) |
167 | { |
168 | struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, |
169 | poll.work); |
170 | struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev); |
171 | const struct vdpa_config_ops *ops = v->vdpa->config; |
172 | |
173 | ops->kick_vq(v->vdpa, vq - v->vqs); |
174 | } |
175 | |
176 | static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) |
177 | { |
178 | struct vhost_virtqueue *vq = private; |
179 | struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; |
180 | |
181 | if (call_ctx) |
182 | eventfd_signal(ctx: call_ctx); |
183 | |
184 | return IRQ_HANDLED; |
185 | } |
186 | |
187 | static irqreturn_t vhost_vdpa_config_cb(void *private) |
188 | { |
189 | struct vhost_vdpa *v = private; |
190 | struct eventfd_ctx *config_ctx = v->config_ctx; |
191 | |
192 | if (config_ctx) |
193 | eventfd_signal(ctx: config_ctx); |
194 | |
195 | return IRQ_HANDLED; |
196 | } |
197 | |
198 | static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) |
199 | { |
200 | struct vhost_virtqueue *vq = &v->vqs[qid]; |
201 | const struct vdpa_config_ops *ops = v->vdpa->config; |
202 | struct vdpa_device *vdpa = v->vdpa; |
203 | int ret, irq; |
204 | |
205 | if (!ops->get_vq_irq) |
206 | return; |
207 | |
208 | irq = ops->get_vq_irq(vdpa, qid); |
209 | if (irq < 0) |
210 | return; |
211 | |
212 | irq_bypass_unregister_producer(&vq->call_ctx.producer); |
213 | if (!vq->call_ctx.ctx) |
214 | return; |
215 | |
216 | vq->call_ctx.producer.token = vq->call_ctx.ctx; |
217 | vq->call_ctx.producer.irq = irq; |
218 | ret = irq_bypass_register_producer(&vq->call_ctx.producer); |
219 | if (unlikely(ret)) |
220 | dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n" , |
221 | qid, vq->call_ctx.producer.token, ret); |
222 | } |
223 | |
224 | static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) |
225 | { |
226 | struct vhost_virtqueue *vq = &v->vqs[qid]; |
227 | |
228 | irq_bypass_unregister_producer(&vq->call_ctx.producer); |
229 | } |
230 | |
231 | static int _compat_vdpa_reset(struct vhost_vdpa *v) |
232 | { |
233 | struct vdpa_device *vdpa = v->vdpa; |
234 | u32 flags = 0; |
235 | |
236 | v->suspended = false; |
237 | |
238 | if (v->vdev.vqs) { |
239 | flags |= !vhost_backend_has_feature(vq: v->vdev.vqs[0], |
240 | VHOST_BACKEND_F_IOTLB_PERSIST) ? |
241 | VDPA_RESET_F_CLEAN_MAP : 0; |
242 | } |
243 | |
244 | return vdpa_reset(vdev: vdpa, flags); |
245 | } |
246 | |
247 | static int vhost_vdpa_reset(struct vhost_vdpa *v) |
248 | { |
249 | v->in_batch = 0; |
250 | return _compat_vdpa_reset(v); |
251 | } |
252 | |
253 | static long vhost_vdpa_bind_mm(struct vhost_vdpa *v) |
254 | { |
255 | struct vdpa_device *vdpa = v->vdpa; |
256 | const struct vdpa_config_ops *ops = vdpa->config; |
257 | |
258 | if (!vdpa->use_va || !ops->bind_mm) |
259 | return 0; |
260 | |
261 | return ops->bind_mm(vdpa, v->vdev.mm); |
262 | } |
263 | |
264 | static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v) |
265 | { |
266 | struct vdpa_device *vdpa = v->vdpa; |
267 | const struct vdpa_config_ops *ops = vdpa->config; |
268 | |
269 | if (!vdpa->use_va || !ops->unbind_mm) |
270 | return; |
271 | |
272 | ops->unbind_mm(vdpa); |
273 | } |
274 | |
275 | static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) |
276 | { |
277 | struct vdpa_device *vdpa = v->vdpa; |
278 | const struct vdpa_config_ops *ops = vdpa->config; |
279 | u32 device_id; |
280 | |
281 | device_id = ops->get_device_id(vdpa); |
282 | |
283 | if (copy_to_user(to: argp, from: &device_id, n: sizeof(device_id))) |
284 | return -EFAULT; |
285 | |
286 | return 0; |
287 | } |
288 | |
289 | static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) |
290 | { |
291 | struct vdpa_device *vdpa = v->vdpa; |
292 | const struct vdpa_config_ops *ops = vdpa->config; |
293 | u8 status; |
294 | |
295 | status = ops->get_status(vdpa); |
296 | |
297 | if (copy_to_user(to: statusp, from: &status, n: sizeof(status))) |
298 | return -EFAULT; |
299 | |
300 | return 0; |
301 | } |
302 | |
303 | static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) |
304 | { |
305 | struct vdpa_device *vdpa = v->vdpa; |
306 | const struct vdpa_config_ops *ops = vdpa->config; |
307 | u8 status, status_old; |
308 | u32 nvqs = v->nvqs; |
309 | int ret; |
310 | u16 i; |
311 | |
312 | if (copy_from_user(to: &status, from: statusp, n: sizeof(status))) |
313 | return -EFAULT; |
314 | |
315 | status_old = ops->get_status(vdpa); |
316 | |
317 | /* |
318 | * Userspace shouldn't remove status bits unless reset the |
319 | * status to 0. |
320 | */ |
321 | if (status != 0 && (status_old & ~status) != 0) |
322 | return -EINVAL; |
323 | |
324 | if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) |
325 | for (i = 0; i < nvqs; i++) |
326 | vhost_vdpa_unsetup_vq_irq(v, qid: i); |
327 | |
328 | if (status == 0) { |
329 | ret = _compat_vdpa_reset(v); |
330 | if (ret) |
331 | return ret; |
332 | } else |
333 | vdpa_set_status(vdev: vdpa, status); |
334 | |
335 | if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) |
336 | for (i = 0; i < nvqs; i++) |
337 | vhost_vdpa_setup_vq_irq(v, qid: i); |
338 | |
339 | return 0; |
340 | } |
341 | |
342 | static int vhost_vdpa_config_validate(struct vhost_vdpa *v, |
343 | struct vhost_vdpa_config *c) |
344 | { |
345 | struct vdpa_device *vdpa = v->vdpa; |
346 | size_t size = vdpa->config->get_config_size(vdpa); |
347 | |
348 | if (c->len == 0 || c->off > size) |
349 | return -EINVAL; |
350 | |
351 | if (c->len > size - c->off) |
352 | return -E2BIG; |
353 | |
354 | return 0; |
355 | } |
356 | |
357 | static long vhost_vdpa_get_config(struct vhost_vdpa *v, |
358 | struct vhost_vdpa_config __user *c) |
359 | { |
360 | struct vdpa_device *vdpa = v->vdpa; |
361 | struct vhost_vdpa_config config; |
362 | unsigned long size = offsetof(struct vhost_vdpa_config, buf); |
363 | u8 *buf; |
364 | |
365 | if (copy_from_user(to: &config, from: c, n: size)) |
366 | return -EFAULT; |
367 | if (vhost_vdpa_config_validate(v, c: &config)) |
368 | return -EINVAL; |
369 | buf = kvzalloc(size: config.len, GFP_KERNEL); |
370 | if (!buf) |
371 | return -ENOMEM; |
372 | |
373 | vdpa_get_config(vdev: vdpa, offset: config.off, buf, len: config.len); |
374 | |
375 | if (copy_to_user(to: c->buf, from: buf, n: config.len)) { |
376 | kvfree(addr: buf); |
377 | return -EFAULT; |
378 | } |
379 | |
380 | kvfree(addr: buf); |
381 | return 0; |
382 | } |
383 | |
384 | static long vhost_vdpa_set_config(struct vhost_vdpa *v, |
385 | struct vhost_vdpa_config __user *c) |
386 | { |
387 | struct vdpa_device *vdpa = v->vdpa; |
388 | struct vhost_vdpa_config config; |
389 | unsigned long size = offsetof(struct vhost_vdpa_config, buf); |
390 | u8 *buf; |
391 | |
392 | if (copy_from_user(to: &config, from: c, n: size)) |
393 | return -EFAULT; |
394 | if (vhost_vdpa_config_validate(v, c: &config)) |
395 | return -EINVAL; |
396 | |
397 | buf = vmemdup_user(c->buf, config.len); |
398 | if (IS_ERR(ptr: buf)) |
399 | return PTR_ERR(ptr: buf); |
400 | |
401 | vdpa_set_config(dev: vdpa, offset: config.off, buf, length: config.len); |
402 | |
403 | kvfree(addr: buf); |
404 | return 0; |
405 | } |
406 | |
407 | static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v) |
408 | { |
409 | struct vdpa_device *vdpa = v->vdpa; |
410 | const struct vdpa_config_ops *ops = vdpa->config; |
411 | |
412 | return ops->suspend; |
413 | } |
414 | |
415 | static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v) |
416 | { |
417 | struct vdpa_device *vdpa = v->vdpa; |
418 | const struct vdpa_config_ops *ops = vdpa->config; |
419 | |
420 | return ops->resume; |
421 | } |
422 | |
423 | static bool vhost_vdpa_has_desc_group(const struct vhost_vdpa *v) |
424 | { |
425 | struct vdpa_device *vdpa = v->vdpa; |
426 | const struct vdpa_config_ops *ops = vdpa->config; |
427 | |
428 | return ops->get_vq_desc_group; |
429 | } |
430 | |
431 | static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) |
432 | { |
433 | struct vdpa_device *vdpa = v->vdpa; |
434 | const struct vdpa_config_ops *ops = vdpa->config; |
435 | u64 features; |
436 | |
437 | features = ops->get_device_features(vdpa); |
438 | |
439 | if (copy_to_user(to: featurep, from: &features, n: sizeof(features))) |
440 | return -EFAULT; |
441 | |
442 | return 0; |
443 | } |
444 | |
445 | static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v) |
446 | { |
447 | struct vdpa_device *vdpa = v->vdpa; |
448 | const struct vdpa_config_ops *ops = vdpa->config; |
449 | |
450 | if (!ops->get_backend_features) |
451 | return 0; |
452 | else |
453 | return ops->get_backend_features(vdpa); |
454 | } |
455 | |
456 | static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v) |
457 | { |
458 | struct vdpa_device *vdpa = v->vdpa; |
459 | const struct vdpa_config_ops *ops = vdpa->config; |
460 | |
461 | return (!ops->set_map && !ops->dma_map) || ops->reset_map || |
462 | vhost_vdpa_get_backend_features(v) & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); |
463 | } |
464 | |
465 | static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) |
466 | { |
467 | struct vdpa_device *vdpa = v->vdpa; |
468 | const struct vdpa_config_ops *ops = vdpa->config; |
469 | struct vhost_dev *d = &v->vdev; |
470 | u64 actual_features; |
471 | u64 features; |
472 | int i; |
473 | |
474 | /* |
475 | * It's not allowed to change the features after they have |
476 | * been negotiated. |
477 | */ |
478 | if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) |
479 | return -EBUSY; |
480 | |
481 | if (copy_from_user(to: &features, from: featurep, n: sizeof(features))) |
482 | return -EFAULT; |
483 | |
484 | if (vdpa_set_features(vdev: vdpa, features)) |
485 | return -EINVAL; |
486 | |
487 | /* let the vqs know what has been configured */ |
488 | actual_features = ops->get_driver_features(vdpa); |
489 | for (i = 0; i < d->nvqs; ++i) { |
490 | struct vhost_virtqueue *vq = d->vqs[i]; |
491 | |
492 | mutex_lock(&vq->mutex); |
493 | vq->acked_features = actual_features; |
494 | mutex_unlock(lock: &vq->mutex); |
495 | } |
496 | |
497 | return 0; |
498 | } |
499 | |
500 | static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) |
501 | { |
502 | struct vdpa_device *vdpa = v->vdpa; |
503 | const struct vdpa_config_ops *ops = vdpa->config; |
504 | u16 num; |
505 | |
506 | num = ops->get_vq_num_max(vdpa); |
507 | |
508 | if (copy_to_user(to: argp, from: &num, n: sizeof(num))) |
509 | return -EFAULT; |
510 | |
511 | return 0; |
512 | } |
513 | |
514 | static void vhost_vdpa_config_put(struct vhost_vdpa *v) |
515 | { |
516 | if (v->config_ctx) { |
517 | eventfd_ctx_put(ctx: v->config_ctx); |
518 | v->config_ctx = NULL; |
519 | } |
520 | } |
521 | |
522 | static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) |
523 | { |
524 | struct vdpa_callback cb; |
525 | int fd; |
526 | struct eventfd_ctx *ctx; |
527 | |
528 | cb.callback = vhost_vdpa_config_cb; |
529 | cb.private = v; |
530 | if (copy_from_user(to: &fd, from: argp, n: sizeof(fd))) |
531 | return -EFAULT; |
532 | |
533 | ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); |
534 | swap(ctx, v->config_ctx); |
535 | |
536 | if (!IS_ERR_OR_NULL(ptr: ctx)) |
537 | eventfd_ctx_put(ctx); |
538 | |
539 | if (IS_ERR(ptr: v->config_ctx)) { |
540 | long ret = PTR_ERR(ptr: v->config_ctx); |
541 | |
542 | v->config_ctx = NULL; |
543 | return ret; |
544 | } |
545 | |
546 | v->vdpa->config->set_config_cb(v->vdpa, &cb); |
547 | |
548 | return 0; |
549 | } |
550 | |
551 | static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp) |
552 | { |
553 | struct vhost_vdpa_iova_range range = { |
554 | .first = v->range.first, |
555 | .last = v->range.last, |
556 | }; |
557 | |
558 | if (copy_to_user(to: argp, from: &range, n: sizeof(range))) |
559 | return -EFAULT; |
560 | return 0; |
561 | } |
562 | |
563 | static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp) |
564 | { |
565 | struct vdpa_device *vdpa = v->vdpa; |
566 | const struct vdpa_config_ops *ops = vdpa->config; |
567 | u32 size; |
568 | |
569 | size = ops->get_config_size(vdpa); |
570 | |
571 | if (copy_to_user(to: argp, from: &size, n: sizeof(size))) |
572 | return -EFAULT; |
573 | |
574 | return 0; |
575 | } |
576 | |
577 | static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) |
578 | { |
579 | struct vdpa_device *vdpa = v->vdpa; |
580 | |
581 | if (copy_to_user(to: argp, from: &vdpa->nvqs, n: sizeof(vdpa->nvqs))) |
582 | return -EFAULT; |
583 | |
584 | return 0; |
585 | } |
586 | |
587 | /* After a successful return of ioctl the device must not process more |
588 | * virtqueue descriptors. The device can answer to read or writes of config |
589 | * fields as if it were not suspended. In particular, writing to "queue_enable" |
590 | * with a value of 1 will not make the device start processing buffers. |
591 | */ |
592 | static long vhost_vdpa_suspend(struct vhost_vdpa *v) |
593 | { |
594 | struct vdpa_device *vdpa = v->vdpa; |
595 | const struct vdpa_config_ops *ops = vdpa->config; |
596 | int ret; |
597 | |
598 | if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK)) |
599 | return 0; |
600 | |
601 | if (!ops->suspend) |
602 | return -EOPNOTSUPP; |
603 | |
604 | ret = ops->suspend(vdpa); |
605 | if (!ret) |
606 | v->suspended = true; |
607 | |
608 | return ret; |
609 | } |
610 | |
611 | /* After a successful return of this ioctl the device resumes processing |
612 | * virtqueue descriptors. The device becomes fully operational the same way it |
613 | * was before it was suspended. |
614 | */ |
615 | static long vhost_vdpa_resume(struct vhost_vdpa *v) |
616 | { |
617 | struct vdpa_device *vdpa = v->vdpa; |
618 | const struct vdpa_config_ops *ops = vdpa->config; |
619 | int ret; |
620 | |
621 | if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK)) |
622 | return 0; |
623 | |
624 | if (!ops->resume) |
625 | return -EOPNOTSUPP; |
626 | |
627 | ret = ops->resume(vdpa); |
628 | if (!ret) |
629 | v->suspended = false; |
630 | |
631 | return ret; |
632 | } |
633 | |
634 | static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, |
635 | void __user *argp) |
636 | { |
637 | struct vdpa_device *vdpa = v->vdpa; |
638 | const struct vdpa_config_ops *ops = vdpa->config; |
639 | struct vdpa_vq_state vq_state; |
640 | struct vdpa_callback cb; |
641 | struct vhost_virtqueue *vq; |
642 | struct vhost_vring_state s; |
643 | u32 idx; |
644 | long r; |
645 | |
646 | r = get_user(idx, (u32 __user *)argp); |
647 | if (r < 0) |
648 | return r; |
649 | |
650 | if (idx >= v->nvqs) |
651 | return -ENOBUFS; |
652 | |
653 | idx = array_index_nospec(idx, v->nvqs); |
654 | vq = &v->vqs[idx]; |
655 | |
656 | switch (cmd) { |
657 | case VHOST_VDPA_SET_VRING_ENABLE: |
658 | if (copy_from_user(to: &s, from: argp, n: sizeof(s))) |
659 | return -EFAULT; |
660 | ops->set_vq_ready(vdpa, idx, s.num); |
661 | return 0; |
662 | case VHOST_VDPA_GET_VRING_GROUP: |
663 | if (!ops->get_vq_group) |
664 | return -EOPNOTSUPP; |
665 | s.index = idx; |
666 | s.num = ops->get_vq_group(vdpa, idx); |
667 | if (s.num >= vdpa->ngroups) |
668 | return -EIO; |
669 | else if (copy_to_user(to: argp, from: &s, n: sizeof(s))) |
670 | return -EFAULT; |
671 | return 0; |
672 | case VHOST_VDPA_GET_VRING_DESC_GROUP: |
673 | if (!vhost_vdpa_has_desc_group(v)) |
674 | return -EOPNOTSUPP; |
675 | s.index = idx; |
676 | s.num = ops->get_vq_desc_group(vdpa, idx); |
677 | if (s.num >= vdpa->ngroups) |
678 | return -EIO; |
679 | else if (copy_to_user(to: argp, from: &s, n: sizeof(s))) |
680 | return -EFAULT; |
681 | return 0; |
682 | case VHOST_VDPA_SET_GROUP_ASID: |
683 | if (copy_from_user(to: &s, from: argp, n: sizeof(s))) |
684 | return -EFAULT; |
685 | if (s.num >= vdpa->nas) |
686 | return -EINVAL; |
687 | if (!ops->set_group_asid) |
688 | return -EOPNOTSUPP; |
689 | return ops->set_group_asid(vdpa, idx, s.num); |
690 | case VHOST_VDPA_GET_VRING_SIZE: |
691 | if (!ops->get_vq_size) |
692 | return -EOPNOTSUPP; |
693 | s.index = idx; |
694 | s.num = ops->get_vq_size(vdpa, idx); |
695 | if (copy_to_user(to: argp, from: &s, n: sizeof(s))) |
696 | return -EFAULT; |
697 | return 0; |
698 | case VHOST_GET_VRING_BASE: |
699 | r = ops->get_vq_state(v->vdpa, idx, &vq_state); |
700 | if (r) |
701 | return r; |
702 | |
703 | if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { |
704 | vq->last_avail_idx = vq_state.packed.last_avail_idx | |
705 | (vq_state.packed.last_avail_counter << 15); |
706 | vq->last_used_idx = vq_state.packed.last_used_idx | |
707 | (vq_state.packed.last_used_counter << 15); |
708 | } else { |
709 | vq->last_avail_idx = vq_state.split.avail_index; |
710 | } |
711 | break; |
712 | } |
713 | |
714 | r = vhost_vring_ioctl(d: &v->vdev, ioctl: cmd, argp); |
715 | if (r) |
716 | return r; |
717 | |
718 | switch (cmd) { |
719 | case VHOST_SET_VRING_ADDR: |
720 | if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) |
721 | return -EINVAL; |
722 | |
723 | if (ops->set_vq_address(vdpa, idx, |
724 | (u64)(uintptr_t)vq->desc, |
725 | (u64)(uintptr_t)vq->avail, |
726 | (u64)(uintptr_t)vq->used)) |
727 | r = -EINVAL; |
728 | break; |
729 | |
730 | case VHOST_SET_VRING_BASE: |
731 | if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) |
732 | return -EINVAL; |
733 | |
734 | if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { |
735 | vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff; |
736 | vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000); |
737 | vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff; |
738 | vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000); |
739 | } else { |
740 | vq_state.split.avail_index = vq->last_avail_idx; |
741 | } |
742 | r = ops->set_vq_state(vdpa, idx, &vq_state); |
743 | break; |
744 | |
745 | case VHOST_SET_VRING_CALL: |
746 | if (vq->call_ctx.ctx) { |
747 | cb.callback = vhost_vdpa_virtqueue_cb; |
748 | cb.private = vq; |
749 | cb.trigger = vq->call_ctx.ctx; |
750 | } else { |
751 | cb.callback = NULL; |
752 | cb.private = NULL; |
753 | cb.trigger = NULL; |
754 | } |
755 | ops->set_vq_cb(vdpa, idx, &cb); |
756 | vhost_vdpa_setup_vq_irq(v, qid: idx); |
757 | break; |
758 | |
759 | case VHOST_SET_VRING_NUM: |
760 | ops->set_vq_num(vdpa, idx, vq->num); |
761 | break; |
762 | } |
763 | |
764 | return r; |
765 | } |
766 | |
767 | static long vhost_vdpa_unlocked_ioctl(struct file *filep, |
768 | unsigned int cmd, unsigned long arg) |
769 | { |
770 | struct vhost_vdpa *v = filep->private_data; |
771 | struct vhost_dev *d = &v->vdev; |
772 | void __user *argp = (void __user *)arg; |
773 | u64 __user *featurep = argp; |
774 | u64 features; |
775 | long r = 0; |
776 | |
777 | if (cmd == VHOST_SET_BACKEND_FEATURES) { |
778 | if (copy_from_user(to: &features, from: featurep, n: sizeof(features))) |
779 | return -EFAULT; |
780 | if (features & ~(VHOST_VDPA_BACKEND_FEATURES | |
781 | BIT_ULL(VHOST_BACKEND_F_DESC_ASID) | |
782 | BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST) | |
783 | BIT_ULL(VHOST_BACKEND_F_SUSPEND) | |
784 | BIT_ULL(VHOST_BACKEND_F_RESUME) | |
785 | BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK))) |
786 | return -EOPNOTSUPP; |
787 | if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && |
788 | !vhost_vdpa_can_suspend(v)) |
789 | return -EOPNOTSUPP; |
790 | if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) && |
791 | !vhost_vdpa_can_resume(v)) |
792 | return -EOPNOTSUPP; |
793 | if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && |
794 | !(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) |
795 | return -EINVAL; |
796 | if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && |
797 | !vhost_vdpa_has_desc_group(v)) |
798 | return -EOPNOTSUPP; |
799 | if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) && |
800 | !vhost_vdpa_has_persistent_map(v)) |
801 | return -EOPNOTSUPP; |
802 | vhost_set_backend_features(dev: &v->vdev, features); |
803 | return 0; |
804 | } |
805 | |
806 | mutex_lock(&d->mutex); |
807 | |
808 | switch (cmd) { |
809 | case VHOST_VDPA_GET_DEVICE_ID: |
810 | r = vhost_vdpa_get_device_id(v, argp); |
811 | break; |
812 | case VHOST_VDPA_GET_STATUS: |
813 | r = vhost_vdpa_get_status(v, statusp: argp); |
814 | break; |
815 | case VHOST_VDPA_SET_STATUS: |
816 | r = vhost_vdpa_set_status(v, statusp: argp); |
817 | break; |
818 | case VHOST_VDPA_GET_CONFIG: |
819 | r = vhost_vdpa_get_config(v, c: argp); |
820 | break; |
821 | case VHOST_VDPA_SET_CONFIG: |
822 | r = vhost_vdpa_set_config(v, c: argp); |
823 | break; |
824 | case VHOST_GET_FEATURES: |
825 | r = vhost_vdpa_get_features(v, featurep: argp); |
826 | break; |
827 | case VHOST_SET_FEATURES: |
828 | r = vhost_vdpa_set_features(v, featurep: argp); |
829 | break; |
830 | case VHOST_VDPA_GET_VRING_NUM: |
831 | r = vhost_vdpa_get_vring_num(v, argp); |
832 | break; |
833 | case VHOST_VDPA_GET_GROUP_NUM: |
834 | if (copy_to_user(to: argp, from: &v->vdpa->ngroups, |
835 | n: sizeof(v->vdpa->ngroups))) |
836 | r = -EFAULT; |
837 | break; |
838 | case VHOST_VDPA_GET_AS_NUM: |
839 | if (copy_to_user(to: argp, from: &v->vdpa->nas, n: sizeof(v->vdpa->nas))) |
840 | r = -EFAULT; |
841 | break; |
842 | case VHOST_SET_LOG_BASE: |
843 | case VHOST_SET_LOG_FD: |
844 | r = -ENOIOCTLCMD; |
845 | break; |
846 | case VHOST_VDPA_SET_CONFIG_CALL: |
847 | r = vhost_vdpa_set_config_call(v, argp); |
848 | break; |
849 | case VHOST_GET_BACKEND_FEATURES: |
850 | features = VHOST_VDPA_BACKEND_FEATURES; |
851 | if (vhost_vdpa_can_suspend(v)) |
852 | features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); |
853 | if (vhost_vdpa_can_resume(v)) |
854 | features |= BIT_ULL(VHOST_BACKEND_F_RESUME); |
855 | if (vhost_vdpa_has_desc_group(v)) |
856 | features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID); |
857 | if (vhost_vdpa_has_persistent_map(v)) |
858 | features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); |
859 | features |= vhost_vdpa_get_backend_features(v); |
860 | if (copy_to_user(to: featurep, from: &features, n: sizeof(features))) |
861 | r = -EFAULT; |
862 | break; |
863 | case VHOST_VDPA_GET_IOVA_RANGE: |
864 | r = vhost_vdpa_get_iova_range(v, argp); |
865 | break; |
866 | case VHOST_VDPA_GET_CONFIG_SIZE: |
867 | r = vhost_vdpa_get_config_size(v, argp); |
868 | break; |
869 | case VHOST_VDPA_GET_VQS_COUNT: |
870 | r = vhost_vdpa_get_vqs_count(v, argp); |
871 | break; |
872 | case VHOST_VDPA_SUSPEND: |
873 | r = vhost_vdpa_suspend(v); |
874 | break; |
875 | case VHOST_VDPA_RESUME: |
876 | r = vhost_vdpa_resume(v); |
877 | break; |
878 | default: |
879 | r = vhost_dev_ioctl(&v->vdev, ioctl: cmd, argp); |
880 | if (r == -ENOIOCTLCMD) |
881 | r = vhost_vdpa_vring_ioctl(v, cmd, argp); |
882 | break; |
883 | } |
884 | |
885 | if (r) |
886 | goto out; |
887 | |
888 | switch (cmd) { |
889 | case VHOST_SET_OWNER: |
890 | r = vhost_vdpa_bind_mm(v); |
891 | if (r) |
892 | vhost_dev_reset_owner(dev: d, NULL); |
893 | break; |
894 | } |
895 | out: |
896 | mutex_unlock(lock: &d->mutex); |
897 | return r; |
898 | } |
899 | static void vhost_vdpa_general_unmap(struct vhost_vdpa *v, |
900 | struct vhost_iotlb_map *map, u32 asid) |
901 | { |
902 | struct vdpa_device *vdpa = v->vdpa; |
903 | const struct vdpa_config_ops *ops = vdpa->config; |
904 | if (ops->dma_map) { |
905 | ops->dma_unmap(vdpa, asid, map->start, map->size); |
906 | } else if (ops->set_map == NULL) { |
907 | iommu_unmap(domain: v->domain, iova: map->start, size: map->size); |
908 | } |
909 | } |
910 | |
911 | static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, |
912 | u64 start, u64 last, u32 asid) |
913 | { |
914 | struct vhost_dev *dev = &v->vdev; |
915 | struct vhost_iotlb_map *map; |
916 | struct page *page; |
917 | unsigned long pfn, pinned; |
918 | |
919 | while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { |
920 | pinned = PFN_DOWN(map->size); |
921 | for (pfn = PFN_DOWN(map->addr); |
922 | pinned > 0; pfn++, pinned--) { |
923 | page = pfn_to_page(pfn); |
924 | if (map->perm & VHOST_ACCESS_WO) |
925 | set_page_dirty_lock(page); |
926 | unpin_user_page(page); |
927 | } |
928 | atomic64_sub(PFN_DOWN(map->size), v: &dev->mm->pinned_vm); |
929 | vhost_vdpa_general_unmap(v, map, asid); |
930 | vhost_iotlb_map_free(iotlb, map); |
931 | } |
932 | } |
933 | |
934 | static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, |
935 | u64 start, u64 last, u32 asid) |
936 | { |
937 | struct vhost_iotlb_map *map; |
938 | struct vdpa_map_file *map_file; |
939 | |
940 | while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { |
941 | map_file = (struct vdpa_map_file *)map->opaque; |
942 | fput(map_file->file); |
943 | kfree(objp: map_file); |
944 | vhost_vdpa_general_unmap(v, map, asid); |
945 | vhost_iotlb_map_free(iotlb, map); |
946 | } |
947 | } |
948 | |
949 | static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, |
950 | struct vhost_iotlb *iotlb, u64 start, |
951 | u64 last, u32 asid) |
952 | { |
953 | struct vdpa_device *vdpa = v->vdpa; |
954 | |
955 | if (vdpa->use_va) |
956 | return vhost_vdpa_va_unmap(v, iotlb, start, last, asid); |
957 | |
958 | return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid); |
959 | } |
960 | |
961 | static int perm_to_iommu_flags(u32 perm) |
962 | { |
963 | int flags = 0; |
964 | |
965 | switch (perm) { |
966 | case VHOST_ACCESS_WO: |
967 | flags |= IOMMU_WRITE; |
968 | break; |
969 | case VHOST_ACCESS_RO: |
970 | flags |= IOMMU_READ; |
971 | break; |
972 | case VHOST_ACCESS_RW: |
973 | flags |= (IOMMU_WRITE | IOMMU_READ); |
974 | break; |
975 | default: |
976 | WARN(1, "invalidate vhost IOTLB permission\n" ); |
977 | break; |
978 | } |
979 | |
980 | return flags | IOMMU_CACHE; |
981 | } |
982 | |
983 | static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, |
984 | u64 iova, u64 size, u64 pa, u32 perm, void *opaque) |
985 | { |
986 | struct vhost_dev *dev = &v->vdev; |
987 | struct vdpa_device *vdpa = v->vdpa; |
988 | const struct vdpa_config_ops *ops = vdpa->config; |
989 | u32 asid = iotlb_to_asid(iotlb); |
990 | int r = 0; |
991 | |
992 | r = vhost_iotlb_add_range_ctx(iotlb, start: iova, last: iova + size - 1, |
993 | addr: pa, perm, opaque); |
994 | if (r) |
995 | return r; |
996 | |
997 | if (ops->dma_map) { |
998 | r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque); |
999 | } else if (ops->set_map) { |
1000 | if (!v->in_batch) |
1001 | r = ops->set_map(vdpa, asid, iotlb); |
1002 | } else { |
1003 | r = iommu_map(domain: v->domain, iova, paddr: pa, size, |
1004 | prot: perm_to_iommu_flags(perm), |
1005 | GFP_KERNEL_ACCOUNT); |
1006 | } |
1007 | if (r) { |
1008 | vhost_iotlb_del_range(iotlb, start: iova, last: iova + size - 1); |
1009 | return r; |
1010 | } |
1011 | |
1012 | if (!vdpa->use_va) |
1013 | atomic64_add(PFN_DOWN(size), v: &dev->mm->pinned_vm); |
1014 | |
1015 | return 0; |
1016 | } |
1017 | |
1018 | static void vhost_vdpa_unmap(struct vhost_vdpa *v, |
1019 | struct vhost_iotlb *iotlb, |
1020 | u64 iova, u64 size) |
1021 | { |
1022 | struct vdpa_device *vdpa = v->vdpa; |
1023 | const struct vdpa_config_ops *ops = vdpa->config; |
1024 | u32 asid = iotlb_to_asid(iotlb); |
1025 | |
1026 | vhost_vdpa_iotlb_unmap(v, iotlb, start: iova, last: iova + size - 1, asid); |
1027 | |
1028 | if (ops->set_map) { |
1029 | if (!v->in_batch) |
1030 | ops->set_map(vdpa, asid, iotlb); |
1031 | } |
1032 | |
1033 | } |
1034 | |
1035 | static int vhost_vdpa_va_map(struct vhost_vdpa *v, |
1036 | struct vhost_iotlb *iotlb, |
1037 | u64 iova, u64 size, u64 uaddr, u32 perm) |
1038 | { |
1039 | struct vhost_dev *dev = &v->vdev; |
1040 | u64 offset, map_size, map_iova = iova; |
1041 | struct vdpa_map_file *map_file; |
1042 | struct vm_area_struct *vma; |
1043 | int ret = 0; |
1044 | |
1045 | mmap_read_lock(mm: dev->mm); |
1046 | |
1047 | while (size) { |
1048 | vma = find_vma(mm: dev->mm, addr: uaddr); |
1049 | if (!vma) { |
1050 | ret = -EINVAL; |
1051 | break; |
1052 | } |
1053 | map_size = min(size, vma->vm_end - uaddr); |
1054 | if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) && |
1055 | !(vma->vm_flags & (VM_IO | VM_PFNMAP)))) |
1056 | goto next; |
1057 | |
1058 | map_file = kzalloc(size: sizeof(*map_file), GFP_KERNEL); |
1059 | if (!map_file) { |
1060 | ret = -ENOMEM; |
1061 | break; |
1062 | } |
1063 | offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; |
1064 | map_file->offset = offset; |
1065 | map_file->file = get_file(f: vma->vm_file); |
1066 | ret = vhost_vdpa_map(v, iotlb, iova: map_iova, size: map_size, pa: uaddr, |
1067 | perm, opaque: map_file); |
1068 | if (ret) { |
1069 | fput(map_file->file); |
1070 | kfree(objp: map_file); |
1071 | break; |
1072 | } |
1073 | next: |
1074 | size -= map_size; |
1075 | uaddr += map_size; |
1076 | map_iova += map_size; |
1077 | } |
1078 | if (ret) |
1079 | vhost_vdpa_unmap(v, iotlb, iova, size: map_iova - iova); |
1080 | |
1081 | mmap_read_unlock(mm: dev->mm); |
1082 | |
1083 | return ret; |
1084 | } |
1085 | |
1086 | static int vhost_vdpa_pa_map(struct vhost_vdpa *v, |
1087 | struct vhost_iotlb *iotlb, |
1088 | u64 iova, u64 size, u64 uaddr, u32 perm) |
1089 | { |
1090 | struct vhost_dev *dev = &v->vdev; |
1091 | struct page **page_list; |
1092 | unsigned long list_size = PAGE_SIZE / sizeof(struct page *); |
1093 | unsigned int gup_flags = FOLL_LONGTERM; |
1094 | unsigned long npages, cur_base, map_pfn, last_pfn = 0; |
1095 | unsigned long lock_limit, sz2pin, nchunks, i; |
1096 | u64 start = iova; |
1097 | long pinned; |
1098 | int ret = 0; |
1099 | |
1100 | /* Limit the use of memory for bookkeeping */ |
1101 | page_list = (struct page **) __get_free_page(GFP_KERNEL); |
1102 | if (!page_list) |
1103 | return -ENOMEM; |
1104 | |
1105 | if (perm & VHOST_ACCESS_WO) |
1106 | gup_flags |= FOLL_WRITE; |
1107 | |
1108 | npages = PFN_UP(size + (iova & ~PAGE_MASK)); |
1109 | if (!npages) { |
1110 | ret = -EINVAL; |
1111 | goto free; |
1112 | } |
1113 | |
1114 | mmap_read_lock(mm: dev->mm); |
1115 | |
1116 | lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); |
1117 | if (npages + atomic64_read(v: &dev->mm->pinned_vm) > lock_limit) { |
1118 | ret = -ENOMEM; |
1119 | goto unlock; |
1120 | } |
1121 | |
1122 | cur_base = uaddr & PAGE_MASK; |
1123 | iova &= PAGE_MASK; |
1124 | nchunks = 0; |
1125 | |
1126 | while (npages) { |
1127 | sz2pin = min_t(unsigned long, npages, list_size); |
1128 | pinned = pin_user_pages(start: cur_base, nr_pages: sz2pin, |
1129 | gup_flags, pages: page_list); |
1130 | if (sz2pin != pinned) { |
1131 | if (pinned < 0) { |
1132 | ret = pinned; |
1133 | } else { |
1134 | unpin_user_pages(pages: page_list, npages: pinned); |
1135 | ret = -ENOMEM; |
1136 | } |
1137 | goto out; |
1138 | } |
1139 | nchunks++; |
1140 | |
1141 | if (!last_pfn) |
1142 | map_pfn = page_to_pfn(page_list[0]); |
1143 | |
1144 | for (i = 0; i < pinned; i++) { |
1145 | unsigned long this_pfn = page_to_pfn(page_list[i]); |
1146 | u64 csize; |
1147 | |
1148 | if (last_pfn && (this_pfn != last_pfn + 1)) { |
1149 | /* Pin a contiguous chunk of memory */ |
1150 | csize = PFN_PHYS(last_pfn - map_pfn + 1); |
1151 | ret = vhost_vdpa_map(v, iotlb, iova, size: csize, |
1152 | PFN_PHYS(map_pfn), |
1153 | perm, NULL); |
1154 | if (ret) { |
1155 | /* |
1156 | * Unpin the pages that are left unmapped |
1157 | * from this point on in the current |
1158 | * page_list. The remaining outstanding |
1159 | * ones which may stride across several |
1160 | * chunks will be covered in the common |
1161 | * error path subsequently. |
1162 | */ |
1163 | unpin_user_pages(pages: &page_list[i], |
1164 | npages: pinned - i); |
1165 | goto out; |
1166 | } |
1167 | |
1168 | map_pfn = this_pfn; |
1169 | iova += csize; |
1170 | nchunks = 0; |
1171 | } |
1172 | |
1173 | last_pfn = this_pfn; |
1174 | } |
1175 | |
1176 | cur_base += PFN_PHYS(pinned); |
1177 | npages -= pinned; |
1178 | } |
1179 | |
1180 | /* Pin the rest chunk */ |
1181 | ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1), |
1182 | PFN_PHYS(map_pfn), perm, NULL); |
1183 | out: |
1184 | if (ret) { |
1185 | if (nchunks) { |
1186 | unsigned long pfn; |
1187 | |
1188 | /* |
1189 | * Unpin the outstanding pages which are yet to be |
1190 | * mapped but haven't due to vdpa_map() or |
1191 | * pin_user_pages() failure. |
1192 | * |
1193 | * Mapped pages are accounted in vdpa_map(), hence |
1194 | * the corresponding unpinning will be handled by |
1195 | * vdpa_unmap(). |
1196 | */ |
1197 | WARN_ON(!last_pfn); |
1198 | for (pfn = map_pfn; pfn <= last_pfn; pfn++) |
1199 | unpin_user_page(pfn_to_page(pfn)); |
1200 | } |
1201 | vhost_vdpa_unmap(v, iotlb, iova: start, size); |
1202 | } |
1203 | unlock: |
1204 | mmap_read_unlock(mm: dev->mm); |
1205 | free: |
1206 | free_page((unsigned long)page_list); |
1207 | return ret; |
1208 | |
1209 | } |
1210 | |
1211 | static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, |
1212 | struct vhost_iotlb *iotlb, |
1213 | struct vhost_iotlb_msg *msg) |
1214 | { |
1215 | struct vdpa_device *vdpa = v->vdpa; |
1216 | |
1217 | if (msg->iova < v->range.first || !msg->size || |
1218 | msg->iova > U64_MAX - msg->size + 1 || |
1219 | msg->iova + msg->size - 1 > v->range.last) |
1220 | return -EINVAL; |
1221 | |
1222 | if (vhost_iotlb_itree_first(iotlb, start: msg->iova, |
1223 | last: msg->iova + msg->size - 1)) |
1224 | return -EEXIST; |
1225 | |
1226 | if (vdpa->use_va) |
1227 | return vhost_vdpa_va_map(v, iotlb, iova: msg->iova, size: msg->size, |
1228 | uaddr: msg->uaddr, perm: msg->perm); |
1229 | |
1230 | return vhost_vdpa_pa_map(v, iotlb, iova: msg->iova, size: msg->size, uaddr: msg->uaddr, |
1231 | perm: msg->perm); |
1232 | } |
1233 | |
1234 | static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid, |
1235 | struct vhost_iotlb_msg *msg) |
1236 | { |
1237 | struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); |
1238 | struct vdpa_device *vdpa = v->vdpa; |
1239 | const struct vdpa_config_ops *ops = vdpa->config; |
1240 | struct vhost_iotlb *iotlb = NULL; |
1241 | struct vhost_vdpa_as *as = NULL; |
1242 | int r = 0; |
1243 | |
1244 | mutex_lock(&dev->mutex); |
1245 | |
1246 | r = vhost_dev_check_owner(dev); |
1247 | if (r) |
1248 | goto unlock; |
1249 | |
1250 | if (msg->type == VHOST_IOTLB_UPDATE || |
1251 | msg->type == VHOST_IOTLB_BATCH_BEGIN) { |
1252 | as = vhost_vdpa_find_alloc_as(v, asid); |
1253 | if (!as) { |
1254 | dev_err(&v->dev, "can't find and alloc asid %d\n" , |
1255 | asid); |
1256 | r = -EINVAL; |
1257 | goto unlock; |
1258 | } |
1259 | iotlb = &as->iotlb; |
1260 | } else |
1261 | iotlb = asid_to_iotlb(v, asid); |
1262 | |
1263 | if ((v->in_batch && v->batch_asid != asid) || !iotlb) { |
1264 | if (v->in_batch && v->batch_asid != asid) { |
1265 | dev_info(&v->dev, "batch id %d asid %d\n" , |
1266 | v->batch_asid, asid); |
1267 | } |
1268 | if (!iotlb) |
1269 | dev_err(&v->dev, "no iotlb for asid %d\n" , asid); |
1270 | r = -EINVAL; |
1271 | goto unlock; |
1272 | } |
1273 | |
1274 | switch (msg->type) { |
1275 | case VHOST_IOTLB_UPDATE: |
1276 | r = vhost_vdpa_process_iotlb_update(v, iotlb, msg); |
1277 | break; |
1278 | case VHOST_IOTLB_INVALIDATE: |
1279 | vhost_vdpa_unmap(v, iotlb, iova: msg->iova, size: msg->size); |
1280 | break; |
1281 | case VHOST_IOTLB_BATCH_BEGIN: |
1282 | v->batch_asid = asid; |
1283 | v->in_batch = true; |
1284 | break; |
1285 | case VHOST_IOTLB_BATCH_END: |
1286 | if (v->in_batch && ops->set_map) |
1287 | ops->set_map(vdpa, asid, iotlb); |
1288 | v->in_batch = false; |
1289 | break; |
1290 | default: |
1291 | r = -EINVAL; |
1292 | break; |
1293 | } |
1294 | unlock: |
1295 | mutex_unlock(lock: &dev->mutex); |
1296 | |
1297 | return r; |
1298 | } |
1299 | |
1300 | static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, |
1301 | struct iov_iter *from) |
1302 | { |
1303 | struct file *file = iocb->ki_filp; |
1304 | struct vhost_vdpa *v = file->private_data; |
1305 | struct vhost_dev *dev = &v->vdev; |
1306 | |
1307 | return vhost_chr_write_iter(dev, from); |
1308 | } |
1309 | |
1310 | static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) |
1311 | { |
1312 | struct vdpa_device *vdpa = v->vdpa; |
1313 | const struct vdpa_config_ops *ops = vdpa->config; |
1314 | struct device *dma_dev = vdpa_get_dma_dev(vdev: vdpa); |
1315 | const struct bus_type *bus; |
1316 | int ret; |
1317 | |
1318 | /* Device want to do DMA by itself */ |
1319 | if (ops->set_map || ops->dma_map) |
1320 | return 0; |
1321 | |
1322 | bus = dma_dev->bus; |
1323 | if (!bus) |
1324 | return -EFAULT; |
1325 | |
1326 | if (!device_iommu_capable(dev: dma_dev, cap: IOMMU_CAP_CACHE_COHERENCY)) { |
1327 | dev_warn_once(&v->dev, |
1328 | "Failed to allocate domain, device is not IOMMU cache coherent capable\n" ); |
1329 | return -ENOTSUPP; |
1330 | } |
1331 | |
1332 | v->domain = iommu_domain_alloc(bus); |
1333 | if (!v->domain) |
1334 | return -EIO; |
1335 | |
1336 | ret = iommu_attach_device(domain: v->domain, dev: dma_dev); |
1337 | if (ret) |
1338 | goto err_attach; |
1339 | |
1340 | return 0; |
1341 | |
1342 | err_attach: |
1343 | iommu_domain_free(domain: v->domain); |
1344 | v->domain = NULL; |
1345 | return ret; |
1346 | } |
1347 | |
1348 | static void vhost_vdpa_free_domain(struct vhost_vdpa *v) |
1349 | { |
1350 | struct vdpa_device *vdpa = v->vdpa; |
1351 | struct device *dma_dev = vdpa_get_dma_dev(vdev: vdpa); |
1352 | |
1353 | if (v->domain) { |
1354 | iommu_detach_device(domain: v->domain, dev: dma_dev); |
1355 | iommu_domain_free(domain: v->domain); |
1356 | } |
1357 | |
1358 | v->domain = NULL; |
1359 | } |
1360 | |
1361 | static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v) |
1362 | { |
1363 | struct vdpa_iova_range *range = &v->range; |
1364 | struct vdpa_device *vdpa = v->vdpa; |
1365 | const struct vdpa_config_ops *ops = vdpa->config; |
1366 | |
1367 | if (ops->get_iova_range) { |
1368 | *range = ops->get_iova_range(vdpa); |
1369 | } else if (v->domain && v->domain->geometry.force_aperture) { |
1370 | range->first = v->domain->geometry.aperture_start; |
1371 | range->last = v->domain->geometry.aperture_end; |
1372 | } else { |
1373 | range->first = 0; |
1374 | range->last = ULLONG_MAX; |
1375 | } |
1376 | } |
1377 | |
1378 | static void vhost_vdpa_cleanup(struct vhost_vdpa *v) |
1379 | { |
1380 | struct vhost_vdpa_as *as; |
1381 | u32 asid; |
1382 | |
1383 | for (asid = 0; asid < v->vdpa->nas; asid++) { |
1384 | as = asid_to_as(v, asid); |
1385 | if (as) |
1386 | vhost_vdpa_remove_as(v, asid); |
1387 | } |
1388 | |
1389 | vhost_vdpa_free_domain(v); |
1390 | vhost_dev_cleanup(&v->vdev); |
1391 | kfree(objp: v->vdev.vqs); |
1392 | v->vdev.vqs = NULL; |
1393 | } |
1394 | |
1395 | static int vhost_vdpa_open(struct inode *inode, struct file *filep) |
1396 | { |
1397 | struct vhost_vdpa *v; |
1398 | struct vhost_dev *dev; |
1399 | struct vhost_virtqueue **vqs; |
1400 | int r, opened; |
1401 | u32 i, nvqs; |
1402 | |
1403 | v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); |
1404 | |
1405 | opened = atomic_cmpxchg(v: &v->opened, old: 0, new: 1); |
1406 | if (opened) |
1407 | return -EBUSY; |
1408 | |
1409 | nvqs = v->nvqs; |
1410 | r = vhost_vdpa_reset(v); |
1411 | if (r) |
1412 | goto err; |
1413 | |
1414 | vqs = kmalloc_array(n: nvqs, size: sizeof(*vqs), GFP_KERNEL); |
1415 | if (!vqs) { |
1416 | r = -ENOMEM; |
1417 | goto err; |
1418 | } |
1419 | |
1420 | dev = &v->vdev; |
1421 | for (i = 0; i < nvqs; i++) { |
1422 | vqs[i] = &v->vqs[i]; |
1423 | vqs[i]->handle_kick = handle_vq_kick; |
1424 | } |
1425 | vhost_dev_init(dev, vqs, nvqs, iov_limit: 0, weight: 0, byte_weight: 0, use_worker: false, |
1426 | msg_handler: vhost_vdpa_process_iotlb_msg); |
1427 | |
1428 | r = vhost_vdpa_alloc_domain(v); |
1429 | if (r) |
1430 | goto err_alloc_domain; |
1431 | |
1432 | vhost_vdpa_set_iova_range(v); |
1433 | |
1434 | filep->private_data = v; |
1435 | |
1436 | return 0; |
1437 | |
1438 | err_alloc_domain: |
1439 | vhost_vdpa_cleanup(v); |
1440 | err: |
1441 | atomic_dec(v: &v->opened); |
1442 | return r; |
1443 | } |
1444 | |
1445 | static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) |
1446 | { |
1447 | u32 i; |
1448 | |
1449 | for (i = 0; i < v->nvqs; i++) |
1450 | vhost_vdpa_unsetup_vq_irq(v, qid: i); |
1451 | } |
1452 | |
1453 | static int vhost_vdpa_release(struct inode *inode, struct file *filep) |
1454 | { |
1455 | struct vhost_vdpa *v = filep->private_data; |
1456 | struct vhost_dev *d = &v->vdev; |
1457 | |
1458 | mutex_lock(&d->mutex); |
1459 | filep->private_data = NULL; |
1460 | vhost_vdpa_clean_irq(v); |
1461 | vhost_vdpa_reset(v); |
1462 | vhost_dev_stop(&v->vdev); |
1463 | vhost_vdpa_unbind_mm(v); |
1464 | vhost_vdpa_config_put(v); |
1465 | vhost_vdpa_cleanup(v); |
1466 | mutex_unlock(lock: &d->mutex); |
1467 | |
1468 | atomic_dec(v: &v->opened); |
1469 | complete(&v->completion); |
1470 | |
1471 | return 0; |
1472 | } |
1473 | |
1474 | #ifdef CONFIG_MMU |
1475 | static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) |
1476 | { |
1477 | struct vhost_vdpa *v = vmf->vma->vm_file->private_data; |
1478 | struct vdpa_device *vdpa = v->vdpa; |
1479 | const struct vdpa_config_ops *ops = vdpa->config; |
1480 | struct vdpa_notification_area notify; |
1481 | struct vm_area_struct *vma = vmf->vma; |
1482 | u16 index = vma->vm_pgoff; |
1483 | |
1484 | notify = ops->get_vq_notification(vdpa, index); |
1485 | |
1486 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); |
1487 | if (remap_pfn_range(vma, addr: vmf->address & PAGE_MASK, |
1488 | PFN_DOWN(notify.addr), PAGE_SIZE, |
1489 | vma->vm_page_prot)) |
1490 | return VM_FAULT_SIGBUS; |
1491 | |
1492 | return VM_FAULT_NOPAGE; |
1493 | } |
1494 | |
1495 | static const struct vm_operations_struct vhost_vdpa_vm_ops = { |
1496 | .fault = vhost_vdpa_fault, |
1497 | }; |
1498 | |
1499 | static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) |
1500 | { |
1501 | struct vhost_vdpa *v = vma->vm_file->private_data; |
1502 | struct vdpa_device *vdpa = v->vdpa; |
1503 | const struct vdpa_config_ops *ops = vdpa->config; |
1504 | struct vdpa_notification_area notify; |
1505 | unsigned long index = vma->vm_pgoff; |
1506 | |
1507 | if (vma->vm_end - vma->vm_start != PAGE_SIZE) |
1508 | return -EINVAL; |
1509 | if ((vma->vm_flags & VM_SHARED) == 0) |
1510 | return -EINVAL; |
1511 | if (vma->vm_flags & VM_READ) |
1512 | return -EINVAL; |
1513 | if (index > 65535) |
1514 | return -EINVAL; |
1515 | if (!ops->get_vq_notification) |
1516 | return -ENOTSUPP; |
1517 | |
1518 | /* To be safe and easily modelled by userspace, We only |
1519 | * support the doorbell which sits on the page boundary and |
1520 | * does not share the page with other registers. |
1521 | */ |
1522 | notify = ops->get_vq_notification(vdpa, index); |
1523 | if (notify.addr & (PAGE_SIZE - 1)) |
1524 | return -EINVAL; |
1525 | if (vma->vm_end - vma->vm_start != notify.size) |
1526 | return -ENOTSUPP; |
1527 | |
1528 | vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); |
1529 | vma->vm_ops = &vhost_vdpa_vm_ops; |
1530 | return 0; |
1531 | } |
1532 | #endif /* CONFIG_MMU */ |
1533 | |
1534 | static const struct file_operations vhost_vdpa_fops = { |
1535 | .owner = THIS_MODULE, |
1536 | .open = vhost_vdpa_open, |
1537 | .release = vhost_vdpa_release, |
1538 | .write_iter = vhost_vdpa_chr_write_iter, |
1539 | .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, |
1540 | #ifdef CONFIG_MMU |
1541 | .mmap = vhost_vdpa_mmap, |
1542 | #endif /* CONFIG_MMU */ |
1543 | .compat_ioctl = compat_ptr_ioctl, |
1544 | }; |
1545 | |
1546 | static void vhost_vdpa_release_dev(struct device *device) |
1547 | { |
1548 | struct vhost_vdpa *v = |
1549 | container_of(device, struct vhost_vdpa, dev); |
1550 | |
1551 | ida_simple_remove(&vhost_vdpa_ida, v->minor); |
1552 | kfree(objp: v->vqs); |
1553 | kfree(objp: v); |
1554 | } |
1555 | |
1556 | static int vhost_vdpa_probe(struct vdpa_device *vdpa) |
1557 | { |
1558 | const struct vdpa_config_ops *ops = vdpa->config; |
1559 | struct vhost_vdpa *v; |
1560 | int minor; |
1561 | int i, r; |
1562 | |
1563 | /* We can't support platform IOMMU device with more than 1 |
1564 | * group or as |
1565 | */ |
1566 | if (!ops->set_map && !ops->dma_map && |
1567 | (vdpa->ngroups > 1 || vdpa->nas > 1)) |
1568 | return -EOPNOTSUPP; |
1569 | |
1570 | v = kzalloc(size: sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); |
1571 | if (!v) |
1572 | return -ENOMEM; |
1573 | |
1574 | minor = ida_simple_get(&vhost_vdpa_ida, 0, |
1575 | VHOST_VDPA_DEV_MAX, GFP_KERNEL); |
1576 | if (minor < 0) { |
1577 | kfree(objp: v); |
1578 | return minor; |
1579 | } |
1580 | |
1581 | atomic_set(v: &v->opened, i: 0); |
1582 | v->minor = minor; |
1583 | v->vdpa = vdpa; |
1584 | v->nvqs = vdpa->nvqs; |
1585 | v->virtio_id = ops->get_device_id(vdpa); |
1586 | |
1587 | device_initialize(dev: &v->dev); |
1588 | v->dev.release = vhost_vdpa_release_dev; |
1589 | v->dev.parent = &vdpa->dev; |
1590 | v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); |
1591 | v->vqs = kmalloc_array(n: v->nvqs, size: sizeof(struct vhost_virtqueue), |
1592 | GFP_KERNEL); |
1593 | if (!v->vqs) { |
1594 | r = -ENOMEM; |
1595 | goto err; |
1596 | } |
1597 | |
1598 | r = dev_set_name(dev: &v->dev, name: "vhost-vdpa-%u" , minor); |
1599 | if (r) |
1600 | goto err; |
1601 | |
1602 | cdev_init(&v->cdev, &vhost_vdpa_fops); |
1603 | v->cdev.owner = THIS_MODULE; |
1604 | |
1605 | r = cdev_device_add(cdev: &v->cdev, dev: &v->dev); |
1606 | if (r) |
1607 | goto err; |
1608 | |
1609 | init_completion(x: &v->completion); |
1610 | vdpa_set_drvdata(vdev: vdpa, data: v); |
1611 | |
1612 | for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++) |
1613 | INIT_HLIST_HEAD(&v->as[i]); |
1614 | |
1615 | return 0; |
1616 | |
1617 | err: |
1618 | put_device(dev: &v->dev); |
1619 | return r; |
1620 | } |
1621 | |
1622 | static void vhost_vdpa_remove(struct vdpa_device *vdpa) |
1623 | { |
1624 | struct vhost_vdpa *v = vdpa_get_drvdata(vdev: vdpa); |
1625 | int opened; |
1626 | |
1627 | cdev_device_del(cdev: &v->cdev, dev: &v->dev); |
1628 | |
1629 | do { |
1630 | opened = atomic_cmpxchg(v: &v->opened, old: 0, new: 1); |
1631 | if (!opened) |
1632 | break; |
1633 | wait_for_completion(&v->completion); |
1634 | } while (1); |
1635 | |
1636 | put_device(dev: &v->dev); |
1637 | } |
1638 | |
1639 | static struct vdpa_driver vhost_vdpa_driver = { |
1640 | .driver = { |
1641 | .name = "vhost_vdpa" , |
1642 | }, |
1643 | .probe = vhost_vdpa_probe, |
1644 | .remove = vhost_vdpa_remove, |
1645 | }; |
1646 | |
1647 | static int __init vhost_vdpa_init(void) |
1648 | { |
1649 | int r; |
1650 | |
1651 | r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX, |
1652 | "vhost-vdpa" ); |
1653 | if (r) |
1654 | goto err_alloc_chrdev; |
1655 | |
1656 | r = vdpa_register_driver(&vhost_vdpa_driver); |
1657 | if (r) |
1658 | goto err_vdpa_register_driver; |
1659 | |
1660 | return 0; |
1661 | |
1662 | err_vdpa_register_driver: |
1663 | unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); |
1664 | err_alloc_chrdev: |
1665 | return r; |
1666 | } |
1667 | module_init(vhost_vdpa_init); |
1668 | |
1669 | static void __exit vhost_vdpa_exit(void) |
1670 | { |
1671 | vdpa_unregister_driver(drv: &vhost_vdpa_driver); |
1672 | unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); |
1673 | } |
1674 | module_exit(vhost_vdpa_exit); |
1675 | |
1676 | MODULE_VERSION("0.0.1" ); |
1677 | MODULE_LICENSE("GPL v2" ); |
1678 | MODULE_AUTHOR("Intel Corporation" ); |
1679 | MODULE_DESCRIPTION("vDPA-based vhost backend for virtio" ); |
1680 | |