1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * virtio-fs: Virtio Filesystem |
4 | * Copyright (C) 2018 Red Hat, Inc. |
5 | */ |
6 | |
7 | #include <linux/fs.h> |
8 | #include <linux/dax.h> |
9 | #include <linux/pci.h> |
10 | #include <linux/pfn_t.h> |
11 | #include <linux/memremap.h> |
12 | #include <linux/module.h> |
13 | #include <linux/virtio.h> |
14 | #include <linux/virtio_fs.h> |
15 | #include <linux/delay.h> |
16 | #include <linux/fs_context.h> |
17 | #include <linux/fs_parser.h> |
18 | #include <linux/highmem.h> |
19 | #include <linux/cleanup.h> |
20 | #include <linux/uio.h> |
21 | #include "fuse_i.h" |
22 | |
23 | /* Used to help calculate the FUSE connection's max_pages limit for a request's |
24 | * size. Parts of the struct fuse_req are sliced into scattergather lists in |
25 | * addition to the pages used, so this can help account for that overhead. |
26 | */ |
27 | #define 4 |
28 | |
29 | /* List of virtio-fs device instances and a lock for the list. Also provides |
30 | * mutual exclusion in device removal and mounting path |
31 | */ |
32 | static DEFINE_MUTEX(virtio_fs_mutex); |
33 | static LIST_HEAD(virtio_fs_instances); |
34 | |
35 | /* The /sys/fs/virtio_fs/ kset */ |
36 | static struct kset *virtio_fs_kset; |
37 | |
38 | enum { |
39 | VQ_HIPRIO, |
40 | VQ_REQUEST |
41 | }; |
42 | |
43 | #define VQ_NAME_LEN 24 |
44 | |
45 | /* Per-virtqueue state */ |
46 | struct virtio_fs_vq { |
47 | spinlock_t lock; |
48 | struct virtqueue *vq; /* protected by ->lock */ |
49 | struct work_struct done_work; |
50 | struct list_head queued_reqs; |
51 | struct list_head end_reqs; /* End these requests */ |
52 | struct delayed_work dispatch_work; |
53 | struct fuse_dev *fud; |
54 | bool connected; |
55 | long in_flight; |
56 | struct completion in_flight_zero; /* No inflight requests */ |
57 | char name[VQ_NAME_LEN]; |
58 | } ____cacheline_aligned_in_smp; |
59 | |
60 | /* A virtio-fs device instance */ |
61 | struct virtio_fs { |
62 | struct kobject kobj; |
63 | struct list_head list; /* on virtio_fs_instances */ |
64 | char *tag; |
65 | struct virtio_fs_vq *vqs; |
66 | unsigned int nvqs; /* number of virtqueues */ |
67 | unsigned int num_request_queues; /* number of request queues */ |
68 | struct dax_device *dax_dev; |
69 | |
70 | /* DAX memory window where file contents are mapped */ |
71 | void *window_kaddr; |
72 | phys_addr_t window_phys_addr; |
73 | size_t window_len; |
74 | }; |
75 | |
76 | struct virtio_fs_forget_req { |
77 | struct fuse_in_header ih; |
78 | struct fuse_forget_in arg; |
79 | }; |
80 | |
81 | struct virtio_fs_forget { |
82 | /* This request can be temporarily queued on virt queue */ |
83 | struct list_head list; |
84 | struct virtio_fs_forget_req req; |
85 | }; |
86 | |
87 | struct virtio_fs_req_work { |
88 | struct fuse_req *req; |
89 | struct virtio_fs_vq *fsvq; |
90 | struct work_struct done_work; |
91 | }; |
92 | |
93 | static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, |
94 | struct fuse_req *req, bool in_flight); |
95 | |
96 | static const struct constant_table dax_param_enums[] = { |
97 | {"always" , FUSE_DAX_ALWAYS }, |
98 | {"never" , FUSE_DAX_NEVER }, |
99 | {"inode" , FUSE_DAX_INODE_USER }, |
100 | {} |
101 | }; |
102 | |
103 | enum { |
104 | OPT_DAX, |
105 | OPT_DAX_ENUM, |
106 | }; |
107 | |
108 | static const struct fs_parameter_spec virtio_fs_parameters[] = { |
109 | fsparam_flag("dax" , OPT_DAX), |
110 | fsparam_enum("dax" , OPT_DAX_ENUM, dax_param_enums), |
111 | {} |
112 | }; |
113 | |
114 | static int virtio_fs_parse_param(struct fs_context *fsc, |
115 | struct fs_parameter *param) |
116 | { |
117 | struct fs_parse_result result; |
118 | struct fuse_fs_context *ctx = fsc->fs_private; |
119 | int opt; |
120 | |
121 | opt = fs_parse(fc: fsc, desc: virtio_fs_parameters, param, result: &result); |
122 | if (opt < 0) |
123 | return opt; |
124 | |
125 | switch (opt) { |
126 | case OPT_DAX: |
127 | ctx->dax_mode = FUSE_DAX_ALWAYS; |
128 | break; |
129 | case OPT_DAX_ENUM: |
130 | ctx->dax_mode = result.uint_32; |
131 | break; |
132 | default: |
133 | return -EINVAL; |
134 | } |
135 | |
136 | return 0; |
137 | } |
138 | |
139 | static void virtio_fs_free_fsc(struct fs_context *fsc) |
140 | { |
141 | struct fuse_fs_context *ctx = fsc->fs_private; |
142 | |
143 | kfree(objp: ctx); |
144 | } |
145 | |
146 | static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) |
147 | { |
148 | struct virtio_fs *fs = vq->vdev->priv; |
149 | |
150 | return &fs->vqs[vq->index]; |
151 | } |
152 | |
153 | /* Should be called with fsvq->lock held. */ |
154 | static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) |
155 | { |
156 | fsvq->in_flight++; |
157 | } |
158 | |
159 | /* Should be called with fsvq->lock held. */ |
160 | static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) |
161 | { |
162 | WARN_ON(fsvq->in_flight <= 0); |
163 | fsvq->in_flight--; |
164 | if (!fsvq->in_flight) |
165 | complete(&fsvq->in_flight_zero); |
166 | } |
167 | |
168 | static ssize_t tag_show(struct kobject *kobj, |
169 | struct kobj_attribute *attr, char *buf) |
170 | { |
171 | struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj); |
172 | |
173 | return sysfs_emit(buf, fmt: fs->tag); |
174 | } |
175 | |
176 | static struct kobj_attribute virtio_fs_tag_attr = __ATTR_RO(tag); |
177 | |
178 | static struct attribute *virtio_fs_attrs[] = { |
179 | &virtio_fs_tag_attr.attr, |
180 | NULL |
181 | }; |
182 | ATTRIBUTE_GROUPS(virtio_fs); |
183 | |
184 | static void virtio_fs_ktype_release(struct kobject *kobj) |
185 | { |
186 | struct virtio_fs *vfs = container_of(kobj, struct virtio_fs, kobj); |
187 | |
188 | kfree(objp: vfs->vqs); |
189 | kfree(objp: vfs); |
190 | } |
191 | |
192 | static const struct kobj_type virtio_fs_ktype = { |
193 | .release = virtio_fs_ktype_release, |
194 | .sysfs_ops = &kobj_sysfs_ops, |
195 | .default_groups = virtio_fs_groups, |
196 | }; |
197 | |
198 | /* Make sure virtiofs_mutex is held */ |
199 | static void virtio_fs_put(struct virtio_fs *fs) |
200 | { |
201 | kobject_put(kobj: &fs->kobj); |
202 | } |
203 | |
204 | static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) |
205 | { |
206 | struct virtio_fs *vfs = fiq->priv; |
207 | |
208 | mutex_lock(&virtio_fs_mutex); |
209 | virtio_fs_put(fs: vfs); |
210 | mutex_unlock(lock: &virtio_fs_mutex); |
211 | } |
212 | |
213 | static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) |
214 | { |
215 | WARN_ON(fsvq->in_flight < 0); |
216 | |
217 | /* Wait for in flight requests to finish.*/ |
218 | spin_lock(lock: &fsvq->lock); |
219 | if (fsvq->in_flight) { |
220 | /* We are holding virtio_fs_mutex. There should not be any |
221 | * waiters waiting for completion. |
222 | */ |
223 | reinit_completion(x: &fsvq->in_flight_zero); |
224 | spin_unlock(lock: &fsvq->lock); |
225 | wait_for_completion(&fsvq->in_flight_zero); |
226 | } else { |
227 | spin_unlock(lock: &fsvq->lock); |
228 | } |
229 | |
230 | flush_work(work: &fsvq->done_work); |
231 | flush_delayed_work(dwork: &fsvq->dispatch_work); |
232 | } |
233 | |
234 | static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs) |
235 | { |
236 | struct virtio_fs_vq *fsvq; |
237 | int i; |
238 | |
239 | for (i = 0; i < fs->nvqs; i++) { |
240 | fsvq = &fs->vqs[i]; |
241 | virtio_fs_drain_queue(fsvq); |
242 | } |
243 | } |
244 | |
245 | static void virtio_fs_drain_all_queues(struct virtio_fs *fs) |
246 | { |
247 | /* Provides mutual exclusion between ->remove and ->kill_sb |
248 | * paths. We don't want both of these draining queue at the |
249 | * same time. Current completion logic reinits completion |
250 | * and that means there should not be any other thread |
251 | * doing reinit or waiting for completion already. |
252 | */ |
253 | mutex_lock(&virtio_fs_mutex); |
254 | virtio_fs_drain_all_queues_locked(fs); |
255 | mutex_unlock(lock: &virtio_fs_mutex); |
256 | } |
257 | |
258 | static void virtio_fs_start_all_queues(struct virtio_fs *fs) |
259 | { |
260 | struct virtio_fs_vq *fsvq; |
261 | int i; |
262 | |
263 | for (i = 0; i < fs->nvqs; i++) { |
264 | fsvq = &fs->vqs[i]; |
265 | spin_lock(lock: &fsvq->lock); |
266 | fsvq->connected = true; |
267 | spin_unlock(lock: &fsvq->lock); |
268 | } |
269 | } |
270 | |
271 | /* Add a new instance to the list or return -EEXIST if tag name exists*/ |
272 | static int virtio_fs_add_instance(struct virtio_device *vdev, |
273 | struct virtio_fs *fs) |
274 | { |
275 | struct virtio_fs *fs2; |
276 | int ret; |
277 | |
278 | mutex_lock(&virtio_fs_mutex); |
279 | |
280 | list_for_each_entry(fs2, &virtio_fs_instances, list) { |
281 | if (strcmp(fs->tag, fs2->tag) == 0) { |
282 | mutex_unlock(lock: &virtio_fs_mutex); |
283 | return -EEXIST; |
284 | } |
285 | } |
286 | |
287 | /* Use the virtio_device's index as a unique identifier, there is no |
288 | * need to allocate our own identifiers because the virtio_fs instance |
289 | * is only visible to userspace as long as the underlying virtio_device |
290 | * exists. |
291 | */ |
292 | fs->kobj.kset = virtio_fs_kset; |
293 | ret = kobject_add(kobj: &fs->kobj, NULL, fmt: "%d" , vdev->index); |
294 | if (ret < 0) { |
295 | mutex_unlock(lock: &virtio_fs_mutex); |
296 | return ret; |
297 | } |
298 | |
299 | ret = sysfs_create_link(kobj: &fs->kobj, target: &vdev->dev.kobj, name: "device" ); |
300 | if (ret < 0) { |
301 | kobject_del(kobj: &fs->kobj); |
302 | mutex_unlock(lock: &virtio_fs_mutex); |
303 | return ret; |
304 | } |
305 | |
306 | list_add_tail(new: &fs->list, head: &virtio_fs_instances); |
307 | |
308 | mutex_unlock(lock: &virtio_fs_mutex); |
309 | |
310 | kobject_uevent(kobj: &fs->kobj, action: KOBJ_ADD); |
311 | |
312 | return 0; |
313 | } |
314 | |
315 | /* Return the virtio_fs with a given tag, or NULL */ |
316 | static struct virtio_fs *virtio_fs_find_instance(const char *tag) |
317 | { |
318 | struct virtio_fs *fs; |
319 | |
320 | mutex_lock(&virtio_fs_mutex); |
321 | |
322 | list_for_each_entry(fs, &virtio_fs_instances, list) { |
323 | if (strcmp(fs->tag, tag) == 0) { |
324 | kobject_get(kobj: &fs->kobj); |
325 | goto found; |
326 | } |
327 | } |
328 | |
329 | fs = NULL; /* not found */ |
330 | |
331 | found: |
332 | mutex_unlock(lock: &virtio_fs_mutex); |
333 | |
334 | return fs; |
335 | } |
336 | |
337 | static void virtio_fs_free_devs(struct virtio_fs *fs) |
338 | { |
339 | unsigned int i; |
340 | |
341 | for (i = 0; i < fs->nvqs; i++) { |
342 | struct virtio_fs_vq *fsvq = &fs->vqs[i]; |
343 | |
344 | if (!fsvq->fud) |
345 | continue; |
346 | |
347 | fuse_dev_free(fud: fsvq->fud); |
348 | fsvq->fud = NULL; |
349 | } |
350 | } |
351 | |
352 | /* Read filesystem name from virtio config into fs->tag (must kfree()). */ |
353 | static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) |
354 | { |
355 | char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; |
356 | char *end; |
357 | size_t len; |
358 | |
359 | virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), |
360 | buf: &tag_buf, len: sizeof(tag_buf)); |
361 | end = memchr(p: tag_buf, c: '\0', size: sizeof(tag_buf)); |
362 | if (end == tag_buf) |
363 | return -EINVAL; /* empty tag */ |
364 | if (!end) |
365 | end = &tag_buf[sizeof(tag_buf)]; |
366 | |
367 | len = end - tag_buf; |
368 | fs->tag = devm_kmalloc(dev: &vdev->dev, size: len + 1, GFP_KERNEL); |
369 | if (!fs->tag) |
370 | return -ENOMEM; |
371 | memcpy(fs->tag, tag_buf, len); |
372 | fs->tag[len] = '\0'; |
373 | |
374 | /* While the VIRTIO specification allows any character, newlines are |
375 | * awkward on mount(8) command-lines and cause problems in the sysfs |
376 | * "tag" attr and uevent TAG= properties. Forbid them. |
377 | */ |
378 | if (strchr(fs->tag, '\n')) { |
379 | dev_dbg(&vdev->dev, "refusing virtiofs tag with newline character\n" ); |
380 | return -EINVAL; |
381 | } |
382 | |
383 | return 0; |
384 | } |
385 | |
386 | /* Work function for hiprio completion */ |
387 | static void virtio_fs_hiprio_done_work(struct work_struct *work) |
388 | { |
389 | struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, |
390 | done_work); |
391 | struct virtqueue *vq = fsvq->vq; |
392 | |
393 | /* Free completed FUSE_FORGET requests */ |
394 | spin_lock(lock: &fsvq->lock); |
395 | do { |
396 | unsigned int len; |
397 | void *req; |
398 | |
399 | virtqueue_disable_cb(vq); |
400 | |
401 | while ((req = virtqueue_get_buf(vq, len: &len)) != NULL) { |
402 | kfree(objp: req); |
403 | dec_in_flight_req(fsvq); |
404 | } |
405 | } while (!virtqueue_enable_cb(vq)); |
406 | spin_unlock(lock: &fsvq->lock); |
407 | } |
408 | |
409 | static void virtio_fs_request_dispatch_work(struct work_struct *work) |
410 | { |
411 | struct fuse_req *req; |
412 | struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, |
413 | dispatch_work.work); |
414 | int ret; |
415 | |
416 | pr_debug("virtio-fs: worker %s called.\n" , __func__); |
417 | while (1) { |
418 | spin_lock(lock: &fsvq->lock); |
419 | req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, |
420 | list); |
421 | if (!req) { |
422 | spin_unlock(lock: &fsvq->lock); |
423 | break; |
424 | } |
425 | |
426 | list_del_init(entry: &req->list); |
427 | spin_unlock(lock: &fsvq->lock); |
428 | fuse_request_end(req); |
429 | } |
430 | |
431 | /* Dispatch pending requests */ |
432 | while (1) { |
433 | spin_lock(lock: &fsvq->lock); |
434 | req = list_first_entry_or_null(&fsvq->queued_reqs, |
435 | struct fuse_req, list); |
436 | if (!req) { |
437 | spin_unlock(lock: &fsvq->lock); |
438 | return; |
439 | } |
440 | list_del_init(entry: &req->list); |
441 | spin_unlock(lock: &fsvq->lock); |
442 | |
443 | ret = virtio_fs_enqueue_req(fsvq, req, in_flight: true); |
444 | if (ret < 0) { |
445 | if (ret == -ENOMEM || ret == -ENOSPC) { |
446 | spin_lock(lock: &fsvq->lock); |
447 | list_add_tail(new: &req->list, head: &fsvq->queued_reqs); |
448 | schedule_delayed_work(dwork: &fsvq->dispatch_work, |
449 | delay: msecs_to_jiffies(m: 1)); |
450 | spin_unlock(lock: &fsvq->lock); |
451 | return; |
452 | } |
453 | req->out.h.error = ret; |
454 | spin_lock(lock: &fsvq->lock); |
455 | dec_in_flight_req(fsvq); |
456 | spin_unlock(lock: &fsvq->lock); |
457 | pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n" , |
458 | ret); |
459 | fuse_request_end(req); |
460 | } |
461 | } |
462 | } |
463 | |
464 | /* |
465 | * Returns 1 if queue is full and sender should wait a bit before sending |
466 | * next request, 0 otherwise. |
467 | */ |
468 | static int send_forget_request(struct virtio_fs_vq *fsvq, |
469 | struct virtio_fs_forget *forget, |
470 | bool in_flight) |
471 | { |
472 | struct scatterlist sg; |
473 | struct virtqueue *vq; |
474 | int ret = 0; |
475 | bool notify; |
476 | struct virtio_fs_forget_req *req = &forget->req; |
477 | |
478 | spin_lock(lock: &fsvq->lock); |
479 | if (!fsvq->connected) { |
480 | if (in_flight) |
481 | dec_in_flight_req(fsvq); |
482 | kfree(objp: forget); |
483 | goto out; |
484 | } |
485 | |
486 | sg_init_one(&sg, req, sizeof(*req)); |
487 | vq = fsvq->vq; |
488 | dev_dbg(&vq->vdev->dev, "%s\n" , __func__); |
489 | |
490 | ret = virtqueue_add_outbuf(vq, sg: &sg, num: 1, data: forget, GFP_ATOMIC); |
491 | if (ret < 0) { |
492 | if (ret == -ENOMEM || ret == -ENOSPC) { |
493 | pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n" , |
494 | ret); |
495 | list_add_tail(new: &forget->list, head: &fsvq->queued_reqs); |
496 | schedule_delayed_work(dwork: &fsvq->dispatch_work, |
497 | delay: msecs_to_jiffies(m: 1)); |
498 | if (!in_flight) |
499 | inc_in_flight_req(fsvq); |
500 | /* Queue is full */ |
501 | ret = 1; |
502 | } else { |
503 | pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n" , |
504 | ret); |
505 | kfree(objp: forget); |
506 | if (in_flight) |
507 | dec_in_flight_req(fsvq); |
508 | } |
509 | goto out; |
510 | } |
511 | |
512 | if (!in_flight) |
513 | inc_in_flight_req(fsvq); |
514 | notify = virtqueue_kick_prepare(vq); |
515 | spin_unlock(lock: &fsvq->lock); |
516 | |
517 | if (notify) |
518 | virtqueue_notify(vq); |
519 | return ret; |
520 | out: |
521 | spin_unlock(lock: &fsvq->lock); |
522 | return ret; |
523 | } |
524 | |
525 | static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) |
526 | { |
527 | struct virtio_fs_forget *forget; |
528 | struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, |
529 | dispatch_work.work); |
530 | pr_debug("virtio-fs: worker %s called.\n" , __func__); |
531 | while (1) { |
532 | spin_lock(lock: &fsvq->lock); |
533 | forget = list_first_entry_or_null(&fsvq->queued_reqs, |
534 | struct virtio_fs_forget, list); |
535 | if (!forget) { |
536 | spin_unlock(lock: &fsvq->lock); |
537 | return; |
538 | } |
539 | |
540 | list_del(entry: &forget->list); |
541 | spin_unlock(lock: &fsvq->lock); |
542 | if (send_forget_request(fsvq, forget, in_flight: true)) |
543 | return; |
544 | } |
545 | } |
546 | |
547 | /* Allocate and copy args into req->argbuf */ |
548 | static int copy_args_to_argbuf(struct fuse_req *req) |
549 | { |
550 | struct fuse_args *args = req->args; |
551 | unsigned int offset = 0; |
552 | unsigned int num_in; |
553 | unsigned int num_out; |
554 | unsigned int len; |
555 | unsigned int i; |
556 | |
557 | num_in = args->in_numargs - args->in_pages; |
558 | num_out = args->out_numargs - args->out_pages; |
559 | len = fuse_len_args(numargs: num_in, args: (struct fuse_arg *) args->in_args) + |
560 | fuse_len_args(numargs: num_out, args: args->out_args); |
561 | |
562 | req->argbuf = kmalloc(size: len, GFP_ATOMIC); |
563 | if (!req->argbuf) |
564 | return -ENOMEM; |
565 | |
566 | for (i = 0; i < num_in; i++) { |
567 | memcpy(req->argbuf + offset, |
568 | args->in_args[i].value, |
569 | args->in_args[i].size); |
570 | offset += args->in_args[i].size; |
571 | } |
572 | |
573 | return 0; |
574 | } |
575 | |
576 | /* Copy args out of and free req->argbuf */ |
577 | static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) |
578 | { |
579 | unsigned int remaining; |
580 | unsigned int offset; |
581 | unsigned int num_in; |
582 | unsigned int num_out; |
583 | unsigned int i; |
584 | |
585 | remaining = req->out.h.len - sizeof(req->out.h); |
586 | num_in = args->in_numargs - args->in_pages; |
587 | num_out = args->out_numargs - args->out_pages; |
588 | offset = fuse_len_args(numargs: num_in, args: (struct fuse_arg *)args->in_args); |
589 | |
590 | for (i = 0; i < num_out; i++) { |
591 | unsigned int argsize = args->out_args[i].size; |
592 | |
593 | if (args->out_argvar && |
594 | i == args->out_numargs - 1 && |
595 | argsize > remaining) { |
596 | argsize = remaining; |
597 | } |
598 | |
599 | memcpy(args->out_args[i].value, req->argbuf + offset, argsize); |
600 | offset += argsize; |
601 | |
602 | if (i != args->out_numargs - 1) |
603 | remaining -= argsize; |
604 | } |
605 | |
606 | /* Store the actual size of the variable-length arg */ |
607 | if (args->out_argvar) |
608 | args->out_args[args->out_numargs - 1].size = remaining; |
609 | |
610 | kfree(objp: req->argbuf); |
611 | req->argbuf = NULL; |
612 | } |
613 | |
614 | /* Work function for request completion */ |
615 | static void virtio_fs_request_complete(struct fuse_req *req, |
616 | struct virtio_fs_vq *fsvq) |
617 | { |
618 | struct fuse_pqueue *fpq = &fsvq->fud->pq; |
619 | struct fuse_args *args; |
620 | struct fuse_args_pages *ap; |
621 | unsigned int len, i, thislen; |
622 | struct page *page; |
623 | |
624 | /* |
625 | * TODO verify that server properly follows FUSE protocol |
626 | * (oh.uniq, oh.len) |
627 | */ |
628 | args = req->args; |
629 | copy_args_from_argbuf(args, req); |
630 | |
631 | if (args->out_pages && args->page_zeroing) { |
632 | len = args->out_args[args->out_numargs - 1].size; |
633 | ap = container_of(args, typeof(*ap), args); |
634 | for (i = 0; i < ap->num_pages; i++) { |
635 | thislen = ap->descs[i].length; |
636 | if (len < thislen) { |
637 | WARN_ON(ap->descs[i].offset); |
638 | page = ap->pages[i]; |
639 | zero_user_segment(page, start: len, end: thislen); |
640 | len = 0; |
641 | } else { |
642 | len -= thislen; |
643 | } |
644 | } |
645 | } |
646 | |
647 | spin_lock(lock: &fpq->lock); |
648 | clear_bit(nr: FR_SENT, addr: &req->flags); |
649 | spin_unlock(lock: &fpq->lock); |
650 | |
651 | fuse_request_end(req); |
652 | spin_lock(lock: &fsvq->lock); |
653 | dec_in_flight_req(fsvq); |
654 | spin_unlock(lock: &fsvq->lock); |
655 | } |
656 | |
657 | static void virtio_fs_complete_req_work(struct work_struct *work) |
658 | { |
659 | struct virtio_fs_req_work *w = |
660 | container_of(work, typeof(*w), done_work); |
661 | |
662 | virtio_fs_request_complete(req: w->req, fsvq: w->fsvq); |
663 | kfree(objp: w); |
664 | } |
665 | |
666 | static void virtio_fs_requests_done_work(struct work_struct *work) |
667 | { |
668 | struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, |
669 | done_work); |
670 | struct fuse_pqueue *fpq = &fsvq->fud->pq; |
671 | struct virtqueue *vq = fsvq->vq; |
672 | struct fuse_req *req; |
673 | struct fuse_req *next; |
674 | unsigned int len; |
675 | LIST_HEAD(reqs); |
676 | |
677 | /* Collect completed requests off the virtqueue */ |
678 | spin_lock(lock: &fsvq->lock); |
679 | do { |
680 | virtqueue_disable_cb(vq); |
681 | |
682 | while ((req = virtqueue_get_buf(vq, len: &len)) != NULL) { |
683 | spin_lock(lock: &fpq->lock); |
684 | list_move_tail(list: &req->list, head: &reqs); |
685 | spin_unlock(lock: &fpq->lock); |
686 | } |
687 | } while (!virtqueue_enable_cb(vq)); |
688 | spin_unlock(lock: &fsvq->lock); |
689 | |
690 | /* End requests */ |
691 | list_for_each_entry_safe(req, next, &reqs, list) { |
692 | list_del_init(entry: &req->list); |
693 | |
694 | /* blocking async request completes in a worker context */ |
695 | if (req->args->may_block) { |
696 | struct virtio_fs_req_work *w; |
697 | |
698 | w = kzalloc(size: sizeof(*w), GFP_NOFS | __GFP_NOFAIL); |
699 | INIT_WORK(&w->done_work, virtio_fs_complete_req_work); |
700 | w->fsvq = fsvq; |
701 | w->req = req; |
702 | schedule_work(work: &w->done_work); |
703 | } else { |
704 | virtio_fs_request_complete(req, fsvq); |
705 | } |
706 | } |
707 | } |
708 | |
709 | /* Virtqueue interrupt handler */ |
710 | static void virtio_fs_vq_done(struct virtqueue *vq) |
711 | { |
712 | struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); |
713 | |
714 | dev_dbg(&vq->vdev->dev, "%s %s\n" , __func__, fsvq->name); |
715 | |
716 | schedule_work(work: &fsvq->done_work); |
717 | } |
718 | |
719 | static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name, |
720 | int vq_type) |
721 | { |
722 | strscpy(fsvq->name, name, VQ_NAME_LEN); |
723 | spin_lock_init(&fsvq->lock); |
724 | INIT_LIST_HEAD(list: &fsvq->queued_reqs); |
725 | INIT_LIST_HEAD(list: &fsvq->end_reqs); |
726 | init_completion(x: &fsvq->in_flight_zero); |
727 | |
728 | if (vq_type == VQ_REQUEST) { |
729 | INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work); |
730 | INIT_DELAYED_WORK(&fsvq->dispatch_work, |
731 | virtio_fs_request_dispatch_work); |
732 | } else { |
733 | INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work); |
734 | INIT_DELAYED_WORK(&fsvq->dispatch_work, |
735 | virtio_fs_hiprio_dispatch_work); |
736 | } |
737 | } |
738 | |
739 | /* Initialize virtqueues */ |
740 | static int virtio_fs_setup_vqs(struct virtio_device *vdev, |
741 | struct virtio_fs *fs) |
742 | { |
743 | struct virtqueue **vqs; |
744 | vq_callback_t **callbacks; |
745 | const char **names; |
746 | unsigned int i; |
747 | int ret = 0; |
748 | |
749 | virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues, |
750 | &fs->num_request_queues); |
751 | if (fs->num_request_queues == 0) |
752 | return -EINVAL; |
753 | |
754 | fs->nvqs = VQ_REQUEST + fs->num_request_queues; |
755 | fs->vqs = kcalloc(n: fs->nvqs, size: sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); |
756 | if (!fs->vqs) |
757 | return -ENOMEM; |
758 | |
759 | vqs = kmalloc_array(n: fs->nvqs, size: sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); |
760 | callbacks = kmalloc_array(n: fs->nvqs, size: sizeof(callbacks[VQ_HIPRIO]), |
761 | GFP_KERNEL); |
762 | names = kmalloc_array(n: fs->nvqs, size: sizeof(names[VQ_HIPRIO]), GFP_KERNEL); |
763 | if (!vqs || !callbacks || !names) { |
764 | ret = -ENOMEM; |
765 | goto out; |
766 | } |
767 | |
768 | /* Initialize the hiprio/forget request virtqueue */ |
769 | callbacks[VQ_HIPRIO] = virtio_fs_vq_done; |
770 | virtio_fs_init_vq(fsvq: &fs->vqs[VQ_HIPRIO], name: "hiprio" , vq_type: VQ_HIPRIO); |
771 | names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; |
772 | |
773 | /* Initialize the requests virtqueues */ |
774 | for (i = VQ_REQUEST; i < fs->nvqs; i++) { |
775 | char vq_name[VQ_NAME_LEN]; |
776 | |
777 | snprintf(buf: vq_name, VQ_NAME_LEN, fmt: "requests.%u" , i - VQ_REQUEST); |
778 | virtio_fs_init_vq(fsvq: &fs->vqs[i], name: vq_name, vq_type: VQ_REQUEST); |
779 | callbacks[i] = virtio_fs_vq_done; |
780 | names[i] = fs->vqs[i].name; |
781 | } |
782 | |
783 | ret = virtio_find_vqs(vdev, nvqs: fs->nvqs, vqs, callbacks, names, NULL); |
784 | if (ret < 0) |
785 | goto out; |
786 | |
787 | for (i = 0; i < fs->nvqs; i++) |
788 | fs->vqs[i].vq = vqs[i]; |
789 | |
790 | virtio_fs_start_all_queues(fs); |
791 | out: |
792 | kfree(objp: names); |
793 | kfree(objp: callbacks); |
794 | kfree(objp: vqs); |
795 | if (ret) |
796 | kfree(objp: fs->vqs); |
797 | return ret; |
798 | } |
799 | |
800 | /* Free virtqueues (device must already be reset) */ |
801 | static void virtio_fs_cleanup_vqs(struct virtio_device *vdev) |
802 | { |
803 | vdev->config->del_vqs(vdev); |
804 | } |
805 | |
806 | /* Map a window offset to a page frame number. The window offset will have |
807 | * been produced by .iomap_begin(), which maps a file offset to a window |
808 | * offset. |
809 | */ |
810 | static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, |
811 | long nr_pages, enum dax_access_mode mode, |
812 | void **kaddr, pfn_t *pfn) |
813 | { |
814 | struct virtio_fs *fs = dax_get_private(dax_dev); |
815 | phys_addr_t offset = PFN_PHYS(pgoff); |
816 | size_t max_nr_pages = fs->window_len / PAGE_SIZE - pgoff; |
817 | |
818 | if (kaddr) |
819 | *kaddr = fs->window_kaddr + offset; |
820 | if (pfn) |
821 | *pfn = phys_to_pfn_t(addr: fs->window_phys_addr + offset, |
822 | PFN_DEV | PFN_MAP); |
823 | return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; |
824 | } |
825 | |
826 | static int virtio_fs_zero_page_range(struct dax_device *dax_dev, |
827 | pgoff_t pgoff, size_t nr_pages) |
828 | { |
829 | long rc; |
830 | void *kaddr; |
831 | |
832 | rc = dax_direct_access(dax_dev, pgoff, nr_pages, mode: DAX_ACCESS, kaddr: &kaddr, |
833 | NULL); |
834 | if (rc < 0) |
835 | return dax_mem2blk_err(err: rc); |
836 | |
837 | memset(kaddr, 0, nr_pages << PAGE_SHIFT); |
838 | dax_flush(dax_dev, addr: kaddr, size: nr_pages << PAGE_SHIFT); |
839 | return 0; |
840 | } |
841 | |
842 | static const struct dax_operations virtio_fs_dax_ops = { |
843 | .direct_access = virtio_fs_direct_access, |
844 | .zero_page_range = virtio_fs_zero_page_range, |
845 | }; |
846 | |
847 | static void virtio_fs_cleanup_dax(void *data) |
848 | { |
849 | struct dax_device *dax_dev = data; |
850 | |
851 | kill_dax(dax_dev); |
852 | put_dax(dax_dev); |
853 | } |
854 | |
855 | DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) virtio_fs_cleanup_dax(_T)) |
856 | |
857 | static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) |
858 | { |
859 | struct dax_device *dax_dev __free(cleanup_dax) = NULL; |
860 | struct virtio_shm_region cache_reg; |
861 | struct dev_pagemap *pgmap; |
862 | bool have_cache; |
863 | |
864 | if (!IS_ENABLED(CONFIG_FUSE_DAX)) |
865 | return 0; |
866 | |
867 | dax_dev = alloc_dax(private: fs, ops: &virtio_fs_dax_ops); |
868 | if (IS_ERR(ptr: dax_dev)) { |
869 | int rc = PTR_ERR(ptr: dax_dev); |
870 | return rc == -EOPNOTSUPP ? 0 : rc; |
871 | } |
872 | |
873 | /* Get cache region */ |
874 | have_cache = virtio_get_shm_region(vdev, region: &cache_reg, |
875 | id: (u8)VIRTIO_FS_SHMCAP_ID_CACHE); |
876 | if (!have_cache) { |
877 | dev_notice(&vdev->dev, "%s: No cache capability\n" , __func__); |
878 | return 0; |
879 | } |
880 | |
881 | if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, |
882 | dev_name(&vdev->dev))) { |
883 | dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n" , |
884 | cache_reg.addr, cache_reg.len); |
885 | return -EBUSY; |
886 | } |
887 | |
888 | dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n" , cache_reg.len, |
889 | cache_reg.addr); |
890 | |
891 | pgmap = devm_kzalloc(dev: &vdev->dev, size: sizeof(*pgmap), GFP_KERNEL); |
892 | if (!pgmap) |
893 | return -ENOMEM; |
894 | |
895 | pgmap->type = MEMORY_DEVICE_FS_DAX; |
896 | |
897 | /* Ideally we would directly use the PCI BAR resource but |
898 | * devm_memremap_pages() wants its own copy in pgmap. So |
899 | * initialize a struct resource from scratch (only the start |
900 | * and end fields will be used). |
901 | */ |
902 | pgmap->range = (struct range) { |
903 | .start = (phys_addr_t) cache_reg.addr, |
904 | .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, |
905 | }; |
906 | pgmap->nr_range = 1; |
907 | |
908 | fs->window_kaddr = devm_memremap_pages(dev: &vdev->dev, pgmap); |
909 | if (IS_ERR(ptr: fs->window_kaddr)) |
910 | return PTR_ERR(ptr: fs->window_kaddr); |
911 | |
912 | fs->window_phys_addr = (phys_addr_t) cache_reg.addr; |
913 | fs->window_len = (phys_addr_t) cache_reg.len; |
914 | |
915 | dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n" , |
916 | __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); |
917 | |
918 | fs->dax_dev = no_free_ptr(dax_dev); |
919 | return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, |
920 | fs->dax_dev); |
921 | } |
922 | |
923 | static int virtio_fs_probe(struct virtio_device *vdev) |
924 | { |
925 | struct virtio_fs *fs; |
926 | int ret; |
927 | |
928 | fs = kzalloc(size: sizeof(*fs), GFP_KERNEL); |
929 | if (!fs) |
930 | return -ENOMEM; |
931 | kobject_init(kobj: &fs->kobj, ktype: &virtio_fs_ktype); |
932 | vdev->priv = fs; |
933 | |
934 | ret = virtio_fs_read_tag(vdev, fs); |
935 | if (ret < 0) |
936 | goto out; |
937 | |
938 | ret = virtio_fs_setup_vqs(vdev, fs); |
939 | if (ret < 0) |
940 | goto out; |
941 | |
942 | /* TODO vq affinity */ |
943 | |
944 | ret = virtio_fs_setup_dax(vdev, fs); |
945 | if (ret < 0) |
946 | goto out_vqs; |
947 | |
948 | /* Bring the device online in case the filesystem is mounted and |
949 | * requests need to be sent before we return. |
950 | */ |
951 | virtio_device_ready(dev: vdev); |
952 | |
953 | ret = virtio_fs_add_instance(vdev, fs); |
954 | if (ret < 0) |
955 | goto out_vqs; |
956 | |
957 | return 0; |
958 | |
959 | out_vqs: |
960 | virtio_reset_device(dev: vdev); |
961 | virtio_fs_cleanup_vqs(vdev); |
962 | |
963 | out: |
964 | vdev->priv = NULL; |
965 | kobject_put(kobj: &fs->kobj); |
966 | return ret; |
967 | } |
968 | |
969 | static void virtio_fs_stop_all_queues(struct virtio_fs *fs) |
970 | { |
971 | struct virtio_fs_vq *fsvq; |
972 | int i; |
973 | |
974 | for (i = 0; i < fs->nvqs; i++) { |
975 | fsvq = &fs->vqs[i]; |
976 | spin_lock(lock: &fsvq->lock); |
977 | fsvq->connected = false; |
978 | spin_unlock(lock: &fsvq->lock); |
979 | } |
980 | } |
981 | |
982 | static void virtio_fs_remove(struct virtio_device *vdev) |
983 | { |
984 | struct virtio_fs *fs = vdev->priv; |
985 | |
986 | mutex_lock(&virtio_fs_mutex); |
987 | /* This device is going away. No one should get new reference */ |
988 | list_del_init(entry: &fs->list); |
989 | sysfs_remove_link(kobj: &fs->kobj, name: "device" ); |
990 | kobject_del(kobj: &fs->kobj); |
991 | virtio_fs_stop_all_queues(fs); |
992 | virtio_fs_drain_all_queues_locked(fs); |
993 | virtio_reset_device(dev: vdev); |
994 | virtio_fs_cleanup_vqs(vdev); |
995 | |
996 | vdev->priv = NULL; |
997 | /* Put device reference on virtio_fs object */ |
998 | virtio_fs_put(fs); |
999 | mutex_unlock(lock: &virtio_fs_mutex); |
1000 | } |
1001 | |
1002 | #ifdef CONFIG_PM_SLEEP |
1003 | static int virtio_fs_freeze(struct virtio_device *vdev) |
1004 | { |
1005 | /* TODO need to save state here */ |
1006 | pr_warn("virtio-fs: suspend/resume not yet supported\n" ); |
1007 | return -EOPNOTSUPP; |
1008 | } |
1009 | |
1010 | static int virtio_fs_restore(struct virtio_device *vdev) |
1011 | { |
1012 | /* TODO need to restore state here */ |
1013 | return 0; |
1014 | } |
1015 | #endif /* CONFIG_PM_SLEEP */ |
1016 | |
1017 | static const struct virtio_device_id id_table[] = { |
1018 | { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, |
1019 | {}, |
1020 | }; |
1021 | |
1022 | static const unsigned int feature_table[] = {}; |
1023 | |
1024 | static struct virtio_driver virtio_fs_driver = { |
1025 | .driver.name = KBUILD_MODNAME, |
1026 | .driver.owner = THIS_MODULE, |
1027 | .id_table = id_table, |
1028 | .feature_table = feature_table, |
1029 | .feature_table_size = ARRAY_SIZE(feature_table), |
1030 | .probe = virtio_fs_probe, |
1031 | .remove = virtio_fs_remove, |
1032 | #ifdef CONFIG_PM_SLEEP |
1033 | .freeze = virtio_fs_freeze, |
1034 | .restore = virtio_fs_restore, |
1035 | #endif |
1036 | }; |
1037 | |
1038 | static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) |
1039 | __releases(fiq->lock) |
1040 | { |
1041 | struct fuse_forget_link *link; |
1042 | struct virtio_fs_forget *forget; |
1043 | struct virtio_fs_forget_req *req; |
1044 | struct virtio_fs *fs; |
1045 | struct virtio_fs_vq *fsvq; |
1046 | u64 unique; |
1047 | |
1048 | link = fuse_dequeue_forget(fiq, max: 1, NULL); |
1049 | unique = fuse_get_unique(fiq); |
1050 | |
1051 | fs = fiq->priv; |
1052 | fsvq = &fs->vqs[VQ_HIPRIO]; |
1053 | spin_unlock(lock: &fiq->lock); |
1054 | |
1055 | /* Allocate a buffer for the request */ |
1056 | forget = kmalloc(size: sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); |
1057 | req = &forget->req; |
1058 | |
1059 | req->ih = (struct fuse_in_header){ |
1060 | .opcode = FUSE_FORGET, |
1061 | .nodeid = link->forget_one.nodeid, |
1062 | .unique = unique, |
1063 | .len = sizeof(*req), |
1064 | }; |
1065 | req->arg = (struct fuse_forget_in){ |
1066 | .nlookup = link->forget_one.nlookup, |
1067 | }; |
1068 | |
1069 | send_forget_request(fsvq, forget, in_flight: false); |
1070 | kfree(objp: link); |
1071 | } |
1072 | |
1073 | static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) |
1074 | __releases(fiq->lock) |
1075 | { |
1076 | /* |
1077 | * TODO interrupts. |
1078 | * |
1079 | * Normal fs operations on a local filesystems aren't interruptible. |
1080 | * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) |
1081 | * with shared lock between host and guest. |
1082 | */ |
1083 | spin_unlock(lock: &fiq->lock); |
1084 | } |
1085 | |
1086 | /* Count number of scatter-gather elements required */ |
1087 | static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs, |
1088 | unsigned int num_pages, |
1089 | unsigned int total_len) |
1090 | { |
1091 | unsigned int i; |
1092 | unsigned int this_len; |
1093 | |
1094 | for (i = 0; i < num_pages && total_len; i++) { |
1095 | this_len = min(page_descs[i].length, total_len); |
1096 | total_len -= this_len; |
1097 | } |
1098 | |
1099 | return i; |
1100 | } |
1101 | |
1102 | /* Return the number of scatter-gather list elements required */ |
1103 | static unsigned int sg_count_fuse_req(struct fuse_req *req) |
1104 | { |
1105 | struct fuse_args *args = req->args; |
1106 | struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); |
1107 | unsigned int size, total_sgs = 1 /* fuse_in_header */; |
1108 | |
1109 | if (args->in_numargs - args->in_pages) |
1110 | total_sgs += 1; |
1111 | |
1112 | if (args->in_pages) { |
1113 | size = args->in_args[args->in_numargs - 1].size; |
1114 | total_sgs += sg_count_fuse_pages(page_descs: ap->descs, num_pages: ap->num_pages, |
1115 | total_len: size); |
1116 | } |
1117 | |
1118 | if (!test_bit(FR_ISREPLY, &req->flags)) |
1119 | return total_sgs; |
1120 | |
1121 | total_sgs += 1 /* fuse_out_header */; |
1122 | |
1123 | if (args->out_numargs - args->out_pages) |
1124 | total_sgs += 1; |
1125 | |
1126 | if (args->out_pages) { |
1127 | size = args->out_args[args->out_numargs - 1].size; |
1128 | total_sgs += sg_count_fuse_pages(page_descs: ap->descs, num_pages: ap->num_pages, |
1129 | total_len: size); |
1130 | } |
1131 | |
1132 | return total_sgs; |
1133 | } |
1134 | |
1135 | /* Add pages to scatter-gather list and return number of elements used */ |
1136 | static unsigned int sg_init_fuse_pages(struct scatterlist *sg, |
1137 | struct page **pages, |
1138 | struct fuse_page_desc *page_descs, |
1139 | unsigned int num_pages, |
1140 | unsigned int total_len) |
1141 | { |
1142 | unsigned int i; |
1143 | unsigned int this_len; |
1144 | |
1145 | for (i = 0; i < num_pages && total_len; i++) { |
1146 | sg_init_table(&sg[i], 1); |
1147 | this_len = min(page_descs[i].length, total_len); |
1148 | sg_set_page(sg: &sg[i], page: pages[i], len: this_len, offset: page_descs[i].offset); |
1149 | total_len -= this_len; |
1150 | } |
1151 | |
1152 | return i; |
1153 | } |
1154 | |
1155 | /* Add args to scatter-gather list and return number of elements used */ |
1156 | static unsigned int sg_init_fuse_args(struct scatterlist *sg, |
1157 | struct fuse_req *req, |
1158 | struct fuse_arg *args, |
1159 | unsigned int numargs, |
1160 | bool argpages, |
1161 | void *argbuf, |
1162 | unsigned int *len_used) |
1163 | { |
1164 | struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); |
1165 | unsigned int total_sgs = 0; |
1166 | unsigned int len; |
1167 | |
1168 | len = fuse_len_args(numargs: numargs - argpages, args); |
1169 | if (len) |
1170 | sg_init_one(&sg[total_sgs++], argbuf, len); |
1171 | |
1172 | if (argpages) |
1173 | total_sgs += sg_init_fuse_pages(sg: &sg[total_sgs], |
1174 | pages: ap->pages, page_descs: ap->descs, |
1175 | num_pages: ap->num_pages, |
1176 | total_len: args[numargs - 1].size); |
1177 | |
1178 | if (len_used) |
1179 | *len_used = len; |
1180 | |
1181 | return total_sgs; |
1182 | } |
1183 | |
1184 | /* Add a request to a virtqueue and kick the device */ |
1185 | static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, |
1186 | struct fuse_req *req, bool in_flight) |
1187 | { |
1188 | /* requests need at least 4 elements */ |
1189 | struct scatterlist *stack_sgs[6]; |
1190 | struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; |
1191 | struct scatterlist **sgs = stack_sgs; |
1192 | struct scatterlist *sg = stack_sg; |
1193 | struct virtqueue *vq; |
1194 | struct fuse_args *args = req->args; |
1195 | unsigned int argbuf_used = 0; |
1196 | unsigned int out_sgs = 0; |
1197 | unsigned int in_sgs = 0; |
1198 | unsigned int total_sgs; |
1199 | unsigned int i; |
1200 | int ret; |
1201 | bool notify; |
1202 | struct fuse_pqueue *fpq; |
1203 | |
1204 | /* Does the sglist fit on the stack? */ |
1205 | total_sgs = sg_count_fuse_req(req); |
1206 | if (total_sgs > ARRAY_SIZE(stack_sgs)) { |
1207 | sgs = kmalloc_array(n: total_sgs, size: sizeof(sgs[0]), GFP_ATOMIC); |
1208 | sg = kmalloc_array(n: total_sgs, size: sizeof(sg[0]), GFP_ATOMIC); |
1209 | if (!sgs || !sg) { |
1210 | ret = -ENOMEM; |
1211 | goto out; |
1212 | } |
1213 | } |
1214 | |
1215 | /* Use a bounce buffer since stack args cannot be mapped */ |
1216 | ret = copy_args_to_argbuf(req); |
1217 | if (ret < 0) |
1218 | goto out; |
1219 | |
1220 | /* Request elements */ |
1221 | sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); |
1222 | out_sgs += sg_init_fuse_args(sg: &sg[out_sgs], req, |
1223 | args: (struct fuse_arg *)args->in_args, |
1224 | numargs: args->in_numargs, argpages: args->in_pages, |
1225 | argbuf: req->argbuf, len_used: &argbuf_used); |
1226 | |
1227 | /* Reply elements */ |
1228 | if (test_bit(FR_ISREPLY, &req->flags)) { |
1229 | sg_init_one(&sg[out_sgs + in_sgs++], |
1230 | &req->out.h, sizeof(req->out.h)); |
1231 | in_sgs += sg_init_fuse_args(sg: &sg[out_sgs + in_sgs], req, |
1232 | args: args->out_args, numargs: args->out_numargs, |
1233 | argpages: args->out_pages, |
1234 | argbuf: req->argbuf + argbuf_used, NULL); |
1235 | } |
1236 | |
1237 | WARN_ON(out_sgs + in_sgs != total_sgs); |
1238 | |
1239 | for (i = 0; i < total_sgs; i++) |
1240 | sgs[i] = &sg[i]; |
1241 | |
1242 | spin_lock(lock: &fsvq->lock); |
1243 | |
1244 | if (!fsvq->connected) { |
1245 | spin_unlock(lock: &fsvq->lock); |
1246 | ret = -ENOTCONN; |
1247 | goto out; |
1248 | } |
1249 | |
1250 | vq = fsvq->vq; |
1251 | ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, data: req, GFP_ATOMIC); |
1252 | if (ret < 0) { |
1253 | spin_unlock(lock: &fsvq->lock); |
1254 | goto out; |
1255 | } |
1256 | |
1257 | /* Request successfully sent. */ |
1258 | fpq = &fsvq->fud->pq; |
1259 | spin_lock(lock: &fpq->lock); |
1260 | list_add_tail(new: &req->list, head: fpq->processing); |
1261 | spin_unlock(lock: &fpq->lock); |
1262 | set_bit(nr: FR_SENT, addr: &req->flags); |
1263 | /* matches barrier in request_wait_answer() */ |
1264 | smp_mb__after_atomic(); |
1265 | |
1266 | if (!in_flight) |
1267 | inc_in_flight_req(fsvq); |
1268 | notify = virtqueue_kick_prepare(vq); |
1269 | |
1270 | spin_unlock(lock: &fsvq->lock); |
1271 | |
1272 | if (notify) |
1273 | virtqueue_notify(vq); |
1274 | |
1275 | out: |
1276 | if (ret < 0 && req->argbuf) { |
1277 | kfree(objp: req->argbuf); |
1278 | req->argbuf = NULL; |
1279 | } |
1280 | if (sgs != stack_sgs) { |
1281 | kfree(objp: sgs); |
1282 | kfree(objp: sg); |
1283 | } |
1284 | |
1285 | return ret; |
1286 | } |
1287 | |
1288 | static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) |
1289 | __releases(fiq->lock) |
1290 | { |
1291 | unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ |
1292 | struct virtio_fs *fs; |
1293 | struct fuse_req *req; |
1294 | struct virtio_fs_vq *fsvq; |
1295 | int ret; |
1296 | |
1297 | WARN_ON(list_empty(&fiq->pending)); |
1298 | req = list_last_entry(&fiq->pending, struct fuse_req, list); |
1299 | clear_bit(nr: FR_PENDING, addr: &req->flags); |
1300 | list_del_init(entry: &req->list); |
1301 | WARN_ON(!list_empty(&fiq->pending)); |
1302 | spin_unlock(lock: &fiq->lock); |
1303 | |
1304 | fs = fiq->priv; |
1305 | |
1306 | pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n" , |
1307 | __func__, req->in.h.opcode, req->in.h.unique, |
1308 | req->in.h.nodeid, req->in.h.len, |
1309 | fuse_len_args(req->args->out_numargs, req->args->out_args)); |
1310 | |
1311 | fsvq = &fs->vqs[queue_id]; |
1312 | ret = virtio_fs_enqueue_req(fsvq, req, in_flight: false); |
1313 | if (ret < 0) { |
1314 | if (ret == -ENOMEM || ret == -ENOSPC) { |
1315 | /* |
1316 | * Virtqueue full. Retry submission from worker |
1317 | * context as we might be holding fc->bg_lock. |
1318 | */ |
1319 | spin_lock(lock: &fsvq->lock); |
1320 | list_add_tail(new: &req->list, head: &fsvq->queued_reqs); |
1321 | inc_in_flight_req(fsvq); |
1322 | schedule_delayed_work(dwork: &fsvq->dispatch_work, |
1323 | delay: msecs_to_jiffies(m: 1)); |
1324 | spin_unlock(lock: &fsvq->lock); |
1325 | return; |
1326 | } |
1327 | req->out.h.error = ret; |
1328 | pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n" , ret); |
1329 | |
1330 | /* Can't end request in submission context. Use a worker */ |
1331 | spin_lock(lock: &fsvq->lock); |
1332 | list_add_tail(new: &req->list, head: &fsvq->end_reqs); |
1333 | schedule_delayed_work(dwork: &fsvq->dispatch_work, delay: 0); |
1334 | spin_unlock(lock: &fsvq->lock); |
1335 | return; |
1336 | } |
1337 | } |
1338 | |
1339 | static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { |
1340 | .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, |
1341 | .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, |
1342 | .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, |
1343 | .release = virtio_fs_fiq_release, |
1344 | }; |
1345 | |
1346 | static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx) |
1347 | { |
1348 | ctx->rootmode = S_IFDIR; |
1349 | ctx->default_permissions = 1; |
1350 | ctx->allow_other = 1; |
1351 | ctx->max_read = UINT_MAX; |
1352 | ctx->blksize = 512; |
1353 | ctx->destroy = true; |
1354 | ctx->no_control = true; |
1355 | ctx->no_force_umount = true; |
1356 | } |
1357 | |
1358 | static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) |
1359 | { |
1360 | struct fuse_mount *fm = get_fuse_mount_super(sb); |
1361 | struct fuse_conn *fc = fm->fc; |
1362 | struct virtio_fs *fs = fc->iq.priv; |
1363 | struct fuse_fs_context *ctx = fsc->fs_private; |
1364 | unsigned int i; |
1365 | int err; |
1366 | |
1367 | virtio_fs_ctx_set_defaults(ctx); |
1368 | mutex_lock(&virtio_fs_mutex); |
1369 | |
1370 | /* After holding mutex, make sure virtiofs device is still there. |
1371 | * Though we are holding a reference to it, drive ->remove might |
1372 | * still have cleaned up virtual queues. In that case bail out. |
1373 | */ |
1374 | err = -EINVAL; |
1375 | if (list_empty(head: &fs->list)) { |
1376 | pr_info("virtio-fs: tag <%s> not found\n" , fs->tag); |
1377 | goto err; |
1378 | } |
1379 | |
1380 | err = -ENOMEM; |
1381 | /* Allocate fuse_dev for hiprio and notification queues */ |
1382 | for (i = 0; i < fs->nvqs; i++) { |
1383 | struct virtio_fs_vq *fsvq = &fs->vqs[i]; |
1384 | |
1385 | fsvq->fud = fuse_dev_alloc(); |
1386 | if (!fsvq->fud) |
1387 | goto err_free_fuse_devs; |
1388 | } |
1389 | |
1390 | /* virtiofs allocates and installs its own fuse devices */ |
1391 | ctx->fudptr = NULL; |
1392 | if (ctx->dax_mode != FUSE_DAX_NEVER) { |
1393 | if (ctx->dax_mode == FUSE_DAX_ALWAYS && !fs->dax_dev) { |
1394 | err = -EINVAL; |
1395 | pr_err("virtio-fs: dax can't be enabled as filesystem" |
1396 | " device does not support it.\n" ); |
1397 | goto err_free_fuse_devs; |
1398 | } |
1399 | ctx->dax_dev = fs->dax_dev; |
1400 | } |
1401 | err = fuse_fill_super_common(sb, ctx); |
1402 | if (err < 0) |
1403 | goto err_free_fuse_devs; |
1404 | |
1405 | for (i = 0; i < fs->nvqs; i++) { |
1406 | struct virtio_fs_vq *fsvq = &fs->vqs[i]; |
1407 | |
1408 | fuse_dev_install(fud: fsvq->fud, fc); |
1409 | } |
1410 | |
1411 | /* Previous unmount will stop all queues. Start these again */ |
1412 | virtio_fs_start_all_queues(fs); |
1413 | fuse_send_init(fm); |
1414 | mutex_unlock(lock: &virtio_fs_mutex); |
1415 | return 0; |
1416 | |
1417 | err_free_fuse_devs: |
1418 | virtio_fs_free_devs(fs); |
1419 | err: |
1420 | mutex_unlock(lock: &virtio_fs_mutex); |
1421 | return err; |
1422 | } |
1423 | |
1424 | static void virtio_fs_conn_destroy(struct fuse_mount *fm) |
1425 | { |
1426 | struct fuse_conn *fc = fm->fc; |
1427 | struct virtio_fs *vfs = fc->iq.priv; |
1428 | struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO]; |
1429 | |
1430 | /* Stop dax worker. Soon evict_inodes() will be called which |
1431 | * will free all memory ranges belonging to all inodes. |
1432 | */ |
1433 | if (IS_ENABLED(CONFIG_FUSE_DAX)) |
1434 | fuse_dax_cancel_work(fc); |
1435 | |
1436 | /* Stop forget queue. Soon destroy will be sent */ |
1437 | spin_lock(lock: &fsvq->lock); |
1438 | fsvq->connected = false; |
1439 | spin_unlock(lock: &fsvq->lock); |
1440 | virtio_fs_drain_all_queues(fs: vfs); |
1441 | |
1442 | fuse_conn_destroy(fm); |
1443 | |
1444 | /* fuse_conn_destroy() must have sent destroy. Stop all queues |
1445 | * and drain one more time and free fuse devices. Freeing fuse |
1446 | * devices will drop their reference on fuse_conn and that in |
1447 | * turn will drop its reference on virtio_fs object. |
1448 | */ |
1449 | virtio_fs_stop_all_queues(fs: vfs); |
1450 | virtio_fs_drain_all_queues(fs: vfs); |
1451 | virtio_fs_free_devs(fs: vfs); |
1452 | } |
1453 | |
1454 | static void virtio_kill_sb(struct super_block *sb) |
1455 | { |
1456 | struct fuse_mount *fm = get_fuse_mount_super(sb); |
1457 | bool last; |
1458 | |
1459 | /* If mount failed, we can still be called without any fc */ |
1460 | if (sb->s_root) { |
1461 | last = fuse_mount_remove(fm); |
1462 | if (last) |
1463 | virtio_fs_conn_destroy(fm); |
1464 | } |
1465 | kill_anon_super(sb); |
1466 | fuse_mount_destroy(fm); |
1467 | } |
1468 | |
1469 | static int virtio_fs_test_super(struct super_block *sb, |
1470 | struct fs_context *fsc) |
1471 | { |
1472 | struct fuse_mount *fsc_fm = fsc->s_fs_info; |
1473 | struct fuse_mount *sb_fm = get_fuse_mount_super(sb); |
1474 | |
1475 | return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv; |
1476 | } |
1477 | |
1478 | static int virtio_fs_get_tree(struct fs_context *fsc) |
1479 | { |
1480 | struct virtio_fs *fs; |
1481 | struct super_block *sb; |
1482 | struct fuse_conn *fc = NULL; |
1483 | struct fuse_mount *fm; |
1484 | unsigned int virtqueue_size; |
1485 | int err = -EIO; |
1486 | |
1487 | /* This gets a reference on virtio_fs object. This ptr gets installed |
1488 | * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() |
1489 | * to drop the reference to this object. |
1490 | */ |
1491 | fs = virtio_fs_find_instance(tag: fsc->source); |
1492 | if (!fs) { |
1493 | pr_info("virtio-fs: tag <%s> not found\n" , fsc->source); |
1494 | return -EINVAL; |
1495 | } |
1496 | |
1497 | virtqueue_size = virtqueue_get_vring_size(vq: fs->vqs[VQ_REQUEST].vq); |
1498 | if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD)) |
1499 | goto out_err; |
1500 | |
1501 | err = -ENOMEM; |
1502 | fc = kzalloc(size: sizeof(struct fuse_conn), GFP_KERNEL); |
1503 | if (!fc) |
1504 | goto out_err; |
1505 | |
1506 | fm = kzalloc(size: sizeof(struct fuse_mount), GFP_KERNEL); |
1507 | if (!fm) |
1508 | goto out_err; |
1509 | |
1510 | fuse_conn_init(fc, fm, user_ns: fsc->user_ns, fiq_ops: &virtio_fs_fiq_ops, fiq_priv: fs); |
1511 | fc->release = fuse_free_conn; |
1512 | fc->delete_stale = true; |
1513 | fc->auto_submounts = true; |
1514 | fc->sync_fs = true; |
1515 | |
1516 | /* Tell FUSE to split requests that exceed the virtqueue's size */ |
1517 | fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit, |
1518 | virtqueue_size - FUSE_HEADER_OVERHEAD); |
1519 | |
1520 | fsc->s_fs_info = fm; |
1521 | sb = sget_fc(fc: fsc, test: virtio_fs_test_super, set: set_anon_super_fc); |
1522 | if (fsc->s_fs_info) |
1523 | fuse_mount_destroy(fm); |
1524 | if (IS_ERR(ptr: sb)) |
1525 | return PTR_ERR(ptr: sb); |
1526 | |
1527 | if (!sb->s_root) { |
1528 | err = virtio_fs_fill_super(sb, fsc); |
1529 | if (err) { |
1530 | deactivate_locked_super(sb); |
1531 | return err; |
1532 | } |
1533 | |
1534 | sb->s_flags |= SB_ACTIVE; |
1535 | } |
1536 | |
1537 | WARN_ON(fsc->root); |
1538 | fsc->root = dget(dentry: sb->s_root); |
1539 | return 0; |
1540 | |
1541 | out_err: |
1542 | kfree(objp: fc); |
1543 | mutex_lock(&virtio_fs_mutex); |
1544 | virtio_fs_put(fs); |
1545 | mutex_unlock(lock: &virtio_fs_mutex); |
1546 | return err; |
1547 | } |
1548 | |
1549 | static const struct fs_context_operations virtio_fs_context_ops = { |
1550 | .free = virtio_fs_free_fsc, |
1551 | .parse_param = virtio_fs_parse_param, |
1552 | .get_tree = virtio_fs_get_tree, |
1553 | }; |
1554 | |
1555 | static int virtio_fs_init_fs_context(struct fs_context *fsc) |
1556 | { |
1557 | struct fuse_fs_context *ctx; |
1558 | |
1559 | if (fsc->purpose == FS_CONTEXT_FOR_SUBMOUNT) |
1560 | return fuse_init_fs_context_submount(fsc); |
1561 | |
1562 | ctx = kzalloc(size: sizeof(struct fuse_fs_context), GFP_KERNEL); |
1563 | if (!ctx) |
1564 | return -ENOMEM; |
1565 | fsc->fs_private = ctx; |
1566 | fsc->ops = &virtio_fs_context_ops; |
1567 | return 0; |
1568 | } |
1569 | |
1570 | static struct file_system_type virtio_fs_type = { |
1571 | .owner = THIS_MODULE, |
1572 | .name = "virtiofs" , |
1573 | .init_fs_context = virtio_fs_init_fs_context, |
1574 | .kill_sb = virtio_kill_sb, |
1575 | }; |
1576 | |
1577 | static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) |
1578 | { |
1579 | const struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj); |
1580 | |
1581 | add_uevent_var(env, format: "TAG=%s" , fs->tag); |
1582 | return 0; |
1583 | } |
1584 | |
1585 | static const struct kset_uevent_ops virtio_fs_uevent_ops = { |
1586 | .uevent = virtio_fs_uevent, |
1587 | }; |
1588 | |
1589 | static int __init virtio_fs_sysfs_init(void) |
1590 | { |
1591 | virtio_fs_kset = kset_create_and_add(name: "virtiofs" , u: &virtio_fs_uevent_ops, |
1592 | parent_kobj: fs_kobj); |
1593 | if (!virtio_fs_kset) |
1594 | return -ENOMEM; |
1595 | return 0; |
1596 | } |
1597 | |
1598 | static void virtio_fs_sysfs_exit(void) |
1599 | { |
1600 | kset_unregister(kset: virtio_fs_kset); |
1601 | virtio_fs_kset = NULL; |
1602 | } |
1603 | |
1604 | static int __init virtio_fs_init(void) |
1605 | { |
1606 | int ret; |
1607 | |
1608 | ret = virtio_fs_sysfs_init(); |
1609 | if (ret < 0) |
1610 | return ret; |
1611 | |
1612 | ret = register_virtio_driver(&virtio_fs_driver); |
1613 | if (ret < 0) |
1614 | goto sysfs_exit; |
1615 | |
1616 | ret = register_filesystem(&virtio_fs_type); |
1617 | if (ret < 0) |
1618 | goto unregister_virtio_driver; |
1619 | |
1620 | return 0; |
1621 | |
1622 | unregister_virtio_driver: |
1623 | unregister_virtio_driver(drv: &virtio_fs_driver); |
1624 | sysfs_exit: |
1625 | virtio_fs_sysfs_exit(); |
1626 | return ret; |
1627 | } |
1628 | module_init(virtio_fs_init); |
1629 | |
1630 | static void __exit virtio_fs_exit(void) |
1631 | { |
1632 | unregister_filesystem(&virtio_fs_type); |
1633 | unregister_virtio_driver(drv: &virtio_fs_driver); |
1634 | virtio_fs_sysfs_exit(); |
1635 | } |
1636 | module_exit(virtio_fs_exit); |
1637 | |
1638 | MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>" ); |
1639 | MODULE_DESCRIPTION("Virtio Filesystem" ); |
1640 | MODULE_LICENSE("GPL" ); |
1641 | MODULE_ALIAS_FS(KBUILD_MODNAME); |
1642 | MODULE_DEVICE_TABLE(virtio, id_table); |
1643 | |