1 | /* |
2 | FUSE: Filesystem in Userspace |
3 | Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu> |
4 | |
5 | This program can be distributed under the terms of the GNU GPL. |
6 | See the file COPYING. |
7 | */ |
8 | |
9 | |
10 | #include "fuse_i.h" |
11 | #include <linux/iversion.h> |
12 | #include <linux/posix_acl.h> |
13 | #include <linux/pagemap.h> |
14 | #include <linux/highmem.h> |
15 | |
16 | static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx) |
17 | { |
18 | struct fuse_conn *fc = get_fuse_conn(inode: dir); |
19 | struct fuse_inode *fi = get_fuse_inode(inode: dir); |
20 | |
21 | if (!fc->do_readdirplus) |
22 | return false; |
23 | if (!fc->readdirplus_auto) |
24 | return true; |
25 | if (test_and_clear_bit(nr: FUSE_I_ADVISE_RDPLUS, addr: &fi->state)) |
26 | return true; |
27 | if (ctx->pos == 0) |
28 | return true; |
29 | return false; |
30 | } |
31 | |
32 | static void fuse_add_dirent_to_cache(struct file *file, |
33 | struct fuse_dirent *dirent, loff_t pos) |
34 | { |
35 | struct fuse_inode *fi = get_fuse_inode(inode: file_inode(f: file)); |
36 | size_t reclen = FUSE_DIRENT_SIZE(dirent); |
37 | pgoff_t index; |
38 | struct page *page; |
39 | loff_t size; |
40 | u64 version; |
41 | unsigned int offset; |
42 | void *addr; |
43 | |
44 | spin_lock(lock: &fi->rdc.lock); |
45 | /* |
46 | * Is cache already completed? Or this entry does not go at the end of |
47 | * cache? |
48 | */ |
49 | if (fi->rdc.cached || pos != fi->rdc.pos) { |
50 | spin_unlock(lock: &fi->rdc.lock); |
51 | return; |
52 | } |
53 | version = fi->rdc.version; |
54 | size = fi->rdc.size; |
55 | offset = size & ~PAGE_MASK; |
56 | index = size >> PAGE_SHIFT; |
57 | /* Dirent doesn't fit in current page? Jump to next page. */ |
58 | if (offset + reclen > PAGE_SIZE) { |
59 | index++; |
60 | offset = 0; |
61 | } |
62 | spin_unlock(lock: &fi->rdc.lock); |
63 | |
64 | if (offset) { |
65 | page = find_lock_page(mapping: file->f_mapping, index); |
66 | } else { |
67 | page = find_or_create_page(mapping: file->f_mapping, index, |
68 | gfp_mask: mapping_gfp_mask(mapping: file->f_mapping)); |
69 | } |
70 | if (!page) |
71 | return; |
72 | |
73 | spin_lock(lock: &fi->rdc.lock); |
74 | /* Raced with another readdir */ |
75 | if (fi->rdc.version != version || fi->rdc.size != size || |
76 | WARN_ON(fi->rdc.pos != pos)) |
77 | goto unlock; |
78 | |
79 | addr = kmap_local_page(page); |
80 | if (!offset) { |
81 | clear_page(page: addr); |
82 | SetPageUptodate(page); |
83 | } |
84 | memcpy(addr + offset, dirent, reclen); |
85 | kunmap_local(addr); |
86 | fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen; |
87 | fi->rdc.pos = dirent->off; |
88 | unlock: |
89 | spin_unlock(lock: &fi->rdc.lock); |
90 | unlock_page(page); |
91 | put_page(page); |
92 | } |
93 | |
94 | static void fuse_readdir_cache_end(struct file *file, loff_t pos) |
95 | { |
96 | struct fuse_inode *fi = get_fuse_inode(inode: file_inode(f: file)); |
97 | loff_t end; |
98 | |
99 | spin_lock(lock: &fi->rdc.lock); |
100 | /* does cache end position match current position? */ |
101 | if (fi->rdc.pos != pos) { |
102 | spin_unlock(lock: &fi->rdc.lock); |
103 | return; |
104 | } |
105 | |
106 | fi->rdc.cached = true; |
107 | end = ALIGN(fi->rdc.size, PAGE_SIZE); |
108 | spin_unlock(lock: &fi->rdc.lock); |
109 | |
110 | /* truncate unused tail of cache */ |
111 | truncate_inode_pages(file->f_mapping, end); |
112 | } |
113 | |
114 | static bool fuse_emit(struct file *file, struct dir_context *ctx, |
115 | struct fuse_dirent *dirent) |
116 | { |
117 | struct fuse_file *ff = file->private_data; |
118 | |
119 | if (ff->open_flags & FOPEN_CACHE_DIR) |
120 | fuse_add_dirent_to_cache(file, dirent, pos: ctx->pos); |
121 | |
122 | return dir_emit(ctx, name: dirent->name, namelen: dirent->namelen, ino: dirent->ino, |
123 | type: dirent->type); |
124 | } |
125 | |
126 | static int parse_dirfile(char *buf, size_t nbytes, struct file *file, |
127 | struct dir_context *ctx) |
128 | { |
129 | while (nbytes >= FUSE_NAME_OFFSET) { |
130 | struct fuse_dirent *dirent = (struct fuse_dirent *) buf; |
131 | size_t reclen = FUSE_DIRENT_SIZE(dirent); |
132 | if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) |
133 | return -EIO; |
134 | if (reclen > nbytes) |
135 | break; |
136 | if (memchr(p: dirent->name, c: '/', size: dirent->namelen) != NULL) |
137 | return -EIO; |
138 | |
139 | if (!fuse_emit(file, ctx, dirent)) |
140 | break; |
141 | |
142 | buf += reclen; |
143 | nbytes -= reclen; |
144 | ctx->pos = dirent->off; |
145 | } |
146 | |
147 | return 0; |
148 | } |
149 | |
150 | static int fuse_direntplus_link(struct file *file, |
151 | struct fuse_direntplus *direntplus, |
152 | u64 attr_version) |
153 | { |
154 | struct fuse_entry_out *o = &direntplus->entry_out; |
155 | struct fuse_dirent *dirent = &direntplus->dirent; |
156 | struct dentry *parent = file->f_path.dentry; |
157 | struct qstr name = QSTR_INIT(dirent->name, dirent->namelen); |
158 | struct dentry *dentry; |
159 | struct dentry *alias; |
160 | struct inode *dir = d_inode(dentry: parent); |
161 | struct fuse_conn *fc; |
162 | struct inode *inode; |
163 | DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); |
164 | |
165 | if (!o->nodeid) { |
166 | /* |
167 | * Unlike in the case of fuse_lookup, zero nodeid does not mean |
168 | * ENOENT. Instead, it only means the userspace filesystem did |
169 | * not want to return attributes/handle for this entry. |
170 | * |
171 | * So do nothing. |
172 | */ |
173 | return 0; |
174 | } |
175 | |
176 | if (name.name[0] == '.') { |
177 | /* |
178 | * We could potentially refresh the attributes of the directory |
179 | * and its parent? |
180 | */ |
181 | if (name.len == 1) |
182 | return 0; |
183 | if (name.name[1] == '.' && name.len == 2) |
184 | return 0; |
185 | } |
186 | |
187 | if (invalid_nodeid(nodeid: o->nodeid)) |
188 | return -EIO; |
189 | if (fuse_invalid_attr(attr: &o->attr)) |
190 | return -EIO; |
191 | |
192 | fc = get_fuse_conn(inode: dir); |
193 | |
194 | name.hash = full_name_hash(salt: parent, name.name, name.len); |
195 | dentry = d_lookup(parent, &name); |
196 | if (!dentry) { |
197 | retry: |
198 | dentry = d_alloc_parallel(parent, &name, &wq); |
199 | if (IS_ERR(ptr: dentry)) |
200 | return PTR_ERR(ptr: dentry); |
201 | } |
202 | if (!d_in_lookup(dentry)) { |
203 | struct fuse_inode *fi; |
204 | inode = d_inode(dentry); |
205 | if (inode && get_node_id(inode) != o->nodeid) |
206 | inode = NULL; |
207 | if (!inode || |
208 | fuse_stale_inode(inode, generation: o->generation, attr: &o->attr)) { |
209 | if (inode) |
210 | fuse_make_bad(inode); |
211 | d_invalidate(dentry); |
212 | dput(dentry); |
213 | goto retry; |
214 | } |
215 | if (fuse_is_bad(inode)) { |
216 | dput(dentry); |
217 | return -EIO; |
218 | } |
219 | |
220 | fi = get_fuse_inode(inode); |
221 | spin_lock(lock: &fi->lock); |
222 | fi->nlookup++; |
223 | spin_unlock(lock: &fi->lock); |
224 | |
225 | forget_all_cached_acls(inode); |
226 | fuse_change_attributes(inode, attr: &o->attr, NULL, |
227 | ATTR_TIMEOUT(o), |
228 | attr_version); |
229 | /* |
230 | * The other branch comes via fuse_iget() |
231 | * which bumps nlookup inside |
232 | */ |
233 | } else { |
234 | inode = fuse_iget(sb: dir->i_sb, nodeid: o->nodeid, generation: o->generation, |
235 | attr: &o->attr, ATTR_TIMEOUT(o), |
236 | attr_version); |
237 | if (!inode) |
238 | inode = ERR_PTR(error: -ENOMEM); |
239 | |
240 | alias = d_splice_alias(inode, dentry); |
241 | d_lookup_done(dentry); |
242 | if (alias) { |
243 | dput(dentry); |
244 | dentry = alias; |
245 | } |
246 | if (IS_ERR(ptr: dentry)) { |
247 | if (!IS_ERR(ptr: inode)) { |
248 | struct fuse_inode *fi = get_fuse_inode(inode); |
249 | |
250 | spin_lock(lock: &fi->lock); |
251 | fi->nlookup--; |
252 | spin_unlock(lock: &fi->lock); |
253 | } |
254 | return PTR_ERR(ptr: dentry); |
255 | } |
256 | } |
257 | if (fc->readdirplus_auto) |
258 | set_bit(nr: FUSE_I_INIT_RDPLUS, addr: &get_fuse_inode(inode)->state); |
259 | fuse_change_entry_timeout(entry: dentry, o); |
260 | |
261 | dput(dentry); |
262 | return 0; |
263 | } |
264 | |
265 | static void fuse_force_forget(struct file *file, u64 nodeid) |
266 | { |
267 | struct inode *inode = file_inode(f: file); |
268 | struct fuse_mount *fm = get_fuse_mount(inode); |
269 | struct fuse_forget_in inarg; |
270 | FUSE_ARGS(args); |
271 | |
272 | memset(&inarg, 0, sizeof(inarg)); |
273 | inarg.nlookup = 1; |
274 | args.opcode = FUSE_FORGET; |
275 | args.nodeid = nodeid; |
276 | args.in_numargs = 1; |
277 | args.in_args[0].size = sizeof(inarg); |
278 | args.in_args[0].value = &inarg; |
279 | args.force = true; |
280 | args.noreply = true; |
281 | |
282 | fuse_simple_request(fm, args: &args); |
283 | /* ignore errors */ |
284 | } |
285 | |
286 | static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, |
287 | struct dir_context *ctx, u64 attr_version) |
288 | { |
289 | struct fuse_direntplus *direntplus; |
290 | struct fuse_dirent *dirent; |
291 | size_t reclen; |
292 | int over = 0; |
293 | int ret; |
294 | |
295 | while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) { |
296 | direntplus = (struct fuse_direntplus *) buf; |
297 | dirent = &direntplus->dirent; |
298 | reclen = FUSE_DIRENTPLUS_SIZE(direntplus); |
299 | |
300 | if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) |
301 | return -EIO; |
302 | if (reclen > nbytes) |
303 | break; |
304 | if (memchr(p: dirent->name, c: '/', size: dirent->namelen) != NULL) |
305 | return -EIO; |
306 | |
307 | if (!over) { |
308 | /* We fill entries into dstbuf only as much as |
309 | it can hold. But we still continue iterating |
310 | over remaining entries to link them. If not, |
311 | we need to send a FORGET for each of those |
312 | which we did not link. |
313 | */ |
314 | over = !fuse_emit(file, ctx, dirent); |
315 | if (!over) |
316 | ctx->pos = dirent->off; |
317 | } |
318 | |
319 | buf += reclen; |
320 | nbytes -= reclen; |
321 | |
322 | ret = fuse_direntplus_link(file, direntplus, attr_version); |
323 | if (ret) |
324 | fuse_force_forget(file, nodeid: direntplus->entry_out.nodeid); |
325 | } |
326 | |
327 | return 0; |
328 | } |
329 | |
330 | static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) |
331 | { |
332 | int plus; |
333 | ssize_t res; |
334 | struct page *page; |
335 | struct inode *inode = file_inode(f: file); |
336 | struct fuse_mount *fm = get_fuse_mount(inode); |
337 | struct fuse_io_args ia = {}; |
338 | struct fuse_args_pages *ap = &ia.ap; |
339 | struct fuse_page_desc desc = { .length = PAGE_SIZE }; |
340 | u64 attr_version = 0; |
341 | bool locked; |
342 | |
343 | page = alloc_page(GFP_KERNEL); |
344 | if (!page) |
345 | return -ENOMEM; |
346 | |
347 | plus = fuse_use_readdirplus(dir: inode, ctx); |
348 | ap->args.out_pages = true; |
349 | ap->num_pages = 1; |
350 | ap->pages = &page; |
351 | ap->descs = &desc; |
352 | if (plus) { |
353 | attr_version = fuse_get_attr_version(fc: fm->fc); |
354 | fuse_read_args_fill(ia: &ia, file, pos: ctx->pos, PAGE_SIZE, |
355 | opcode: FUSE_READDIRPLUS); |
356 | } else { |
357 | fuse_read_args_fill(ia: &ia, file, pos: ctx->pos, PAGE_SIZE, |
358 | opcode: FUSE_READDIR); |
359 | } |
360 | locked = fuse_lock_inode(inode); |
361 | res = fuse_simple_request(fm, args: &ap->args); |
362 | fuse_unlock_inode(inode, locked); |
363 | if (res >= 0) { |
364 | if (!res) { |
365 | struct fuse_file *ff = file->private_data; |
366 | |
367 | if (ff->open_flags & FOPEN_CACHE_DIR) |
368 | fuse_readdir_cache_end(file, pos: ctx->pos); |
369 | } else if (plus) { |
370 | res = parse_dirplusfile(page_address(page), nbytes: res, |
371 | file, ctx, attr_version); |
372 | } else { |
373 | res = parse_dirfile(page_address(page), nbytes: res, file, |
374 | ctx); |
375 | } |
376 | } |
377 | |
378 | __free_page(page); |
379 | fuse_invalidate_atime(inode); |
380 | return res; |
381 | } |
382 | |
383 | enum fuse_parse_result { |
384 | FOUND_ERR = -1, |
385 | FOUND_NONE = 0, |
386 | FOUND_SOME, |
387 | FOUND_ALL, |
388 | }; |
389 | |
390 | static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff, |
391 | void *addr, unsigned int size, |
392 | struct dir_context *ctx) |
393 | { |
394 | unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK; |
395 | enum fuse_parse_result res = FOUND_NONE; |
396 | |
397 | WARN_ON(offset >= size); |
398 | |
399 | for (;;) { |
400 | struct fuse_dirent *dirent = addr + offset; |
401 | unsigned int nbytes = size - offset; |
402 | size_t reclen; |
403 | |
404 | if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen) |
405 | break; |
406 | |
407 | reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */ |
408 | |
409 | if (WARN_ON(dirent->namelen > FUSE_NAME_MAX)) |
410 | return FOUND_ERR; |
411 | if (WARN_ON(reclen > nbytes)) |
412 | return FOUND_ERR; |
413 | if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL)) |
414 | return FOUND_ERR; |
415 | |
416 | if (ff->readdir.pos == ctx->pos) { |
417 | res = FOUND_SOME; |
418 | if (!dir_emit(ctx, name: dirent->name, namelen: dirent->namelen, |
419 | ino: dirent->ino, type: dirent->type)) |
420 | return FOUND_ALL; |
421 | ctx->pos = dirent->off; |
422 | } |
423 | ff->readdir.pos = dirent->off; |
424 | ff->readdir.cache_off += reclen; |
425 | |
426 | offset += reclen; |
427 | } |
428 | |
429 | return res; |
430 | } |
431 | |
432 | static void fuse_rdc_reset(struct inode *inode) |
433 | { |
434 | struct fuse_inode *fi = get_fuse_inode(inode); |
435 | |
436 | fi->rdc.cached = false; |
437 | fi->rdc.version++; |
438 | fi->rdc.size = 0; |
439 | fi->rdc.pos = 0; |
440 | } |
441 | |
442 | #define UNCACHED 1 |
443 | |
444 | static int fuse_readdir_cached(struct file *file, struct dir_context *ctx) |
445 | { |
446 | struct fuse_file *ff = file->private_data; |
447 | struct inode *inode = file_inode(f: file); |
448 | struct fuse_conn *fc = get_fuse_conn(inode); |
449 | struct fuse_inode *fi = get_fuse_inode(inode); |
450 | enum fuse_parse_result res; |
451 | pgoff_t index; |
452 | unsigned int size; |
453 | struct page *page; |
454 | void *addr; |
455 | |
456 | /* Seeked? If so, reset the cache stream */ |
457 | if (ff->readdir.pos != ctx->pos) { |
458 | ff->readdir.pos = 0; |
459 | ff->readdir.cache_off = 0; |
460 | } |
461 | |
462 | /* |
463 | * We're just about to start reading into the cache or reading the |
464 | * cache; both cases require an up-to-date mtime value. |
465 | */ |
466 | if (!ctx->pos && fc->auto_inval_data) { |
467 | int err = fuse_update_attributes(inode, file, STATX_MTIME); |
468 | |
469 | if (err) |
470 | return err; |
471 | } |
472 | |
473 | retry: |
474 | spin_lock(lock: &fi->rdc.lock); |
475 | retry_locked: |
476 | if (!fi->rdc.cached) { |
477 | /* Starting cache? Set cache mtime. */ |
478 | if (!ctx->pos && !fi->rdc.size) { |
479 | fi->rdc.mtime = inode_get_mtime(inode); |
480 | fi->rdc.iversion = inode_query_iversion(inode); |
481 | } |
482 | spin_unlock(lock: &fi->rdc.lock); |
483 | return UNCACHED; |
484 | } |
485 | /* |
486 | * When at the beginning of the directory (i.e. just after opendir(3) or |
487 | * rewinddir(3)), then need to check whether directory contents have |
488 | * changed, and reset the cache if so. |
489 | */ |
490 | if (!ctx->pos) { |
491 | struct timespec64 mtime = inode_get_mtime(inode); |
492 | |
493 | if (inode_peek_iversion(inode) != fi->rdc.iversion || |
494 | !timespec64_equal(a: &fi->rdc.mtime, b: &mtime)) { |
495 | fuse_rdc_reset(inode); |
496 | goto retry_locked; |
497 | } |
498 | } |
499 | |
500 | /* |
501 | * If cache version changed since the last getdents() call, then reset |
502 | * the cache stream. |
503 | */ |
504 | if (ff->readdir.version != fi->rdc.version) { |
505 | ff->readdir.pos = 0; |
506 | ff->readdir.cache_off = 0; |
507 | } |
508 | /* |
509 | * If at the beginning of the cache, than reset version to |
510 | * current. |
511 | */ |
512 | if (ff->readdir.pos == 0) |
513 | ff->readdir.version = fi->rdc.version; |
514 | |
515 | WARN_ON(fi->rdc.size < ff->readdir.cache_off); |
516 | |
517 | index = ff->readdir.cache_off >> PAGE_SHIFT; |
518 | |
519 | if (index == (fi->rdc.size >> PAGE_SHIFT)) |
520 | size = fi->rdc.size & ~PAGE_MASK; |
521 | else |
522 | size = PAGE_SIZE; |
523 | spin_unlock(lock: &fi->rdc.lock); |
524 | |
525 | /* EOF? */ |
526 | if ((ff->readdir.cache_off & ~PAGE_MASK) == size) |
527 | return 0; |
528 | |
529 | page = find_get_page_flags(mapping: file->f_mapping, offset: index, |
530 | FGP_ACCESSED | FGP_LOCK); |
531 | /* Page gone missing, then re-added to cache, but not initialized? */ |
532 | if (page && !PageUptodate(page)) { |
533 | unlock_page(page); |
534 | put_page(page); |
535 | page = NULL; |
536 | } |
537 | spin_lock(lock: &fi->rdc.lock); |
538 | if (!page) { |
539 | /* |
540 | * Uh-oh: page gone missing, cache is useless |
541 | */ |
542 | if (fi->rdc.version == ff->readdir.version) |
543 | fuse_rdc_reset(inode); |
544 | goto retry_locked; |
545 | } |
546 | |
547 | /* Make sure it's still the same version after getting the page. */ |
548 | if (ff->readdir.version != fi->rdc.version) { |
549 | spin_unlock(lock: &fi->rdc.lock); |
550 | unlock_page(page); |
551 | put_page(page); |
552 | goto retry; |
553 | } |
554 | spin_unlock(lock: &fi->rdc.lock); |
555 | |
556 | /* |
557 | * Contents of the page are now protected against changing by holding |
558 | * the page lock. |
559 | */ |
560 | addr = kmap_local_page(page); |
561 | res = fuse_parse_cache(ff, addr, size, ctx); |
562 | kunmap_local(addr); |
563 | unlock_page(page); |
564 | put_page(page); |
565 | |
566 | if (res == FOUND_ERR) |
567 | return -EIO; |
568 | |
569 | if (res == FOUND_ALL) |
570 | return 0; |
571 | |
572 | if (size == PAGE_SIZE) { |
573 | /* We hit end of page: skip to next page. */ |
574 | ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE); |
575 | goto retry; |
576 | } |
577 | |
578 | /* |
579 | * End of cache reached. If found position, then we are done, otherwise |
580 | * need to fall back to uncached, since the position we were looking for |
581 | * wasn't in the cache. |
582 | */ |
583 | return res == FOUND_SOME ? 0 : UNCACHED; |
584 | } |
585 | |
586 | int fuse_readdir(struct file *file, struct dir_context *ctx) |
587 | { |
588 | struct fuse_file *ff = file->private_data; |
589 | struct inode *inode = file_inode(f: file); |
590 | int err; |
591 | |
592 | if (fuse_is_bad(inode)) |
593 | return -EIO; |
594 | |
595 | err = UNCACHED; |
596 | if (ff->open_flags & FOPEN_CACHE_DIR) |
597 | err = fuse_readdir_cached(file, ctx); |
598 | if (err == UNCACHED) |
599 | err = fuse_readdir_uncached(file, ctx); |
600 | |
601 | return err; |
602 | } |
603 | |