1// SPDX-License-Identifier: GPL-2.0-or-later
2/* kiocb-using read/write
3 *
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8#include <linux/mount.h>
9#include <linux/slab.h>
10#include <linux/file.h>
11#include <linux/uio.h>
12#include <linux/falloc.h>
13#include <linux/sched/mm.h>
14#include <trace/events/fscache.h>
15#include "internal.h"
16
17struct cachefiles_kiocb {
18 struct kiocb iocb;
19 refcount_t ki_refcnt;
20 loff_t start;
21 union {
22 size_t skipped;
23 size_t len;
24 };
25 struct cachefiles_object *object;
26 netfs_io_terminated_t term_func;
27 void *term_func_priv;
28 bool was_async;
29 unsigned int inval_counter; /* Copy of cookie->inval_counter */
30 u64 b_writing;
31};
32
33static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
34{
35 if (refcount_dec_and_test(r: &ki->ki_refcnt)) {
36 cachefiles_put_object(object: ki->object, why: cachefiles_obj_put_ioreq);
37 fput(ki->iocb.ki_filp);
38 kfree(objp: ki);
39 }
40}
41
42/*
43 * Handle completion of a read from the cache.
44 */
45static void cachefiles_read_complete(struct kiocb *iocb, long ret)
46{
47 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
48 struct inode *inode = file_inode(f: ki->iocb.ki_filp);
49
50 _enter("%ld", ret);
51
52 if (ret < 0)
53 trace_cachefiles_io_error(obj: ki->object, backer: inode, error: ret,
54 where: cachefiles_trace_read_error);
55
56 if (ki->term_func) {
57 if (ret >= 0) {
58 if (ki->object->cookie->inval_counter == ki->inval_counter)
59 ki->skipped += ret;
60 else
61 ret = -ESTALE;
62 }
63
64 ki->term_func(ki->term_func_priv, ret, ki->was_async);
65 }
66
67 cachefiles_put_kiocb(ki);
68}
69
70/*
71 * Initiate a read from the cache.
72 */
73static int cachefiles_read(struct netfs_cache_resources *cres,
74 loff_t start_pos,
75 struct iov_iter *iter,
76 enum netfs_read_from_hole read_hole,
77 netfs_io_terminated_t term_func,
78 void *term_func_priv)
79{
80 struct cachefiles_object *object;
81 struct cachefiles_kiocb *ki;
82 struct file *file;
83 unsigned int old_nofs;
84 ssize_t ret = -ENOBUFS;
85 size_t len = iov_iter_count(i: iter), skipped = 0;
86
87 if (!fscache_wait_for_operation(cred: cres, state: FSCACHE_WANT_READ))
88 goto presubmission_error;
89
90 fscache_count_read();
91 object = cachefiles_cres_object(cres);
92 file = cachefiles_cres_file(cres);
93
94 _enter("%pD,%li,%llx,%zx/%llx",
95 file, file_inode(file)->i_ino, start_pos, len,
96 i_size_read(file_inode(file)));
97
98 /* If the caller asked us to seek for data before doing the read, then
99 * we should do that now. If we find a gap, we fill it with zeros.
100 */
101 if (read_hole != NETFS_READ_HOLE_IGNORE) {
102 loff_t off = start_pos, off2;
103
104 off2 = cachefiles_inject_read_error();
105 if (off2 == 0)
106 off2 = vfs_llseek(file, offset: off, SEEK_DATA);
107 if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
108 skipped = 0;
109 ret = off2;
110 goto presubmission_error;
111 }
112
113 if (off2 == -ENXIO || off2 >= start_pos + len) {
114 /* The region is beyond the EOF or there's no more data
115 * in the region, so clear the rest of the buffer and
116 * return success.
117 */
118 ret = -ENODATA;
119 if (read_hole == NETFS_READ_HOLE_FAIL)
120 goto presubmission_error;
121
122 iov_iter_zero(bytes: len, iter);
123 skipped = len;
124 ret = 0;
125 goto presubmission_error;
126 }
127
128 skipped = off2 - off;
129 iov_iter_zero(bytes: skipped, iter);
130 }
131
132 ret = -ENOMEM;
133 ki = kzalloc(size: sizeof(struct cachefiles_kiocb), GFP_KERNEL);
134 if (!ki)
135 goto presubmission_error;
136
137 refcount_set(r: &ki->ki_refcnt, n: 2);
138 ki->iocb.ki_filp = file;
139 ki->iocb.ki_pos = start_pos + skipped;
140 ki->iocb.ki_flags = IOCB_DIRECT;
141 ki->iocb.ki_ioprio = get_current_ioprio();
142 ki->skipped = skipped;
143 ki->object = object;
144 ki->inval_counter = cres->inval_counter;
145 ki->term_func = term_func;
146 ki->term_func_priv = term_func_priv;
147 ki->was_async = true;
148
149 if (ki->term_func)
150 ki->iocb.ki_complete = cachefiles_read_complete;
151
152 get_file(f: ki->iocb.ki_filp);
153 cachefiles_grab_object(object, why: cachefiles_obj_get_ioreq);
154
155 trace_cachefiles_read(obj: object, backer: file_inode(f: file), start: ki->iocb.ki_pos, len: len - skipped);
156 old_nofs = memalloc_nofs_save();
157 ret = cachefiles_inject_read_error();
158 if (ret == 0)
159 ret = vfs_iocb_iter_read(file, iocb: &ki->iocb, iter);
160 memalloc_nofs_restore(flags: old_nofs);
161 switch (ret) {
162 case -EIOCBQUEUED:
163 goto in_progress;
164
165 case -ERESTARTSYS:
166 case -ERESTARTNOINTR:
167 case -ERESTARTNOHAND:
168 case -ERESTART_RESTARTBLOCK:
169 /* There's no easy way to restart the syscall since other AIO's
170 * may be already running. Just fail this IO with EINTR.
171 */
172 ret = -EINTR;
173 fallthrough;
174 default:
175 ki->was_async = false;
176 cachefiles_read_complete(iocb: &ki->iocb, ret);
177 if (ret > 0)
178 ret = 0;
179 break;
180 }
181
182in_progress:
183 cachefiles_put_kiocb(ki);
184 _leave(" = %zd", ret);
185 return ret;
186
187presubmission_error:
188 if (term_func)
189 term_func(term_func_priv, ret < 0 ? ret : skipped, false);
190 return ret;
191}
192
193/*
194 * Query the occupancy of the cache in a region, returning where the next chunk
195 * of data starts and how long it is.
196 */
197static int cachefiles_query_occupancy(struct netfs_cache_resources *cres,
198 loff_t start, size_t len, size_t granularity,
199 loff_t *_data_start, size_t *_data_len)
200{
201 struct cachefiles_object *object;
202 struct file *file;
203 loff_t off, off2;
204
205 *_data_start = -1;
206 *_data_len = 0;
207
208 if (!fscache_wait_for_operation(cred: cres, state: FSCACHE_WANT_READ))
209 return -ENOBUFS;
210
211 object = cachefiles_cres_object(cres);
212 file = cachefiles_cres_file(cres);
213 granularity = max_t(size_t, object->volume->cache->bsize, granularity);
214
215 _enter("%pD,%li,%llx,%zx/%llx",
216 file, file_inode(file)->i_ino, start, len,
217 i_size_read(file_inode(file)));
218
219 off = cachefiles_inject_read_error();
220 if (off == 0)
221 off = vfs_llseek(file, offset: start, SEEK_DATA);
222 if (off == -ENXIO)
223 return -ENODATA; /* Beyond EOF */
224 if (off < 0 && off >= (loff_t)-MAX_ERRNO)
225 return -ENOBUFS; /* Error. */
226 if (round_up(off, granularity) >= start + len)
227 return -ENODATA; /* No data in range */
228
229 off2 = cachefiles_inject_read_error();
230 if (off2 == 0)
231 off2 = vfs_llseek(file, offset: off, SEEK_HOLE);
232 if (off2 == -ENXIO)
233 return -ENODATA; /* Beyond EOF */
234 if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO)
235 return -ENOBUFS; /* Error. */
236
237 /* Round away partial blocks */
238 off = round_up(off, granularity);
239 off2 = round_down(off2, granularity);
240 if (off2 <= off)
241 return -ENODATA;
242
243 *_data_start = off;
244 if (off2 > start + len)
245 *_data_len = len;
246 else
247 *_data_len = off2 - off;
248 return 0;
249}
250
251/*
252 * Handle completion of a write to the cache.
253 */
254static void cachefiles_write_complete(struct kiocb *iocb, long ret)
255{
256 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
257 struct cachefiles_object *object = ki->object;
258 struct inode *inode = file_inode(f: ki->iocb.ki_filp);
259
260 _enter("%ld", ret);
261
262 if (ki->was_async)
263 kiocb_end_write(iocb);
264
265 if (ret < 0)
266 trace_cachefiles_io_error(obj: object, backer: inode, error: ret,
267 where: cachefiles_trace_write_error);
268
269 atomic_long_sub(i: ki->b_writing, v: &object->volume->cache->b_writing);
270 set_bit(FSCACHE_COOKIE_HAVE_DATA, addr: &object->cookie->flags);
271 if (ki->term_func)
272 ki->term_func(ki->term_func_priv, ret, ki->was_async);
273 cachefiles_put_kiocb(ki);
274}
275
276/*
277 * Initiate a write to the cache.
278 */
279int __cachefiles_write(struct cachefiles_object *object,
280 struct file *file,
281 loff_t start_pos,
282 struct iov_iter *iter,
283 netfs_io_terminated_t term_func,
284 void *term_func_priv)
285{
286 struct cachefiles_cache *cache;
287 struct cachefiles_kiocb *ki;
288 unsigned int old_nofs;
289 ssize_t ret;
290 size_t len = iov_iter_count(i: iter);
291
292 fscache_count_write();
293 cache = object->volume->cache;
294
295 _enter("%pD,%li,%llx,%zx/%llx",
296 file, file_inode(file)->i_ino, start_pos, len,
297 i_size_read(file_inode(file)));
298
299 ki = kzalloc(size: sizeof(struct cachefiles_kiocb), GFP_KERNEL);
300 if (!ki) {
301 if (term_func)
302 term_func(term_func_priv, -ENOMEM, false);
303 return -ENOMEM;
304 }
305
306 refcount_set(r: &ki->ki_refcnt, n: 2);
307 ki->iocb.ki_filp = file;
308 ki->iocb.ki_pos = start_pos;
309 ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE;
310 ki->iocb.ki_ioprio = get_current_ioprio();
311 ki->object = object;
312 ki->start = start_pos;
313 ki->len = len;
314 ki->term_func = term_func;
315 ki->term_func_priv = term_func_priv;
316 ki->was_async = true;
317 ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift;
318
319 if (ki->term_func)
320 ki->iocb.ki_complete = cachefiles_write_complete;
321 atomic_long_add(i: ki->b_writing, v: &cache->b_writing);
322
323 get_file(f: ki->iocb.ki_filp);
324 cachefiles_grab_object(object, why: cachefiles_obj_get_ioreq);
325
326 trace_cachefiles_write(obj: object, backer: file_inode(f: file), start: ki->iocb.ki_pos, len);
327 old_nofs = memalloc_nofs_save();
328 ret = cachefiles_inject_write_error();
329 if (ret == 0)
330 ret = vfs_iocb_iter_write(file, iocb: &ki->iocb, iter);
331 memalloc_nofs_restore(flags: old_nofs);
332 switch (ret) {
333 case -EIOCBQUEUED:
334 goto in_progress;
335
336 case -ERESTARTSYS:
337 case -ERESTARTNOINTR:
338 case -ERESTARTNOHAND:
339 case -ERESTART_RESTARTBLOCK:
340 /* There's no easy way to restart the syscall since other AIO's
341 * may be already running. Just fail this IO with EINTR.
342 */
343 ret = -EINTR;
344 fallthrough;
345 default:
346 ki->was_async = false;
347 cachefiles_write_complete(iocb: &ki->iocb, ret);
348 if (ret > 0)
349 ret = 0;
350 break;
351 }
352
353in_progress:
354 cachefiles_put_kiocb(ki);
355 _leave(" = %zd", ret);
356 return ret;
357}
358
359static int cachefiles_write(struct netfs_cache_resources *cres,
360 loff_t start_pos,
361 struct iov_iter *iter,
362 netfs_io_terminated_t term_func,
363 void *term_func_priv)
364{
365 if (!fscache_wait_for_operation(cred: cres, state: FSCACHE_WANT_WRITE)) {
366 if (term_func)
367 term_func(term_func_priv, -ENOBUFS, false);
368 return -ENOBUFS;
369 }
370
371 return __cachefiles_write(object: cachefiles_cres_object(cres),
372 file: cachefiles_cres_file(cres),
373 start_pos, iter,
374 term_func, term_func_priv);
375}
376
377static inline enum netfs_io_source
378cachefiles_do_prepare_read(struct netfs_cache_resources *cres,
379 loff_t start, size_t *_len, loff_t i_size,
380 unsigned long *_flags, ino_t netfs_ino)
381{
382 enum cachefiles_prepare_read_trace why;
383 struct cachefiles_object *object = NULL;
384 struct cachefiles_cache *cache;
385 struct fscache_cookie *cookie = fscache_cres_cookie(cres);
386 const struct cred *saved_cred;
387 struct file *file = cachefiles_cres_file(cres);
388 enum netfs_io_source ret = NETFS_DOWNLOAD_FROM_SERVER;
389 size_t len = *_len;
390 loff_t off, to;
391 ino_t ino = file ? file_inode(f: file)->i_ino : 0;
392 int rc;
393
394 _enter("%zx @%llx/%llx", len, start, i_size);
395
396 if (start >= i_size) {
397 ret = NETFS_FILL_WITH_ZEROES;
398 why = cachefiles_trace_read_after_eof;
399 goto out_no_object;
400 }
401
402 if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) {
403 __set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags);
404 why = cachefiles_trace_read_no_data;
405 if (!test_bit(NETFS_SREQ_ONDEMAND, _flags))
406 goto out_no_object;
407 }
408
409 /* The object and the file may be being created in the background. */
410 if (!file) {
411 why = cachefiles_trace_read_no_file;
412 if (!fscache_wait_for_operation(cred: cres, state: FSCACHE_WANT_READ))
413 goto out_no_object;
414 file = cachefiles_cres_file(cres);
415 if (!file)
416 goto out_no_object;
417 ino = file_inode(f: file)->i_ino;
418 }
419
420 object = cachefiles_cres_object(cres);
421 cache = object->volume->cache;
422 cachefiles_begin_secure(cache, saved_cred: &saved_cred);
423retry:
424 off = cachefiles_inject_read_error();
425 if (off == 0)
426 off = vfs_llseek(file, offset: start, SEEK_DATA);
427 if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
428 if (off == (loff_t)-ENXIO) {
429 why = cachefiles_trace_read_seek_nxio;
430 goto download_and_store;
431 }
432 trace_cachefiles_io_error(obj: object, backer: file_inode(f: file), error: off,
433 where: cachefiles_trace_seek_error);
434 why = cachefiles_trace_read_seek_error;
435 goto out;
436 }
437
438 if (off >= start + len) {
439 why = cachefiles_trace_read_found_hole;
440 goto download_and_store;
441 }
442
443 if (off > start) {
444 off = round_up(off, cache->bsize);
445 len = off - start;
446 *_len = len;
447 why = cachefiles_trace_read_found_part;
448 goto download_and_store;
449 }
450
451 to = cachefiles_inject_read_error();
452 if (to == 0)
453 to = vfs_llseek(file, offset: start, SEEK_HOLE);
454 if (to < 0 && to >= (loff_t)-MAX_ERRNO) {
455 trace_cachefiles_io_error(obj: object, backer: file_inode(f: file), error: to,
456 where: cachefiles_trace_seek_error);
457 why = cachefiles_trace_read_seek_error;
458 goto out;
459 }
460
461 if (to < start + len) {
462 if (start + len >= i_size)
463 to = round_up(to, cache->bsize);
464 else
465 to = round_down(to, cache->bsize);
466 len = to - start;
467 *_len = len;
468 }
469
470 why = cachefiles_trace_read_have_data;
471 ret = NETFS_READ_FROM_CACHE;
472 goto out;
473
474download_and_store:
475 __set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags);
476 if (test_bit(NETFS_SREQ_ONDEMAND, _flags)) {
477 rc = cachefiles_ondemand_read(object, pos: start, len);
478 if (!rc) {
479 __clear_bit(NETFS_SREQ_ONDEMAND, _flags);
480 goto retry;
481 }
482 ret = NETFS_INVALID_READ;
483 }
484out:
485 cachefiles_end_secure(cache, saved_cred);
486out_no_object:
487 trace_cachefiles_prep_read(obj: object, start, len, flags: *_flags, source: ret, why, cache_inode: ino, netfs_inode: netfs_ino);
488 return ret;
489}
490
491/*
492 * Prepare a read operation, shortening it to a cached/uncached
493 * boundary as appropriate.
494 */
495static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq,
496 loff_t i_size)
497{
498 return cachefiles_do_prepare_read(cres: &subreq->rreq->cache_resources,
499 start: subreq->start, len: &subreq->len, i_size,
500 flags: &subreq->flags, netfs_ino: subreq->rreq->inode->i_ino);
501}
502
503/*
504 * Prepare an on-demand read operation, shortening it to a cached/uncached
505 * boundary as appropriate.
506 */
507static enum netfs_io_source
508cachefiles_prepare_ondemand_read(struct netfs_cache_resources *cres,
509 loff_t start, size_t *_len, loff_t i_size,
510 unsigned long *_flags, ino_t ino)
511{
512 return cachefiles_do_prepare_read(cres, start, _len, i_size, _flags, netfs_ino: ino);
513}
514
515/*
516 * Prepare for a write to occur.
517 */
518int __cachefiles_prepare_write(struct cachefiles_object *object,
519 struct file *file,
520 loff_t *_start, size_t *_len, size_t upper_len,
521 bool no_space_allocated_yet)
522{
523 struct cachefiles_cache *cache = object->volume->cache;
524 loff_t start = *_start, pos;
525 size_t len = *_len;
526 int ret;
527
528 /* Round to DIO size */
529 start = round_down(*_start, PAGE_SIZE);
530 if (start != *_start || *_len > upper_len) {
531 /* Probably asked to cache a streaming write written into the
532 * pagecache when the cookie was temporarily out of service to
533 * culling.
534 */
535 fscache_count_dio_misfit();
536 return -ENOBUFS;
537 }
538
539 *_len = round_up(len, PAGE_SIZE);
540
541 /* We need to work out whether there's sufficient disk space to perform
542 * the write - but we can skip that check if we have space already
543 * allocated.
544 */
545 if (no_space_allocated_yet)
546 goto check_space;
547
548 pos = cachefiles_inject_read_error();
549 if (pos == 0)
550 pos = vfs_llseek(file, offset: start, SEEK_DATA);
551 if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
552 if (pos == -ENXIO)
553 goto check_space; /* Unallocated tail */
554 trace_cachefiles_io_error(obj: object, backer: file_inode(f: file), error: pos,
555 where: cachefiles_trace_seek_error);
556 return pos;
557 }
558 if ((u64)pos >= (u64)start + *_len)
559 goto check_space; /* Unallocated region */
560
561 /* We have a block that's at least partially filled - if we're low on
562 * space, we need to see if it's fully allocated. If it's not, we may
563 * want to cull it.
564 */
565 if (cachefiles_has_space(cache, fnr: 0, bnr: *_len / PAGE_SIZE,
566 reason: cachefiles_has_space_check) == 0)
567 return 0; /* Enough space to simply overwrite the whole block */
568
569 pos = cachefiles_inject_read_error();
570 if (pos == 0)
571 pos = vfs_llseek(file, offset: start, SEEK_HOLE);
572 if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
573 trace_cachefiles_io_error(obj: object, backer: file_inode(f: file), error: pos,
574 where: cachefiles_trace_seek_error);
575 return pos;
576 }
577 if ((u64)pos >= (u64)start + *_len)
578 return 0; /* Fully allocated */
579
580 /* Partially allocated, but insufficient space: cull. */
581 fscache_count_no_write_space();
582 ret = cachefiles_inject_remove_error();
583 if (ret == 0)
584 ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
585 offset: start, len: *_len);
586 if (ret < 0) {
587 trace_cachefiles_io_error(obj: object, backer: file_inode(f: file), error: ret,
588 where: cachefiles_trace_fallocate_error);
589 cachefiles_io_error_obj(object,
590 "CacheFiles: fallocate failed (%d)\n", ret);
591 ret = -EIO;
592 }
593
594 return ret;
595
596check_space:
597 return cachefiles_has_space(cache, fnr: 0, bnr: *_len / PAGE_SIZE,
598 reason: cachefiles_has_space_for_write);
599}
600
601static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
602 loff_t *_start, size_t *_len, size_t upper_len,
603 loff_t i_size, bool no_space_allocated_yet)
604{
605 struct cachefiles_object *object = cachefiles_cres_object(cres);
606 struct cachefiles_cache *cache = object->volume->cache;
607 const struct cred *saved_cred;
608 int ret;
609
610 if (!cachefiles_cres_file(cres)) {
611 if (!fscache_wait_for_operation(cred: cres, state: FSCACHE_WANT_WRITE))
612 return -ENOBUFS;
613 if (!cachefiles_cres_file(cres))
614 return -ENOBUFS;
615 }
616
617 cachefiles_begin_secure(cache, saved_cred: &saved_cred);
618 ret = __cachefiles_prepare_write(object, file: cachefiles_cres_file(cres),
619 _start, _len, upper_len,
620 no_space_allocated_yet);
621 cachefiles_end_secure(cache, saved_cred);
622 return ret;
623}
624
625/*
626 * Clean up an operation.
627 */
628static void cachefiles_end_operation(struct netfs_cache_resources *cres)
629{
630 struct file *file = cachefiles_cres_file(cres);
631
632 if (file)
633 fput(file);
634 fscache_end_cookie_access(cookie: fscache_cres_cookie(cres), why: fscache_access_io_end);
635}
636
637static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
638 .end_operation = cachefiles_end_operation,
639 .read = cachefiles_read,
640 .write = cachefiles_write,
641 .prepare_read = cachefiles_prepare_read,
642 .prepare_write = cachefiles_prepare_write,
643 .prepare_ondemand_read = cachefiles_prepare_ondemand_read,
644 .query_occupancy = cachefiles_query_occupancy,
645};
646
647/*
648 * Open the cache file when beginning a cache operation.
649 */
650bool cachefiles_begin_operation(struct netfs_cache_resources *cres,
651 enum fscache_want_state want_state)
652{
653 struct cachefiles_object *object = cachefiles_cres_object(cres);
654
655 if (!cachefiles_cres_file(cres)) {
656 cres->ops = &cachefiles_netfs_cache_ops;
657 if (object->file) {
658 spin_lock(lock: &object->lock);
659 if (!cres->cache_priv2 && object->file)
660 cres->cache_priv2 = get_file(f: object->file);
661 spin_unlock(lock: &object->lock);
662 }
663 }
664
665 if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) {
666 pr_err("failed to get cres->file\n");
667 return false;
668 }
669
670 return true;
671}
672

source code of linux/fs/cachefiles/io.c