1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #ifndef NO_BCACHEFS_FS |
3 | |
4 | #include "bcachefs.h" |
5 | #include "alloc_foreground.h" |
6 | #include "fs.h" |
7 | #include "fs-io.h" |
8 | #include "fs-io-direct.h" |
9 | #include "fs-io-pagecache.h" |
10 | #include "io_read.h" |
11 | #include "io_write.h" |
12 | |
13 | #include <linux/kthread.h> |
14 | #include <linux/pagemap.h> |
15 | #include <linux/prefetch.h> |
16 | #include <linux/task_io_accounting_ops.h> |
17 | |
18 | /* O_DIRECT reads */ |
19 | |
20 | struct dio_read { |
21 | struct closure cl; |
22 | struct kiocb *req; |
23 | long ret; |
24 | bool should_dirty; |
25 | struct bch_read_bio rbio; |
26 | }; |
27 | |
28 | static void bio_check_or_release(struct bio *bio, bool check_dirty) |
29 | { |
30 | if (check_dirty) { |
31 | bio_check_pages_dirty(bio); |
32 | } else { |
33 | bio_release_pages(bio, mark_dirty: false); |
34 | bio_put(bio); |
35 | } |
36 | } |
37 | |
38 | static CLOSURE_CALLBACK(bch2_dio_read_complete) |
39 | { |
40 | closure_type(dio, struct dio_read, cl); |
41 | |
42 | dio->req->ki_complete(dio->req, dio->ret); |
43 | bio_check_or_release(bio: &dio->rbio.bio, check_dirty: dio->should_dirty); |
44 | } |
45 | |
46 | static void bch2_direct_IO_read_endio(struct bio *bio) |
47 | { |
48 | struct dio_read *dio = bio->bi_private; |
49 | |
50 | if (bio->bi_status) |
51 | dio->ret = blk_status_to_errno(status: bio->bi_status); |
52 | |
53 | closure_put(cl: &dio->cl); |
54 | } |
55 | |
56 | static void bch2_direct_IO_read_split_endio(struct bio *bio) |
57 | { |
58 | struct dio_read *dio = bio->bi_private; |
59 | bool should_dirty = dio->should_dirty; |
60 | |
61 | bch2_direct_IO_read_endio(bio); |
62 | bio_check_or_release(bio, check_dirty: should_dirty); |
63 | } |
64 | |
65 | static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) |
66 | { |
67 | struct file *file = req->ki_filp; |
68 | struct bch_inode_info *inode = file_bch_inode(file); |
69 | struct bch_fs *c = inode->v.i_sb->s_fs_info; |
70 | struct bch_io_opts opts; |
71 | struct dio_read *dio; |
72 | struct bio *bio; |
73 | loff_t offset = req->ki_pos; |
74 | bool sync = is_sync_kiocb(kiocb: req); |
75 | size_t shorten; |
76 | ssize_t ret; |
77 | |
78 | bch2_inode_opts_get(&opts, c, &inode->ei_inode); |
79 | |
80 | /* bios must be 512 byte aligned: */ |
81 | if ((offset|iter->count) & (SECTOR_SIZE - 1)) |
82 | return -EINVAL; |
83 | |
84 | ret = min_t(loff_t, iter->count, |
85 | max_t(loff_t, 0, i_size_read(&inode->v) - offset)); |
86 | |
87 | if (!ret) |
88 | return ret; |
89 | |
90 | shorten = iov_iter_count(i: iter) - round_up(ret, block_bytes(c)); |
91 | if (shorten >= iter->count) |
92 | shorten = 0; |
93 | iter->count -= shorten; |
94 | |
95 | bio = bio_alloc_bioset(NULL, |
96 | nr_vecs: bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), |
97 | opf: REQ_OP_READ, |
98 | GFP_KERNEL, |
99 | bs: &c->dio_read_bioset); |
100 | |
101 | bio->bi_end_io = bch2_direct_IO_read_endio; |
102 | |
103 | dio = container_of(bio, struct dio_read, rbio.bio); |
104 | closure_init(cl: &dio->cl, NULL); |
105 | |
106 | /* |
107 | * this is a _really_ horrible hack just to avoid an atomic sub at the |
108 | * end: |
109 | */ |
110 | if (!sync) { |
111 | set_closure_fn(cl: &dio->cl, fn: bch2_dio_read_complete, NULL); |
112 | atomic_set(v: &dio->cl.remaining, |
113 | CLOSURE_REMAINING_INITIALIZER - |
114 | CLOSURE_RUNNING + |
115 | CLOSURE_DESTRUCTOR); |
116 | } else { |
117 | atomic_set(v: &dio->cl.remaining, |
118 | CLOSURE_REMAINING_INITIALIZER + 1); |
119 | dio->cl.closure_get_happened = true; |
120 | } |
121 | |
122 | dio->req = req; |
123 | dio->ret = ret; |
124 | /* |
125 | * This is one of the sketchier things I've encountered: we have to skip |
126 | * the dirtying of requests that are internal from the kernel (i.e. from |
127 | * loopback), because we'll deadlock on page_lock. |
128 | */ |
129 | dio->should_dirty = iter_is_iovec(i: iter); |
130 | |
131 | goto start; |
132 | while (iter->count) { |
133 | bio = bio_alloc_bioset(NULL, |
134 | nr_vecs: bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), |
135 | opf: REQ_OP_READ, |
136 | GFP_KERNEL, |
137 | bs: &c->bio_read); |
138 | bio->bi_end_io = bch2_direct_IO_read_split_endio; |
139 | start: |
140 | bio->bi_opf = REQ_OP_READ|REQ_SYNC; |
141 | bio->bi_iter.bi_sector = offset >> 9; |
142 | bio->bi_private = dio; |
143 | |
144 | ret = bio_iov_iter_get_pages(bio, iter); |
145 | if (ret < 0) { |
146 | /* XXX: fault inject this path */ |
147 | bio->bi_status = BLK_STS_RESOURCE; |
148 | bio_endio(bio); |
149 | break; |
150 | } |
151 | |
152 | offset += bio->bi_iter.bi_size; |
153 | |
154 | if (dio->should_dirty) |
155 | bio_set_pages_dirty(bio); |
156 | |
157 | if (iter->count) |
158 | closure_get(cl: &dio->cl); |
159 | |
160 | bch2_read(c, rbio: rbio_init(bio, opts), inum: inode_inum(inode)); |
161 | } |
162 | |
163 | iter->count += shorten; |
164 | |
165 | if (sync) { |
166 | closure_sync(cl: &dio->cl); |
167 | closure_debug_destroy(cl: &dio->cl); |
168 | ret = dio->ret; |
169 | bio_check_or_release(bio: &dio->rbio.bio, check_dirty: dio->should_dirty); |
170 | return ret; |
171 | } else { |
172 | return -EIOCBQUEUED; |
173 | } |
174 | } |
175 | |
176 | ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) |
177 | { |
178 | struct file *file = iocb->ki_filp; |
179 | struct bch_inode_info *inode = file_bch_inode(file); |
180 | struct address_space *mapping = file->f_mapping; |
181 | size_t count = iov_iter_count(i: iter); |
182 | ssize_t ret; |
183 | |
184 | if (!count) |
185 | return 0; /* skip atime */ |
186 | |
187 | if (iocb->ki_flags & IOCB_DIRECT) { |
188 | struct blk_plug plug; |
189 | |
190 | if (unlikely(mapping->nrpages)) { |
191 | ret = filemap_write_and_wait_range(mapping, |
192 | lstart: iocb->ki_pos, |
193 | lend: iocb->ki_pos + count - 1); |
194 | if (ret < 0) |
195 | goto out; |
196 | } |
197 | |
198 | file_accessed(file); |
199 | |
200 | blk_start_plug(&plug); |
201 | ret = bch2_direct_IO_read(req: iocb, iter); |
202 | blk_finish_plug(&plug); |
203 | |
204 | if (ret >= 0) |
205 | iocb->ki_pos += ret; |
206 | } else { |
207 | bch2_pagecache_add_get(inode); |
208 | ret = generic_file_read_iter(iocb, iter); |
209 | bch2_pagecache_add_put(inode); |
210 | } |
211 | out: |
212 | return bch2_err_class(err: ret); |
213 | } |
214 | |
215 | /* O_DIRECT writes */ |
216 | |
217 | struct dio_write { |
218 | struct kiocb *req; |
219 | struct address_space *mapping; |
220 | struct bch_inode_info *inode; |
221 | struct mm_struct *mm; |
222 | const struct iovec *iov; |
223 | unsigned loop:1, |
224 | extending:1, |
225 | sync:1, |
226 | flush:1; |
227 | struct quota_res quota_res; |
228 | u64 written; |
229 | |
230 | struct iov_iter iter; |
231 | struct iovec inline_vecs[2]; |
232 | |
233 | /* must be last: */ |
234 | struct bch_write_op op; |
235 | }; |
236 | |
237 | static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, |
238 | u64 offset, u64 size, |
239 | unsigned nr_replicas, bool compressed) |
240 | { |
241 | struct btree_trans *trans = bch2_trans_get(c); |
242 | struct btree_iter iter; |
243 | struct bkey_s_c k; |
244 | u64 end = offset + size; |
245 | u32 snapshot; |
246 | bool ret = true; |
247 | int err; |
248 | retry: |
249 | bch2_trans_begin(trans); |
250 | |
251 | err = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); |
252 | if (err) |
253 | goto err; |
254 | |
255 | for_each_btree_key_norestart(trans, iter, BTREE_ID_extents, |
256 | SPOS(inum.inum, offset, snapshot), |
257 | BTREE_ITER_SLOTS, k, err) { |
258 | if (bkey_ge(l: bkey_start_pos(k: k.k), POS(inum.inum, end))) |
259 | break; |
260 | |
261 | if (k.k->p.snapshot != snapshot || |
262 | nr_replicas > bch2_bkey_replicas(c, k) || |
263 | (!compressed && bch2_bkey_sectors_compressed(k))) { |
264 | ret = false; |
265 | break; |
266 | } |
267 | } |
268 | |
269 | offset = iter.pos.offset; |
270 | bch2_trans_iter_exit(trans, &iter); |
271 | err: |
272 | if (bch2_err_matches(err, BCH_ERR_transaction_restart)) |
273 | goto retry; |
274 | bch2_trans_put(trans); |
275 | |
276 | return err ? false : ret; |
277 | } |
278 | |
279 | static noinline bool bch2_dio_write_check_allocated(struct dio_write *dio) |
280 | { |
281 | struct bch_fs *c = dio->op.c; |
282 | struct bch_inode_info *inode = dio->inode; |
283 | struct bio *bio = &dio->op.wbio.bio; |
284 | |
285 | return bch2_check_range_allocated(c, inum: inode_inum(inode), |
286 | offset: dio->op.pos.offset, bio_sectors(bio), |
287 | nr_replicas: dio->op.opts.data_replicas, |
288 | compressed: dio->op.opts.compression != 0); |
289 | } |
290 | |
291 | static void bch2_dio_write_loop_async(struct bch_write_op *); |
292 | static __always_inline long bch2_dio_write_done(struct dio_write *dio); |
293 | |
294 | /* |
295 | * We're going to return -EIOCBQUEUED, but we haven't finished consuming the |
296 | * iov_iter yet, so we need to stash a copy of the iovec: it might be on the |
297 | * caller's stack, we're not guaranteed that it will live for the duration of |
298 | * the IO: |
299 | */ |
300 | static noinline int bch2_dio_write_copy_iov(struct dio_write *dio) |
301 | { |
302 | struct iovec *iov = dio->inline_vecs; |
303 | |
304 | /* |
305 | * iov_iter has a single embedded iovec - nothing to do: |
306 | */ |
307 | if (iter_is_ubuf(i: &dio->iter)) |
308 | return 0; |
309 | |
310 | /* |
311 | * We don't currently handle non-iovec iov_iters here - return an error, |
312 | * and we'll fall back to doing the IO synchronously: |
313 | */ |
314 | if (!iter_is_iovec(i: &dio->iter)) |
315 | return -1; |
316 | |
317 | if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { |
318 | dio->iov = iov = kmalloc_array(n: dio->iter.nr_segs, size: sizeof(*iov), |
319 | GFP_KERNEL); |
320 | if (unlikely(!iov)) |
321 | return -ENOMEM; |
322 | } |
323 | |
324 | memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov)); |
325 | dio->iter.__iov = iov; |
326 | return 0; |
327 | } |
328 | |
329 | static CLOSURE_CALLBACK(bch2_dio_write_flush_done) |
330 | { |
331 | closure_type(dio, struct dio_write, op.cl); |
332 | struct bch_fs *c = dio->op.c; |
333 | |
334 | closure_debug_destroy(cl); |
335 | |
336 | dio->op.error = bch2_journal_error(j: &c->journal); |
337 | |
338 | bch2_dio_write_done(dio); |
339 | } |
340 | |
341 | static noinline void bch2_dio_write_flush(struct dio_write *dio) |
342 | { |
343 | struct bch_fs *c = dio->op.c; |
344 | struct bch_inode_unpacked inode; |
345 | int ret; |
346 | |
347 | dio->flush = 0; |
348 | |
349 | closure_init(cl: &dio->op.cl, NULL); |
350 | |
351 | if (!dio->op.error) { |
352 | ret = bch2_inode_find_by_inum(c, inode_inum(inode: dio->inode), &inode); |
353 | if (ret) { |
354 | dio->op.error = ret; |
355 | } else { |
356 | bch2_journal_flush_seq_async(&c->journal, inode.bi_journal_seq, |
357 | &dio->op.cl); |
358 | bch2_inode_flush_nocow_writes_async(c, dio->inode, &dio->op.cl); |
359 | } |
360 | } |
361 | |
362 | if (dio->sync) { |
363 | closure_sync(cl: &dio->op.cl); |
364 | closure_debug_destroy(cl: &dio->op.cl); |
365 | } else { |
366 | continue_at(&dio->op.cl, bch2_dio_write_flush_done, NULL); |
367 | } |
368 | } |
369 | |
370 | static __always_inline long bch2_dio_write_done(struct dio_write *dio) |
371 | { |
372 | struct kiocb *req = dio->req; |
373 | struct bch_inode_info *inode = dio->inode; |
374 | bool sync = dio->sync; |
375 | long ret; |
376 | |
377 | if (unlikely(dio->flush)) { |
378 | bch2_dio_write_flush(dio); |
379 | if (!sync) |
380 | return -EIOCBQUEUED; |
381 | } |
382 | |
383 | bch2_pagecache_block_put(inode); |
384 | |
385 | kfree(objp: dio->iov); |
386 | |
387 | ret = dio->op.error ?: ((long) dio->written << 9); |
388 | bio_put(&dio->op.wbio.bio); |
389 | |
390 | bch2_write_ref_put(c: dio->op.c, ref: BCH_WRITE_REF_dio_write); |
391 | |
392 | /* inode->i_dio_count is our ref on inode and thus bch_fs */ |
393 | inode_dio_end(inode: &inode->v); |
394 | |
395 | if (ret < 0) |
396 | ret = bch2_err_class(err: ret); |
397 | |
398 | if (!sync) { |
399 | req->ki_complete(req, ret); |
400 | ret = -EIOCBQUEUED; |
401 | } |
402 | return ret; |
403 | } |
404 | |
405 | static __always_inline void bch2_dio_write_end(struct dio_write *dio) |
406 | { |
407 | struct bch_fs *c = dio->op.c; |
408 | struct kiocb *req = dio->req; |
409 | struct bch_inode_info *inode = dio->inode; |
410 | struct bio *bio = &dio->op.wbio.bio; |
411 | |
412 | req->ki_pos += (u64) dio->op.written << 9; |
413 | dio->written += dio->op.written; |
414 | |
415 | if (dio->extending) { |
416 | spin_lock(lock: &inode->v.i_lock); |
417 | if (req->ki_pos > inode->v.i_size) |
418 | i_size_write(inode: &inode->v, i_size: req->ki_pos); |
419 | spin_unlock(lock: &inode->v.i_lock); |
420 | } |
421 | |
422 | if (dio->op.i_sectors_delta || dio->quota_res.sectors) { |
423 | mutex_lock(&inode->ei_quota_lock); |
424 | __bch2_i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta); |
425 | __bch2_quota_reservation_put(c, inode, res: &dio->quota_res); |
426 | mutex_unlock(lock: &inode->ei_quota_lock); |
427 | } |
428 | |
429 | bio_release_pages(bio, mark_dirty: false); |
430 | |
431 | if (unlikely(dio->op.error)) |
432 | set_bit(EI_INODE_ERROR, addr: &inode->ei_flags); |
433 | } |
434 | |
435 | static __always_inline long bch2_dio_write_loop(struct dio_write *dio) |
436 | { |
437 | struct bch_fs *c = dio->op.c; |
438 | struct kiocb *req = dio->req; |
439 | struct address_space *mapping = dio->mapping; |
440 | struct bch_inode_info *inode = dio->inode; |
441 | struct bch_io_opts opts; |
442 | struct bio *bio = &dio->op.wbio.bio; |
443 | unsigned unaligned, iter_count; |
444 | bool sync = dio->sync, dropped_locks; |
445 | long ret; |
446 | |
447 | bch2_inode_opts_get(&opts, c, &inode->ei_inode); |
448 | |
449 | while (1) { |
450 | iter_count = dio->iter.count; |
451 | |
452 | EBUG_ON(current->faults_disabled_mapping); |
453 | current->faults_disabled_mapping = mapping; |
454 | |
455 | ret = bio_iov_iter_get_pages(bio, iter: &dio->iter); |
456 | |
457 | dropped_locks = fdm_dropped_locks(); |
458 | |
459 | current->faults_disabled_mapping = NULL; |
460 | |
461 | /* |
462 | * If the fault handler returned an error but also signalled |
463 | * that it dropped & retook ei_pagecache_lock, we just need to |
464 | * re-shoot down the page cache and retry: |
465 | */ |
466 | if (dropped_locks && ret) |
467 | ret = 0; |
468 | |
469 | if (unlikely(ret < 0)) |
470 | goto err; |
471 | |
472 | if (unlikely(dropped_locks)) { |
473 | ret = bch2_write_invalidate_inode_pages_range(mapping, |
474 | req->ki_pos, |
475 | req->ki_pos + iter_count - 1); |
476 | if (unlikely(ret)) |
477 | goto err; |
478 | |
479 | if (!bio->bi_iter.bi_size) |
480 | continue; |
481 | } |
482 | |
483 | unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1); |
484 | bio->bi_iter.bi_size -= unaligned; |
485 | iov_iter_revert(i: &dio->iter, bytes: unaligned); |
486 | |
487 | if (!bio->bi_iter.bi_size) { |
488 | /* |
489 | * bio_iov_iter_get_pages was only able to get < |
490 | * blocksize worth of pages: |
491 | */ |
492 | ret = -EFAULT; |
493 | goto err; |
494 | } |
495 | |
496 | bch2_write_op_init(op: &dio->op, c, opts); |
497 | dio->op.end_io = sync |
498 | ? NULL |
499 | : bch2_dio_write_loop_async; |
500 | dio->op.target = dio->op.opts.foreground_target; |
501 | dio->op.write_point = writepoint_hashed(v: (unsigned long) current); |
502 | dio->op.nr_replicas = dio->op.opts.data_replicas; |
503 | dio->op.subvol = inode->ei_subvol; |
504 | dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9); |
505 | dio->op.devs_need_flush = &inode->ei_devs_need_flush; |
506 | |
507 | if (sync) |
508 | dio->op.flags |= BCH_WRITE_SYNC; |
509 | dio->op.flags |= BCH_WRITE_CHECK_ENOSPC; |
510 | |
511 | ret = bch2_quota_reservation_add(c, inode, res: &dio->quota_res, |
512 | bio_sectors(bio), check_enospc: true); |
513 | if (unlikely(ret)) |
514 | goto err; |
515 | |
516 | ret = bch2_disk_reservation_get(c, res: &dio->op.res, bio_sectors(bio), |
517 | nr_replicas: dio->op.opts.data_replicas, flags: 0); |
518 | if (unlikely(ret) && |
519 | !bch2_dio_write_check_allocated(dio)) |
520 | goto err; |
521 | |
522 | task_io_account_write(bytes: bio->bi_iter.bi_size); |
523 | |
524 | if (unlikely(dio->iter.count) && |
525 | !dio->sync && |
526 | !dio->loop && |
527 | bch2_dio_write_copy_iov(dio)) |
528 | dio->sync = sync = true; |
529 | |
530 | dio->loop = true; |
531 | closure_call(cl: &dio->op.cl, fn: bch2_write, NULL, NULL); |
532 | |
533 | if (!sync) |
534 | return -EIOCBQUEUED; |
535 | |
536 | bch2_dio_write_end(dio); |
537 | |
538 | if (likely(!dio->iter.count) || dio->op.error) |
539 | break; |
540 | |
541 | bio_reset(bio, NULL, opf: REQ_OP_WRITE | REQ_SYNC | REQ_IDLE); |
542 | } |
543 | out: |
544 | return bch2_dio_write_done(dio); |
545 | err: |
546 | dio->op.error = ret; |
547 | |
548 | bio_release_pages(bio, mark_dirty: false); |
549 | |
550 | bch2_quota_reservation_put(c, inode, res: &dio->quota_res); |
551 | goto out; |
552 | } |
553 | |
554 | static noinline __cold void bch2_dio_write_continue(struct dio_write *dio) |
555 | { |
556 | struct mm_struct *mm = dio->mm; |
557 | |
558 | bio_reset(bio: &dio->op.wbio.bio, NULL, opf: REQ_OP_WRITE); |
559 | |
560 | if (mm) |
561 | kthread_use_mm(mm); |
562 | bch2_dio_write_loop(dio); |
563 | if (mm) |
564 | kthread_unuse_mm(mm); |
565 | } |
566 | |
567 | static void bch2_dio_write_loop_async(struct bch_write_op *op) |
568 | { |
569 | struct dio_write *dio = container_of(op, struct dio_write, op); |
570 | |
571 | bch2_dio_write_end(dio); |
572 | |
573 | if (likely(!dio->iter.count) || dio->op.error) |
574 | bch2_dio_write_done(dio); |
575 | else |
576 | bch2_dio_write_continue(dio); |
577 | } |
578 | |
579 | ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) |
580 | { |
581 | struct file *file = req->ki_filp; |
582 | struct address_space *mapping = file->f_mapping; |
583 | struct bch_inode_info *inode = file_bch_inode(file); |
584 | struct bch_fs *c = inode->v.i_sb->s_fs_info; |
585 | struct dio_write *dio; |
586 | struct bio *bio; |
587 | bool locked = true, extending; |
588 | ssize_t ret; |
589 | |
590 | prefetch(&c->opts); |
591 | prefetch((void *) &c->opts + 64); |
592 | prefetch(&inode->ei_inode); |
593 | prefetch((void *) &inode->ei_inode + 64); |
594 | |
595 | if (!bch2_write_ref_tryget(c, ref: BCH_WRITE_REF_dio_write)) |
596 | return -EROFS; |
597 | |
598 | inode_lock(inode: &inode->v); |
599 | |
600 | ret = generic_write_checks(req, iter); |
601 | if (unlikely(ret <= 0)) |
602 | goto err_put_write_ref; |
603 | |
604 | ret = file_remove_privs(file); |
605 | if (unlikely(ret)) |
606 | goto err_put_write_ref; |
607 | |
608 | ret = file_update_time(file); |
609 | if (unlikely(ret)) |
610 | goto err_put_write_ref; |
611 | |
612 | if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) |
613 | goto err_put_write_ref; |
614 | |
615 | inode_dio_begin(inode: &inode->v); |
616 | bch2_pagecache_block_get(inode); |
617 | |
618 | extending = req->ki_pos + iter->count > inode->v.i_size; |
619 | if (!extending) { |
620 | inode_unlock(inode: &inode->v); |
621 | locked = false; |
622 | } |
623 | |
624 | bio = bio_alloc_bioset(NULL, |
625 | nr_vecs: bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), |
626 | opf: REQ_OP_WRITE | REQ_SYNC | REQ_IDLE, |
627 | GFP_KERNEL, |
628 | bs: &c->dio_write_bioset); |
629 | dio = container_of(bio, struct dio_write, op.wbio.bio); |
630 | dio->req = req; |
631 | dio->mapping = mapping; |
632 | dio->inode = inode; |
633 | dio->mm = current->mm; |
634 | dio->iov = NULL; |
635 | dio->loop = false; |
636 | dio->extending = extending; |
637 | dio->sync = is_sync_kiocb(kiocb: req) || extending; |
638 | dio->flush = iocb_is_dsync(iocb: req) && !c->opts.journal_flush_disabled; |
639 | dio->quota_res.sectors = 0; |
640 | dio->written = 0; |
641 | dio->iter = *iter; |
642 | dio->op.c = c; |
643 | |
644 | if (unlikely(mapping->nrpages)) { |
645 | ret = bch2_write_invalidate_inode_pages_range(mapping, |
646 | req->ki_pos, |
647 | req->ki_pos + iter->count - 1); |
648 | if (unlikely(ret)) |
649 | goto err_put_bio; |
650 | } |
651 | |
652 | ret = bch2_dio_write_loop(dio); |
653 | out: |
654 | if (locked) |
655 | inode_unlock(inode: &inode->v); |
656 | return ret; |
657 | err_put_bio: |
658 | bch2_pagecache_block_put(inode); |
659 | bio_put(bio); |
660 | inode_dio_end(inode: &inode->v); |
661 | err_put_write_ref: |
662 | bch2_write_ref_put(c, ref: BCH_WRITE_REF_dio_write); |
663 | goto out; |
664 | } |
665 | |
666 | void bch2_fs_fs_io_direct_exit(struct bch_fs *c) |
667 | { |
668 | bioset_exit(&c->dio_write_bioset); |
669 | bioset_exit(&c->dio_read_bioset); |
670 | } |
671 | |
672 | int bch2_fs_fs_io_direct_init(struct bch_fs *c) |
673 | { |
674 | if (bioset_init(&c->dio_read_bioset, |
675 | 4, offsetof(struct dio_read, rbio.bio), |
676 | flags: BIOSET_NEED_BVECS)) |
677 | return -BCH_ERR_ENOMEM_dio_read_bioset_init; |
678 | |
679 | if (bioset_init(&c->dio_write_bioset, |
680 | 4, offsetof(struct dio_write, op.wbio.bio), |
681 | flags: BIOSET_NEED_BVECS)) |
682 | return -BCH_ERR_ENOMEM_dio_write_bioset_init; |
683 | |
684 | return 0; |
685 | } |
686 | |
687 | #endif /* NO_BCACHEFS_FS */ |
688 | |