1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #ifndef NO_BCACHEFS_FS |
3 | |
4 | #include "bcachefs.h" |
5 | #include "btree_iter.h" |
6 | #include "extents.h" |
7 | #include "fs-io.h" |
8 | #include "fs-io-pagecache.h" |
9 | #include "subvolume.h" |
10 | |
11 | #include <linux/pagevec.h> |
12 | #include <linux/writeback.h> |
13 | |
14 | int bch2_filemap_get_contig_folios_d(struct address_space *mapping, |
15 | loff_t start, u64 end, |
16 | fgf_t fgp_flags, gfp_t gfp, |
17 | folios *fs) |
18 | { |
19 | struct folio *f; |
20 | u64 pos = start; |
21 | int ret = 0; |
22 | |
23 | while (pos < end) { |
24 | if ((u64) pos >= (u64) start + (1ULL << 20)) |
25 | fgp_flags &= ~FGP_CREAT; |
26 | |
27 | ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL); |
28 | if (ret) |
29 | break; |
30 | |
31 | f = __filemap_get_folio(mapping, index: pos >> PAGE_SHIFT, fgp_flags, gfp); |
32 | if (IS_ERR_OR_NULL(ptr: f)) |
33 | break; |
34 | |
35 | BUG_ON(fs->nr && folio_pos(f) != pos); |
36 | |
37 | pos = folio_end_pos(folio: f); |
38 | darray_push(fs, f); |
39 | } |
40 | |
41 | if (!fs->nr && !ret && (fgp_flags & FGP_CREAT)) |
42 | ret = -ENOMEM; |
43 | |
44 | return fs->nr ? 0 : ret; |
45 | } |
46 | |
47 | /* pagecache_block must be held */ |
48 | int bch2_write_invalidate_inode_pages_range(struct address_space *mapping, |
49 | loff_t start, loff_t end) |
50 | { |
51 | int ret; |
52 | |
53 | /* |
54 | * XXX: the way this is currently implemented, we can spin if a process |
55 | * is continually redirtying a specific page |
56 | */ |
57 | do { |
58 | if (!mapping->nrpages) |
59 | return 0; |
60 | |
61 | ret = filemap_write_and_wait_range(mapping, lstart: start, lend: end); |
62 | if (ret) |
63 | break; |
64 | |
65 | if (!mapping->nrpages) |
66 | return 0; |
67 | |
68 | ret = invalidate_inode_pages2_range(mapping, |
69 | start: start >> PAGE_SHIFT, |
70 | end: end >> PAGE_SHIFT); |
71 | } while (ret == -EBUSY); |
72 | |
73 | return ret; |
74 | } |
75 | |
76 | #if 0 |
77 | /* Useful for debug tracing: */ |
78 | static const char * const bch2_folio_sector_states[] = { |
79 | #define x(n) #n, |
80 | BCH_FOLIO_SECTOR_STATE() |
81 | #undef x |
82 | NULL |
83 | }; |
84 | #endif |
85 | |
86 | static inline enum bch_folio_sector_state |
87 | folio_sector_dirty(enum bch_folio_sector_state state) |
88 | { |
89 | switch (state) { |
90 | case SECTOR_unallocated: |
91 | return SECTOR_dirty; |
92 | case SECTOR_reserved: |
93 | return SECTOR_dirty_reserved; |
94 | default: |
95 | return state; |
96 | } |
97 | } |
98 | |
99 | static inline enum bch_folio_sector_state |
100 | folio_sector_undirty(enum bch_folio_sector_state state) |
101 | { |
102 | switch (state) { |
103 | case SECTOR_dirty: |
104 | return SECTOR_unallocated; |
105 | case SECTOR_dirty_reserved: |
106 | return SECTOR_reserved; |
107 | default: |
108 | return state; |
109 | } |
110 | } |
111 | |
112 | static inline enum bch_folio_sector_state |
113 | folio_sector_reserve(enum bch_folio_sector_state state) |
114 | { |
115 | switch (state) { |
116 | case SECTOR_unallocated: |
117 | return SECTOR_reserved; |
118 | case SECTOR_dirty: |
119 | return SECTOR_dirty_reserved; |
120 | default: |
121 | return state; |
122 | } |
123 | } |
124 | |
125 | /* for newly allocated folios: */ |
126 | struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp) |
127 | { |
128 | struct bch_folio *s; |
129 | |
130 | s = kzalloc(size: sizeof(*s) + |
131 | sizeof(struct bch_folio_sector) * |
132 | folio_sectors(folio), flags: gfp); |
133 | if (!s) |
134 | return NULL; |
135 | |
136 | spin_lock_init(&s->lock); |
137 | folio_attach_private(folio, data: s); |
138 | return s; |
139 | } |
140 | |
141 | struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp) |
142 | { |
143 | return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp); |
144 | } |
145 | |
146 | static unsigned bkey_to_sector_state(struct bkey_s_c k) |
147 | { |
148 | if (bkey_extent_is_reservation(k)) |
149 | return SECTOR_reserved; |
150 | if (bkey_extent_is_allocation(k: k.k)) |
151 | return SECTOR_allocated; |
152 | return SECTOR_unallocated; |
153 | } |
154 | |
155 | static void __bch2_folio_set(struct folio *folio, |
156 | unsigned pg_offset, unsigned pg_len, |
157 | unsigned nr_ptrs, unsigned state) |
158 | { |
159 | struct bch_folio *s = bch2_folio(folio); |
160 | unsigned i, sectors = folio_sectors(folio); |
161 | |
162 | BUG_ON(pg_offset >= sectors); |
163 | BUG_ON(pg_offset + pg_len > sectors); |
164 | |
165 | spin_lock(lock: &s->lock); |
166 | |
167 | for (i = pg_offset; i < pg_offset + pg_len; i++) { |
168 | s->s[i].nr_replicas = nr_ptrs; |
169 | bch2_folio_sector_set(folio, s, i, n: state); |
170 | } |
171 | |
172 | if (i == sectors) |
173 | s->uptodate = true; |
174 | |
175 | spin_unlock(lock: &s->lock); |
176 | } |
177 | |
178 | /* |
179 | * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the |
180 | * extents btree: |
181 | */ |
182 | int bch2_folio_set(struct bch_fs *c, subvol_inum inum, |
183 | struct folio **fs, unsigned nr_folios) |
184 | { |
185 | struct btree_trans *trans; |
186 | struct btree_iter iter; |
187 | struct bkey_s_c k; |
188 | struct bch_folio *s; |
189 | u64 offset = folio_sector(folio: fs[0]); |
190 | unsigned folio_idx; |
191 | u32 snapshot; |
192 | bool need_set = false; |
193 | int ret; |
194 | |
195 | for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) { |
196 | s = bch2_folio_create(folio: fs[folio_idx], GFP_KERNEL); |
197 | if (!s) |
198 | return -ENOMEM; |
199 | |
200 | need_set |= !s->uptodate; |
201 | } |
202 | |
203 | if (!need_set) |
204 | return 0; |
205 | |
206 | folio_idx = 0; |
207 | trans = bch2_trans_get(c); |
208 | retry: |
209 | bch2_trans_begin(trans); |
210 | |
211 | ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); |
212 | if (ret) |
213 | goto err; |
214 | |
215 | for_each_btree_key_norestart(trans, iter, BTREE_ID_extents, |
216 | SPOS(inum.inum, offset, snapshot), |
217 | BTREE_ITER_SLOTS, k, ret) { |
218 | unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k); |
219 | unsigned state = bkey_to_sector_state(k); |
220 | |
221 | while (folio_idx < nr_folios) { |
222 | struct folio *folio = fs[folio_idx]; |
223 | u64 folio_start = folio_sector(folio); |
224 | u64 folio_end = folio_end_sector(folio); |
225 | unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) - |
226 | folio_start; |
227 | unsigned folio_len = min(k.k->p.offset, folio_end) - |
228 | folio_offset - folio_start; |
229 | |
230 | BUG_ON(k.k->p.offset < folio_start); |
231 | BUG_ON(bkey_start_offset(k.k) > folio_end); |
232 | |
233 | if (!bch2_folio(folio)->uptodate) |
234 | __bch2_folio_set(folio, pg_offset: folio_offset, pg_len: folio_len, nr_ptrs, state); |
235 | |
236 | if (k.k->p.offset < folio_end) |
237 | break; |
238 | folio_idx++; |
239 | } |
240 | |
241 | if (folio_idx == nr_folios) |
242 | break; |
243 | } |
244 | |
245 | offset = iter.pos.offset; |
246 | bch2_trans_iter_exit(trans, &iter); |
247 | err: |
248 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
249 | goto retry; |
250 | bch2_trans_put(trans); |
251 | |
252 | return ret; |
253 | } |
254 | |
255 | void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k) |
256 | { |
257 | struct bvec_iter iter; |
258 | struct folio_vec fv; |
259 | unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v |
260 | ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k); |
261 | unsigned state = bkey_to_sector_state(k); |
262 | |
263 | bio_for_each_folio(fv, bio, iter) |
264 | __bch2_folio_set(folio: fv.fv_folio, |
265 | pg_offset: fv.fv_offset >> 9, |
266 | pg_len: fv.fv_len >> 9, |
267 | nr_ptrs, state); |
268 | } |
269 | |
270 | void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode, |
271 | u64 start, u64 end) |
272 | { |
273 | pgoff_t index = start >> PAGE_SECTORS_SHIFT; |
274 | pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; |
275 | struct folio_batch fbatch; |
276 | unsigned i, j; |
277 | |
278 | if (end <= start) |
279 | return; |
280 | |
281 | folio_batch_init(fbatch: &fbatch); |
282 | |
283 | while (filemap_get_folios(mapping: inode->v.i_mapping, |
284 | start: &index, end: end_index, fbatch: &fbatch)) { |
285 | for (i = 0; i < folio_batch_count(fbatch: &fbatch); i++) { |
286 | struct folio *folio = fbatch.folios[i]; |
287 | u64 folio_start = folio_sector(folio); |
288 | u64 folio_end = folio_end_sector(folio); |
289 | unsigned folio_offset = max(start, folio_start) - folio_start; |
290 | unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; |
291 | struct bch_folio *s; |
292 | |
293 | BUG_ON(end <= folio_start); |
294 | |
295 | folio_lock(folio); |
296 | s = bch2_folio(folio); |
297 | |
298 | if (s) { |
299 | spin_lock(lock: &s->lock); |
300 | for (j = folio_offset; j < folio_offset + folio_len; j++) |
301 | s->s[j].nr_replicas = 0; |
302 | spin_unlock(lock: &s->lock); |
303 | } |
304 | |
305 | folio_unlock(folio); |
306 | } |
307 | folio_batch_release(fbatch: &fbatch); |
308 | cond_resched(); |
309 | } |
310 | } |
311 | |
312 | int bch2_mark_pagecache_reserved(struct bch_inode_info *inode, |
313 | u64 *start, u64 end, |
314 | bool nonblocking) |
315 | { |
316 | struct bch_fs *c = inode->v.i_sb->s_fs_info; |
317 | pgoff_t index = *start >> PAGE_SECTORS_SHIFT; |
318 | pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; |
319 | struct folio_batch fbatch; |
320 | s64 i_sectors_delta = 0; |
321 | int ret = 0; |
322 | |
323 | if (end <= *start) |
324 | return 0; |
325 | |
326 | folio_batch_init(fbatch: &fbatch); |
327 | |
328 | while (filemap_get_folios(mapping: inode->v.i_mapping, |
329 | start: &index, end: end_index, fbatch: &fbatch)) { |
330 | for (unsigned i = 0; i < folio_batch_count(fbatch: &fbatch); i++) { |
331 | struct folio *folio = fbatch.folios[i]; |
332 | |
333 | if (!nonblocking) |
334 | folio_lock(folio); |
335 | else if (!folio_trylock(folio)) { |
336 | folio_batch_release(fbatch: &fbatch); |
337 | ret = -EAGAIN; |
338 | break; |
339 | } |
340 | |
341 | u64 folio_start = folio_sector(folio); |
342 | u64 folio_end = folio_end_sector(folio); |
343 | |
344 | BUG_ON(end <= folio_start); |
345 | |
346 | *start = min(end, folio_end); |
347 | |
348 | struct bch_folio *s = bch2_folio(folio); |
349 | if (s) { |
350 | unsigned folio_offset = max(*start, folio_start) - folio_start; |
351 | unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; |
352 | |
353 | spin_lock(lock: &s->lock); |
354 | for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) { |
355 | i_sectors_delta -= s->s[j].state == SECTOR_dirty; |
356 | bch2_folio_sector_set(folio, s, i: j, |
357 | n: folio_sector_reserve(state: s->s[j].state)); |
358 | } |
359 | spin_unlock(lock: &s->lock); |
360 | } |
361 | |
362 | folio_unlock(folio); |
363 | } |
364 | folio_batch_release(fbatch: &fbatch); |
365 | cond_resched(); |
366 | } |
367 | |
368 | bch2_i_sectors_acct(c, inode, NULL, sectors: i_sectors_delta); |
369 | return ret; |
370 | } |
371 | |
372 | static inline unsigned sectors_to_reserve(struct bch_folio_sector *s, |
373 | unsigned nr_replicas) |
374 | { |
375 | return max(0, (int) nr_replicas - |
376 | s->nr_replicas - |
377 | s->replicas_reserved); |
378 | } |
379 | |
380 | int bch2_get_folio_disk_reservation(struct bch_fs *c, |
381 | struct bch_inode_info *inode, |
382 | struct folio *folio, bool check_enospc) |
383 | { |
384 | struct bch_folio *s = bch2_folio_create(folio, gfp: 0); |
385 | unsigned nr_replicas = inode_nr_replicas(c, inode); |
386 | struct disk_reservation disk_res = { 0 }; |
387 | unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0; |
388 | int ret; |
389 | |
390 | if (!s) |
391 | return -ENOMEM; |
392 | |
393 | for (i = 0; i < sectors; i++) |
394 | disk_res_sectors += sectors_to_reserve(s: &s->s[i], nr_replicas); |
395 | |
396 | if (!disk_res_sectors) |
397 | return 0; |
398 | |
399 | ret = bch2_disk_reservation_get(c, res: &disk_res, |
400 | sectors: disk_res_sectors, nr_replicas: 1, |
401 | flags: !check_enospc |
402 | ? BCH_DISK_RESERVATION_NOFAIL |
403 | : 0); |
404 | if (unlikely(ret)) |
405 | return ret; |
406 | |
407 | for (i = 0; i < sectors; i++) |
408 | s->s[i].replicas_reserved += |
409 | sectors_to_reserve(s: &s->s[i], nr_replicas); |
410 | |
411 | return 0; |
412 | } |
413 | |
414 | void bch2_folio_reservation_put(struct bch_fs *c, |
415 | struct bch_inode_info *inode, |
416 | struct bch2_folio_reservation *res) |
417 | { |
418 | bch2_disk_reservation_put(c, res: &res->disk); |
419 | bch2_quota_reservation_put(c, inode, res: &res->quota); |
420 | } |
421 | |
422 | int bch2_folio_reservation_get(struct bch_fs *c, |
423 | struct bch_inode_info *inode, |
424 | struct folio *folio, |
425 | struct bch2_folio_reservation *res, |
426 | unsigned offset, unsigned len) |
427 | { |
428 | struct bch_folio *s = bch2_folio_create(folio, gfp: 0); |
429 | unsigned i, disk_sectors = 0, quota_sectors = 0; |
430 | int ret; |
431 | |
432 | if (!s) |
433 | return -ENOMEM; |
434 | |
435 | BUG_ON(!s->uptodate); |
436 | |
437 | for (i = round_down(offset, block_bytes(c)) >> 9; |
438 | i < round_up(offset + len, block_bytes(c)) >> 9; |
439 | i++) { |
440 | disk_sectors += sectors_to_reserve(s: &s->s[i], |
441 | nr_replicas: res->disk.nr_replicas); |
442 | quota_sectors += s->s[i].state == SECTOR_unallocated; |
443 | } |
444 | |
445 | if (disk_sectors) { |
446 | ret = bch2_disk_reservation_add(c, res: &res->disk, sectors: disk_sectors, flags: 0); |
447 | if (unlikely(ret)) |
448 | return ret; |
449 | } |
450 | |
451 | if (quota_sectors) { |
452 | ret = bch2_quota_reservation_add(c, inode, res: &res->quota, |
453 | sectors: quota_sectors, check_enospc: true); |
454 | if (unlikely(ret)) { |
455 | struct disk_reservation tmp = { |
456 | .sectors = disk_sectors |
457 | }; |
458 | |
459 | bch2_disk_reservation_put(c, res: &tmp); |
460 | res->disk.sectors -= disk_sectors; |
461 | return ret; |
462 | } |
463 | } |
464 | |
465 | return 0; |
466 | } |
467 | |
468 | static void bch2_clear_folio_bits(struct folio *folio) |
469 | { |
470 | struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); |
471 | struct bch_fs *c = inode->v.i_sb->s_fs_info; |
472 | struct bch_folio *s = bch2_folio(folio); |
473 | struct disk_reservation disk_res = { 0 }; |
474 | int i, sectors = folio_sectors(folio), dirty_sectors = 0; |
475 | |
476 | if (!s) |
477 | return; |
478 | |
479 | EBUG_ON(!folio_test_locked(folio)); |
480 | EBUG_ON(folio_test_writeback(folio)); |
481 | |
482 | for (i = 0; i < sectors; i++) { |
483 | disk_res.sectors += s->s[i].replicas_reserved; |
484 | s->s[i].replicas_reserved = 0; |
485 | |
486 | dirty_sectors -= s->s[i].state == SECTOR_dirty; |
487 | bch2_folio_sector_set(folio, s, i, n: folio_sector_undirty(state: s->s[i].state)); |
488 | } |
489 | |
490 | bch2_disk_reservation_put(c, res: &disk_res); |
491 | |
492 | bch2_i_sectors_acct(c, inode, NULL, sectors: dirty_sectors); |
493 | |
494 | bch2_folio_release(folio); |
495 | } |
496 | |
497 | void bch2_set_folio_dirty(struct bch_fs *c, |
498 | struct bch_inode_info *inode, |
499 | struct folio *folio, |
500 | struct bch2_folio_reservation *res, |
501 | unsigned offset, unsigned len) |
502 | { |
503 | struct bch_folio *s = bch2_folio(folio); |
504 | unsigned i, dirty_sectors = 0; |
505 | |
506 | WARN_ON((u64) folio_pos(folio) + offset + len > |
507 | round_up((u64) i_size_read(&inode->v), block_bytes(c))); |
508 | |
509 | BUG_ON(!s->uptodate); |
510 | |
511 | spin_lock(lock: &s->lock); |
512 | |
513 | for (i = round_down(offset, block_bytes(c)) >> 9; |
514 | i < round_up(offset + len, block_bytes(c)) >> 9; |
515 | i++) { |
516 | unsigned sectors = sectors_to_reserve(s: &s->s[i], |
517 | nr_replicas: res->disk.nr_replicas); |
518 | |
519 | /* |
520 | * This can happen if we race with the error path in |
521 | * bch2_writepage_io_done(): |
522 | */ |
523 | sectors = min_t(unsigned, sectors, res->disk.sectors); |
524 | |
525 | s->s[i].replicas_reserved += sectors; |
526 | res->disk.sectors -= sectors; |
527 | |
528 | dirty_sectors += s->s[i].state == SECTOR_unallocated; |
529 | |
530 | bch2_folio_sector_set(folio, s, i, n: folio_sector_dirty(state: s->s[i].state)); |
531 | } |
532 | |
533 | spin_unlock(lock: &s->lock); |
534 | |
535 | bch2_i_sectors_acct(c, inode, quota_res: &res->quota, sectors: dirty_sectors); |
536 | |
537 | if (!folio_test_dirty(folio)) |
538 | filemap_dirty_folio(mapping: inode->v.i_mapping, folio); |
539 | } |
540 | |
541 | vm_fault_t bch2_page_fault(struct vm_fault *vmf) |
542 | { |
543 | struct file *file = vmf->vma->vm_file; |
544 | struct address_space *mapping = file->f_mapping; |
545 | struct address_space *fdm = faults_disabled_mapping(); |
546 | struct bch_inode_info *inode = file_bch_inode(file); |
547 | vm_fault_t ret; |
548 | |
549 | if (fdm == mapping) |
550 | return VM_FAULT_SIGBUS; |
551 | |
552 | /* Lock ordering: */ |
553 | if (fdm > mapping) { |
554 | struct bch_inode_info *fdm_host = to_bch_ei(fdm->host); |
555 | |
556 | if (bch2_pagecache_add_tryget(inode)) |
557 | goto got_lock; |
558 | |
559 | bch2_pagecache_block_put(fdm_host); |
560 | |
561 | bch2_pagecache_add_get(inode); |
562 | bch2_pagecache_add_put(inode); |
563 | |
564 | bch2_pagecache_block_get(fdm_host); |
565 | |
566 | /* Signal that lock has been dropped: */ |
567 | set_fdm_dropped_locks(); |
568 | return VM_FAULT_SIGBUS; |
569 | } |
570 | |
571 | bch2_pagecache_add_get(inode); |
572 | got_lock: |
573 | ret = filemap_fault(vmf); |
574 | bch2_pagecache_add_put(inode); |
575 | |
576 | return ret; |
577 | } |
578 | |
579 | vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) |
580 | { |
581 | struct folio *folio = page_folio(vmf->page); |
582 | struct file *file = vmf->vma->vm_file; |
583 | struct bch_inode_info *inode = file_bch_inode(file); |
584 | struct address_space *mapping = file->f_mapping; |
585 | struct bch_fs *c = inode->v.i_sb->s_fs_info; |
586 | struct bch2_folio_reservation res; |
587 | unsigned len; |
588 | loff_t isize; |
589 | vm_fault_t ret; |
590 | |
591 | bch2_folio_reservation_init(c, inode, res: &res); |
592 | |
593 | sb_start_pagefault(sb: inode->v.i_sb); |
594 | file_update_time(file); |
595 | |
596 | /* |
597 | * Not strictly necessary, but helps avoid dio writes livelocking in |
598 | * bch2_write_invalidate_inode_pages_range() - can drop this if/when we get |
599 | * a bch2_write_invalidate_inode_pages_range() that works without dropping |
600 | * page lock before invalidating page |
601 | */ |
602 | bch2_pagecache_add_get(inode); |
603 | |
604 | folio_lock(folio); |
605 | isize = i_size_read(inode: &inode->v); |
606 | |
607 | if (folio->mapping != mapping || folio_pos(folio) >= isize) { |
608 | folio_unlock(folio); |
609 | ret = VM_FAULT_NOPAGE; |
610 | goto out; |
611 | } |
612 | |
613 | len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio)); |
614 | |
615 | if (bch2_folio_set(c, inum: inode_inum(inode), fs: &folio, nr_folios: 1) ?: |
616 | bch2_folio_reservation_get(c, inode, folio, res: &res, offset: 0, len)) { |
617 | folio_unlock(folio); |
618 | ret = VM_FAULT_SIGBUS; |
619 | goto out; |
620 | } |
621 | |
622 | bch2_set_folio_dirty(c, inode, folio, res: &res, offset: 0, len); |
623 | bch2_folio_reservation_put(c, inode, res: &res); |
624 | |
625 | folio_wait_stable(folio); |
626 | ret = VM_FAULT_LOCKED; |
627 | out: |
628 | bch2_pagecache_add_put(inode); |
629 | sb_end_pagefault(sb: inode->v.i_sb); |
630 | |
631 | return ret; |
632 | } |
633 | |
634 | void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length) |
635 | { |
636 | if (offset || length < folio_size(folio)) |
637 | return; |
638 | |
639 | bch2_clear_folio_bits(folio); |
640 | } |
641 | |
642 | bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask) |
643 | { |
644 | if (folio_test_dirty(folio) || folio_test_writeback(folio)) |
645 | return false; |
646 | |
647 | bch2_clear_folio_bits(folio); |
648 | return true; |
649 | } |
650 | |
651 | /* fseek: */ |
652 | |
653 | static int folio_data_offset(struct folio *folio, loff_t pos, |
654 | unsigned min_replicas) |
655 | { |
656 | struct bch_folio *s = bch2_folio(folio); |
657 | unsigned i, sectors = folio_sectors(folio); |
658 | |
659 | if (s) |
660 | for (i = folio_pos_to_s(folio, pos); i < sectors; i++) |
661 | if (s->s[i].state >= SECTOR_dirty && |
662 | s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas) |
663 | return i << SECTOR_SHIFT; |
664 | |
665 | return -1; |
666 | } |
667 | |
668 | loff_t bch2_seek_pagecache_data(struct inode *vinode, |
669 | loff_t start_offset, |
670 | loff_t end_offset, |
671 | unsigned min_replicas, |
672 | bool nonblock) |
673 | { |
674 | struct folio_batch fbatch; |
675 | pgoff_t start_index = start_offset >> PAGE_SHIFT; |
676 | pgoff_t end_index = end_offset >> PAGE_SHIFT; |
677 | pgoff_t index = start_index; |
678 | unsigned i; |
679 | loff_t ret; |
680 | int offset; |
681 | |
682 | folio_batch_init(fbatch: &fbatch); |
683 | |
684 | while (filemap_get_folios(mapping: vinode->i_mapping, |
685 | start: &index, end: end_index, fbatch: &fbatch)) { |
686 | for (i = 0; i < folio_batch_count(fbatch: &fbatch); i++) { |
687 | struct folio *folio = fbatch.folios[i]; |
688 | |
689 | if (!nonblock) { |
690 | folio_lock(folio); |
691 | } else if (!folio_trylock(folio)) { |
692 | folio_batch_release(fbatch: &fbatch); |
693 | return -EAGAIN; |
694 | } |
695 | |
696 | offset = folio_data_offset(folio, |
697 | max(folio_pos(folio), start_offset), |
698 | min_replicas); |
699 | if (offset >= 0) { |
700 | ret = clamp(folio_pos(folio) + offset, |
701 | start_offset, end_offset); |
702 | folio_unlock(folio); |
703 | folio_batch_release(fbatch: &fbatch); |
704 | return ret; |
705 | } |
706 | folio_unlock(folio); |
707 | } |
708 | folio_batch_release(fbatch: &fbatch); |
709 | cond_resched(); |
710 | } |
711 | |
712 | return end_offset; |
713 | } |
714 | |
715 | /* |
716 | * Search for a hole in a folio. |
717 | * |
718 | * The filemap layer returns -ENOENT if no folio exists, so reuse the same error |
719 | * code to indicate a pagecache hole exists at the returned offset. Otherwise |
720 | * return 0 if the folio is filled with data, or an error code. This function |
721 | * can return -EAGAIN if nonblock is specified. |
722 | */ |
723 | static int folio_hole_offset(struct address_space *mapping, loff_t *offset, |
724 | unsigned min_replicas, bool nonblock) |
725 | { |
726 | struct folio *folio; |
727 | struct bch_folio *s; |
728 | unsigned i, sectors; |
729 | int ret = -ENOENT; |
730 | |
731 | folio = __filemap_get_folio(mapping, index: *offset >> PAGE_SHIFT, |
732 | FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), gfp: 0); |
733 | if (IS_ERR(ptr: folio)) |
734 | return PTR_ERR(ptr: folio); |
735 | |
736 | s = bch2_folio(folio); |
737 | if (!s) |
738 | goto unlock; |
739 | |
740 | sectors = folio_sectors(folio); |
741 | for (i = folio_pos_to_s(folio, pos: *offset); i < sectors; i++) |
742 | if (s->s[i].state < SECTOR_dirty || |
743 | s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) { |
744 | *offset = max(*offset, |
745 | folio_pos(folio) + (i << SECTOR_SHIFT)); |
746 | goto unlock; |
747 | } |
748 | |
749 | *offset = folio_end_pos(folio); |
750 | ret = 0; |
751 | unlock: |
752 | folio_unlock(folio); |
753 | folio_put(folio); |
754 | return ret; |
755 | } |
756 | |
757 | loff_t bch2_seek_pagecache_hole(struct inode *vinode, |
758 | loff_t start_offset, |
759 | loff_t end_offset, |
760 | unsigned min_replicas, |
761 | bool nonblock) |
762 | { |
763 | struct address_space *mapping = vinode->i_mapping; |
764 | loff_t offset = start_offset; |
765 | loff_t ret = 0; |
766 | |
767 | while (!ret && offset < end_offset) |
768 | ret = folio_hole_offset(mapping, offset: &offset, min_replicas, nonblock); |
769 | |
770 | if (ret && ret != -ENOENT) |
771 | return ret; |
772 | return min(offset, end_offset); |
773 | } |
774 | |
775 | int bch2_clamp_data_hole(struct inode *inode, |
776 | u64 *hole_start, |
777 | u64 *hole_end, |
778 | unsigned min_replicas, |
779 | bool nonblock) |
780 | { |
781 | loff_t ret; |
782 | |
783 | ret = bch2_seek_pagecache_hole(vinode: inode, |
784 | start_offset: *hole_start << 9, end_offset: *hole_end << 9, min_replicas, nonblock) >> 9; |
785 | if (ret < 0) |
786 | return ret; |
787 | |
788 | *hole_start = ret; |
789 | |
790 | if (*hole_start == *hole_end) |
791 | return 0; |
792 | |
793 | ret = bch2_seek_pagecache_data(vinode: inode, |
794 | start_offset: *hole_start << 9, end_offset: *hole_end << 9, min_replicas, nonblock) >> 9; |
795 | if (ret < 0) |
796 | return ret; |
797 | |
798 | *hole_end = ret; |
799 | return 0; |
800 | } |
801 | |
802 | #endif /* NO_BCACHEFS_FS */ |
803 | |