fs-io-pagecache.c source code [linux/fs/bcachefs/fs-io-pagecache.c]

1	// SPDX-License-Identifier: GPL-2.0
2	#ifndef NO_BCACHEFS_FS
3
4	#include "bcachefs.h"
5	#include "btree_iter.h"
6	#include "extents.h"
7	#include "fs-io.h"
8	#include "fs-io-pagecache.h"
9	#include "subvolume.h"
10
11	#include <linux/pagevec.h>
12	#include <linux/writeback.h>
13
14	int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
15	loff_t start, u64 end,
16	fgf_t fgp_flags, gfp_t gfp,
17	folios *fs)
18	{
19	struct folio *f;
20	u64 pos = start;
21	int ret = `0`;
22
23	while (pos < end) {
24	if ((u64) pos >= (u64) start + (`1ULL` << `20`))
25	fgp_flags &= ~FGP_CREAT;
26
27	ret = darray_make_room_gfp(fs, `1`, gfp & GFP_KERNEL);
28	if (ret)
29	break;
30
31	f = __filemap_get_folio(mapping, index: pos >> PAGE_SHIFT, fgp_flags, gfp);
32	if (IS_ERR_OR_NULL(ptr: f))
33	break;
34
35	BUG_ON(fs->nr && folio_pos(f) != pos);
36
37	pos = folio_end_pos(folio: f);
38	darray_push(fs, f);
39	}
40
41	if (!fs->nr && !ret && (fgp_flags & FGP_CREAT))
42	ret = -ENOMEM;
43
44	return fs->nr ? `0` : ret;
45	}
46
47	/ pagecache_block must be held /
48	int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
49	loff_t start, loff_t end)
50	{
51	int ret;
52
53	/*
54	* XXX: the way this is currently implemented, we can spin if a process
55	* is continually redirtying a specific page
56	*/
57	do {
58	if (!mapping->nrpages)
59	return `0`;
60
61	ret = filemap_write_and_wait_range(mapping, lstart: start, lend: end);
62	if (ret)
63	break;
64
65	if (!mapping->nrpages)
66	return `0`;
67
68	ret = invalidate_inode_pages2_range(mapping,
69	start: start >> PAGE_SHIFT,
70	end: end >> PAGE_SHIFT);
71	} while (ret == -EBUSY);
72
73	return ret;
74	}
75
76	#if 0
77	/ Useful for debug tracing: /
78	static const char * const bch2_folio_sector_states[] = {
79	#define x(n) #n,
80	BCH_FOLIO_SECTOR_STATE()
81	#undef x
82	NULL
83	};
84	#endif
85
86	static inline enum bch_folio_sector_state
87	folio_sector_dirty(enum bch_folio_sector_state state)
88	{
89	switch (state) {
90	case SECTOR_unallocated:
91	return SECTOR_dirty;
92	case SECTOR_reserved:
93	return SECTOR_dirty_reserved;
94	default:
95	return state;
96	}
97	}
98
99	static inline enum bch_folio_sector_state
100	folio_sector_undirty(enum bch_folio_sector_state state)
101	{
102	switch (state) {
103	case SECTOR_dirty:
104	return SECTOR_unallocated;
105	case SECTOR_dirty_reserved:
106	return SECTOR_reserved;
107	default:
108	return state;
109	}
110	}
111
112	static inline enum bch_folio_sector_state
113	folio_sector_reserve(enum bch_folio_sector_state state)
114	{
115	switch (state) {
116	case SECTOR_unallocated:
117	return SECTOR_reserved;
118	case SECTOR_dirty:
119	return SECTOR_dirty_reserved;
120	default:
121	return state;
122	}
123	}
124
125	/ for newly allocated folios: /
126	struct bch_folio __bch2_folio_create(struct* folio *folio, gfp_t gfp)
127	{
128	struct bch_folio *s;
129
130	s = kzalloc(size: sizeof(*s) +
131	sizeof(struct bch_folio_sector) *
132	folio_sectors(folio), flags: gfp);
133	if (!s)
134	return NULL;
135
136	spin_lock_init(&s->lock);
137	folio_attach_private(folio, data: s);
138	return s;
139	}
140
141	struct bch_folio bch2_folio_create(struct* folio *folio, gfp_t gfp)
142	{
143	return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp);
144	}
145
146	static unsigned bkey_to_sector_state(struct bkey_s_c k)
147	{
148	if (bkey_extent_is_reservation(k))
149	return SECTOR_reserved;
150	if (bkey_extent_is_allocation(k: k.k))
151	return SECTOR_allocated;
152	return SECTOR_unallocated;
153	}
154
155	static void __bch2_folio_set(struct folio *folio,
156	unsigned pg_offset, unsigned pg_len,
157	unsigned nr_ptrs, unsigned state)
158	{
159	struct bch_folio *s = bch2_folio(folio);
160	unsigned i, sectors = folio_sectors(folio);
161
162	BUG_ON(pg_offset >= sectors);
163	BUG_ON(pg_offset + pg_len > sectors);
164
165	spin_lock(lock: &s->lock);
166
167	for (i = pg_offset; i < pg_offset + pg_len; i++) {
168	s->s[i].nr_replicas = nr_ptrs;
169	bch2_folio_sector_set(folio, s, i, n: state);
170	}
171
172	if (i == sectors)
173	s->uptodate = true;
174
175	spin_unlock(lock: &s->lock);
176	}
177
178	/*
179	* Initialize bch_folio state (allocated/unallocated, nr_replicas) from the
180	* extents btree:
181	*/
182	int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
183	struct folio *fs, unsigned* nr_folios)
184	{
185	struct btree_trans *trans;
186	struct btree_iter iter;
187	struct bkey_s_c k;
188	struct bch_folio *s;
189	u64 offset = folio_sector(folio: fs[`0`]);
190	unsigned folio_idx;
191	u32 snapshot;
192	bool need_set = false;
193	int ret;
194
195	for (folio_idx = `0`; folio_idx < nr_folios; folio_idx++) {
196	s = bch2_folio_create(folio: fs[folio_idx], GFP_KERNEL);
197	if (!s)
198	return -ENOMEM;
199
200	need_set \|= !s->uptodate;
201	}
202
203	if (!need_set)
204	return `0`;
205
206	folio_idx = `0`;
207	trans = bch2_trans_get(c);
208	retry:
209	bch2_trans_begin(trans);
210
211	ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
212	if (ret)
213	goto err;
214
215	for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
216	SPOS(inum.inum, offset, snapshot),
217	BTREE_ITER_SLOTS, k, ret) {
218	unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
219	unsigned state = bkey_to_sector_state(k);
220
221	while (folio_idx < nr_folios) {
222	struct folio *folio = fs[folio_idx];
223	u64 folio_start = folio_sector(folio);
224	u64 folio_end = folio_end_sector(folio);
225	unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
226	folio_start;
227	unsigned folio_len = min(k.k->p.offset, folio_end) -
228	folio_offset - folio_start;
229
230	BUG_ON(k.k->p.offset < folio_start);
231	BUG_ON(bkey_start_offset(k.k) > folio_end);
232
233	if (!bch2_folio(folio)->uptodate)
234	__bch2_folio_set(folio, pg_offset: folio_offset, pg_len: folio_len, nr_ptrs, state);
235
236	if (k.k->p.offset < folio_end)
237	break;
238	folio_idx++;
239	}
240
241	if (folio_idx == nr_folios)
242	break;
243	}
244
245	offset = iter.pos.offset;
246	bch2_trans_iter_exit(trans, &iter);
247	err:
248	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
249	goto retry;
250	bch2_trans_put(trans);
251
252	return ret;
253	}
254
255	void bch2_bio_page_state_set(struct bio bio, struct* bkey_s_c k)
256	{
257	struct bvec_iter iter;
258	struct folio_vec fv;
259	unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
260	? `0` : bch2_bkey_nr_ptrs_fully_allocated(k);
261	unsigned state = bkey_to_sector_state(k);
262
263	bio_for_each_folio(fv, bio, iter)
264	__bch2_folio_set(folio: fv.fv_folio,
265	pg_offset: fv.fv_offset >> `9`,
266	pg_len: fv.fv_len >> `9`,
267	nr_ptrs, state);
268	}
269
270	void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
271	u64 start, u64 end)
272	{
273	pgoff_t index = start >> PAGE_SECTORS_SHIFT;
274	pgoff_t end_index = (end - `1`) >> PAGE_SECTORS_SHIFT;
275	struct folio_batch fbatch;
276	unsigned i, j;
277
278	if (end <= start)
279	return;
280
281	folio_batch_init(fbatch: &fbatch);
282
283	while (filemap_get_folios(mapping: inode->v.i_mapping,
284	start: &index, end: end_index, fbatch: &fbatch)) {
285	for (i = `0`; i < folio_batch_count(fbatch: &fbatch); i++) {
286	struct folio *folio = fbatch.folios[i];
287	u64 folio_start = folio_sector(folio);
288	u64 folio_end = folio_end_sector(folio);
289	unsigned folio_offset = max(start, folio_start) - folio_start;
290	unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
291	struct bch_folio *s;
292
293	BUG_ON(end <= folio_start);
294
295	folio_lock(folio);
296	s = bch2_folio(folio);
297
298	if (s) {
299	spin_lock(lock: &s->lock);
300	for (j = folio_offset; j < folio_offset + folio_len; j++)
301	s->s[j].nr_replicas = `0`;
302	spin_unlock(lock: &s->lock);
303	}
304
305	folio_unlock(folio);
306	}
307	folio_batch_release(fbatch: &fbatch);
308	cond_resched();
309	}
310	}
311
312	int bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
313	u64 *start, u64 end,
314	bool nonblocking)
315	{
316	struct bch_fs *c = inode->v.i_sb->s_fs_info;
317	pgoff_t index = *start >> PAGE_SECTORS_SHIFT;
318	pgoff_t end_index = (end - `1`) >> PAGE_SECTORS_SHIFT;
319	struct folio_batch fbatch;
320	s64 i_sectors_delta = `0`;
321	int ret = `0`;
322
323	if (end <= *start)
324	return `0`;
325
326	folio_batch_init(fbatch: &fbatch);
327
328	while (filemap_get_folios(mapping: inode->v.i_mapping,
329	start: &index, end: end_index, fbatch: &fbatch)) {
330	for (unsigned i = `0`; i < folio_batch_count(fbatch: &fbatch); i++) {
331	struct folio *folio = fbatch.folios[i];
332
333	if (!nonblocking)
334	folio_lock(folio);
335	else if (!folio_trylock(folio)) {
336	folio_batch_release(fbatch: &fbatch);
337	ret = -EAGAIN;
338	break;
339	}
340
341	u64 folio_start = folio_sector(folio);
342	u64 folio_end = folio_end_sector(folio);
343
344	BUG_ON(end <= folio_start);
345
346	*start = min(end, folio_end);
347
348	struct bch_folio *s = bch2_folio(folio);
349	if (s) {
350	unsigned folio_offset = max(*start, folio_start) - folio_start;
351	unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
352
353	spin_lock(lock: &s->lock);
354	for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) {
355	i_sectors_delta -= s->s[j].state == SECTOR_dirty;
356	bch2_folio_sector_set(folio, s, i: j,
357	n: folio_sector_reserve(state: s->s[j].state));
358	}
359	spin_unlock(lock: &s->lock);
360	}
361
362	folio_unlock(folio);
363	}
364	folio_batch_release(fbatch: &fbatch);
365	cond_resched();
366	}
367
368	bch2_i_sectors_acct(c, inode, NULL, sectors: i_sectors_delta);
369	return ret;
370	}
371
372	static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
373	unsigned nr_replicas)
374	{
375	return max(`0`, (int) nr_replicas -
376	s->nr_replicas -
377	s->replicas_reserved);
378	}
379
380	int bch2_get_folio_disk_reservation(struct bch_fs *c,
381	struct bch_inode_info *inode,
382	struct folio *folio, bool check_enospc)
383	{
384	struct bch_folio *s = bch2_folio_create(folio, gfp: `0`);
385	unsigned nr_replicas = inode_nr_replicas(c, inode);
386	struct disk_reservation disk_res = { `0` };
387	unsigned i, sectors = folio_sectors(folio), disk_res_sectors = `0`;
388	int ret;
389
390	if (!s)
391	return -ENOMEM;
392
393	for (i = `0`; i < sectors; i++)
394	disk_res_sectors += sectors_to_reserve(s: &s->s[i], nr_replicas);
395
396	if (!disk_res_sectors)
397	return `0`;
398
399	ret = bch2_disk_reservation_get(c, res: &disk_res,
400	sectors: disk_res_sectors, nr_replicas: `1`,
401	flags: !check_enospc
402	? BCH_DISK_RESERVATION_NOFAIL
403	: `0`);
404	if (unlikely(ret))
405	return ret;
406
407	for (i = `0`; i < sectors; i++)
408	s->s[i].replicas_reserved +=
409	sectors_to_reserve(s: &s->s[i], nr_replicas);
410
411	return `0`;
412	}
413
414	void bch2_folio_reservation_put(struct bch_fs *c,
415	struct bch_inode_info *inode,
416	struct bch2_folio_reservation *res)
417	{
418	bch2_disk_reservation_put(c, res: &res->disk);
419	bch2_quota_reservation_put(c, inode, res: &res->quota);
420	}
421
422	int bch2_folio_reservation_get(struct bch_fs *c,
423	struct bch_inode_info *inode,
424	struct folio *folio,
425	struct bch2_folio_reservation *res,
426	unsigned offset, unsigned len)
427	{
428	struct bch_folio *s = bch2_folio_create(folio, gfp: `0`);
429	unsigned i, disk_sectors = `0`, quota_sectors = `0`;
430	int ret;
431
432	if (!s)
433	return -ENOMEM;
434
435	BUG_ON(!s->uptodate);
436
437	for (i = round_down(offset, block_bytes(c)) >> `9`;
438	i < round_up(offset + len, block_bytes(c)) >> `9`;
439	i++) {
440	disk_sectors += sectors_to_reserve(s: &s->s[i],
441	nr_replicas: res->disk.nr_replicas);
442	quota_sectors += s->s[i].state == SECTOR_unallocated;
443	}
444
445	if (disk_sectors) {
446	ret = bch2_disk_reservation_add(c, res: &res->disk, sectors: disk_sectors, flags: `0`);
447	if (unlikely(ret))
448	return ret;
449	}
450
451	if (quota_sectors) {
452	ret = bch2_quota_reservation_add(c, inode, res: &res->quota,
453	sectors: quota_sectors, check_enospc: true);
454	if (unlikely(ret)) {
455	struct disk_reservation tmp = {
456	.sectors = disk_sectors
457	};
458
459	bch2_disk_reservation_put(c, res: &tmp);
460	res->disk.sectors -= disk_sectors;
461	return ret;
462	}
463	}
464
465	return `0`;
466	}
467
468	static void bch2_clear_folio_bits(struct folio *folio)
469	{
470	struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
471	struct bch_fs *c = inode->v.i_sb->s_fs_info;
472	struct bch_folio *s = bch2_folio(folio);
473	struct disk_reservation disk_res = { `0` };
474	int i, sectors = folio_sectors(folio), dirty_sectors = `0`;
475
476	if (!s)
477	return;
478
479	EBUG_ON(!folio_test_locked(folio));
480	EBUG_ON(folio_test_writeback(folio));
481
482	for (i = `0`; i < sectors; i++) {
483	disk_res.sectors += s->s[i].replicas_reserved;
484	s->s[i].replicas_reserved = `0`;
485
486	dirty_sectors -= s->s[i].state == SECTOR_dirty;
487	bch2_folio_sector_set(folio, s, i, n: folio_sector_undirty(state: s->s[i].state));
488	}
489
490	bch2_disk_reservation_put(c, res: &disk_res);
491
492	bch2_i_sectors_acct(c, inode, NULL, sectors: dirty_sectors);
493
494	bch2_folio_release(folio);
495	}
496
497	void bch2_set_folio_dirty(struct bch_fs *c,
498	struct bch_inode_info *inode,
499	struct folio *folio,
500	struct bch2_folio_reservation *res,
501	unsigned offset, unsigned len)
502	{
503	struct bch_folio *s = bch2_folio(folio);
504	unsigned i, dirty_sectors = `0`;
505
506	WARN_ON((u64) folio_pos(folio) + offset + len >
507	round_up((u64) i_size_read(&inode->v), block_bytes(c)));
508
509	BUG_ON(!s->uptodate);
510
511	spin_lock(lock: &s->lock);
512
513	for (i = round_down(offset, block_bytes(c)) >> `9`;
514	i < round_up(offset + len, block_bytes(c)) >> `9`;
515	i++) {
516	unsigned sectors = sectors_to_reserve(s: &s->s[i],
517	nr_replicas: res->disk.nr_replicas);
518
519	/*
520	* This can happen if we race with the error path in
521	* bch2_writepage_io_done():
522	*/
523	sectors = min_t(unsigned, sectors, res->disk.sectors);
524
525	s->s[i].replicas_reserved += sectors;
526	res->disk.sectors -= sectors;
527
528	dirty_sectors += s->s[i].state == SECTOR_unallocated;
529
530	bch2_folio_sector_set(folio, s, i, n: folio_sector_dirty(state: s->s[i].state));
531	}
532
533	spin_unlock(lock: &s->lock);
534
535	bch2_i_sectors_acct(c, inode, quota_res: &res->quota, sectors: dirty_sectors);
536
537	if (!folio_test_dirty(folio))
538	filemap_dirty_folio(mapping: inode->v.i_mapping, folio);
539	}
540
541	vm_fault_t bch2_page_fault(struct vm_fault *vmf)
542	{
543	struct file *file = vmf->vma->vm_file;
544	struct address_space *mapping = file->f_mapping;
545	struct address_space *fdm = faults_disabled_mapping();
546	struct bch_inode_info *inode = file_bch_inode(file);
547	vm_fault_t ret;
548
549	if (fdm == mapping)
550	return VM_FAULT_SIGBUS;
551
552	/ Lock ordering: /
553	if (fdm > mapping) {
554	struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
555
556	if (bch2_pagecache_add_tryget(inode))
557	goto got_lock;
558
559	bch2_pagecache_block_put(fdm_host);
560
561	bch2_pagecache_add_get(inode);
562	bch2_pagecache_add_put(inode);
563
564	bch2_pagecache_block_get(fdm_host);
565
566	/ Signal that lock has been dropped: /
567	set_fdm_dropped_locks();
568	return VM_FAULT_SIGBUS;
569	}
570
571	bch2_pagecache_add_get(inode);
572	got_lock:
573	ret = filemap_fault(vmf);
574	bch2_pagecache_add_put(inode);
575
576	return ret;
577	}
578
579	vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
580	{
581	struct folio *folio = page_folio(vmf->page);
582	struct file *file = vmf->vma->vm_file;
583	struct bch_inode_info *inode = file_bch_inode(file);
584	struct address_space *mapping = file->f_mapping;
585	struct bch_fs *c = inode->v.i_sb->s_fs_info;
586	struct bch2_folio_reservation res;
587	unsigned len;
588	loff_t isize;
589	vm_fault_t ret;
590
591	bch2_folio_reservation_init(c, inode, res: &res);
592
593	sb_start_pagefault(sb: inode->v.i_sb);
594	file_update_time(file);
595
596	/*
597	* Not strictly necessary, but helps avoid dio writes livelocking in
598	* bch2_write_invalidate_inode_pages_range() - can drop this if/when we get
599	* a bch2_write_invalidate_inode_pages_range() that works without dropping
600	* page lock before invalidating page
601	*/
602	bch2_pagecache_add_get(inode);
603
604	folio_lock(folio);
605	isize = i_size_read(inode: &inode->v);
606
607	if (folio->mapping != mapping \|\| folio_pos(folio) >= isize) {
608	folio_unlock(folio);
609	ret = VM_FAULT_NOPAGE;
610	goto out;
611	}
612
613	len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
614
615	if (bch2_folio_set(c, inum: inode_inum(inode), fs: &folio, nr_folios: `1`) ?:
616	bch2_folio_reservation_get(c, inode, folio, res: &res, offset: `0`, len)) {
617	folio_unlock(folio);
618	ret = VM_FAULT_SIGBUS;
619	goto out;
620	}
621
622	bch2_set_folio_dirty(c, inode, folio, res: &res, offset: `0`, len);
623	bch2_folio_reservation_put(c, inode, res: &res);
624
625	folio_wait_stable(folio);
626	ret = VM_FAULT_LOCKED;
627	out:
628	bch2_pagecache_add_put(inode);
629	sb_end_pagefault(sb: inode->v.i_sb);
630
631	return ret;
632	}
633
634	void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
635	{
636	if (offset \|\| length < folio_size(folio))
637	return;
638
639	bch2_clear_folio_bits(folio);
640	}
641
642	bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
643	{
644	if (folio_test_dirty(folio) \|\| folio_test_writeback(folio))
645	return false;
646
647	bch2_clear_folio_bits(folio);
648	return true;
649	}
650
651	/ fseek: /
652
653	static int folio_data_offset(struct folio *folio, loff_t pos,
654	unsigned min_replicas)
655	{
656	struct bch_folio *s = bch2_folio(folio);
657	unsigned i, sectors = folio_sectors(folio);
658
659	if (s)
660	for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
661	if (s->s[i].state >= SECTOR_dirty &&
662	s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
663	return i << SECTOR_SHIFT;
664
665	return -`1`;
666	}
667
668	loff_t bch2_seek_pagecache_data(struct inode *vinode,
669	loff_t start_offset,
670	loff_t end_offset,
671	unsigned min_replicas,
672	bool nonblock)
673	{
674	struct folio_batch fbatch;
675	pgoff_t start_index = start_offset >> PAGE_SHIFT;
676	pgoff_t end_index = end_offset >> PAGE_SHIFT;
677	pgoff_t index = start_index;
678	unsigned i;
679	loff_t ret;
680	int offset;
681
682	folio_batch_init(fbatch: &fbatch);
683
684	while (filemap_get_folios(mapping: vinode->i_mapping,
685	start: &index, end: end_index, fbatch: &fbatch)) {
686	for (i = `0`; i < folio_batch_count(fbatch: &fbatch); i++) {
687	struct folio *folio = fbatch.folios[i];
688
689	if (!nonblock) {
690	folio_lock(folio);
691	} else if (!folio_trylock(folio)) {
692	folio_batch_release(fbatch: &fbatch);
693	return -EAGAIN;
694	}
695
696	offset = folio_data_offset(folio,
697	max(folio_pos(folio), start_offset),
698	min_replicas);
699	if (offset >= `0`) {
700	ret = clamp(folio_pos(folio) + offset,
701	start_offset, end_offset);
702	folio_unlock(folio);
703	folio_batch_release(fbatch: &fbatch);
704	return ret;
705	}
706	folio_unlock(folio);
707	}
708	folio_batch_release(fbatch: &fbatch);
709	cond_resched();
710	}
711
712	return end_offset;
713	}
714
715	/*
716	* Search for a hole in a folio.
717	*
718	* The filemap layer returns -ENOENT if no folio exists, so reuse the same error
719	* code to indicate a pagecache hole exists at the returned offset. Otherwise
720	* return 0 if the folio is filled with data, or an error code. This function
721	* can return -EAGAIN if nonblock is specified.
722	*/
723	static int folio_hole_offset(struct address_space mapping, loff_t offset,
724	unsigned min_replicas, bool nonblock)
725	{
726	struct folio *folio;
727	struct bch_folio *s;
728	unsigned i, sectors;
729	int ret = -ENOENT;
730
731	folio = __filemap_get_folio(mapping, index: *offset >> PAGE_SHIFT,
732	FGP_LOCK\|(nonblock ? FGP_NOWAIT : `0`), gfp: `0`);
733	if (IS_ERR(ptr: folio))
734	return PTR_ERR(ptr: folio);
735
736	s = bch2_folio(folio);
737	if (!s)
738	goto unlock;
739
740	sectors = folio_sectors(folio);
741	for (i = folio_pos_to_s(folio, pos: *offset); i < sectors; i++)
742	if (s->s[i].state < SECTOR_dirty \|\|
743	s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
744	offset = max(offset,
745	folio_pos(folio) + (i << SECTOR_SHIFT));
746	goto unlock;
747	}
748
749	*offset = folio_end_pos(folio);
750	ret = `0`;
751	unlock:
752	folio_unlock(folio);
753	folio_put(folio);
754	return ret;
755	}
756
757	loff_t bch2_seek_pagecache_hole(struct inode *vinode,
758	loff_t start_offset,
759	loff_t end_offset,
760	unsigned min_replicas,
761	bool nonblock)
762	{
763	struct address_space *mapping = vinode->i_mapping;
764	loff_t offset = start_offset;
765	loff_t ret = `0`;
766
767	while (!ret && offset < end_offset)
768	ret = folio_hole_offset(mapping, offset: &offset, min_replicas, nonblock);
769
770	if (ret && ret != -ENOENT)
771	return ret;
772	return min(offset, end_offset);
773	}
774
775	int bch2_clamp_data_hole(struct inode *inode,
776	u64 *hole_start,
777	u64 *hole_end,
778	unsigned min_replicas,
779	bool nonblock)
780	{
781	loff_t ret;
782
783	ret = bch2_seek_pagecache_hole(vinode: inode,
784	start_offset: hole_start << `9`, end_offset: hole_end << `9`, min_replicas, nonblock) >> `9`;
785	if (ret < `0`)
786	return ret;
787
788	*hole_start = ret;
789
790	if (hole_start == hole_end)
791	return `0`;
792
793	ret = bch2_seek_pagecache_data(vinode: inode,
794	start_offset: hole_start << `9`, end_offset: hole_end << `9`, min_replicas, nonblock) >> `9`;
795	if (ret < `0`)
796	return ret;
797
798	*hole_end = ret;
799	return `0`;
800	}
801
802	#endif /* NO_BCACHEFS_FS */
803

source code of linux/fs/bcachefs/fs-io-pagecache.c