data.c source code [linux/fs/f2fs/data.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* fs/f2fs/data.c
4	*
5	* Copyright (c) 2012 Samsung Electronics Co., Ltd.
6	* http://www.samsung.com/
7	*/
8	#include <linux/fs.h>
9	#include <linux/f2fs_fs.h>
10	#include <linux/buffer_head.h>
11	#include <linux/sched/mm.h>
12	#include <linux/mpage.h>
13	#include <linux/writeback.h>
14	#include <linux/pagevec.h>
15	#include <linux/blkdev.h>
16	#include <linux/bio.h>
17	#include <linux/blk-crypto.h>
18	#include <linux/swap.h>
19	#include <linux/prefetch.h>
20	#include <linux/uio.h>
21	#include <linux/sched/signal.h>
22	#include <linux/fiemap.h>
23	#include <linux/iomap.h>
24
25	#include "f2fs.h"
26	#include "node.h"
27	#include "segment.h"
28	#include "iostat.h"
29	#include <trace/events/f2fs.h>
30
31	#define NUM_PREALLOC_POST_READ_CTXS 128
32
33	static struct kmem_cache *bio_post_read_ctx_cache;
34	static struct kmem_cache *bio_entry_slab;
35	static mempool_t *bio_post_read_ctx_pool;
36	static struct bio_set f2fs_bioset;
37
38	#define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE
39
40	int __init f2fs_init_bioset(void)
41	{
42	return bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
43	`0`, flags: BIOSET_NEED_BVECS);
44	}
45
46	void f2fs_destroy_bioset(void)
47	{
48	bioset_exit(&f2fs_bioset);
49	}
50
51	static bool __is_cp_guaranteed(struct page *page)
52	{
53	struct address_space *mapping = page->mapping;
54	struct inode *inode;
55	struct f2fs_sb_info *sbi;
56
57	if (!mapping)
58	return false;
59
60	inode = mapping->host;
61	sbi = F2FS_I_SB(inode);
62
63	if (inode->i_ino == F2FS_META_INO(sbi) \|\|
64	inode->i_ino == F2FS_NODE_INO(sbi) \|\|
65	S_ISDIR(inode->i_mode))
66	return true;
67
68	if (f2fs_is_compressed_page(page))
69	return false;
70	if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) \|\|
71	page_private_gcing(page))
72	return true;
73	return false;
74	}
75
76	static enum count_type __read_io_type(struct page *page)
77	{
78	struct address_space *mapping = page_file_mapping(page);
79
80	if (mapping) {
81	struct inode *inode = mapping->host;
82	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
83
84	if (inode->i_ino == F2FS_META_INO(sbi))
85	return F2FS_RD_META;
86
87	if (inode->i_ino == F2FS_NODE_INO(sbi))
88	return F2FS_RD_NODE;
89	}
90	return F2FS_RD_DATA;
91	}
92
93	/ postprocessing steps for read bios /
94	enum bio_post_read_step {
95	#ifdef CONFIG_FS_ENCRYPTION
96	STEP_DECRYPT = BIT(`0`),
97	#else
98	STEP_DECRYPT = `0`, / compile out the decryption-related code /
99	#endif
100	#ifdef CONFIG_F2FS_FS_COMPRESSION
101	STEP_DECOMPRESS = BIT(`1`),
102	#else
103	STEP_DECOMPRESS = `0`, / compile out the decompression-related code /
104	#endif
105	#ifdef CONFIG_FS_VERITY
106	STEP_VERITY = BIT(`2`),
107	#else
108	STEP_VERITY = `0`, / compile out the verity-related code /
109	#endif
110	};
111
112	struct bio_post_read_ctx {
113	struct bio *bio;
114	struct f2fs_sb_info *sbi;
115	struct work_struct work;
116	unsigned int enabled_steps;
117	/*
118	* decompression_attempted keeps track of whether
119	* f2fs_end_read_compressed_page() has been called on the pages in the
120	* bio that belong to a compressed cluster yet.
121	*/
122	bool decompression_attempted;
123	block_t fs_blkaddr;
124	};
125
126	/*
127	* Update and unlock a bio's pages, and free the bio.
128	*
129	* This marks pages up-to-date only if there was no error in the bio (I/O error,
130	* decryption error, or verity error), as indicated by bio->bi_status.
131	*
132	* "Compressed pages" (pagecache pages backed by a compressed cluster on-disk)
133	* aren't marked up-to-date here, as decompression is done on a per-compression-
134	* cluster basis rather than a per-bio basis. Instead, we only must do two
135	* things for each compressed page here: call f2fs_end_read_compressed_page()
136	* with failed=true if an error occurred before it would have normally gotten
137	* called (i.e., I/O error or decryption error, but not verity error), and
138	* release the bio's reference to the decompress_io_ctx of the page's cluster.
139	*/
140	static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
141	{
142	struct bio_vec *bv;
143	struct bvec_iter_all iter_all;
144	struct bio_post_read_ctx *ctx = bio->bi_private;
145
146	bio_for_each_segment_all(bv, bio, iter_all) {
147	struct page *page = bv->bv_page;
148
149	if (f2fs_is_compressed_page(page)) {
150	if (ctx && !ctx->decompression_attempted)
151	f2fs_end_read_compressed_page(page, failed: true, blkaddr: `0`,
152	in_task);
153	f2fs_put_page_dic(page, in_task);
154	continue;
155	}
156
157	if (bio->bi_status)
158	ClearPageUptodate(page);
159	else
160	SetPageUptodate(page);
161	dec_page_count(sbi: F2FS_P_SB(page), count_type: __read_io_type(page));
162	unlock_page(page);
163	}
164
165	if (ctx)
166	mempool_free(element: ctx, pool: bio_post_read_ctx_pool);
167	bio_put(bio);
168	}
169
170	static void f2fs_verify_bio(struct work_struct *work)
171	{
172	struct bio_post_read_ctx *ctx =
173	container_of(work, struct bio_post_read_ctx, work);
174	struct bio *bio = ctx->bio;
175	bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
176
177	/*
178	* fsverity_verify_bio() may call readahead() again, and while verity
179	* will be disabled for this, decryption and/or decompression may still
180	* be needed, resulting in another bio_post_read_ctx being allocated.
181	* So to prevent deadlocks we need to release the current ctx to the
182	* mempool first. This assumes that verity is the last post-read step.
183	*/
184	mempool_free(element: ctx, pool: bio_post_read_ctx_pool);
185	bio->bi_private = NULL;
186
187	/*
188	* Verify the bio's pages with fs-verity. Exclude compressed pages,
189	* as those were handled separately by f2fs_end_read_compressed_page().
190	*/
191	if (may_have_compressed_pages) {
192	struct bio_vec *bv;
193	struct bvec_iter_all iter_all;
194
195	bio_for_each_segment_all(bv, bio, iter_all) {
196	struct page *page = bv->bv_page;
197
198	if (!f2fs_is_compressed_page(page) &&
199	!fsverity_verify_page(page)) {
200	bio->bi_status = BLK_STS_IOERR;
201	break;
202	}
203	}
204	} else {
205	fsverity_verify_bio(bio);
206	}
207
208	f2fs_finish_read_bio(bio, in_task: true);
209	}
210
211	/*
212	* If the bio's data needs to be verified with fs-verity, then enqueue the
213	* verity work for the bio. Otherwise finish the bio now.
214	*
215	* Note that to avoid deadlocks, the verity work can't be done on the
216	* decryption/decompression workqueue. This is because verifying the data pages
217	* can involve reading verity metadata pages from the file, and these verity
218	* metadata pages may be encrypted and/or compressed.
219	*/
220	static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task)
221	{
222	struct bio_post_read_ctx *ctx = bio->bi_private;
223
224	if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
225	INIT_WORK(&ctx->work, f2fs_verify_bio);
226	fsverity_enqueue_verify_work(work: &ctx->work);
227	} else {
228	f2fs_finish_read_bio(bio, in_task);
229	}
230	}
231
232	/*
233	* Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
234	* remaining page was read by @ctx->bio.
235	*
236	* Note that a bio may span clusters (even a mix of compressed and uncompressed
237	* clusters) or be for just part of a cluster. STEP_DECOMPRESS just indicates
238	* that the bio includes at least one compressed page. The actual decompression
239	* is done on a per-cluster basis, not a per-bio basis.
240	*/
241	static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
242	bool in_task)
243	{
244	struct bio_vec *bv;
245	struct bvec_iter_all iter_all;
246	bool all_compressed = true;
247	block_t blkaddr = ctx->fs_blkaddr;
248
249	bio_for_each_segment_all(bv, ctx->bio, iter_all) {
250	struct page *page = bv->bv_page;
251
252	if (f2fs_is_compressed_page(page))
253	f2fs_end_read_compressed_page(page, failed: false, blkaddr,
254	in_task);
255	else
256	all_compressed = false;
257
258	blkaddr++;
259	}
260
261	ctx->decompression_attempted = true;
262
263	/*
264	* Optimization: if all the bio's pages are compressed, then scheduling
265	* the per-bio verity work is unnecessary, as verity will be fully
266	* handled at the compression cluster level.
267	*/
268	if (all_compressed)
269	ctx->enabled_steps &= ~STEP_VERITY;
270	}
271
272	static void f2fs_post_read_work(struct work_struct *work)
273	{
274	struct bio_post_read_ctx *ctx =
275	container_of(work, struct bio_post_read_ctx, work);
276	struct bio *bio = ctx->bio;
277
278	if ((ctx->enabled_steps & STEP_DECRYPT) && !fscrypt_decrypt_bio(bio)) {
279	f2fs_finish_read_bio(bio, in_task: true);
280	return;
281	}
282
283	if (ctx->enabled_steps & STEP_DECOMPRESS)
284	f2fs_handle_step_decompress(ctx, in_task: true);
285
286	f2fs_verify_and_finish_bio(bio, in_task: true);
287	}
288
289	static void f2fs_read_end_io(struct bio *bio)
290	{
291	struct f2fs_sb_info *sbi = F2FS_P_SB(page: bio_first_page_all(bio));
292	struct bio_post_read_ctx *ctx;
293	bool intask = in_task();
294
295	iostat_update_and_unbind_ctx(bio);
296	ctx = bio->bi_private;
297
298	if (time_to_inject(sbi, FAULT_READ_IO))
299	bio->bi_status = BLK_STS_IOERR;
300
301	if (bio->bi_status) {
302	f2fs_finish_read_bio(bio, in_task: intask);
303	return;
304	}
305
306	if (ctx) {
307	unsigned int enabled_steps = ctx->enabled_steps &
308	(STEP_DECRYPT \| STEP_DECOMPRESS);
309
310	/*
311	* If we have only decompression step between decompression and
312	* decrypt, we don't need post processing for this.
313	*/
314	if (enabled_steps == STEP_DECOMPRESS &&
315	!f2fs_low_mem_mode(sbi)) {
316	f2fs_handle_step_decompress(ctx, in_task: intask);
317	} else if (enabled_steps) {
318	INIT_WORK(&ctx->work, f2fs_post_read_work);
319	queue_work(wq: ctx->sbi->post_read_wq, work: &ctx->work);
320	return;
321	}
322	}
323
324	f2fs_verify_and_finish_bio(bio, in_task: intask);
325	}
326
327	static void f2fs_write_end_io(struct bio *bio)
328	{
329	struct f2fs_sb_info *sbi;
330	struct bio_vec *bvec;
331	struct bvec_iter_all iter_all;
332
333	iostat_update_and_unbind_ctx(bio);
334	sbi = bio->bi_private;
335
336	if (time_to_inject(sbi, FAULT_WRITE_IO))
337	bio->bi_status = BLK_STS_IOERR;
338
339	bio_for_each_segment_all(bvec, bio, iter_all) {
340	struct page *page = bvec->bv_page;
341	enum count_type type = WB_DATA_TYPE(page);
342
343	if (page_private_dummy(page)) {
344	clear_page_private_dummy(page);
345	unlock_page(page);
346	mempool_free(element: page, pool: sbi->write_io_dummy);
347
348	if (unlikely(bio->bi_status))
349	f2fs_stop_checkpoint(sbi, end_io: true,
350	reason: STOP_CP_REASON_WRITE_FAIL);
351	continue;
352	}
353
354	fscrypt_finalize_bounce_page(pagep: &page);
355
356	#ifdef CONFIG_F2FS_FS_COMPRESSION
357	if (f2fs_is_compressed_page(page)) {
358	f2fs_compress_write_end_io(bio, page);
359	continue;
360	}
361	#endif
362
363	if (unlikely(bio->bi_status)) {
364	mapping_set_error(mapping: page->mapping, error: -EIO);
365	if (type == F2FS_WB_CP_DATA)
366	f2fs_stop_checkpoint(sbi, end_io: true,
367	reason: STOP_CP_REASON_WRITE_FAIL);
368	}
369
370	f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
371	page->index != nid_of_node(page));
372
373	dec_page_count(sbi, count_type: type);
374	if (f2fs_in_warm_node_list(sbi, page))
375	f2fs_del_fsync_node_entry(sbi, page);
376	clear_page_private_gcing(page);
377	end_page_writeback(page);
378	}
379	if (!get_pages(sbi, count_type: F2FS_WB_CP_DATA) &&
380	wq_has_sleeper(wq_head: &sbi->cp_wait))
381	wake_up(&sbi->cp_wait);
382
383	bio_put(bio);
384	}
385
386	#ifdef CONFIG_BLK_DEV_ZONED
387	static void f2fs_zone_write_end_io(struct bio *bio)
388	{
389	struct f2fs_bio_info io = (struct* f2fs_bio_info *)bio->bi_private;
390
391	bio->bi_private = io->bi_private;
392	complete(&io->zone_wait);
393	f2fs_write_end_io(bio);
394	}
395	#endif
396
397	struct block_device f2fs_target_device(struct* f2fs_sb_info *sbi,
398	block_t blk_addr, sector_t *sector)
399	{
400	struct block_device *bdev = sbi->sb->s_bdev;
401	int i;
402
403	if (f2fs_is_multi_device(sbi)) {
404	for (i = `0`; i < sbi->s_ndevs; i++) {
405	if (FDEV(i).start_blk <= blk_addr &&
406	FDEV(i).end_blk >= blk_addr) {
407	blk_addr -= FDEV(i).start_blk;
408	bdev = FDEV(i).bdev;
409	break;
410	}
411	}
412	}
413
414	if (sector)
415	*sector = SECTOR_FROM_BLOCK(blk_addr);
416	return bdev;
417	}
418
419	int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
420	{
421	int i;
422
423	if (!f2fs_is_multi_device(sbi))
424	return `0`;
425
426	for (i = `0`; i < sbi->s_ndevs; i++)
427	if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
428	return i;
429	return `0`;
430	}
431
432	static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
433	{
434	unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - `1`, `0`);
435	unsigned int fua_flag, meta_flag, io_flag;
436	blk_opf_t op_flags = `0`;
437
438	if (fio->op != REQ_OP_WRITE)
439	return `0`;
440	if (fio->type == DATA)
441	io_flag = fio->sbi->data_io_flag;
442	else if (fio->type == NODE)
443	io_flag = fio->sbi->node_io_flag;
444	else
445	return `0`;
446
447	fua_flag = io_flag & temp_mask;
448	meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
449
450	/*
451	* data/node io flag bits per temp:
452	* REQ_META \| REQ_FUA \|
453	* 5 \| 4 \| 3 \| 2 \| 1 \| 0 \|
454	* Cold \| Warm \| Hot \| Cold \| Warm \| Hot \|
455	*/
456	if (BIT(fio->temp) & meta_flag)
457	op_flags \|= REQ_META;
458	if (BIT(fio->temp) & fua_flag)
459	op_flags \|= REQ_FUA;
460	return op_flags;
461	}
462
463	static struct bio __bio_alloc(struct* f2fs_io_info fio, int* npages)
464	{
465	struct f2fs_sb_info *sbi = fio->sbi;
466	struct block_device *bdev;
467	sector_t sector;
468	struct bio *bio;
469
470	bdev = f2fs_target_device(sbi, blk_addr: fio->new_blkaddr, sector: &sector);
471	bio = bio_alloc_bioset(bdev, nr_vecs: npages,
472	opf: fio->op \| fio->op_flags \| f2fs_io_flags(fio),
473	GFP_NOIO, bs: &f2fs_bioset);
474	bio->bi_iter.bi_sector = sector;
475	if (is_read_io(fio->op)) {
476	bio->bi_end_io = f2fs_read_end_io;
477	bio->bi_private = NULL;
478	} else {
479	bio->bi_end_io = f2fs_write_end_io;
480	bio->bi_private = sbi;
481	}
482	iostat_alloc_and_bind_ctx(sbi, bio, NULL);
483
484	if (fio->io_wbc)
485	wbc_init_bio(wbc: fio->io_wbc, bio);
486
487	return bio;
488	}
489
490	static void f2fs_set_bio_crypt_ctx(struct bio bio, const* struct inode *inode,
491	pgoff_t first_idx,
492	const struct f2fs_io_info *fio,
493	gfp_t gfp_mask)
494	{
495	/*
496	* The f2fs garbage collector sets ->encrypted_page when it wants to
497	* read/write raw data without encryption.
498	*/
499	if (!fio \|\| !fio->encrypted_page)
500	fscrypt_set_bio_crypt_ctx(bio, inode, first_lblk: first_idx, gfp_mask);
501	}
502
503	static bool f2fs_crypt_mergeable_bio(struct bio bio, const* struct inode *inode,
504	pgoff_t next_idx,
505	const struct f2fs_io_info *fio)
506	{
507	/*
508	* The f2fs garbage collector sets ->encrypted_page when it wants to
509	* read/write raw data without encryption.
510	*/
511	if (fio && fio->encrypted_page)
512	return !bio_has_crypt_ctx(bio);
513
514	return fscrypt_mergeable_bio(bio, inode, next_lblk: next_idx);
515	}
516
517	void f2fs_submit_read_bio(struct f2fs_sb_info sbi, struct* bio *bio,
518	enum page_type type)
519	{
520	WARN_ON_ONCE(!is_read_io(bio_op(bio)));
521	trace_f2fs_submit_read_bio(sb: sbi->sb, type, bio);
522
523	iostat_update_submit_ctx(bio, type);
524	submit_bio(bio);
525	}
526
527	static void f2fs_align_write_bio(struct f2fs_sb_info sbi, struct* bio *bio)
528	{
529	unsigned int start =
530	(bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS) % F2FS_IO_SIZE(sbi);
531
532	if (start == `0`)
533	return;
534
535	/ fill dummy pages /
536	for (; start < F2FS_IO_SIZE(sbi); start++) {
537	struct page *page =
538	mempool_alloc(pool: sbi->write_io_dummy,
539	GFP_NOIO \| __GFP_NOFAIL);
540	f2fs_bug_on(sbi, !page);
541
542	lock_page(page);
543
544	zero_user_segment(page, start: `0`, PAGE_SIZE);
545	set_page_private_dummy(page);
546
547	if (bio_add_page(bio, page, PAGE_SIZE, off: `0`) < PAGE_SIZE)
548	f2fs_bug_on(sbi, `1`);
549	}
550	}
551
552	static void f2fs_submit_write_bio(struct f2fs_sb_info sbi, struct* bio *bio,
553	enum page_type type)
554	{
555	WARN_ON_ONCE(is_read_io(bio_op(bio)));
556
557	if (type == DATA \|\| type == NODE) {
558	if (f2fs_lfs_mode(sbi) && current->plug)
559	blk_finish_plug(current->plug);
560
561	if (F2FS_IO_ALIGNED(sbi)) {
562	f2fs_align_write_bio(sbi, bio);
563	/*
564	* In the NODE case, we lose next block address chain.
565	* So, we need to do checkpoint in f2fs_sync_file.
566	*/
567	if (type == NODE)
568	set_sbi_flag(sbi, type: SBI_NEED_CP);
569	}
570	}
571
572	trace_f2fs_submit_write_bio(sb: sbi->sb, type, bio);
573	iostat_update_submit_ctx(bio, type);
574	submit_bio(bio);
575	}
576
577	static void __submit_merged_bio(struct f2fs_bio_info *io)
578	{
579	struct f2fs_io_info *fio = &io->fio;
580
581	if (!io->bio)
582	return;
583
584	if (is_read_io(fio->op)) {
585	trace_f2fs_prepare_read_bio(sb: io->sbi->sb, type: fio->type, bio: io->bio);
586	f2fs_submit_read_bio(sbi: io->sbi, bio: io->bio, type: fio->type);
587	} else {
588	trace_f2fs_prepare_write_bio(sb: io->sbi->sb, type: fio->type, bio: io->bio);
589	f2fs_submit_write_bio(sbi: io->sbi, bio: io->bio, type: fio->type);
590	}
591	io->bio = NULL;
592	}
593
594	static bool __has_merged_page(struct bio bio, struct* inode *inode,
595	struct page *page, nid_t ino)
596	{
597	struct bio_vec *bvec;
598	struct bvec_iter_all iter_all;
599
600	if (!bio)
601	return false;
602
603	if (!inode && !page && !ino)
604	return true;
605
606	bio_for_each_segment_all(bvec, bio, iter_all) {
607	struct page *target = bvec->bv_page;
608
609	if (fscrypt_is_bounce_page(page: target)) {
610	target = fscrypt_pagecache_page(bounce_page: target);
611	if (IS_ERR(ptr: target))
612	continue;
613	}
614	if (f2fs_is_compressed_page(page: target)) {
615	target = f2fs_compress_control_page(page: target);
616	if (IS_ERR(ptr: target))
617	continue;
618	}
619
620	if (inode && inode == target->mapping->host)
621	return true;
622	if (page && page == target)
623	return true;
624	if (ino && ino == ino_of_node(node_page: target))
625	return true;
626	}
627
628	return false;
629	}
630
631	int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
632	{
633	int i;
634
635	for (i = `0`; i < NR_PAGE_TYPE; i++) {
636	int n = (i == META) ? `1` : NR_TEMP_TYPE;
637	int j;
638
639	sbi->write_io[i] = f2fs_kmalloc(sbi,
640	array_size(n, sizeof(struct f2fs_bio_info)),
641	GFP_KERNEL);
642	if (!sbi->write_io[i])
643	return -ENOMEM;
644
645	for (j = HOT; j < n; j++) {
646	init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem);
647	sbi->write_io[i][j].sbi = sbi;
648	sbi->write_io[i][j].bio = NULL;
649	spin_lock_init(&sbi->write_io[i][j].io_lock);
650	INIT_LIST_HEAD(list: &sbi->write_io[i][j].io_list);
651	INIT_LIST_HEAD(list: &sbi->write_io[i][j].bio_list);
652	init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
653	#ifdef CONFIG_BLK_DEV_ZONED
654	init_completion(x: &sbi->write_io[i][j].zone_wait);
655	sbi->write_io[i][j].zone_pending_bio = NULL;
656	sbi->write_io[i][j].bi_private = NULL;
657	#endif
658	}
659	}
660
661	return `0`;
662	}
663
664	static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
665	enum page_type type, enum temp_type temp)
666	{
667	enum page_type btype = PAGE_TYPE_OF_BIO(type);
668	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
669
670	f2fs_down_write(sem: &io->io_rwsem);
671
672	if (!io->bio)
673	goto unlock_out;
674
675	/ change META to META_FLUSH in the checkpoint procedure /
676	if (type >= META_FLUSH) {
677	io->fio.type = META_FLUSH;
678	io->bio->bi_opf \|= REQ_META \| REQ_PRIO \| REQ_SYNC;
679	if (!test_opt(sbi, NOBARRIER))
680	io->bio->bi_opf \|= REQ_PREFLUSH \| REQ_FUA;
681	}
682	__submit_merged_bio(io);
683	unlock_out:
684	f2fs_up_write(sem: &io->io_rwsem);
685	}
686
687	static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
688	struct inode inode, struct* page *page,
689	nid_t ino, enum page_type type, bool force)
690	{
691	enum temp_type temp;
692	bool ret = true;
693
694	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
695	if (!force) {
696	enum page_type btype = PAGE_TYPE_OF_BIO(type);
697	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
698
699	f2fs_down_read(sem: &io->io_rwsem);
700	ret = __has_merged_page(bio: io->bio, inode, page, ino);
701	f2fs_up_read(sem: &io->io_rwsem);
702	}
703	if (ret)
704	__f2fs_submit_merged_write(sbi, type, temp);
705
706	/ TODO: use HOT temp only for meta pages now. /
707	if (type >= META)
708	break;
709	}
710	}
711
712	void f2fs_submit_merged_write(struct f2fs_sb_info sbi, enum* page_type type)
713	{
714	__submit_merged_write_cond(sbi, NULL, NULL, ino: `0`, type, force: true);
715	}
716
717	void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
718	struct inode inode, struct* page *page,
719	nid_t ino, enum page_type type)
720	{
721	__submit_merged_write_cond(sbi, inode, page, ino, type, force: false);
722	}
723
724	void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
725	{
726	f2fs_submit_merged_write(sbi, type: DATA);
727	f2fs_submit_merged_write(sbi, type: NODE);
728	f2fs_submit_merged_write(sbi, type: META);
729	}
730
731	/*
732	* Fill the locked page with data located in the block address.
733	* A caller needs to unlock the page on failure.
734	*/
735	int f2fs_submit_page_bio(struct f2fs_io_info *fio)
736	{
737	struct bio *bio;
738	struct page *page = fio->encrypted_page ?
739	fio->encrypted_page : fio->page;
740
741	if (!f2fs_is_valid_blkaddr(sbi: fio->sbi, blkaddr: fio->new_blkaddr,
742	type: fio->is_por ? META_POR : (__is_meta_io(fio) ?
743	META_GENERIC : DATA_GENERIC_ENHANCE))) {
744	f2fs_handle_error(sbi: fio->sbi, error: ERROR_INVALID_BLKADDR);
745	return -EFSCORRUPTED;
746	}
747
748	trace_f2fs_submit_page_bio(page, fio);
749
750	/ Allocate a new bio /
751	bio = __bio_alloc(fio, npages: `1`);
752
753	f2fs_set_bio_crypt_ctx(bio, inode: fio->page->mapping->host,
754	first_idx: fio->page->index, fio, GFP_NOIO);
755
756	if (bio_add_page(bio, page, PAGE_SIZE, off: `0`) < PAGE_SIZE) {
757	bio_put(bio);
758	return -EFAULT;
759	}
760
761	if (fio->io_wbc && !is_read_io(fio->op))
762	wbc_account_cgroup_owner(wbc: fio->io_wbc, page: fio->page, PAGE_SIZE);
763
764	inc_page_count(sbi: fio->sbi, is_read_io(fio->op) ?
765	__read_io_type(page) : WB_DATA_TYPE(fio->page));
766
767	if (is_read_io(bio_op(bio)))
768	f2fs_submit_read_bio(sbi: fio->sbi, bio, type: fio->type);
769	else
770	f2fs_submit_write_bio(sbi: fio->sbi, bio, type: fio->type);
771	return `0`;
772	}
773
774	static bool page_is_mergeable(struct f2fs_sb_info sbi, struct* bio *bio,
775	block_t last_blkaddr, block_t cur_blkaddr)
776	{
777	if (unlikely(sbi->max_io_bytes &&
778	bio->bi_iter.bi_size >= sbi->max_io_bytes))
779	return false;
780	if (last_blkaddr + `1` != cur_blkaddr)
781	return false;
782	return bio->bi_bdev == f2fs_target_device(sbi, blk_addr: cur_blkaddr, NULL);
783	}
784
785	static bool io_type_is_mergeable(struct f2fs_bio_info *io,
786	struct f2fs_io_info *fio)
787	{
788	if (io->fio.op != fio->op)
789	return false;
790	return io->fio.op_flags == fio->op_flags;
791	}
792
793	static bool io_is_mergeable(struct f2fs_sb_info sbi, struct* bio *bio,
794	struct f2fs_bio_info *io,
795	struct f2fs_io_info *fio,
796	block_t last_blkaddr,
797	block_t cur_blkaddr)
798	{
799	if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA \|\| fio->type == NODE)) {
800	unsigned int filled_blocks =
801	F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
802	unsigned int io_size = F2FS_IO_SIZE(sbi);
803	unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
804
805	/ IOs in bio is aligned and left space of vectors is not enough /
806	if (!(filled_blocks % io_size) && left_vecs < io_size)
807	return false;
808	}
809	if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
810	return false;
811	return io_type_is_mergeable(io, fio);
812	}
813
814	static void add_bio_entry(struct f2fs_sb_info sbi, struct* bio *bio,
815	struct page page, enum* temp_type temp)
816	{
817	struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
818	struct bio_entry *be;
819
820	be = f2fs_kmem_cache_alloc(cachep: bio_entry_slab, GFP_NOFS, nofail: true, NULL);
821	be->bio = bio;
822	bio_get(bio);
823
824	if (bio_add_page(bio, page, PAGE_SIZE, off: `0`) != PAGE_SIZE)
825	f2fs_bug_on(sbi, `1`);
826
827	f2fs_down_write(sem: &io->bio_list_lock);
828	list_add_tail(new: &be->list, head: &io->bio_list);
829	f2fs_up_write(sem: &io->bio_list_lock);
830	}
831
832	static void del_bio_entry(struct bio_entry *be)
833	{
834	list_del(entry: &be->list);
835	kmem_cache_free(s: bio_entry_slab, objp: be);
836	}
837
838	static int add_ipu_page(struct f2fs_io_info fio, struct* bio **bio,
839	struct page *page)
840	{
841	struct f2fs_sb_info *sbi = fio->sbi;
842	enum temp_type temp;
843	bool found = false;
844	int ret = -EAGAIN;
845
846	for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
847	struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
848	struct list_head *head = &io->bio_list;
849	struct bio_entry *be;
850
851	f2fs_down_write(sem: &io->bio_list_lock);
852	list_for_each_entry(be, head, list) {
853	if (be->bio != *bio)
854	continue;
855
856	found = true;
857
858	f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
859	*fio->last_block,
860	fio->new_blkaddr));
861	if (f2fs_crypt_mergeable_bio(bio: *bio,
862	inode: fio->page->mapping->host,
863	next_idx: fio->page->index, fio) &&
864	bio_add_page(bio: *bio, page, PAGE_SIZE, off: `0`) ==
865	PAGE_SIZE) {
866	ret = `0`;
867	break;
868	}
869
870	/ page can't be merged into bio; submit the bio /
871	del_bio_entry(be);
872	f2fs_submit_write_bio(sbi, bio: *bio, type: DATA);
873	break;
874	}
875	f2fs_up_write(sem: &io->bio_list_lock);
876	}
877
878	if (ret) {
879	bio_put(*bio);
880	*bio = NULL;
881	}
882
883	return ret;
884	}
885
886	void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
887	struct bio bio, struct** page *page)
888	{
889	enum temp_type temp;
890	bool found = false;
891	struct bio target = bio ? bio : NULL;
892
893	f2fs_bug_on(sbi, !target && !page);
894
895	for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
896	struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
897	struct list_head *head = &io->bio_list;
898	struct bio_entry *be;
899
900	if (list_empty(head))
901	continue;
902
903	f2fs_down_read(sem: &io->bio_list_lock);
904	list_for_each_entry(be, head, list) {
905	if (target)
906	found = (target == be->bio);
907	else
908	found = __has_merged_page(bio: be->bio, NULL,
909	page, ino: `0`);
910	if (found)
911	break;
912	}
913	f2fs_up_read(sem: &io->bio_list_lock);
914
915	if (!found)
916	continue;
917
918	found = false;
919
920	f2fs_down_write(sem: &io->bio_list_lock);
921	list_for_each_entry(be, head, list) {
922	if (target)
923	found = (target == be->bio);
924	else
925	found = __has_merged_page(bio: be->bio, NULL,
926	page, ino: `0`);
927	if (found) {
928	target = be->bio;
929	del_bio_entry(be);
930	break;
931	}
932	}
933	f2fs_up_write(sem: &io->bio_list_lock);
934	}
935
936	if (found)
937	f2fs_submit_write_bio(sbi, bio: target, type: DATA);
938	if (bio && *bio) {
939	bio_put(*bio);
940	*bio = NULL;
941	}
942	}
943
944	int f2fs_merge_page_bio(struct f2fs_io_info *fio)
945	{
946	struct bio bio = fio->bio;
947	struct page *page = fio->encrypted_page ?
948	fio->encrypted_page : fio->page;
949
950	if (!f2fs_is_valid_blkaddr(sbi: fio->sbi, blkaddr: fio->new_blkaddr,
951	__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) {
952	f2fs_handle_error(sbi: fio->sbi, error: ERROR_INVALID_BLKADDR);
953	return -EFSCORRUPTED;
954	}
955
956	trace_f2fs_submit_page_bio(page, fio);
957
958	if (bio && !page_is_mergeable(sbi: fio->sbi, bio, last_blkaddr: *fio->last_block,
959	cur_blkaddr: fio->new_blkaddr))
960	f2fs_submit_merged_ipu_write(sbi: fio->sbi, bio: &bio, NULL);
961	alloc_new:
962	if (!bio) {
963	bio = __bio_alloc(fio, BIO_MAX_VECS);
964	f2fs_set_bio_crypt_ctx(bio, inode: fio->page->mapping->host,
965	first_idx: fio->page->index, fio, GFP_NOIO);
966
967	add_bio_entry(sbi: fio->sbi, bio, page, temp: fio->temp);
968	} else {
969	if (add_ipu_page(fio, bio: &bio, page))
970	goto alloc_new;
971	}
972
973	if (fio->io_wbc)
974	wbc_account_cgroup_owner(wbc: fio->io_wbc, page: fio->page, PAGE_SIZE);
975
976	inc_page_count(sbi: fio->sbi, WB_DATA_TYPE(page));
977
978	*fio->last_block = fio->new_blkaddr;
979	*fio->bio = bio;
980
981	return `0`;
982	}
983
984	#ifdef CONFIG_BLK_DEV_ZONED
985	static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
986	{
987	int devi = `0`;
988
989	if (f2fs_is_multi_device(sbi)) {
990	devi = f2fs_target_device_index(sbi, blkaddr);
991	if (blkaddr < FDEV(devi).start_blk \|\|
992	blkaddr > FDEV(devi).end_blk) {
993	f2fs_err(sbi, "Invalid block %x", blkaddr);
994	return false;
995	}
996	blkaddr -= FDEV(devi).start_blk;
997	}
998	return bdev_zoned_model(FDEV(devi).bdev) == BLK_ZONED_HM &&
999	f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
1000	(blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - `1`);
1001	}
1002	#endif
1003
1004	void f2fs_submit_page_write(struct f2fs_io_info *fio)
1005	{
1006	struct f2fs_sb_info *sbi = fio->sbi;
1007	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
1008	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
1009	struct page *bio_page;
1010
1011	f2fs_bug_on(sbi, is_read_io(fio->op));
1012
1013	f2fs_down_write(sem: &io->io_rwsem);
1014
1015	#ifdef CONFIG_BLK_DEV_ZONED
1016	if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
1017	wait_for_completion_io(&io->zone_wait);
1018	bio_put(io->zone_pending_bio);
1019	io->zone_pending_bio = NULL;
1020	io->bi_private = NULL;
1021	}
1022	#endif
1023
1024	next:
1025	if (fio->in_list) {
1026	spin_lock(lock: &io->io_lock);
1027	if (list_empty(head: &io->io_list)) {
1028	spin_unlock(lock: &io->io_lock);
1029	goto out;
1030	}
1031	fio = list_first_entry(&io->io_list,
1032	struct f2fs_io_info, list);
1033	list_del(entry: &fio->list);
1034	spin_unlock(lock: &io->io_lock);
1035	}
1036
1037	verify_fio_blkaddr(fio);
1038
1039	if (fio->encrypted_page)
1040	bio_page = fio->encrypted_page;
1041	else if (fio->compressed_page)
1042	bio_page = fio->compressed_page;
1043	else
1044	bio_page = fio->page;
1045
1046	/ set submitted = true as a return value /
1047	fio->submitted = `1`;
1048
1049	inc_page_count(sbi, WB_DATA_TYPE(bio_page));
1050
1051	if (io->bio &&
1052	(!io_is_mergeable(sbi, bio: io->bio, io, fio, last_blkaddr: io->last_block_in_bio,
1053	cur_blkaddr: fio->new_blkaddr) \|\|
1054	!f2fs_crypt_mergeable_bio(bio: io->bio, inode: fio->page->mapping->host,
1055	next_idx: bio_page->index, fio)))
1056	__submit_merged_bio(io);
1057	alloc_new:
1058	if (io->bio == NULL) {
1059	if (F2FS_IO_ALIGNED(sbi) &&
1060	(fio->type == DATA \|\| fio->type == NODE) &&
1061	fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
1062	dec_page_count(sbi, WB_DATA_TYPE(bio_page));
1063	fio->retry = `1`;
1064	goto skip;
1065	}
1066	io->bio = __bio_alloc(fio, BIO_MAX_VECS);
1067	f2fs_set_bio_crypt_ctx(bio: io->bio, inode: fio->page->mapping->host,
1068	first_idx: bio_page->index, fio, GFP_NOIO);
1069	io->fio = *fio;
1070	}
1071
1072	if (bio_add_page(bio: io->bio, page: bio_page, PAGE_SIZE, off: `0`) < PAGE_SIZE) {
1073	__submit_merged_bio(io);
1074	goto alloc_new;
1075	}
1076
1077	if (fio->io_wbc)
1078	wbc_account_cgroup_owner(wbc: fio->io_wbc, page: fio->page, PAGE_SIZE);
1079
1080	io->last_block_in_bio = fio->new_blkaddr;
1081
1082	trace_f2fs_submit_page_write(page: fio->page, fio);
1083	skip:
1084	if (fio->in_list)
1085	goto next;
1086	out:
1087	#ifdef CONFIG_BLK_DEV_ZONED
1088	if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
1089	is_end_zone_blkaddr(sbi, blkaddr: fio->new_blkaddr)) {
1090	bio_get(bio: io->bio);
1091	reinit_completion(x: &io->zone_wait);
1092	io->bi_private = io->bio->bi_private;
1093	io->bio->bi_private = io;
1094	io->bio->bi_end_io = f2fs_zone_write_end_io;
1095	io->zone_pending_bio = io->bio;
1096	__submit_merged_bio(io);
1097	}
1098	#endif
1099	if (is_sbi_flag_set(sbi, type: SBI_IS_SHUTDOWN) \|\|
1100	!f2fs_is_checkpoint_ready(sbi))
1101	__submit_merged_bio(io);
1102	f2fs_up_write(sem: &io->io_rwsem);
1103	}
1104
1105	static struct bio f2fs_grab_read_bio(struct* inode *inode, block_t blkaddr,
1106	unsigned nr_pages, blk_opf_t op_flag,
1107	pgoff_t first_idx, bool for_write)
1108	{
1109	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1110	struct bio *bio;
1111	struct bio_post_read_ctx *ctx = NULL;
1112	unsigned int post_read_steps = `0`;
1113	sector_t sector;
1114	struct block_device *bdev = f2fs_target_device(sbi, blk_addr: blkaddr, sector: &sector);
1115
1116	bio = bio_alloc_bioset(bdev, nr_vecs: bio_max_segs(nr_segs: nr_pages),
1117	opf: REQ_OP_READ \| op_flag,
1118	gfp_mask: for_write ? GFP_NOIO : GFP_KERNEL, bs: &f2fs_bioset);
1119	if (!bio)
1120	return ERR_PTR(error: -ENOMEM);
1121	bio->bi_iter.bi_sector = sector;
1122	f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
1123	bio->bi_end_io = f2fs_read_end_io;
1124
1125	if (fscrypt_inode_uses_fs_layer_crypto(inode))
1126	post_read_steps \|= STEP_DECRYPT;
1127
1128	if (f2fs_need_verity(inode, idx: first_idx))
1129	post_read_steps \|= STEP_VERITY;
1130
1131	/*
1132	* STEP_DECOMPRESS is handled specially, since a compressed file might
1133	* contain both compressed and uncompressed clusters. We'll allocate a
1134	* bio_post_read_ctx if the file is compressed, but the caller is
1135	* responsible for enabling STEP_DECOMPRESS if it's actually needed.
1136	*/
1137
1138	if (post_read_steps \|\| f2fs_compressed_file(inode)) {
1139	/ Due to the mempool, this never fails. /
1140	ctx = mempool_alloc(pool: bio_post_read_ctx_pool, GFP_NOFS);
1141	ctx->bio = bio;
1142	ctx->sbi = sbi;
1143	ctx->enabled_steps = post_read_steps;
1144	ctx->fs_blkaddr = blkaddr;
1145	ctx->decompression_attempted = false;
1146	bio->bi_private = ctx;
1147	}
1148	iostat_alloc_and_bind_ctx(sbi, bio, ctx);
1149
1150	return bio;
1151	}
1152
1153	/ This can handle encryption stuffs /
1154	static int f2fs_submit_page_read(struct inode inode, struct* page *page,
1155	block_t blkaddr, blk_opf_t op_flags,
1156	bool for_write)
1157	{
1158	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1159	struct bio *bio;
1160
1161	bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages: `1`, op_flag: op_flags,
1162	first_idx: page->index, for_write);
1163	if (IS_ERR(ptr: bio))
1164	return PTR_ERR(ptr: bio);
1165
1166	/ wait for GCed page writeback via META_MAPPING /
1167	f2fs_wait_on_block_writeback(inode, blkaddr);
1168
1169	if (bio_add_page(bio, page, PAGE_SIZE, off: `0`) < PAGE_SIZE) {
1170	iostat_update_and_unbind_ctx(bio);
1171	if (bio->bi_private)
1172	mempool_free(element: bio->bi_private, pool: bio_post_read_ctx_pool);
1173	bio_put(bio);
1174	return -EFAULT;
1175	}
1176	inc_page_count(sbi, count_type: F2FS_RD_DATA);
1177	f2fs_update_iostat(sbi, NULL, type: FS_DATA_READ_IO, F2FS_BLKSIZE);
1178	f2fs_submit_read_bio(sbi, bio, type: DATA);
1179	return `0`;
1180	}
1181
1182	static void __set_data_blkaddr(struct dnode_of_data *dn)
1183	{
1184	struct f2fs_node *rn = F2FS_NODE(page: dn->node_page);
1185	__le32 *addr_array;
1186	int base = `0`;
1187
1188	if (IS_INODE(page: dn->node_page) && f2fs_has_extra_attr(inode: dn->inode))
1189	base = get_extra_isize(inode: dn->inode);
1190
1191	/ Get physical address of data block /
1192	addr_array = blkaddr_in_node(node: rn);
1193	addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
1194	}
1195
1196	/*
1197	* Lock ordering for the change of data block address:
1198	* ->data_page
1199	* ->node_page
1200	* update block addresses in the node page
1201	*/
1202	void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
1203	{
1204	f2fs_wait_on_page_writeback(page: dn->node_page, type: NODE, ordered: true, locked: true);
1205	__set_data_blkaddr(dn);
1206	if (set_page_dirty(dn->node_page))
1207	dn->node_changed = true;
1208	}
1209
1210	void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1211	{
1212	dn->data_blkaddr = blkaddr;
1213	f2fs_set_data_blkaddr(dn);
1214	f2fs_update_read_extent_cache(dn);
1215	}
1216
1217	/ dn->ofs_in_node will be returned with up-to-date last block pointer /
1218	int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
1219	{
1220	struct f2fs_sb_info *sbi = F2FS_I_SB(inode: dn->inode);
1221	int err;
1222
1223	if (!count)
1224	return `0`;
1225
1226	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1227	return -EPERM;
1228	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
1229	return err;
1230
1231	trace_f2fs_reserve_new_blocks(inode: dn->inode, nid: dn->nid,
1232	ofs_in_node: dn->ofs_in_node, count);
1233
1234	f2fs_wait_on_page_writeback(page: dn->node_page, type: NODE, ordered: true, locked: true);
1235
1236	for (; count > `0`; dn->ofs_in_node++) {
1237	block_t blkaddr = f2fs_data_blkaddr(dn);
1238
1239	if (blkaddr == NULL_ADDR) {
1240	dn->data_blkaddr = NEW_ADDR;
1241	__set_data_blkaddr(dn);
1242	count--;
1243	}
1244	}
1245
1246	if (set_page_dirty(dn->node_page))
1247	dn->node_changed = true;
1248	return `0`;
1249	}
1250
1251	/ Should keep dn->ofs_in_node unchanged /
1252	int f2fs_reserve_new_block(struct dnode_of_data *dn)
1253	{
1254	unsigned int ofs_in_node = dn->ofs_in_node;
1255	int ret;
1256
1257	ret = f2fs_reserve_new_blocks(dn, count: `1`);
1258	dn->ofs_in_node = ofs_in_node;
1259	return ret;
1260	}
1261
1262	int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
1263	{
1264	bool need_put = dn->inode_page ? false : true;
1265	int err;
1266
1267	err = f2fs_get_dnode_of_data(dn, index, mode: ALLOC_NODE);
1268	if (err)
1269	return err;
1270
1271	if (dn->data_blkaddr == NULL_ADDR)
1272	err = f2fs_reserve_new_block(dn);
1273	if (err \|\| need_put)
1274	f2fs_put_dnode(dn);
1275	return err;
1276	}
1277
1278	struct page f2fs_get_read_data_page(struct* inode *inode, pgoff_t index,
1279	blk_opf_t op_flags, bool for_write,
1280	pgoff_t *next_pgofs)
1281	{
1282	struct address_space *mapping = inode->i_mapping;
1283	struct dnode_of_data dn;
1284	struct page *page;
1285	int err;
1286
1287	page = f2fs_grab_cache_page(mapping, index, for_write);
1288	if (!page)
1289	return ERR_PTR(error: -ENOMEM);
1290
1291	if (f2fs_lookup_read_extent_cache_block(inode, index,
1292	blkaddr: &dn.data_blkaddr)) {
1293	if (!f2fs_is_valid_blkaddr(sbi: F2FS_I_SB(inode), blkaddr: dn.data_blkaddr,
1294	type: DATA_GENERIC_ENHANCE_READ)) {
1295	err = -EFSCORRUPTED;
1296	f2fs_handle_error(sbi: F2FS_I_SB(inode),
1297	error: ERROR_INVALID_BLKADDR);
1298	goto put_err;
1299	}
1300	goto got_it;
1301	}
1302
1303	set_new_dnode(dn: &dn, inode, NULL, NULL, nid: `0`);
1304	err = f2fs_get_dnode_of_data(dn: &dn, index, mode: LOOKUP_NODE);
1305	if (err) {
1306	if (err == -ENOENT && next_pgofs)
1307	*next_pgofs = f2fs_get_next_page_offset(dn: &dn, pgofs: index);
1308	goto put_err;
1309	}
1310	f2fs_put_dnode(dn: &dn);
1311
1312	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
1313	err = -ENOENT;
1314	if (next_pgofs)
1315	*next_pgofs = index + `1`;
1316	goto put_err;
1317	}
1318	if (dn.data_blkaddr != NEW_ADDR &&
1319	!f2fs_is_valid_blkaddr(sbi: F2FS_I_SB(inode),
1320	blkaddr: dn.data_blkaddr,
1321	type: DATA_GENERIC_ENHANCE)) {
1322	err = -EFSCORRUPTED;
1323	f2fs_handle_error(sbi: F2FS_I_SB(inode),
1324	error: ERROR_INVALID_BLKADDR);
1325	goto put_err;
1326	}
1327	got_it:
1328	if (PageUptodate(page)) {
1329	unlock_page(page);
1330	return page;
1331	}
1332
1333	/*
1334	* A new dentry page is allocated but not able to be written, since its
1335	* new inode page couldn't be allocated due to -ENOSPC.
1336	* In such the case, its blkaddr can be remained as NEW_ADDR.
1337	* see, f2fs_add_link -> f2fs_get_new_data_page ->
1338	* f2fs_init_inode_metadata.
1339	*/
1340	if (dn.data_blkaddr == NEW_ADDR) {
1341	zero_user_segment(page, start: `0`, PAGE_SIZE);
1342	if (!PageUptodate(page))
1343	SetPageUptodate(page);
1344	unlock_page(page);
1345	return page;
1346	}
1347
1348	err = f2fs_submit_page_read(inode, page, blkaddr: dn.data_blkaddr,
1349	op_flags, for_write);
1350	if (err)
1351	goto put_err;
1352	return page;
1353
1354	put_err:
1355	f2fs_put_page(page, unlock: `1`);
1356	return ERR_PTR(error: err);
1357	}
1358
1359	struct page f2fs_find_data_page(struct* inode *inode, pgoff_t index,
1360	pgoff_t *next_pgofs)
1361	{
1362	struct address_space *mapping = inode->i_mapping;
1363	struct page *page;
1364
1365	page = find_get_page(mapping, offset: index);
1366	if (page && PageUptodate(page))
1367	return page;
1368	f2fs_put_page(page, unlock: `0`);
1369
1370	page = f2fs_get_read_data_page(inode, index, op_flags: `0`, for_write: false, next_pgofs);
1371	if (IS_ERR(ptr: page))
1372	return page;
1373
1374	if (PageUptodate(page))
1375	return page;
1376
1377	wait_on_page_locked(page);
1378	if (unlikely(!PageUptodate(page))) {
1379	f2fs_put_page(page, unlock: `0`);
1380	return ERR_PTR(error: -EIO);
1381	}
1382	return page;
1383	}
1384
1385	/*
1386	* If it tries to access a hole, return an error.
1387	* Because, the callers, functions in dir.c and GC, should be able to know
1388	* whether this page exists or not.
1389	*/
1390	struct page f2fs_get_lock_data_page(struct* inode *inode, pgoff_t index,
1391	bool for_write)
1392	{
1393	struct address_space *mapping = inode->i_mapping;
1394	struct page *page;
1395
1396	page = f2fs_get_read_data_page(inode, index, op_flags: `0`, for_write, NULL);
1397	if (IS_ERR(ptr: page))
1398	return page;
1399
1400	/ wait for read completion /
1401	lock_page(page);
1402	if (unlikely(page->mapping != mapping \|\| !PageUptodate(page))) {
1403	f2fs_put_page(page, unlock: `1`);
1404	return ERR_PTR(error: -EIO);
1405	}
1406	return page;
1407	}
1408
1409	/*
1410	* Caller ensures that this data page is never allocated.
1411	* A new zero-filled data page is allocated in the page cache.
1412	*
1413	* Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
1414	* f2fs_unlock_op().
1415	* Note that, ipage is set only by make_empty_dir, and if any error occur,
1416	* ipage should be released by this function.
1417	*/
1418	struct page f2fs_get_new_data_page(struct* inode *inode,
1419	struct page *ipage, pgoff_t index, bool new_i_size)
1420	{
1421	struct address_space *mapping = inode->i_mapping;
1422	struct page *page;
1423	struct dnode_of_data dn;
1424	int err;
1425
1426	page = f2fs_grab_cache_page(mapping, index, for_write: true);
1427	if (!page) {
1428	/*
1429	* before exiting, we should make sure ipage will be released
1430	* if any error occur.
1431	*/
1432	f2fs_put_page(page: ipage, unlock: `1`);
1433	return ERR_PTR(error: -ENOMEM);
1434	}
1435
1436	set_new_dnode(dn: &dn, inode, ipage, NULL, nid: `0`);
1437	err = f2fs_reserve_block(dn: &dn, index);
1438	if (err) {
1439	f2fs_put_page(page, unlock: `1`);
1440	return ERR_PTR(error: err);
1441	}
1442	if (!ipage)
1443	f2fs_put_dnode(dn: &dn);
1444
1445	if (PageUptodate(page))
1446	goto got_it;
1447
1448	if (dn.data_blkaddr == NEW_ADDR) {
1449	zero_user_segment(page, start: `0`, PAGE_SIZE);
1450	if (!PageUptodate(page))
1451	SetPageUptodate(page);
1452	} else {
1453	f2fs_put_page(page, unlock: `1`);
1454
1455	/ if ipage exists, blkaddr should be NEW_ADDR /
1456	f2fs_bug_on(F2FS_I_SB(inode), ipage);
1457	page = f2fs_get_lock_data_page(inode, index, for_write: true);
1458	if (IS_ERR(ptr: page))
1459	return page;
1460	}
1461	got_it:
1462	if (new_i_size && i_size_read(inode) <
1463	((loff_t)(index + `1`) << PAGE_SHIFT))
1464	f2fs_i_size_write(inode, i_size: ((loff_t)(index + `1`) << PAGE_SHIFT));
1465	return page;
1466	}
1467
1468	static int __allocate_data_block(struct dnode_of_data dn, int* seg_type)
1469	{
1470	struct f2fs_sb_info *sbi = F2FS_I_SB(inode: dn->inode);
1471	struct f2fs_summary sum;
1472	struct node_info ni;
1473	block_t old_blkaddr;
1474	blkcnt_t count = `1`;
1475	int err;
1476
1477	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1478	return -EPERM;
1479
1480	err = f2fs_get_node_info(sbi, nid: dn->nid, ni: &ni, checkpoint_context: false);
1481	if (err)
1482	return err;
1483
1484	dn->data_blkaddr = f2fs_data_blkaddr(dn);
1485	if (dn->data_blkaddr == NULL_ADDR) {
1486	err = inc_valid_block_count(sbi, inode: dn->inode, count: &count);
1487	if (unlikely(err))
1488	return err;
1489	}
1490
1491	set_summary(sum: &sum, nid: dn->nid, ofs_in_node: dn->ofs_in_node, version: ni.version);
1492	old_blkaddr = dn->data_blkaddr;
1493	f2fs_allocate_data_block(sbi, NULL, old_blkaddr, new_blkaddr: &dn->data_blkaddr,
1494	sum: &sum, type: seg_type, NULL);
1495	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
1496	invalidate_mapping_pages(mapping: META_MAPPING(sbi),
1497	start: old_blkaddr, end: old_blkaddr);
1498	f2fs_invalidate_compress_page(sbi, blkaddr: old_blkaddr);
1499	}
1500	f2fs_update_data_blkaddr(dn, blkaddr: dn->data_blkaddr);
1501	return `0`;
1502	}
1503
1504	static void f2fs_map_lock(struct f2fs_sb_info sbi, int* flag)
1505	{
1506	if (flag == F2FS_GET_BLOCK_PRE_AIO)
1507	f2fs_down_read(sem: &sbi->node_change);
1508	else
1509	f2fs_lock_op(sbi);
1510	}
1511
1512	static void f2fs_map_unlock(struct f2fs_sb_info sbi, int* flag)
1513	{
1514	if (flag == F2FS_GET_BLOCK_PRE_AIO)
1515	f2fs_up_read(sem: &sbi->node_change);
1516	else
1517	f2fs_unlock_op(sbi);
1518	}
1519
1520	int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index)
1521	{
1522	struct f2fs_sb_info *sbi = F2FS_I_SB(inode: dn->inode);
1523	int err = `0`;
1524
1525	f2fs_map_lock(sbi, flag: F2FS_GET_BLOCK_PRE_AIO);
1526	if (!f2fs_lookup_read_extent_cache_block(inode: dn->inode, index,
1527	blkaddr: &dn->data_blkaddr))
1528	err = f2fs_reserve_block(dn, index);
1529	f2fs_map_unlock(sbi, flag: F2FS_GET_BLOCK_PRE_AIO);
1530
1531	return err;
1532	}
1533
1534	static int f2fs_map_no_dnode(struct inode *inode,
1535	struct f2fs_map_blocks map, struct* dnode_of_data *dn,
1536	pgoff_t pgoff)
1537	{
1538	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1539
1540	/*
1541	* There is one exceptional case that read_node_page() may return
1542	* -ENOENT due to filesystem has been shutdown or cp_error, return
1543	* -EIO in that case.
1544	*/
1545	if (map->m_may_create &&
1546	(is_sbi_flag_set(sbi, type: SBI_IS_SHUTDOWN) \|\| f2fs_cp_error(sbi)))
1547	return -EIO;
1548
1549	if (map->m_next_pgofs)
1550	*map->m_next_pgofs = f2fs_get_next_page_offset(dn, pgofs: pgoff);
1551	if (map->m_next_extent)
1552	*map->m_next_extent = f2fs_get_next_page_offset(dn, pgofs: pgoff);
1553	return `0`;
1554	}
1555
1556	static bool f2fs_map_blocks_cached(struct inode *inode,
1557	struct f2fs_map_blocks map, int* flag)
1558	{
1559	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1560	unsigned int maxblocks = map->m_len;
1561	pgoff_t pgoff = (pgoff_t)map->m_lblk;
1562	struct extent_info ei = {};
1563
1564	if (!f2fs_lookup_read_extent_cache(inode, pgofs: pgoff, ei: &ei))
1565	return false;
1566
1567	map->m_pblk = ei.blk + pgoff - ei.fofs;
1568	map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgoff);
1569	map->m_flags = F2FS_MAP_MAPPED;
1570	if (map->m_next_extent)
1571	*map->m_next_extent = pgoff + map->m_len;
1572
1573	/ for hardware encryption, but to avoid potential issue in future /
1574	if (flag == F2FS_GET_BLOCK_DIO)
1575	f2fs_wait_on_block_writeback_range(inode,
1576	blkaddr: map->m_pblk, len: map->m_len);
1577
1578	if (f2fs_allow_multi_device_dio(sbi, flag)) {
1579	int bidx = f2fs_target_device_index(sbi, blkaddr: map->m_pblk);
1580	struct f2fs_dev_info *dev = &sbi->devs[bidx];
1581
1582	map->m_bdev = dev->bdev;
1583	map->m_pblk -= dev->start_blk;
1584	map->m_len = min(map->m_len, dev->end_blk + `1` - map->m_pblk);
1585	} else {
1586	map->m_bdev = inode->i_sb->s_bdev;
1587	}
1588	return true;
1589	}
1590
1591	/*
1592	* f2fs_map_blocks() tries to find or build mapping relationship which
1593	* maps continuous logical blocks to physical blocks, and return such
1594	* info via f2fs_map_blocks structure.
1595	*/
1596	int f2fs_map_blocks(struct inode inode, struct* f2fs_map_blocks map, int* flag)
1597	{
1598	unsigned int maxblocks = map->m_len;
1599	struct dnode_of_data dn;
1600	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1601	int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
1602	pgoff_t pgofs, end_offset, end;
1603	int err = `0`, ofs = `1`;
1604	unsigned int ofs_in_node, last_ofs_in_node;
1605	blkcnt_t prealloc;
1606	block_t blkaddr;
1607	unsigned int start_pgofs;
1608	int bidx = `0`;
1609	bool is_hole;
1610
1611	if (!maxblocks)
1612	return `0`;
1613
1614	if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag))
1615	goto out;
1616
1617	map->m_bdev = inode->i_sb->s_bdev;
1618	map->m_multidev_dio =
1619	f2fs_allow_multi_device_dio(sbi: F2FS_I_SB(inode), flag);
1620
1621	map->m_len = `0`;
1622	map->m_flags = `0`;
1623
1624	/ it only supports block size == page size /
1625	pgofs = (pgoff_t)map->m_lblk;
1626	end = pgofs + maxblocks;
1627
1628	next_dnode:
1629	if (map->m_may_create)
1630	f2fs_map_lock(sbi, flag);
1631
1632	/ When reading holes, we need its node page /
1633	set_new_dnode(dn: &dn, inode, NULL, NULL, nid: `0`);
1634	err = f2fs_get_dnode_of_data(dn: &dn, index: pgofs, mode);
1635	if (err) {
1636	if (flag == F2FS_GET_BLOCK_BMAP)
1637	map->m_pblk = `0`;
1638	if (err == -ENOENT)
1639	err = f2fs_map_no_dnode(inode, map, dn: &dn, pgoff: pgofs);
1640	goto unlock_out;
1641	}
1642
1643	start_pgofs = pgofs;
1644	prealloc = `0`;
1645	last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1646	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1647
1648	next_block:
1649	blkaddr = f2fs_data_blkaddr(dn: &dn);
1650	is_hole = !__is_valid_data_blkaddr(blkaddr);
1651	if (!is_hole &&
1652	!f2fs_is_valid_blkaddr(sbi, blkaddr, type: DATA_GENERIC_ENHANCE)) {
1653	err = -EFSCORRUPTED;
1654	f2fs_handle_error(sbi, error: ERROR_INVALID_BLKADDR);
1655	goto sync_out;
1656	}
1657
1658	/ use out-place-update for direct IO under LFS mode /
1659	if (map->m_may_create &&
1660	(is_hole \|\| (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) {
1661	if (unlikely(f2fs_cp_error(sbi))) {
1662	err = -EIO;
1663	goto sync_out;
1664	}
1665
1666	switch (flag) {
1667	case F2FS_GET_BLOCK_PRE_AIO:
1668	if (blkaddr == NULL_ADDR) {
1669	prealloc++;
1670	last_ofs_in_node = dn.ofs_in_node;
1671	}
1672	break;
1673	case F2FS_GET_BLOCK_PRE_DIO:
1674	case F2FS_GET_BLOCK_DIO:
1675	err = __allocate_data_block(dn: &dn, seg_type: map->m_seg_type);
1676	if (err)
1677	goto sync_out;
1678	if (flag == F2FS_GET_BLOCK_PRE_DIO)
1679	file_need_truncate(inode);
1680	set_inode_flag(inode, flag: FI_APPEND_WRITE);
1681	break;
1682	default:
1683	WARN_ON_ONCE(`1`);
1684	err = -EIO;
1685	goto sync_out;
1686	}
1687
1688	blkaddr = dn.data_blkaddr;
1689	if (is_hole)
1690	map->m_flags \|= F2FS_MAP_NEW;
1691	} else if (is_hole) {
1692	if (f2fs_compressed_file(inode) &&
1693	f2fs_sanity_check_cluster(dn: &dn)) {
1694	err = -EFSCORRUPTED;
1695	f2fs_handle_error(sbi,
1696	error: ERROR_CORRUPTED_CLUSTER);
1697	goto sync_out;
1698	}
1699
1700	switch (flag) {
1701	case F2FS_GET_BLOCK_PRECACHE:
1702	goto sync_out;
1703	case F2FS_GET_BLOCK_BMAP:
1704	map->m_pblk = `0`;
1705	goto sync_out;
1706	case F2FS_GET_BLOCK_FIEMAP:
1707	if (blkaddr == NULL_ADDR) {
1708	if (map->m_next_pgofs)
1709	*map->m_next_pgofs = pgofs + `1`;
1710	goto sync_out;
1711	}
1712	break;
1713	default:
1714	/ for defragment case /
1715	if (map->m_next_pgofs)
1716	*map->m_next_pgofs = pgofs + `1`;
1717	goto sync_out;
1718	}
1719	}
1720
1721	if (flag == F2FS_GET_BLOCK_PRE_AIO)
1722	goto skip;
1723
1724	if (map->m_multidev_dio)
1725	bidx = f2fs_target_device_index(sbi, blkaddr);
1726
1727	if (map->m_len == `0`) {
1728	/ reserved delalloc block should be mapped for fiemap. /
1729	if (blkaddr == NEW_ADDR)
1730	map->m_flags \|= F2FS_MAP_DELALLOC;
1731	map->m_flags \|= F2FS_MAP_MAPPED;
1732
1733	map->m_pblk = blkaddr;
1734	map->m_len = `1`;
1735
1736	if (map->m_multidev_dio)
1737	map->m_bdev = FDEV(bidx).bdev;
1738	} else if ((map->m_pblk != NEW_ADDR &&
1739	blkaddr == (map->m_pblk + ofs)) \|\|
1740	(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) \|\|
1741	flag == F2FS_GET_BLOCK_PRE_DIO) {
1742	if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
1743	goto sync_out;
1744	ofs++;
1745	map->m_len++;
1746	} else {
1747	goto sync_out;
1748	}
1749
1750	skip:
1751	dn.ofs_in_node++;
1752	pgofs++;
1753
1754	/ preallocate blocks in batch for one dnode page /
1755	if (flag == F2FS_GET_BLOCK_PRE_AIO &&
1756	(pgofs == end \|\| dn.ofs_in_node == end_offset)) {
1757
1758	dn.ofs_in_node = ofs_in_node;
1759	err = f2fs_reserve_new_blocks(dn: &dn, count: prealloc);
1760	if (err)
1761	goto sync_out;
1762
1763	map->m_len += dn.ofs_in_node - ofs_in_node;
1764	if (prealloc && dn.ofs_in_node != last_ofs_in_node + `1`) {
1765	err = -ENOSPC;
1766	goto sync_out;
1767	}
1768	dn.ofs_in_node = end_offset;
1769	}
1770
1771	if (pgofs >= end)
1772	goto sync_out;
1773	else if (dn.ofs_in_node < end_offset)
1774	goto next_block;
1775
1776	if (flag == F2FS_GET_BLOCK_PRECACHE) {
1777	if (map->m_flags & F2FS_MAP_MAPPED) {
1778	unsigned int ofs = start_pgofs - map->m_lblk;
1779
1780	f2fs_update_read_extent_cache_range(dn: &dn,
1781	fofs: start_pgofs, blkaddr: map->m_pblk + ofs,
1782	len: map->m_len - ofs);
1783	}
1784	}
1785
1786	f2fs_put_dnode(dn: &dn);
1787
1788	if (map->m_may_create) {
1789	f2fs_map_unlock(sbi, flag);
1790	f2fs_balance_fs(sbi, need: dn.node_changed);
1791	}
1792	goto next_dnode;
1793
1794	sync_out:
1795
1796	if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
1797	/*
1798	* for hardware encryption, but to avoid potential issue
1799	* in future
1800	*/
1801	f2fs_wait_on_block_writeback_range(inode,
1802	blkaddr: map->m_pblk, len: map->m_len);
1803
1804	if (map->m_multidev_dio) {
1805	block_t blk_addr = map->m_pblk;
1806
1807	bidx = f2fs_target_device_index(sbi, blkaddr: map->m_pblk);
1808
1809	map->m_bdev = FDEV(bidx).bdev;
1810	map->m_pblk -= FDEV(bidx).start_blk;
1811
1812	if (map->m_may_create)
1813	f2fs_update_device_state(sbi, ino: inode->i_ino,
1814	blkaddr: blk_addr, blkcnt: map->m_len);
1815
1816	f2fs_bug_on(sbi, blk_addr + map->m_len >
1817	FDEV(bidx).end_blk + `1`);
1818	}
1819	}
1820
1821	if (flag == F2FS_GET_BLOCK_PRECACHE) {
1822	if (map->m_flags & F2FS_MAP_MAPPED) {
1823	unsigned int ofs = start_pgofs - map->m_lblk;
1824
1825	f2fs_update_read_extent_cache_range(dn: &dn,
1826	fofs: start_pgofs, blkaddr: map->m_pblk + ofs,
1827	len: map->m_len - ofs);
1828	}
1829	if (map->m_next_extent)
1830	*map->m_next_extent = pgofs + `1`;
1831	}
1832	f2fs_put_dnode(dn: &dn);
1833	unlock_out:
1834	if (map->m_may_create) {
1835	f2fs_map_unlock(sbi, flag);
1836	f2fs_balance_fs(sbi, need: dn.node_changed);
1837	}
1838	out:
1839	trace_f2fs_map_blocks(inode, map, flag, ret: err);
1840	return err;
1841	}
1842
1843	bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
1844	{
1845	struct f2fs_map_blocks map;
1846	block_t last_lblk;
1847	int err;
1848
1849	if (pos + len > i_size_read(inode))
1850	return false;
1851
1852	map.m_lblk = F2FS_BYTES_TO_BLK(pos);
1853	map.m_next_pgofs = NULL;
1854	map.m_next_extent = NULL;
1855	map.m_seg_type = NO_CHECK_TYPE;
1856	map.m_may_create = false;
1857	last_lblk = F2FS_BLK_ALIGN(pos + len);
1858
1859	while (map.m_lblk < last_lblk) {
1860	map.m_len = last_lblk - map.m_lblk;
1861	err = f2fs_map_blocks(inode, map: &map, flag: F2FS_GET_BLOCK_DEFAULT);
1862	if (err \|\| map.m_len == `0`)
1863	return false;
1864	map.m_lblk += map.m_len;
1865	}
1866	return true;
1867	}
1868
1869	static inline u64 bytes_to_blks(struct inode *inode, u64 bytes)
1870	{
1871	return (bytes >> inode->i_blkbits);
1872	}
1873
1874	static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
1875	{
1876	return (blks << inode->i_blkbits);
1877	}
1878
1879	static int f2fs_xattr_fiemap(struct inode *inode,
1880	struct fiemap_extent_info *fieinfo)
1881	{
1882	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1883	struct page *page;
1884	struct node_info ni;
1885	__u64 phys = `0`, len;
1886	__u32 flags;
1887	nid_t xnid = F2FS_I(inode)->i_xattr_nid;
1888	int err = `0`;
1889
1890	if (f2fs_has_inline_xattr(inode)) {
1891	int offset;
1892
1893	page = f2fs_grab_cache_page(mapping: NODE_MAPPING(sbi),
1894	index: inode->i_ino, for_write: false);
1895	if (!page)
1896	return -ENOMEM;
1897
1898	err = f2fs_get_node_info(sbi, nid: inode->i_ino, ni: &ni, checkpoint_context: false);
1899	if (err) {
1900	f2fs_put_page(page, unlock: `1`);
1901	return err;
1902	}
1903
1904	phys = blks_to_bytes(inode, blks: ni.blk_addr);
1905	offset = offsetof(struct f2fs_inode, i_addr) +
1906	sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1907	get_inline_xattr_addrs(inode));
1908
1909	phys += offset;
1910	len = inline_xattr_size(inode);
1911
1912	f2fs_put_page(page, unlock: `1`);
1913
1914	flags = FIEMAP_EXTENT_DATA_INLINE \| FIEMAP_EXTENT_NOT_ALIGNED;
1915
1916	if (!xnid)
1917	flags \|= FIEMAP_EXTENT_LAST;
1918
1919	err = fiemap_fill_next_extent(info: fieinfo, logical: `0`, phys, len, flags);
1920	trace_f2fs_fiemap(inode, lblock: `0`, pblock: phys, len, flags, ret: err);
1921	if (err)
1922	return err;
1923	}
1924
1925	if (xnid) {
1926	page = f2fs_grab_cache_page(mapping: NODE_MAPPING(sbi), index: xnid, for_write: false);
1927	if (!page)
1928	return -ENOMEM;
1929
1930	err = f2fs_get_node_info(sbi, nid: xnid, ni: &ni, checkpoint_context: false);
1931	if (err) {
1932	f2fs_put_page(page, unlock: `1`);
1933	return err;
1934	}
1935
1936	phys = blks_to_bytes(inode, blks: ni.blk_addr);
1937	len = inode->i_sb->s_blocksize;
1938
1939	f2fs_put_page(page, unlock: `1`);
1940
1941	flags = FIEMAP_EXTENT_LAST;
1942	}
1943
1944	if (phys) {
1945	err = fiemap_fill_next_extent(info: fieinfo, logical: `0`, phys, len, flags);
1946	trace_f2fs_fiemap(inode, lblock: `0`, pblock: phys, len, flags, ret: err);
1947	}
1948
1949	return (err < `0` ? err : `0`);
1950	}
1951
1952	static loff_t max_inode_blocks(struct inode *inode)
1953	{
1954	loff_t result = ADDRS_PER_INODE(inode);
1955	loff_t leaf_count = ADDRS_PER_BLOCK(inode);
1956
1957	/ two direct node blocks /
1958	result += (leaf_count * `2`);
1959
1960	/ two indirect node blocks /
1961	leaf_count *= NIDS_PER_BLOCK;
1962	result += (leaf_count * `2`);
1963
1964	/ one double indirect node block /
1965	leaf_count *= NIDS_PER_BLOCK;
1966	result += leaf_count;
1967
1968	return result;
1969	}
1970
1971	int f2fs_fiemap(struct inode inode, struct* fiemap_extent_info *fieinfo,
1972	u64 start, u64 len)
1973	{
1974	struct f2fs_map_blocks map;
1975	sector_t start_blk, last_blk;
1976	pgoff_t next_pgofs;
1977	u64 logical = `0`, phys = `0`, size = `0`;
1978	u32 flags = `0`;
1979	int ret = `0`;
1980	bool compr_cluster = false, compr_appended;
1981	unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
1982	unsigned int count_in_cluster = `0`;
1983	loff_t maxbytes;
1984
1985	if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
1986	ret = f2fs_precache_extents(inode);
1987	if (ret)
1988	return ret;
1989	}
1990
1991	ret = fiemap_prep(inode, fieinfo, start, len: &len, FIEMAP_FLAG_XATTR);
1992	if (ret)
1993	return ret;
1994
1995	inode_lock(inode);
1996
1997	maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
1998	if (start > maxbytes) {
1999	ret = -EFBIG;
2000	goto out;
2001	}
2002
2003	if (len > maxbytes \|\| (maxbytes - len) < start)
2004	len = maxbytes - start;
2005
2006	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
2007	ret = f2fs_xattr_fiemap(inode, fieinfo);
2008	goto out;
2009	}
2010
2011	if (f2fs_has_inline_data(inode) \|\| f2fs_has_inline_dentry(inode)) {
2012	ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
2013	if (ret != -EAGAIN)
2014	goto out;
2015	}
2016
2017	if (bytes_to_blks(inode, bytes: len) == `0`)
2018	len = blks_to_bytes(inode, blks: `1`);
2019
2020	start_blk = bytes_to_blks(inode, bytes: start);
2021	last_blk = bytes_to_blks(inode, bytes: start + len - `1`);
2022
2023	next:
2024	memset(&map, `0`, sizeof(map));
2025	map.m_lblk = start_blk;
2026	map.m_len = bytes_to_blks(inode, bytes: len);
2027	map.m_next_pgofs = &next_pgofs;
2028	map.m_seg_type = NO_CHECK_TYPE;
2029
2030	if (compr_cluster) {
2031	map.m_lblk += `1`;
2032	map.m_len = cluster_size - count_in_cluster;
2033	}
2034
2035	ret = f2fs_map_blocks(inode, map: &map, flag: F2FS_GET_BLOCK_FIEMAP);
2036	if (ret)
2037	goto out;
2038
2039	/ HOLE /
2040	if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
2041	start_blk = next_pgofs;
2042
2043	if (blks_to_bytes(inode, blks: start_blk) < blks_to_bytes(inode,
2044	blks: max_inode_blocks(inode)))
2045	goto prep_next;
2046
2047	flags \|= FIEMAP_EXTENT_LAST;
2048	}
2049
2050	compr_appended = false;
2051	/ In a case of compressed cluster, append this to the last extent /
2052	if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) \|\|
2053	!(map.m_flags & F2FS_MAP_FLAGS))) {
2054	compr_appended = true;
2055	goto skip_fill;
2056	}
2057
2058	if (size) {
2059	flags \|= FIEMAP_EXTENT_MERGED;
2060	if (IS_ENCRYPTED(inode))
2061	flags \|= FIEMAP_EXTENT_DATA_ENCRYPTED;
2062
2063	ret = fiemap_fill_next_extent(info: fieinfo, logical,
2064	phys, len: size, flags);
2065	trace_f2fs_fiemap(inode, lblock: logical, pblock: phys, len: size, flags, ret);
2066	if (ret)
2067	goto out;
2068	size = `0`;
2069	}
2070
2071	if (start_blk > last_blk)
2072	goto out;
2073
2074	skip_fill:
2075	if (map.m_pblk == COMPRESS_ADDR) {
2076	compr_cluster = true;
2077	count_in_cluster = `1`;
2078	} else if (compr_appended) {
2079	unsigned int appended_blks = cluster_size -
2080	count_in_cluster + `1`;
2081	size += blks_to_bytes(inode, blks: appended_blks);
2082	start_blk += appended_blks;
2083	compr_cluster = false;
2084	} else {
2085	logical = blks_to_bytes(inode, blks: start_blk);
2086	phys = __is_valid_data_blkaddr(blkaddr: map.m_pblk) ?
2087	blks_to_bytes(inode, blks: map.m_pblk) : `0`;
2088	size = blks_to_bytes(inode, blks: map.m_len);
2089	flags = `0`;
2090
2091	if (compr_cluster) {
2092	flags = FIEMAP_EXTENT_ENCODED;
2093	count_in_cluster += map.m_len;
2094	if (count_in_cluster == cluster_size) {
2095	compr_cluster = false;
2096	size += blks_to_bytes(inode, blks: `1`);
2097	}
2098	} else if (map.m_flags & F2FS_MAP_DELALLOC) {
2099	flags = FIEMAP_EXTENT_UNWRITTEN;
2100	}
2101
2102	start_blk += bytes_to_blks(inode, bytes: size);
2103	}
2104
2105	prep_next:
2106	cond_resched();
2107	if (fatal_signal_pending(current))
2108	ret = -EINTR;
2109	else
2110	goto next;
2111	out:
2112	if (ret == `1`)
2113	ret = `0`;
2114
2115	inode_unlock(inode);
2116	return ret;
2117	}
2118
2119	static inline loff_t f2fs_readpage_limit(struct inode *inode)
2120	{
2121	if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
2122	return inode->i_sb->s_maxbytes;
2123
2124	return i_size_read(inode);
2125	}
2126
2127	static int f2fs_read_single_page(struct inode inode, struct* page *page,
2128	unsigned nr_pages,
2129	struct f2fs_map_blocks *map,
2130	struct bio **bio_ret,
2131	sector_t *last_block_in_bio,
2132	bool is_readahead)
2133	{
2134	struct bio bio = bio_ret;
2135	const unsigned blocksize = blks_to_bytes(inode, blks: `1`);
2136	sector_t block_in_file;
2137	sector_t last_block;
2138	sector_t last_block_in_file;
2139	sector_t block_nr;
2140	int ret = `0`;
2141
2142	block_in_file = (sector_t)page_index(page);
2143	last_block = block_in_file + nr_pages;
2144	last_block_in_file = bytes_to_blks(inode,
2145	bytes: f2fs_readpage_limit(inode) + blocksize - `1`);
2146	if (last_block > last_block_in_file)
2147	last_block = last_block_in_file;
2148
2149	/ just zeroing out page which is beyond EOF /
2150	if (block_in_file >= last_block)
2151	goto zero_out;
2152	/*
2153	* Map blocks using the previous result first.
2154	*/
2155	if ((map->m_flags & F2FS_MAP_MAPPED) &&
2156	block_in_file > map->m_lblk &&
2157	block_in_file < (map->m_lblk + map->m_len))
2158	goto got_it;
2159
2160	/*
2161	* Then do more f2fs_map_blocks() calls until we are
2162	* done with this page.
2163	*/
2164	map->m_lblk = block_in_file;
2165	map->m_len = last_block - block_in_file;
2166
2167	ret = f2fs_map_blocks(inode, map, flag: F2FS_GET_BLOCK_DEFAULT);
2168	if (ret)
2169	goto out;
2170	got_it:
2171	if ((map->m_flags & F2FS_MAP_MAPPED)) {
2172	block_nr = map->m_pblk + block_in_file - map->m_lblk;
2173	SetPageMappedToDisk(page);
2174
2175	if (!f2fs_is_valid_blkaddr(sbi: F2FS_I_SB(inode), blkaddr: block_nr,
2176	type: DATA_GENERIC_ENHANCE_READ)) {
2177	ret = -EFSCORRUPTED;
2178	f2fs_handle_error(sbi: F2FS_I_SB(inode),
2179	error: ERROR_INVALID_BLKADDR);
2180	goto out;
2181	}
2182	} else {
2183	zero_out:
2184	zero_user_segment(page, start: `0`, PAGE_SIZE);
2185	if (f2fs_need_verity(inode, idx: page->index) &&
2186	!fsverity_verify_page(page)) {
2187	ret = -EIO;
2188	goto out;
2189	}
2190	if (!PageUptodate(page))
2191	SetPageUptodate(page);
2192	unlock_page(page);
2193	goto out;
2194	}
2195
2196	/*
2197	* This page will go to BIO. Do we need to send this
2198	* BIO off first?
2199	*/
2200	if (bio && (!page_is_mergeable(sbi: F2FS_I_SB(inode), bio,
2201	last_blkaddr: *last_block_in_bio, cur_blkaddr: block_nr) \|\|
2202	!f2fs_crypt_mergeable_bio(bio, inode, next_idx: page->index, NULL))) {
2203	submit_and_realloc:
2204	f2fs_submit_read_bio(sbi: F2FS_I_SB(inode), bio, type: DATA);
2205	bio = NULL;
2206	}
2207	if (bio == NULL) {
2208	bio = f2fs_grab_read_bio(inode, blkaddr: block_nr, nr_pages,
2209	op_flag: is_readahead ? REQ_RAHEAD : `0`, first_idx: page->index,
2210	for_write: false);
2211	if (IS_ERR(ptr: bio)) {
2212	ret = PTR_ERR(ptr: bio);
2213	bio = NULL;
2214	goto out;
2215	}
2216	}
2217
2218	/*
2219	* If the page is under writeback, we need to wait for
2220	* its completion to see the correct decrypted data.
2221	*/
2222	f2fs_wait_on_block_writeback(inode, blkaddr: block_nr);
2223
2224	if (bio_add_page(bio, page, len: blocksize, off: `0`) < blocksize)
2225	goto submit_and_realloc;
2226
2227	inc_page_count(sbi: F2FS_I_SB(inode), count_type: F2FS_RD_DATA);
2228	f2fs_update_iostat(sbi: F2FS_I_SB(inode), NULL, type: FS_DATA_READ_IO,
2229	F2FS_BLKSIZE);
2230	*last_block_in_bio = block_nr;
2231	out:
2232	*bio_ret = bio;
2233	return ret;
2234	}
2235
2236	#ifdef CONFIG_F2FS_FS_COMPRESSION
2237	int f2fs_read_multi_pages(struct compress_ctx cc, struct* bio **bio_ret,
2238	unsigned nr_pages, sector_t *last_block_in_bio,
2239	bool is_readahead, bool for_write)
2240	{
2241	struct dnode_of_data dn;
2242	struct inode *inode = cc->inode;
2243	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2244	struct bio bio = bio_ret;
2245	unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
2246	sector_t last_block_in_file;
2247	const unsigned blocksize = blks_to_bytes(inode, blks: `1`);
2248	struct decompress_io_ctx *dic = NULL;
2249	struct extent_info ei = {};
2250	bool from_dnode = true;
2251	int i;
2252	int ret = `0`;
2253
2254	f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
2255
2256	last_block_in_file = bytes_to_blks(inode,
2257	bytes: f2fs_readpage_limit(inode) + blocksize - `1`);
2258
2259	/ get rid of pages beyond EOF /
2260	for (i = `0`; i < cc->cluster_size; i++) {
2261	struct page *page = cc->rpages[i];
2262
2263	if (!page)
2264	continue;
2265	if ((sector_t)page->index >= last_block_in_file) {
2266	zero_user_segment(page, start: `0`, PAGE_SIZE);
2267	if (!PageUptodate(page))
2268	SetPageUptodate(page);
2269	} else if (!PageUptodate(page)) {
2270	continue;
2271	}
2272	unlock_page(page);
2273	if (for_write)
2274	put_page(page);
2275	cc->rpages[i] = NULL;
2276	cc->nr_rpages--;
2277	}
2278
2279	/ we are done since all pages are beyond EOF /
2280	if (f2fs_cluster_is_empty(cc))
2281	goto out;
2282
2283	if (f2fs_lookup_read_extent_cache(inode, pgofs: start_idx, ei: &ei))
2284	from_dnode = false;
2285
2286	if (!from_dnode)
2287	goto skip_reading_dnode;
2288
2289	set_new_dnode(dn: &dn, inode, NULL, NULL, nid: `0`);
2290	ret = f2fs_get_dnode_of_data(dn: &dn, index: start_idx, mode: LOOKUP_NODE);
2291	if (ret)
2292	goto out;
2293
2294	if (unlikely(f2fs_cp_error(sbi))) {
2295	ret = -EIO;
2296	goto out_put_dnode;
2297	}
2298	f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
2299
2300	skip_reading_dnode:
2301	for (i = `1`; i < cc->cluster_size; i++) {
2302	block_t blkaddr;
2303
2304	blkaddr = from_dnode ? data_blkaddr(inode: dn.inode, node_page: dn.node_page,
2305	offset: dn.ofs_in_node + i) :
2306	ei.blk + i - `1`;
2307
2308	if (!__is_valid_data_blkaddr(blkaddr))
2309	break;
2310
2311	if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type: DATA_GENERIC)) {
2312	ret = -EFAULT;
2313	goto out_put_dnode;
2314	}
2315	cc->nr_cpages++;
2316
2317	if (!from_dnode && i >= ei.c_len)
2318	break;
2319	}
2320
2321	/ nothing to decompress /
2322	if (cc->nr_cpages == `0`) {
2323	ret = `0`;
2324	goto out_put_dnode;
2325	}
2326
2327	dic = f2fs_alloc_dic(cc);
2328	if (IS_ERR(ptr: dic)) {
2329	ret = PTR_ERR(ptr: dic);
2330	goto out_put_dnode;
2331	}
2332
2333	for (i = `0`; i < cc->nr_cpages; i++) {
2334	struct page *page = dic->cpages[i];
2335	block_t blkaddr;
2336	struct bio_post_read_ctx *ctx;
2337
2338	blkaddr = from_dnode ? data_blkaddr(inode: dn.inode, node_page: dn.node_page,
2339	offset: dn.ofs_in_node + i + `1`) :
2340	ei.blk + i;
2341
2342	f2fs_wait_on_block_writeback(inode, blkaddr);
2343
2344	if (f2fs_load_compressed_page(sbi, page, blkaddr)) {
2345	if (atomic_dec_and_test(v: &dic->remaining_pages)) {
2346	f2fs_decompress_cluster(dic, in_task: true);
2347	break;
2348	}
2349	continue;
2350	}
2351
2352	if (bio && (!page_is_mergeable(sbi, bio,
2353	last_blkaddr: *last_block_in_bio, cur_blkaddr: blkaddr) \|\|
2354	!f2fs_crypt_mergeable_bio(bio, inode, next_idx: page->index, NULL))) {
2355	submit_and_realloc:
2356	f2fs_submit_read_bio(sbi, bio, type: DATA);
2357	bio = NULL;
2358	}
2359
2360	if (!bio) {
2361	bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
2362	op_flag: is_readahead ? REQ_RAHEAD : `0`,
2363	first_idx: page->index, for_write);
2364	if (IS_ERR(ptr: bio)) {
2365	ret = PTR_ERR(ptr: bio);
2366	f2fs_decompress_end_io(dic, failed: ret, in_task: true);
2367	f2fs_put_dnode(dn: &dn);
2368	*bio_ret = NULL;
2369	return ret;
2370	}
2371	}
2372
2373	if (bio_add_page(bio, page, len: blocksize, off: `0`) < blocksize)
2374	goto submit_and_realloc;
2375
2376	ctx = get_post_read_ctx(bio);
2377	ctx->enabled_steps \|= STEP_DECOMPRESS;
2378	refcount_inc(r: &dic->refcnt);
2379
2380	inc_page_count(sbi, count_type: F2FS_RD_DATA);
2381	f2fs_update_iostat(sbi, inode, type: FS_DATA_READ_IO, F2FS_BLKSIZE);
2382	*last_block_in_bio = blkaddr;
2383	}
2384
2385	if (from_dnode)
2386	f2fs_put_dnode(dn: &dn);
2387
2388	*bio_ret = bio;
2389	return `0`;
2390
2391	out_put_dnode:
2392	if (from_dnode)
2393	f2fs_put_dnode(dn: &dn);
2394	out:
2395	for (i = `0`; i < cc->cluster_size; i++) {
2396	if (cc->rpages[i]) {
2397	ClearPageUptodate(page: cc->rpages[i]);
2398	unlock_page(page: cc->rpages[i]);
2399	}
2400	}
2401	*bio_ret = bio;
2402	return ret;
2403	}
2404	#endif
2405
2406	/*
2407	* This function was originally taken from fs/mpage.c, and customized for f2fs.
2408	* Major change was from block_size == page_size in f2fs by default.
2409	*/
2410	static int f2fs_mpage_readpages(struct inode *inode,
2411	struct readahead_control rac, struct* page *page)
2412	{
2413	struct bio *bio = NULL;
2414	sector_t last_block_in_bio = `0`;
2415	struct f2fs_map_blocks map;
2416	#ifdef CONFIG_F2FS_FS_COMPRESSION
2417	struct compress_ctx cc = {
2418	.inode = inode,
2419	.log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2420	.cluster_size = F2FS_I(inode)->i_cluster_size,
2421	.cluster_idx = NULL_CLUSTER,
2422	.rpages = NULL,
2423	.cpages = NULL,
2424	.nr_rpages = `0`,
2425	.nr_cpages = `0`,
2426	};
2427	pgoff_t nc_cluster_idx = NULL_CLUSTER;
2428	#endif
2429	unsigned nr_pages = rac ? readahead_count(rac) : `1`;
2430	unsigned max_nr_pages = nr_pages;
2431	int ret = `0`;
2432
2433	map.m_pblk = `0`;
2434	map.m_lblk = `0`;
2435	map.m_len = `0`;
2436	map.m_flags = `0`;
2437	map.m_next_pgofs = NULL;
2438	map.m_next_extent = NULL;
2439	map.m_seg_type = NO_CHECK_TYPE;
2440	map.m_may_create = false;
2441
2442	for (; nr_pages; nr_pages--) {
2443	if (rac) {
2444	page = readahead_page(ractl: rac);
2445	prefetchw(x: &page->flags);
2446	}
2447
2448	#ifdef CONFIG_F2FS_FS_COMPRESSION
2449	if (f2fs_compressed_file(inode)) {
2450	/ there are remained compressed pages, submit them /
2451	if (!f2fs_cluster_can_merge_page(cc: &cc, index: page->index)) {
2452	ret = f2fs_read_multi_pages(cc: &cc, bio_ret: &bio,
2453	nr_pages: max_nr_pages,
2454	last_block_in_bio: &last_block_in_bio,
2455	is_readahead: rac != NULL, for_write: false);
2456	f2fs_destroy_compress_ctx(cc: &cc, reuse: false);
2457	if (ret)
2458	goto set_error_page;
2459	}
2460	if (cc.cluster_idx == NULL_CLUSTER) {
2461	if (nc_cluster_idx ==
2462	page->index >> cc.log_cluster_size) {
2463	goto read_single_page;
2464	}
2465
2466	ret = f2fs_is_compressed_cluster(inode, index: page->index);
2467	if (ret < `0`)
2468	goto set_error_page;
2469	else if (!ret) {
2470	nc_cluster_idx =
2471	page->index >> cc.log_cluster_size;
2472	goto read_single_page;
2473	}
2474
2475	nc_cluster_idx = NULL_CLUSTER;
2476	}
2477	ret = f2fs_init_compress_ctx(cc: &cc);
2478	if (ret)
2479	goto set_error_page;
2480
2481	f2fs_compress_ctx_add_page(cc: &cc, page);
2482
2483	goto next_page;
2484	}
2485	read_single_page:
2486	#endif
2487
2488	ret = f2fs_read_single_page(inode, page, nr_pages: max_nr_pages, map: &map,
2489	bio_ret: &bio, last_block_in_bio: &last_block_in_bio, is_readahead: rac);
2490	if (ret) {
2491	#ifdef CONFIG_F2FS_FS_COMPRESSION
2492	set_error_page:
2493	#endif
2494	zero_user_segment(page, start: `0`, PAGE_SIZE);
2495	unlock_page(page);
2496	}
2497	#ifdef CONFIG_F2FS_FS_COMPRESSION
2498	next_page:
2499	#endif
2500	if (rac)
2501	put_page(page);
2502
2503	#ifdef CONFIG_F2FS_FS_COMPRESSION
2504	if (f2fs_compressed_file(inode)) {
2505	/ last page /
2506	if (nr_pages == `1` && !f2fs_cluster_is_empty(cc: &cc)) {
2507	ret = f2fs_read_multi_pages(cc: &cc, bio_ret: &bio,
2508	nr_pages: max_nr_pages,
2509	last_block_in_bio: &last_block_in_bio,
2510	is_readahead: rac != NULL, for_write: false);
2511	f2fs_destroy_compress_ctx(cc: &cc, reuse: false);
2512	}
2513	}
2514	#endif
2515	}
2516	if (bio)
2517	f2fs_submit_read_bio(sbi: F2FS_I_SB(inode), bio, type: DATA);
2518	return ret;
2519	}
2520
2521	static int f2fs_read_data_folio(struct file file, struct* folio *folio)
2522	{
2523	struct page *page = &folio->page;
2524	struct inode *inode = page_file_mapping(page)->host;
2525	int ret = -EAGAIN;
2526
2527	trace_f2fs_readpage(page, type: DATA);
2528
2529	if (!f2fs_is_compress_backend_ready(inode)) {
2530	unlock_page(page);
2531	return -EOPNOTSUPP;
2532	}
2533
2534	/ If the file has inline data, try to read it directly /
2535	if (f2fs_has_inline_data(inode))
2536	ret = f2fs_read_inline_data(inode, page);
2537	if (ret == -EAGAIN)
2538	ret = f2fs_mpage_readpages(inode, NULL, page);
2539	return ret;
2540	}
2541
2542	static void f2fs_readahead(struct readahead_control *rac)
2543	{
2544	struct inode *inode = rac->mapping->host;
2545
2546	trace_f2fs_readpages(inode, start: readahead_index(rac), nrpage: readahead_count(rac));
2547
2548	if (!f2fs_is_compress_backend_ready(inode))
2549	return;
2550
2551	/ If the file has inline data, skip readahead /
2552	if (f2fs_has_inline_data(inode))
2553	return;
2554
2555	f2fs_mpage_readpages(inode, rac, NULL);
2556	}
2557
2558	int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
2559	{
2560	struct inode *inode = fio->page->mapping->host;
2561	struct page mpage, page;
2562	gfp_t gfp_flags = GFP_NOFS;
2563
2564	if (!f2fs_encrypted_file(inode))
2565	return `0`;
2566
2567	page = fio->compressed_page ? fio->compressed_page : fio->page;
2568
2569	/ wait for GCed page writeback via META_MAPPING /
2570	f2fs_wait_on_block_writeback(inode, blkaddr: fio->old_blkaddr);
2571
2572	if (fscrypt_inode_uses_inline_crypto(inode))
2573	return `0`;
2574
2575	retry_encrypt:
2576	fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page,
2577	PAGE_SIZE, offs: `0`, gfp_flags);
2578	if (IS_ERR(ptr: fio->encrypted_page)) {
2579	/ flush pending IOs and wait for a while in the ENOMEM case /
2580	if (PTR_ERR(ptr: fio->encrypted_page) == -ENOMEM) {
2581	f2fs_flush_merged_writes(sbi: fio->sbi);
2582	memalloc_retry_wait(GFP_NOFS);
2583	gfp_flags \|= __GFP_NOFAIL;
2584	goto retry_encrypt;
2585	}
2586	return PTR_ERR(ptr: fio->encrypted_page);
2587	}
2588
2589	mpage = find_lock_page(mapping: META_MAPPING(sbi: fio->sbi), index: fio->old_blkaddr);
2590	if (mpage) {
2591	if (PageUptodate(page: mpage))
2592	memcpy(page_address(mpage),
2593	page_address(fio->encrypted_page), PAGE_SIZE);
2594	f2fs_put_page(page: mpage, unlock: `1`);
2595	}
2596	return `0`;
2597	}
2598
2599	static inline bool check_inplace_update_policy(struct inode *inode,
2600	struct f2fs_io_info *fio)
2601	{
2602	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2603
2604	if (IS_F2FS_IPU_HONOR_OPU_WRITE(sbi) &&
2605	is_inode_flag_set(inode, flag: FI_OPU_WRITE))
2606	return false;
2607	if (IS_F2FS_IPU_FORCE(sbi))
2608	return true;
2609	if (IS_F2FS_IPU_SSR(sbi) && f2fs_need_SSR(sbi))
2610	return true;
2611	if (IS_F2FS_IPU_UTIL(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util)
2612	return true;
2613	if (IS_F2FS_IPU_SSR_UTIL(sbi) && f2fs_need_SSR(sbi) &&
2614	utilization(sbi) > SM_I(sbi)->min_ipu_util)
2615	return true;
2616
2617	/*
2618	* IPU for rewrite async pages
2619	*/
2620	if (IS_F2FS_IPU_ASYNC(sbi) && fio && fio->op == REQ_OP_WRITE &&
2621	!(fio->op_flags & REQ_SYNC) && !IS_ENCRYPTED(inode))
2622	return true;
2623
2624	/ this is only set during fdatasync /
2625	if (IS_F2FS_IPU_FSYNC(sbi) && is_inode_flag_set(inode, flag: FI_NEED_IPU))
2626	return true;
2627
2628	if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2629	!f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2630	return true;
2631
2632	return false;
2633	}
2634
2635	bool f2fs_should_update_inplace(struct inode inode, struct* f2fs_io_info *fio)
2636	{
2637	/ swap file is migrating in aligned write mode /
2638	if (is_inode_flag_set(inode, flag: FI_ALIGNED_WRITE))
2639	return false;
2640
2641	if (f2fs_is_pinned_file(inode))
2642	return true;
2643
2644	/ if this is cold file, we should overwrite to avoid fragmentation /
2645	if (file_is_cold(inode) && !is_inode_flag_set(inode, flag: FI_OPU_WRITE))
2646	return true;
2647
2648	return check_inplace_update_policy(inode, fio);
2649	}
2650
2651	bool f2fs_should_update_outplace(struct inode inode, struct* f2fs_io_info *fio)
2652	{
2653	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2654
2655	/ The below cases were checked when setting it. /
2656	if (f2fs_is_pinned_file(inode))
2657	return false;
2658	if (fio && is_sbi_flag_set(sbi, type: SBI_NEED_FSCK))
2659	return true;
2660	if (f2fs_lfs_mode(sbi))
2661	return true;
2662	if (S_ISDIR(inode->i_mode))
2663	return true;
2664	if (IS_NOQUOTA(inode))
2665	return true;
2666	if (f2fs_is_atomic_file(inode))
2667	return true;
2668	/ rewrite low ratio compress data w/ OPU mode to avoid fragmentation /
2669	if (f2fs_compressed_file(inode) &&
2670	F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER &&
2671	is_inode_flag_set(inode, flag: FI_ENABLE_COMPRESS))
2672	return true;
2673
2674	/ swap file is migrating in aligned write mode /
2675	if (is_inode_flag_set(inode, flag: FI_ALIGNED_WRITE))
2676	return true;
2677
2678	if (is_inode_flag_set(inode, flag: FI_OPU_WRITE))
2679	return true;
2680
2681	if (fio) {
2682	if (page_private_gcing(page: fio->page))
2683	return true;
2684	if (page_private_dummy(page: fio->page))
2685	return true;
2686	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2687	f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2688	return true;
2689	}
2690	return false;
2691	}
2692
2693	static inline bool need_inplace_update(struct f2fs_io_info *fio)
2694	{
2695	struct inode *inode = fio->page->mapping->host;
2696
2697	if (f2fs_should_update_outplace(inode, fio))
2698	return false;
2699
2700	return f2fs_should_update_inplace(inode, fio);
2701	}
2702
2703	int f2fs_do_write_data_page(struct f2fs_io_info *fio)
2704	{
2705	struct page *page = fio->page;
2706	struct inode *inode = page->mapping->host;
2707	struct dnode_of_data dn;
2708	struct node_info ni;
2709	bool ipu_force = false;
2710	int err = `0`;
2711
2712	/ Use COW inode to make dnode_of_data for atomic write /
2713	if (f2fs_is_atomic_file(inode))
2714	set_new_dnode(dn: &dn, inode: F2FS_I(inode)->cow_inode, NULL, NULL, nid: `0`);
2715	else
2716	set_new_dnode(dn: &dn, inode, NULL, NULL, nid: `0`);
2717
2718	if (need_inplace_update(fio) &&
2719	f2fs_lookup_read_extent_cache_block(inode, index: page->index,
2720	blkaddr: &fio->old_blkaddr)) {
2721	if (!f2fs_is_valid_blkaddr(sbi: fio->sbi, blkaddr: fio->old_blkaddr,
2722	type: DATA_GENERIC_ENHANCE)) {
2723	f2fs_handle_error(sbi: fio->sbi,
2724	error: ERROR_INVALID_BLKADDR);
2725	return -EFSCORRUPTED;
2726	}
2727
2728	ipu_force = true;
2729	fio->need_lock = LOCK_DONE;
2730	goto got_it;
2731	}
2732
2733	/ Deadlock due to between page->lock and f2fs_lock_op /
2734	if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(sbi: fio->sbi))
2735	return -EAGAIN;
2736
2737	err = f2fs_get_dnode_of_data(dn: &dn, index: page->index, mode: LOOKUP_NODE);
2738	if (err)
2739	goto out;
2740
2741	fio->old_blkaddr = dn.data_blkaddr;
2742
2743	/ This page is already truncated /
2744	if (fio->old_blkaddr == NULL_ADDR) {
2745	ClearPageUptodate(page);
2746	clear_page_private_gcing(page);
2747	goto out_writepage;
2748	}
2749	got_it:
2750	if (__is_valid_data_blkaddr(blkaddr: fio->old_blkaddr) &&
2751	!f2fs_is_valid_blkaddr(sbi: fio->sbi, blkaddr: fio->old_blkaddr,
2752	type: DATA_GENERIC_ENHANCE)) {
2753	err = -EFSCORRUPTED;
2754	f2fs_handle_error(sbi: fio->sbi, error: ERROR_INVALID_BLKADDR);
2755	goto out_writepage;
2756	}
2757
2758	/*
2759	* If current allocation needs SSR,
2760	* it had better in-place writes for updated data.
2761	*/
2762	if (ipu_force \|\|
2763	(__is_valid_data_blkaddr(blkaddr: fio->old_blkaddr) &&
2764	need_inplace_update(fio))) {
2765	err = f2fs_encrypt_one_page(fio);
2766	if (err)
2767	goto out_writepage;
2768
2769	set_page_writeback(page);
2770	f2fs_put_dnode(dn: &dn);
2771	if (fio->need_lock == LOCK_REQ)
2772	f2fs_unlock_op(sbi: fio->sbi);
2773	err = f2fs_inplace_write_data(fio);
2774	if (err) {
2775	if (fscrypt_inode_uses_fs_layer_crypto(inode))
2776	fscrypt_finalize_bounce_page(pagep: &fio->encrypted_page);
2777	if (PageWriteback(page))
2778	end_page_writeback(page);
2779	} else {
2780	set_inode_flag(inode, flag: FI_UPDATE_WRITE);
2781	}
2782	trace_f2fs_do_write_data_page(page: fio->page, type: IPU);
2783	return err;
2784	}
2785
2786	if (fio->need_lock == LOCK_RETRY) {
2787	if (!f2fs_trylock_op(sbi: fio->sbi)) {
2788	err = -EAGAIN;
2789	goto out_writepage;
2790	}
2791	fio->need_lock = LOCK_REQ;
2792	}
2793
2794	err = f2fs_get_node_info(sbi: fio->sbi, nid: dn.nid, ni: &ni, checkpoint_context: false);
2795	if (err)
2796	goto out_writepage;
2797
2798	fio->version = ni.version;
2799
2800	err = f2fs_encrypt_one_page(fio);
2801	if (err)
2802	goto out_writepage;
2803
2804	set_page_writeback(page);
2805
2806	if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
2807	f2fs_i_compr_blocks_update(inode, blocks: fio->compr_blocks - `1`, add: false);
2808
2809	/ LFS mode write path /
2810	f2fs_outplace_write_data(dn: &dn, fio);
2811	trace_f2fs_do_write_data_page(page, type: OPU);
2812	set_inode_flag(inode, flag: FI_APPEND_WRITE);
2813	if (page->index == `0`)
2814	set_inode_flag(inode, flag: FI_FIRST_BLOCK_WRITTEN);
2815	out_writepage:
2816	f2fs_put_dnode(dn: &dn);
2817	out:
2818	if (fio->need_lock == LOCK_REQ)
2819	f2fs_unlock_op(sbi: fio->sbi);
2820	return err;
2821	}
2822
2823	int f2fs_write_single_data_page(struct page page, int* *submitted,
2824	struct bio **bio,
2825	sector_t *last_block,
2826	struct writeback_control *wbc,
2827	enum iostat_type io_type,
2828	int compr_blocks,
2829	bool allow_balance)
2830	{
2831	struct inode *inode = page->mapping->host;
2832	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2833	loff_t i_size = i_size_read(inode);
2834	const pgoff_t end_index = ((unsigned long long)i_size)
2835	>> PAGE_SHIFT;
2836	loff_t psize = (loff_t)(page->index + `1`) << PAGE_SHIFT;
2837	unsigned offset = `0`;
2838	bool need_balance_fs = false;
2839	bool quota_inode = IS_NOQUOTA(inode);
2840	int err = `0`;
2841	struct f2fs_io_info fio = {
2842	.sbi = sbi,
2843	.ino = inode->i_ino,
2844	.type = DATA,
2845	.op = REQ_OP_WRITE,
2846	.op_flags = wbc_to_write_flags(wbc),
2847	.old_blkaddr = NULL_ADDR,
2848	.page = page,
2849	.encrypted_page = NULL,
2850	.submitted = `0`,
2851	.compr_blocks = compr_blocks,
2852	.need_lock = LOCK_RETRY,
2853	.post_read = f2fs_post_read_required(inode) ? `1` : `0`,
2854	.io_type = io_type,
2855	.io_wbc = wbc,
2856	.bio = bio,
2857	.last_block = last_block,
2858	};
2859
2860	trace_f2fs_writepage(page, type: DATA);
2861
2862	/ we should bypass data pages to proceed the kworker jobs /
2863	if (unlikely(f2fs_cp_error(sbi))) {
2864	mapping_set_error(mapping: page->mapping, error: -EIO);
2865	/*
2866	* don't drop any dirty dentry pages for keeping lastest
2867	* directory structure.
2868	*/
2869	if (S_ISDIR(inode->i_mode) &&
2870	!is_sbi_flag_set(sbi, type: SBI_IS_CLOSE))
2871	goto redirty_out;
2872
2873	/ keep data pages in remount-ro mode /
2874	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
2875	goto redirty_out;
2876	goto out;
2877	}
2878
2879	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2880	goto redirty_out;
2881
2882	if (page->index < end_index \|\|
2883	f2fs_verity_in_progress(inode) \|\|
2884	compr_blocks)
2885	goto write;
2886
2887	/*
2888	* If the offset is out-of-range of file size,
2889	* this page does not have to be written to disk.
2890	*/
2891	offset = i_size & (PAGE_SIZE - `1`);
2892	if ((page->index >= end_index + `1`) \|\| !offset)
2893	goto out;
2894
2895	zero_user_segment(page, start: offset, PAGE_SIZE);
2896	write:
2897	if (f2fs_is_drop_cache(inode))
2898	goto out;
2899
2900	/ Dentry/quota blocks are controlled by checkpoint /
2901	if (S_ISDIR(inode->i_mode) \|\| quota_inode) {
2902	/*
2903	* We need to wait for node_write to avoid block allocation during
2904	* checkpoint. This can only happen to quota writes which can cause
2905	* the below discard race condition.
2906	*/
2907	if (quota_inode)
2908	f2fs_down_read(sem: &sbi->node_write);
2909
2910	fio.need_lock = LOCK_DONE;
2911	err = f2fs_do_write_data_page(fio: &fio);
2912
2913	if (quota_inode)
2914	f2fs_up_read(sem: &sbi->node_write);
2915
2916	goto done;
2917	}
2918
2919	if (!wbc->for_reclaim)
2920	need_balance_fs = true;
2921	else if (has_not_enough_free_secs(sbi, freed: `0`, needed: `0`))
2922	goto redirty_out;
2923	else
2924	set_inode_flag(inode, flag: FI_HOT_DATA);
2925
2926	err = -EAGAIN;
2927	if (f2fs_has_inline_data(inode)) {
2928	err = f2fs_write_inline_data(inode, page);
2929	if (!err)
2930	goto out;
2931	}
2932
2933	if (err == -EAGAIN) {
2934	err = f2fs_do_write_data_page(fio: &fio);
2935	if (err == -EAGAIN) {
2936	fio.need_lock = LOCK_REQ;
2937	err = f2fs_do_write_data_page(fio: &fio);
2938	}
2939	}
2940
2941	if (err) {
2942	file_set_keep_isize(inode);
2943	} else {
2944	spin_lock(lock: &F2FS_I(inode)->i_size_lock);
2945	if (F2FS_I(inode)->last_disk_size < psize)
2946	F2FS_I(inode)->last_disk_size = psize;
2947	spin_unlock(lock: &F2FS_I(inode)->i_size_lock);
2948	}
2949
2950	done:
2951	if (err && err != -ENOENT)
2952	goto redirty_out;
2953
2954	out:
2955	inode_dec_dirty_pages(inode);
2956	if (err) {
2957	ClearPageUptodate(page);
2958	clear_page_private_gcing(page);
2959	}
2960
2961	if (wbc->for_reclaim) {
2962	f2fs_submit_merged_write_cond(sbi, NULL, page, ino: `0`, type: DATA);
2963	clear_inode_flag(inode, flag: FI_HOT_DATA);
2964	f2fs_remove_dirty_inode(inode);
2965	submitted = NULL;
2966	}
2967	unlock_page(page);
2968	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
2969	!F2FS_I(inode)->wb_task && allow_balance)
2970	f2fs_balance_fs(sbi, need: need_balance_fs);
2971
2972	if (unlikely(f2fs_cp_error(sbi))) {
2973	f2fs_submit_merged_write(sbi, type: DATA);
2974	if (bio && *bio)
2975	f2fs_submit_merged_ipu_write(sbi, bio, NULL);
2976	submitted = NULL;
2977	}
2978
2979	if (submitted)
2980	*submitted = fio.submitted;
2981
2982	return `0`;
2983
2984	redirty_out:
2985	redirty_page_for_writepage(wbc, page);
2986	/*
2987	* pageout() in MM translates EAGAIN, so calls handle_write_error()
2988	* -> mapping_set_error() -> set_bit(AS_EIO, ...).
2989	* file_write_and_wait_range() will see EIO error, which is critical
2990	* to return value of fsync() followed by atomic_write failure to user.
2991	*/
2992	if (!err \|\| wbc->for_reclaim)
2993	return AOP_WRITEPAGE_ACTIVATE;
2994	unlock_page(page);
2995	return err;
2996	}
2997
2998	static int f2fs_write_data_page(struct page *page,
2999	struct writeback_control *wbc)
3000	{
3001	#ifdef CONFIG_F2FS_FS_COMPRESSION
3002	struct inode *inode = page->mapping->host;
3003
3004	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
3005	goto out;
3006
3007	if (f2fs_compressed_file(inode)) {
3008	if (f2fs_is_compressed_cluster(inode, index: page->index)) {
3009	redirty_page_for_writepage(wbc, page);
3010	return AOP_WRITEPAGE_ACTIVATE;
3011	}
3012	}
3013	out:
3014	#endif
3015
3016	return f2fs_write_single_data_page(page, NULL, NULL, NULL,
3017	wbc, io_type: FS_DATA_IO, compr_blocks: `0`, allow_balance: true);
3018	}
3019
3020	/*
3021	* This function was copied from write_cache_pages from mm/page-writeback.c.
3022	* The major change is making write step of cold data page separately from
3023	* warm/hot data page.
3024	*/
3025	static int f2fs_write_cache_pages(struct address_space *mapping,
3026	struct writeback_control *wbc,
3027	enum iostat_type io_type)
3028	{
3029	int ret = `0`;
3030	int done = `0`, retry = `0`;
3031	struct page *pages_local[F2FS_ONSTACK_PAGES];
3032	struct page **pages = pages_local;
3033	struct folio_batch fbatch;
3034	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
3035	struct bio *bio = NULL;
3036	sector_t last_block;
3037	#ifdef CONFIG_F2FS_FS_COMPRESSION
3038	struct inode *inode = mapping->host;
3039	struct compress_ctx cc = {
3040	.inode = inode,
3041	.log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
3042	.cluster_size = F2FS_I(inode)->i_cluster_size,
3043	.cluster_idx = NULL_CLUSTER,
3044	.rpages = NULL,
3045	.nr_rpages = `0`,
3046	.cpages = NULL,
3047	.valid_nr_cpages = `0`,
3048	.rbuf = NULL,
3049	.cbuf = NULL,
3050	.rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
3051	.private = NULL,
3052	};
3053	#endif
3054	int nr_folios, p, idx;
3055	int nr_pages;
3056	unsigned int max_pages = F2FS_ONSTACK_PAGES;
3057	pgoff_t index;
3058	pgoff_t end; / Inclusive /
3059	pgoff_t done_index;
3060	int range_whole = `0`;
3061	xa_mark_t tag;
3062	int nwritten = `0`;
3063	int submitted = `0`;
3064	int i;
3065
3066	#ifdef CONFIG_F2FS_FS_COMPRESSION
3067	if (f2fs_compressed_file(inode) &&
3068	`1` << cc.log_cluster_size > F2FS_ONSTACK_PAGES) {
3069	pages = f2fs_kzalloc(sbi, size: sizeof(struct page *) <<
3070	cc.log_cluster_size, GFP_NOFS \| __GFP_NOFAIL);
3071	max_pages = `1` << cc.log_cluster_size;
3072	}
3073	#endif
3074
3075	folio_batch_init(fbatch: &fbatch);
3076
3077	if (get_dirty_pages(inode: mapping->host) <=
3078	SM_I(sbi: F2FS_M_SB(mapping))->min_hot_blocks)
3079	set_inode_flag(inode: mapping->host, flag: FI_HOT_DATA);
3080	else
3081	clear_inode_flag(inode: mapping->host, flag: FI_HOT_DATA);
3082
3083	if (wbc->range_cyclic) {
3084	index = mapping->writeback_index; / prev offset /
3085	end = -`1`;
3086	} else {
3087	index = wbc->range_start >> PAGE_SHIFT;
3088	end = wbc->range_end >> PAGE_SHIFT;
3089	if (wbc->range_start == `0` && wbc->range_end == LLONG_MAX)
3090	range_whole = `1`;
3091	}
3092	if (wbc->sync_mode == WB_SYNC_ALL \|\| wbc->tagged_writepages)
3093	tag = PAGECACHE_TAG_TOWRITE;
3094	else
3095	tag = PAGECACHE_TAG_DIRTY;
3096	retry:
3097	retry = `0`;
3098	if (wbc->sync_mode == WB_SYNC_ALL \|\| wbc->tagged_writepages)
3099	tag_pages_for_writeback(mapping, start: index, end);
3100	done_index = index;
3101	while (!done && !retry && (index <= end)) {
3102	nr_pages = `0`;
3103	again:
3104	nr_folios = filemap_get_folios_tag(mapping, start: &index, end,
3105	tag, fbatch: &fbatch);
3106	if (nr_folios == `0`) {
3107	if (nr_pages)
3108	goto write;
3109	break;
3110	}
3111
3112	for (i = `0`; i < nr_folios; i++) {
3113	struct folio *folio = fbatch.folios[i];
3114
3115	idx = `0`;
3116	p = folio_nr_pages(folio);
3117	add_more:
3118	pages[nr_pages] = folio_page(folio, idx);
3119	folio_get(folio);
3120	if (++nr_pages == max_pages) {
3121	index = folio->index + idx + `1`;
3122	folio_batch_release(fbatch: &fbatch);
3123	goto write;
3124	}
3125	if (++idx < p)
3126	goto add_more;
3127	}
3128	folio_batch_release(fbatch: &fbatch);
3129	goto again;
3130	write:
3131	for (i = `0`; i < nr_pages; i++) {
3132	struct page *page = pages[i];
3133	struct folio *folio = page_folio(page);
3134	bool need_readd;
3135	readd:
3136	need_readd = false;
3137	#ifdef CONFIG_F2FS_FS_COMPRESSION
3138	if (f2fs_compressed_file(inode)) {
3139	void *fsdata = NULL;
3140	struct page *pagep;
3141	int ret2;
3142
3143	ret = f2fs_init_compress_ctx(cc: &cc);
3144	if (ret) {
3145	done = `1`;
3146	break;
3147	}
3148
3149	if (!f2fs_cluster_can_merge_page(cc: &cc,
3150	index: folio->index)) {
3151	ret = f2fs_write_multi_pages(cc: &cc,
3152	submitted: &submitted, wbc, io_type);
3153	if (!ret)
3154	need_readd = true;
3155	goto result;
3156	}
3157
3158	if (unlikely(f2fs_cp_error(sbi)))
3159	goto lock_folio;
3160
3161	if (!f2fs_cluster_is_empty(cc: &cc))
3162	goto lock_folio;
3163
3164	if (f2fs_all_cluster_page_ready(cc: &cc,
3165	pages, index: i, nr_pages, uptodate: true))
3166	goto lock_folio;
3167
3168	ret2 = f2fs_prepare_compress_overwrite(
3169	inode, pagep: &pagep,
3170	index: folio->index, fsdata: &fsdata);
3171	if (ret2 < `0`) {
3172	ret = ret2;
3173	done = `1`;
3174	break;
3175	} else if (ret2 &&
3176	(!f2fs_compress_write_end(inode,
3177	fsdata, index: folio->index, copied: `1`) \|\|
3178	!f2fs_all_cluster_page_ready(cc: &cc,
3179	pages, index: i, nr_pages,
3180	uptodate: false))) {
3181	retry = `1`;
3182	break;
3183	}
3184	}
3185	#endif
3186	/ give a priority to WB_SYNC threads /
3187	if (atomic_read(v: &sbi->wb_sync_req[DATA]) &&
3188	wbc->sync_mode == WB_SYNC_NONE) {
3189	done = `1`;
3190	break;
3191	}
3192	#ifdef CONFIG_F2FS_FS_COMPRESSION
3193	lock_folio:
3194	#endif
3195	done_index = folio->index;
3196	retry_write:
3197	folio_lock(folio);
3198
3199	if (unlikely(folio->mapping != mapping)) {
3200	continue_unlock:
3201	folio_unlock(folio);
3202	continue;
3203	}
3204
3205	if (!folio_test_dirty(folio)) {
3206	/ someone wrote it for us /
3207	goto continue_unlock;
3208	}
3209
3210	if (folio_test_writeback(folio)) {
3211	if (wbc->sync_mode == WB_SYNC_NONE)
3212	goto continue_unlock;
3213	f2fs_wait_on_page_writeback(page: &folio->page, type: DATA, ordered: true, locked: true);
3214	}
3215
3216	if (!folio_clear_dirty_for_io(folio))
3217	goto continue_unlock;
3218
3219	#ifdef CONFIG_F2FS_FS_COMPRESSION
3220	if (f2fs_compressed_file(inode)) {
3221	folio_get(folio);
3222	f2fs_compress_ctx_add_page(cc: &cc, page: &folio->page);
3223	continue;
3224	}
3225	#endif
3226	ret = f2fs_write_single_data_page(page: &folio->page,
3227	submitted: &submitted, bio: &bio, last_block: &last_block,
3228	wbc, io_type, compr_blocks: `0`, allow_balance: true);
3229	if (ret == AOP_WRITEPAGE_ACTIVATE)
3230	folio_unlock(folio);
3231	#ifdef CONFIG_F2FS_FS_COMPRESSION
3232	result:
3233	#endif
3234	nwritten += submitted;
3235	wbc->nr_to_write -= submitted;
3236
3237	if (unlikely(ret)) {
3238	/*
3239	* keep nr_to_write, since vfs uses this to
3240	* get # of written pages.
3241	*/
3242	if (ret == AOP_WRITEPAGE_ACTIVATE) {
3243	ret = `0`;
3244	goto next;
3245	} else if (ret == -EAGAIN) {
3246	ret = `0`;
3247	if (wbc->sync_mode == WB_SYNC_ALL) {
3248	f2fs_io_schedule_timeout(
3249	DEFAULT_IO_TIMEOUT);
3250	goto retry_write;
3251	}
3252	goto next;
3253	}
3254	done_index = folio_next_index(folio);
3255	done = `1`;
3256	break;
3257	}
3258
3259	if (wbc->nr_to_write <= `0` &&
3260	wbc->sync_mode == WB_SYNC_NONE) {
3261	done = `1`;
3262	break;
3263	}
3264	next:
3265	if (need_readd)
3266	goto readd;
3267	}
3268	release_pages(pages, nr: nr_pages);
3269	cond_resched();
3270	}
3271	#ifdef CONFIG_F2FS_FS_COMPRESSION
3272	/ flush remained pages in compress cluster /
3273	if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(cc: &cc)) {
3274	ret = f2fs_write_multi_pages(cc: &cc, submitted: &submitted, wbc, io_type);
3275	nwritten += submitted;
3276	wbc->nr_to_write -= submitted;
3277	if (ret) {
3278	done = `1`;
3279	retry = `0`;
3280	}
3281	}
3282	if (f2fs_compressed_file(inode))
3283	f2fs_destroy_compress_ctx(cc: &cc, reuse: false);
3284	#endif
3285	if (retry) {
3286	index = `0`;
3287	end = -`1`;
3288	goto retry;
3289	}
3290	if (wbc->range_cyclic && !done)
3291	done_index = `0`;
3292	if (wbc->range_cyclic \|\| (range_whole && wbc->nr_to_write > `0`))
3293	mapping->writeback_index = done_index;
3294
3295	if (nwritten)
3296	f2fs_submit_merged_write_cond(sbi: F2FS_M_SB(mapping), inode: mapping->host,
3297	NULL, ino: `0`, type: DATA);
3298	/ submit cached bio of IPU write /
3299	if (bio)
3300	f2fs_submit_merged_ipu_write(sbi, bio: &bio, NULL);
3301
3302	#ifdef CONFIG_F2FS_FS_COMPRESSION
3303	if (pages != pages_local)
3304	kfree(objp: pages);
3305	#endif
3306
3307	return ret;
3308	}
3309
3310	static inline bool __should_serialize_io(struct inode *inode,
3311	struct writeback_control *wbc)
3312	{
3313	/ to avoid deadlock in path of data flush /
3314	if (F2FS_I(inode)->wb_task)
3315	return false;
3316
3317	if (!S_ISREG(inode->i_mode))
3318	return false;
3319	if (IS_NOQUOTA(inode))
3320	return false;
3321
3322	if (f2fs_need_compress_data(inode))
3323	return true;
3324	if (wbc->sync_mode != WB_SYNC_ALL)
3325	return true;
3326	if (get_dirty_pages(inode) >= SM_I(sbi: F2FS_I_SB(inode))->min_seq_blocks)
3327	return true;
3328	return false;
3329	}
3330
3331	static int __f2fs_write_data_pages(struct address_space *mapping,
3332	struct writeback_control *wbc,
3333	enum iostat_type io_type)
3334	{
3335	struct inode *inode = mapping->host;
3336	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3337	struct blk_plug plug;
3338	int ret;
3339	bool locked = false;
3340
3341	/ deal with chardevs and other special file /
3342	if (!mapping->a_ops->writepage)
3343	return `0`;
3344
3345	/ skip writing if there is no dirty page in this inode /
3346	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
3347	return `0`;
3348
3349	/ during POR, we don't need to trigger writepage at all. /
3350	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
3351	goto skip_write;
3352
3353	if ((S_ISDIR(inode->i_mode) \|\| IS_NOQUOTA(inode)) &&
3354	wbc->sync_mode == WB_SYNC_NONE &&
3355	get_dirty_pages(inode) < nr_pages_to_skip(sbi, type: DATA) &&
3356	f2fs_available_free_memory(sbi, type: DIRTY_DENTS))
3357	goto skip_write;
3358
3359	/ skip writing in file defragment preparing stage /
3360	if (is_inode_flag_set(inode, flag: FI_SKIP_WRITES))
3361	goto skip_write;
3362
3363	trace_f2fs_writepages(inode: mapping->host, wbc, type: DATA);
3364
3365	/ to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE /
3366	if (wbc->sync_mode == WB_SYNC_ALL)
3367	atomic_inc(v: &sbi->wb_sync_req[DATA]);
3368	else if (atomic_read(v: &sbi->wb_sync_req[DATA])) {
3369	/ to avoid potential deadlock /
3370	if (current->plug)
3371	blk_finish_plug(current->plug);
3372	goto skip_write;
3373	}
3374
3375	if (__should_serialize_io(inode, wbc)) {
3376	mutex_lock(&sbi->writepages);
3377	locked = true;
3378	}
3379
3380	blk_start_plug(&plug);
3381	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
3382	blk_finish_plug(&plug);
3383
3384	if (locked)
3385	mutex_unlock(lock: &sbi->writepages);
3386
3387	if (wbc->sync_mode == WB_SYNC_ALL)
3388	atomic_dec(v: &sbi->wb_sync_req[DATA]);
3389	/*
3390	* if some pages were truncated, we cannot guarantee its mapping->host
3391	* to detect pending bios.
3392	*/
3393
3394	f2fs_remove_dirty_inode(inode);
3395	return ret;
3396
3397	skip_write:
3398	wbc->pages_skipped += get_dirty_pages(inode);
3399	trace_f2fs_writepages(inode: mapping->host, wbc, type: DATA);
3400	return `0`;
3401	}
3402
3403	static int f2fs_write_data_pages(struct address_space *mapping,
3404	struct writeback_control *wbc)
3405	{
3406	struct inode *inode = mapping->host;
3407
3408	return __f2fs_write_data_pages(mapping, wbc,
3409	io_type: F2FS_I(inode)->cp_task == current ?
3410	FS_CP_DATA_IO : FS_DATA_IO);
3411	}
3412
3413	void f2fs_write_failed(struct inode *inode, loff_t to)
3414	{
3415	loff_t i_size = i_size_read(inode);
3416
3417	if (IS_NOQUOTA(inode))
3418	return;
3419
3420	/ In the fs-verity case, f2fs_end_enable_verity() does the truncate /
3421	if (to > i_size && !f2fs_verity_in_progress(inode)) {
3422	f2fs_down_write(sem: &F2FS_I(inode)->i_gc_rwsem[WRITE]);
3423	filemap_invalidate_lock(mapping: inode->i_mapping);
3424
3425	truncate_pagecache(inode, new: i_size);
3426	f2fs_truncate_blocks(inode, from: i_size, lock: true);
3427
3428	filemap_invalidate_unlock(mapping: inode->i_mapping);
3429	f2fs_up_write(sem: &F2FS_I(inode)->i_gc_rwsem[WRITE]);
3430	}
3431	}
3432
3433	static int prepare_write_begin(struct f2fs_sb_info *sbi,
3434	struct page page, loff_t pos, unsigned* len,
3435	block_t blk_addr, bool node_changed)
3436	{
3437	struct inode *inode = page->mapping->host;
3438	pgoff_t index = page->index;
3439	struct dnode_of_data dn;
3440	struct page *ipage;
3441	bool locked = false;
3442	int flag = F2FS_GET_BLOCK_PRE_AIO;
3443	int err = `0`;
3444
3445	/*
3446	* If a whole page is being written and we already preallocated all the
3447	* blocks, then there is no need to get a block address now.
3448	*/
3449	if (len == PAGE_SIZE && is_inode_flag_set(inode, flag: FI_PREALLOCATED_ALL))
3450	return `0`;
3451
3452	/ f2fs_lock_op avoids race between write CP and convert_inline_page /
3453	if (f2fs_has_inline_data(inode)) {
3454	if (pos + len > MAX_INLINE_DATA(inode))
3455	flag = F2FS_GET_BLOCK_DEFAULT;
3456	f2fs_map_lock(sbi, flag);
3457	locked = true;
3458	} else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
3459	f2fs_map_lock(sbi, flag);
3460	locked = true;
3461	}
3462
3463	restart:
3464	/ check inline_data /
3465	ipage = f2fs_get_node_page(sbi, nid: inode->i_ino);
3466	if (IS_ERR(ptr: ipage)) {
3467	err = PTR_ERR(ptr: ipage);
3468	goto unlock_out;
3469	}
3470
3471	set_new_dnode(dn: &dn, inode, ipage, npage: ipage, nid: `0`);
3472
3473	if (f2fs_has_inline_data(inode)) {
3474	if (pos + len <= MAX_INLINE_DATA(inode)) {
3475	f2fs_do_read_inline_data(page, ipage);
3476	set_inode_flag(inode, flag: FI_DATA_EXIST);
3477	if (inode->i_nlink)
3478	set_page_private_inline(ipage);
3479	goto out;
3480	}
3481	err = f2fs_convert_inline_page(dn: &dn, page);
3482	if (err \|\| dn.data_blkaddr != NULL_ADDR)
3483	goto out;
3484	}
3485
3486	if (!f2fs_lookup_read_extent_cache_block(inode, index,
3487	blkaddr: &dn.data_blkaddr)) {
3488	if (locked) {
3489	err = f2fs_reserve_block(dn: &dn, index);
3490	goto out;
3491	}
3492
3493	/ hole case /
3494	err = f2fs_get_dnode_of_data(dn: &dn, index, mode: LOOKUP_NODE);
3495	if (!err && dn.data_blkaddr != NULL_ADDR)
3496	goto out;
3497	f2fs_put_dnode(dn: &dn);
3498	f2fs_map_lock(sbi, flag: F2FS_GET_BLOCK_PRE_AIO);
3499	WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
3500	locked = true;
3501	goto restart;
3502	}
3503	out:
3504	if (!err) {
3505	/ convert_inline_page can make node_changed /
3506	*blk_addr = dn.data_blkaddr;
3507	*node_changed = dn.node_changed;
3508	}
3509	f2fs_put_dnode(dn: &dn);
3510	unlock_out:
3511	if (locked)
3512	f2fs_map_unlock(sbi, flag);
3513	return err;
3514	}
3515
3516	static int __find_data_block(struct inode *inode, pgoff_t index,
3517	block_t *blk_addr)
3518	{
3519	struct dnode_of_data dn;
3520	struct page *ipage;
3521	int err = `0`;
3522
3523	ipage = f2fs_get_node_page(sbi: F2FS_I_SB(inode), nid: inode->i_ino);
3524	if (IS_ERR(ptr: ipage))
3525	return PTR_ERR(ptr: ipage);
3526
3527	set_new_dnode(dn: &dn, inode, ipage, npage: ipage, nid: `0`);
3528
3529	if (!f2fs_lookup_read_extent_cache_block(inode, index,
3530	blkaddr: &dn.data_blkaddr)) {
3531	/ hole case /
3532	err = f2fs_get_dnode_of_data(dn: &dn, index, mode: LOOKUP_NODE);
3533	if (err) {
3534	dn.data_blkaddr = NULL_ADDR;
3535	err = `0`;
3536	}
3537	}
3538	*blk_addr = dn.data_blkaddr;
3539	f2fs_put_dnode(dn: &dn);
3540	return err;
3541	}
3542
3543	static int __reserve_data_block(struct inode *inode, pgoff_t index,
3544	block_t blk_addr, bool node_changed)
3545	{
3546	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3547	struct dnode_of_data dn;
3548	struct page *ipage;
3549	int err = `0`;
3550
3551	f2fs_map_lock(sbi, flag: F2FS_GET_BLOCK_PRE_AIO);
3552
3553	ipage = f2fs_get_node_page(sbi, nid: inode->i_ino);
3554	if (IS_ERR(ptr: ipage)) {
3555	err = PTR_ERR(ptr: ipage);
3556	goto unlock_out;
3557	}
3558	set_new_dnode(dn: &dn, inode, ipage, npage: ipage, nid: `0`);
3559
3560	if (!f2fs_lookup_read_extent_cache_block(inode: dn.inode, index,
3561	blkaddr: &dn.data_blkaddr))
3562	err = f2fs_reserve_block(dn: &dn, index);
3563
3564	*blk_addr = dn.data_blkaddr;
3565	*node_changed = dn.node_changed;
3566	f2fs_put_dnode(dn: &dn);
3567
3568	unlock_out:
3569	f2fs_map_unlock(sbi, flag: F2FS_GET_BLOCK_PRE_AIO);
3570	return err;
3571	}
3572
3573	static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
3574	struct page page, loff_t pos, unsigned* int len,
3575	block_t blk_addr, bool node_changed, bool *use_cow)
3576	{
3577	struct inode *inode = page->mapping->host;
3578	struct inode *cow_inode = F2FS_I(inode)->cow_inode;
3579	pgoff_t index = page->index;
3580	int err = `0`;
3581	block_t ori_blk_addr = NULL_ADDR;
3582
3583	/ If pos is beyond the end of file, reserve a new block in COW inode /
3584	if ((pos & PAGE_MASK) >= i_size_read(inode))
3585	goto reserve_block;
3586
3587	/ Look for the block in COW inode first /
3588	err = __find_data_block(inode: cow_inode, index, blk_addr);
3589	if (err) {
3590	return err;
3591	} else if (*blk_addr != NULL_ADDR) {
3592	*use_cow = true;
3593	return `0`;
3594	}
3595
3596	if (is_inode_flag_set(inode, flag: FI_ATOMIC_REPLACE))
3597	goto reserve_block;
3598
3599	/ Look for the block in the original inode /
3600	err = __find_data_block(inode, index, blk_addr: &ori_blk_addr);
3601	if (err)
3602	return err;
3603
3604	reserve_block:
3605	/ Finally, we should reserve a new block in COW inode for the update /
3606	err = __reserve_data_block(inode: cow_inode, index, blk_addr, node_changed);
3607	if (err)
3608	return err;
3609	inc_atomic_write_cnt(inode);
3610
3611	if (ori_blk_addr != NULL_ADDR)
3612	*blk_addr = ori_blk_addr;
3613	return `0`;
3614	}
3615
3616	static int f2fs_write_begin(struct file file, struct* address_space *mapping,
3617	loff_t pos, unsigned len, struct page *pagep, void* **fsdata)
3618	{
3619	struct inode *inode = mapping->host;
3620	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3621	struct page *page = NULL;
3622	pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
3623	bool need_balance = false;
3624	bool use_cow = false;
3625	block_t blkaddr = NULL_ADDR;
3626	int err = `0`;
3627
3628	trace_f2fs_write_begin(inode, pos, len);
3629
3630	if (!f2fs_is_checkpoint_ready(sbi)) {
3631	err = -ENOSPC;
3632	goto fail;
3633	}
3634
3635	/*
3636	* We should check this at this moment to avoid deadlock on inode page
3637	* and #0 page. The locking rule for inline_data conversion should be:
3638	* lock_page(page #0) -> lock_page(inode_page)
3639	*/
3640	if (index != `0`) {
3641	err = f2fs_convert_inline_inode(inode);
3642	if (err)
3643	goto fail;
3644	}
3645
3646	#ifdef CONFIG_F2FS_FS_COMPRESSION
3647	if (f2fs_compressed_file(inode)) {
3648	int ret;
3649
3650	*fsdata = NULL;
3651
3652	if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
3653	goto repeat;
3654
3655	ret = f2fs_prepare_compress_overwrite(inode, pagep,
3656	index, fsdata);
3657	if (ret < `0`) {
3658	err = ret;
3659	goto fail;
3660	} else if (ret) {
3661	return `0`;
3662	}
3663	}
3664	#endif
3665
3666	repeat:
3667	/*
3668	* Do not use grab_cache_page_write_begin() to avoid deadlock due to
3669	* wait_for_stable_page. Will wait that below with our IO control.
3670	*/
3671	page = f2fs_pagecache_get_page(mapping, index,
3672	FGP_LOCK \| FGP_WRITE \| FGP_CREAT, GFP_NOFS);
3673	if (!page) {
3674	err = -ENOMEM;
3675	goto fail;
3676	}
3677
3678	/ TODO: cluster can be compressed due to race with .writepage /
3679
3680	*pagep = page;
3681
3682	if (f2fs_is_atomic_file(inode))
3683	err = prepare_atomic_write_begin(sbi, page, pos, len,
3684	blk_addr: &blkaddr, node_changed: &need_balance, use_cow: &use_cow);
3685	else
3686	err = prepare_write_begin(sbi, page, pos, len,
3687	blk_addr: &blkaddr, node_changed: &need_balance);
3688	if (err)
3689	goto fail;
3690
3691	if (need_balance && !IS_NOQUOTA(inode) &&
3692	has_not_enough_free_secs(sbi, freed: `0`, needed: `0`)) {
3693	unlock_page(page);
3694	f2fs_balance_fs(sbi, need: true);
3695	lock_page(page);
3696	if (page->mapping != mapping) {
3697	/ The page got truncated from under us /
3698	f2fs_put_page(page, unlock: `1`);
3699	goto repeat;
3700	}
3701	}
3702
3703	f2fs_wait_on_page_writeback(page, type: DATA, ordered: false, locked: true);
3704
3705	if (len == PAGE_SIZE \|\| PageUptodate(page))
3706	return `0`;
3707
3708	if (!(pos & (PAGE_SIZE - `1`)) && (pos + len) >= i_size_read(inode) &&
3709	!f2fs_verity_in_progress(inode)) {
3710	zero_user_segment(page, start: len, PAGE_SIZE);
3711	return `0`;
3712	}
3713
3714	if (blkaddr == NEW_ADDR) {
3715	zero_user_segment(page, start: `0`, PAGE_SIZE);
3716	SetPageUptodate(page);
3717	} else {
3718	if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
3719	type: DATA_GENERIC_ENHANCE_READ)) {
3720	err = -EFSCORRUPTED;
3721	f2fs_handle_error(sbi, error: ERROR_INVALID_BLKADDR);
3722	goto fail;
3723	}
3724	err = f2fs_submit_page_read(inode: use_cow ?
3725	F2FS_I(inode)->cow_inode : inode, page,
3726	blkaddr, op_flags: `0`, for_write: true);
3727	if (err)
3728	goto fail;
3729
3730	lock_page(page);
3731	if (unlikely(page->mapping != mapping)) {
3732	f2fs_put_page(page, unlock: `1`);
3733	goto repeat;
3734	}
3735	if (unlikely(!PageUptodate(page))) {
3736	err = -EIO;
3737	goto fail;
3738	}
3739	}
3740	return `0`;
3741
3742	fail:
3743	f2fs_put_page(page, unlock: `1`);
3744	f2fs_write_failed(inode, to: pos + len);
3745	return err;
3746	}
3747
3748	static int f2fs_write_end(struct file *file,
3749	struct address_space *mapping,
3750	loff_t pos, unsigned len, unsigned copied,
3751	struct page page, void* *fsdata)
3752	{
3753	struct inode *inode = page->mapping->host;
3754
3755	trace_f2fs_write_end(inode, pos, len, copied);
3756
3757	/*
3758	* This should be come from len == PAGE_SIZE, and we expect copied
3759	* should be PAGE_SIZE. Otherwise, we treat it with zero copied and
3760	* let generic_perform_write() try to copy data again through copied=0.
3761	*/
3762	if (!PageUptodate(page)) {
3763	if (unlikely(copied != len))
3764	copied = `0`;
3765	else
3766	SetPageUptodate(page);
3767	}
3768
3769	#ifdef CONFIG_F2FS_FS_COMPRESSION
3770	/ overwrite compressed file /
3771	if (f2fs_compressed_file(inode) && fsdata) {
3772	f2fs_compress_write_end(inode, fsdata, index: page->index, copied);
3773	f2fs_update_time(sbi: F2FS_I_SB(inode), type: REQ_TIME);
3774
3775	if (pos + copied > i_size_read(inode) &&
3776	!f2fs_verity_in_progress(inode))
3777	f2fs_i_size_write(inode, i_size: pos + copied);
3778	return copied;
3779	}
3780	#endif
3781
3782	if (!copied)
3783	goto unlock_out;
3784
3785	set_page_dirty(page);
3786
3787	if (pos + copied > i_size_read(inode) &&
3788	!f2fs_verity_in_progress(inode)) {
3789	f2fs_i_size_write(inode, i_size: pos + copied);
3790	if (f2fs_is_atomic_file(inode))
3791	f2fs_i_size_write(inode: F2FS_I(inode)->cow_inode,
3792	i_size: pos + copied);
3793	}
3794	unlock_out:
3795	f2fs_put_page(page, unlock: `1`);
3796	f2fs_update_time(sbi: F2FS_I_SB(inode), type: REQ_TIME);
3797	return copied;
3798	}
3799
3800	void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
3801	{
3802	struct inode *inode = folio->mapping->host;
3803	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3804
3805	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
3806	(offset \|\| length != folio_size(folio)))
3807	return;
3808
3809	if (folio_test_dirty(folio)) {
3810	if (inode->i_ino == F2FS_META_INO(sbi)) {
3811	dec_page_count(sbi, count_type: F2FS_DIRTY_META);
3812	} else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
3813	dec_page_count(sbi, count_type: F2FS_DIRTY_NODES);
3814	} else {
3815	inode_dec_dirty_pages(inode);
3816	f2fs_remove_dirty_inode(inode);
3817	}
3818	}
3819	clear_page_private_all(page: &folio->page);
3820	}
3821
3822	bool f2fs_release_folio(struct folio *folio, gfp_t wait)
3823	{
3824	/ If this is dirty folio, keep private data /
3825	if (folio_test_dirty(folio))
3826	return false;
3827
3828	clear_page_private_all(page: &folio->page);
3829	return true;
3830	}
3831
3832	static bool f2fs_dirty_data_folio(struct address_space *mapping,
3833	struct folio *folio)
3834	{
3835	struct inode *inode = mapping->host;
3836
3837	trace_f2fs_set_page_dirty(page: &folio->page, type: DATA);
3838
3839	if (!folio_test_uptodate(folio))
3840	folio_mark_uptodate(folio);
3841	BUG_ON(folio_test_swapcache(folio));
3842
3843	if (filemap_dirty_folio(mapping, folio)) {
3844	f2fs_update_dirty_folio(inode, folio);
3845	return true;
3846	}
3847	return false;
3848	}
3849
3850
3851	static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
3852	{
3853	#ifdef CONFIG_F2FS_FS_COMPRESSION
3854	struct dnode_of_data dn;
3855	sector_t start_idx, blknr = `0`;
3856	int ret;
3857
3858	start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
3859
3860	set_new_dnode(dn: &dn, inode, NULL, NULL, nid: `0`);
3861	ret = f2fs_get_dnode_of_data(dn: &dn, index: start_idx, mode: LOOKUP_NODE);
3862	if (ret)
3863	return `0`;
3864
3865	if (dn.data_blkaddr != COMPRESS_ADDR) {
3866	dn.ofs_in_node += block - start_idx;
3867	blknr = f2fs_data_blkaddr(dn: &dn);
3868	if (!__is_valid_data_blkaddr(blkaddr: blknr))
3869	blknr = `0`;
3870	}
3871
3872	f2fs_put_dnode(dn: &dn);
3873	return blknr;
3874	#else
3875	return `0`;
3876	#endif
3877	}
3878
3879
3880	static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
3881	{
3882	struct inode *inode = mapping->host;
3883	sector_t blknr = `0`;
3884
3885	if (f2fs_has_inline_data(inode))
3886	goto out;
3887
3888	/ make sure allocating whole blocks /
3889	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
3890	filemap_write_and_wait(mapping);
3891
3892	/ Block number less than F2FS MAX BLOCKS /
3893	if (unlikely(block >= max_file_blocks(inode)))
3894	goto out;
3895
3896	if (f2fs_compressed_file(inode)) {
3897	blknr = f2fs_bmap_compress(inode, block);
3898	} else {
3899	struct f2fs_map_blocks map;
3900
3901	memset(&map, `0`, sizeof(map));
3902	map.m_lblk = block;
3903	map.m_len = `1`;
3904	map.m_next_pgofs = NULL;
3905	map.m_seg_type = NO_CHECK_TYPE;
3906
3907	if (!f2fs_map_blocks(inode, map: &map, flag: F2FS_GET_BLOCK_BMAP))
3908	blknr = map.m_pblk;
3909	}
3910	out:
3911	trace_f2fs_bmap(inode, lblock: block, pblock: blknr);
3912	return blknr;
3913	}
3914
3915	#ifdef CONFIG_SWAP
3916	static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
3917	unsigned int blkcnt)
3918	{
3919	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3920	unsigned int blkofs;
3921	unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
3922	unsigned int secidx = start_blk / blk_per_sec;
3923	unsigned int end_sec = secidx + blkcnt / blk_per_sec;
3924	int ret = `0`;
3925
3926	f2fs_down_write(sem: &F2FS_I(inode)->i_gc_rwsem[WRITE]);
3927	filemap_invalidate_lock(mapping: inode->i_mapping);
3928
3929	set_inode_flag(inode, flag: FI_ALIGNED_WRITE);
3930	set_inode_flag(inode, flag: FI_OPU_WRITE);
3931
3932	for (; secidx < end_sec; secidx++) {
3933	f2fs_down_write(sem: &sbi->pin_sem);
3934
3935	f2fs_lock_op(sbi);
3936	f2fs_allocate_new_section(sbi, type: CURSEG_COLD_DATA_PINNED, force: false);
3937	f2fs_unlock_op(sbi);
3938
3939	set_inode_flag(inode, flag: FI_SKIP_WRITES);
3940
3941	for (blkofs = `0`; blkofs < blk_per_sec; blkofs++) {
3942	struct page *page;
3943	unsigned int blkidx = secidx * blk_per_sec + blkofs;
3944
3945	page = f2fs_get_lock_data_page(inode, index: blkidx, for_write: true);
3946	if (IS_ERR(ptr: page)) {
3947	f2fs_up_write(sem: &sbi->pin_sem);
3948	ret = PTR_ERR(ptr: page);
3949	goto done;
3950	}
3951
3952	set_page_dirty(page);
3953	f2fs_put_page(page, unlock: `1`);
3954	}
3955
3956	clear_inode_flag(inode, flag: FI_SKIP_WRITES);
3957
3958	ret = filemap_fdatawrite(inode->i_mapping);
3959
3960	f2fs_up_write(sem: &sbi->pin_sem);
3961
3962	if (ret)
3963	break;
3964	}
3965
3966	done:
3967	clear_inode_flag(inode, flag: FI_SKIP_WRITES);
3968	clear_inode_flag(inode, flag: FI_OPU_WRITE);
3969	clear_inode_flag(inode, flag: FI_ALIGNED_WRITE);
3970
3971	filemap_invalidate_unlock(mapping: inode->i_mapping);
3972	f2fs_up_write(sem: &F2FS_I(inode)->i_gc_rwsem[WRITE]);
3973
3974	return ret;
3975	}
3976
3977	static int check_swap_activate(struct swap_info_struct *sis,
3978	struct file swap_file, sector_t span)
3979	{
3980	struct address_space *mapping = swap_file->f_mapping;
3981	struct inode *inode = mapping->host;
3982	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3983	sector_t cur_lblock;
3984	sector_t last_lblock;
3985	sector_t pblock;
3986	sector_t lowest_pblock = -`1`;
3987	sector_t highest_pblock = `0`;
3988	int nr_extents = `0`;
3989	unsigned long nr_pblocks;
3990	unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
3991	unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - `1`;
3992	unsigned int not_aligned = `0`;
3993	int ret = `0`;
3994
3995	/*
3996	* Map all the blocks into the extent list. This code doesn't try
3997	* to be very smart.
3998	*/
3999	cur_lblock = `0`;
4000	last_lblock = bytes_to_blks(inode, bytes: i_size_read(inode));
4001
4002	while (cur_lblock < last_lblock && cur_lblock < sis->max) {
4003	struct f2fs_map_blocks map;
4004	retry:
4005	cond_resched();
4006
4007	memset(&map, `0`, sizeof(map));
4008	map.m_lblk = cur_lblock;
4009	map.m_len = last_lblock - cur_lblock;
4010	map.m_next_pgofs = NULL;
4011	map.m_next_extent = NULL;
4012	map.m_seg_type = NO_CHECK_TYPE;
4013	map.m_may_create = false;
4014
4015	ret = f2fs_map_blocks(inode, map: &map, flag: F2FS_GET_BLOCK_FIEMAP);
4016	if (ret)
4017	goto out;
4018
4019	/ hole /
4020	if (!(map.m_flags & F2FS_MAP_FLAGS)) {
4021	f2fs_err(sbi, "Swapfile has holes");
4022	ret = -EINVAL;
4023	goto out;
4024	}
4025
4026	pblock = map.m_pblk;
4027	nr_pblocks = map.m_len;
4028
4029	if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask \|\|
4030	nr_pblocks & sec_blks_mask) {
4031	not_aligned++;
4032
4033	nr_pblocks = roundup(nr_pblocks, blks_per_sec);
4034	if (cur_lblock + nr_pblocks > sis->max)
4035	nr_pblocks -= blks_per_sec;
4036
4037	if (!nr_pblocks) {
4038	/ this extent is last one /
4039	nr_pblocks = map.m_len;
4040	f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
4041	goto next;
4042	}
4043
4044	ret = f2fs_migrate_blocks(inode, start_blk: cur_lblock,
4045	blkcnt: nr_pblocks);
4046	if (ret)
4047	goto out;
4048	goto retry;
4049	}
4050	next:
4051	if (cur_lblock + nr_pblocks >= sis->max)
4052	nr_pblocks = sis->max - cur_lblock;
4053
4054	if (cur_lblock) { / exclude the header page /
4055	if (pblock < lowest_pblock)
4056	lowest_pblock = pblock;
4057	if (pblock + nr_pblocks - `1` > highest_pblock)
4058	highest_pblock = pblock + nr_pblocks - `1`;
4059	}
4060
4061	/*
4062	* We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
4063	*/
4064	ret = add_swap_extent(sis, start_page: cur_lblock, nr_pages: nr_pblocks, start_block: pblock);
4065	if (ret < `0`)
4066	goto out;
4067	nr_extents += ret;
4068	cur_lblock += nr_pblocks;
4069	}
4070	ret = nr_extents;
4071	*span = `1` + highest_pblock - lowest_pblock;
4072	if (cur_lblock == `0`)
4073	cur_lblock = `1`; / force Empty message /
4074	sis->max = cur_lblock;
4075	sis->pages = cur_lblock - `1`;
4076	sis->highest_bit = cur_lblock - `1`;
4077	out:
4078	if (not_aligned)
4079	f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%lu * N)",
4080	not_aligned, blks_per_sec * F2FS_BLKSIZE);
4081	return ret;
4082	}
4083
4084	static int f2fs_swap_activate(struct swap_info_struct sis, struct* file *file,
4085	sector_t *span)
4086	{
4087	struct inode *inode = file_inode(f: file);
4088	int ret;
4089
4090	if (!S_ISREG(inode->i_mode))
4091	return -EINVAL;
4092
4093	if (f2fs_readonly(sb: F2FS_I_SB(inode)->sb))
4094	return -EROFS;
4095
4096	if (f2fs_lfs_mode(sbi: F2FS_I_SB(inode))) {
4097	f2fs_err(F2FS_I_SB(inode),
4098	"Swapfile not supported in LFS mode");
4099	return -EINVAL;
4100	}
4101
4102	ret = f2fs_convert_inline_inode(inode);
4103	if (ret)
4104	return ret;
4105
4106	if (!f2fs_disable_compressed_file(inode))
4107	return -EINVAL;
4108
4109	f2fs_precache_extents(inode);
4110
4111	ret = check_swap_activate(sis, swap_file: file, span);
4112	if (ret < `0`)
4113	return ret;
4114
4115	stat_inc_swapfile_inode(inode);
4116	set_inode_flag(inode, flag: FI_PIN_FILE);
4117	f2fs_update_time(sbi: F2FS_I_SB(inode), type: REQ_TIME);
4118	return ret;
4119	}
4120
4121	static void f2fs_swap_deactivate(struct file *file)
4122	{
4123	struct inode *inode = file_inode(f: file);
4124
4125	stat_dec_swapfile_inode(inode);
4126	clear_inode_flag(inode, flag: FI_PIN_FILE);
4127	}
4128	#else
4129	static int f2fs_swap_activate(struct swap_info_struct sis, struct* file *file,
4130	sector_t *span)
4131	{
4132	return -EOPNOTSUPP;
4133	}
4134
4135	static void f2fs_swap_deactivate(struct file *file)
4136	{
4137	}
4138	#endif
4139
4140	const struct address_space_operations f2fs_dblock_aops = {
4141	.read_folio = f2fs_read_data_folio,
4142	.readahead = f2fs_readahead,
4143	.writepage = f2fs_write_data_page,
4144	.writepages = f2fs_write_data_pages,
4145	.write_begin = f2fs_write_begin,
4146	.write_end = f2fs_write_end,
4147	.dirty_folio = f2fs_dirty_data_folio,
4148	.migrate_folio = filemap_migrate_folio,
4149	.invalidate_folio = f2fs_invalidate_folio,
4150	.release_folio = f2fs_release_folio,
4151	.bmap = f2fs_bmap,
4152	.swap_activate = f2fs_swap_activate,
4153	.swap_deactivate = f2fs_swap_deactivate,
4154	};
4155
4156	void f2fs_clear_page_cache_dirty_tag(struct page *page)
4157	{
4158	struct address_space *mapping = page_mapping(page);
4159	unsigned long flags;
4160
4161	xa_lock_irqsave(&mapping->i_pages, flags);
4162	__xa_clear_mark(&mapping->i_pages, index: page_index(page),
4163	PAGECACHE_TAG_DIRTY);
4164	xa_unlock_irqrestore(&mapping->i_pages, flags);
4165	}
4166
4167	int __init f2fs_init_post_read_processing(void)
4168	{
4169	bio_post_read_ctx_cache =
4170	kmem_cache_create(name: "f2fs_bio_post_read_ctx",
4171	size: sizeof(struct bio_post_read_ctx), align: `0`, flags: `0`, NULL);
4172	if (!bio_post_read_ctx_cache)
4173	goto fail;
4174	bio_post_read_ctx_pool =
4175	mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
4176	kc: bio_post_read_ctx_cache);
4177	if (!bio_post_read_ctx_pool)
4178	goto fail_free_cache;
4179	return `0`;
4180
4181	fail_free_cache:
4182	kmem_cache_destroy(s: bio_post_read_ctx_cache);
4183	fail:
4184	return -ENOMEM;
4185	}
4186
4187	void f2fs_destroy_post_read_processing(void)
4188	{
4189	mempool_destroy(pool: bio_post_read_ctx_pool);
4190	kmem_cache_destroy(s: bio_post_read_ctx_cache);
4191	}
4192
4193	int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
4194	{
4195	if (!f2fs_sb_has_encrypt(sbi) &&
4196	!f2fs_sb_has_verity(sbi) &&
4197	!f2fs_sb_has_compression(sbi))
4198	return `0`;
4199
4200	sbi->post_read_wq = alloc_workqueue(fmt: "f2fs_post_read_wq",
4201	flags: WQ_UNBOUND \| WQ_HIGHPRI,
4202	max_active: num_online_cpus());
4203	return sbi->post_read_wq ? `0` : -ENOMEM;
4204	}
4205
4206	void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
4207	{
4208	if (sbi->post_read_wq)
4209	destroy_workqueue(wq: sbi->post_read_wq);
4210	}
4211
4212	int __init f2fs_init_bio_entry_cache(void)
4213	{
4214	bio_entry_slab = f2fs_kmem_cache_create(name: "f2fs_bio_entry_slab",
4215	size: sizeof(struct bio_entry));
4216	return bio_entry_slab ? `0` : -ENOMEM;
4217	}
4218
4219	void f2fs_destroy_bio_entry_cache(void)
4220	{
4221	kmem_cache_destroy(s: bio_entry_slab);
4222	}
4223
4224	static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
4225	unsigned int flags, struct iomap *iomap,
4226	struct iomap *srcmap)
4227	{
4228	struct f2fs_map_blocks map = {};
4229	pgoff_t next_pgofs = `0`;
4230	int err;
4231
4232	map.m_lblk = bytes_to_blks(inode, bytes: offset);
4233	map.m_len = bytes_to_blks(inode, bytes: offset + length - `1`) - map.m_lblk + `1`;
4234	map.m_next_pgofs = &next_pgofs;
4235	map.m_seg_type = f2fs_rw_hint_to_seg_type(hint: inode->i_write_hint);
4236	if (flags & IOMAP_WRITE)
4237	map.m_may_create = true;
4238
4239	err = f2fs_map_blocks(inode, map: &map, flag: F2FS_GET_BLOCK_DIO);
4240	if (err)
4241	return err;
4242
4243	iomap->offset = blks_to_bytes(inode, blks: map.m_lblk);
4244
4245	/*
4246	* When inline encryption is enabled, sometimes I/O to an encrypted file
4247	* has to be broken up to guarantee DUN contiguity. Handle this by
4248	* limiting the length of the mapping returned.
4249	*/
4250	map.m_len = fscrypt_limit_io_blocks(inode, lblk: map.m_lblk, nr_blocks: map.m_len);
4251
4252	/*
4253	* We should never see delalloc or compressed extents here based on
4254	* prior flushing and checks.
4255	*/
4256	if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR))
4257	return -EINVAL;
4258	if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR))
4259	return -EINVAL;
4260
4261	if (map.m_pblk != NULL_ADDR) {
4262	iomap->length = blks_to_bytes(inode, blks: map.m_len);
4263	iomap->type = IOMAP_MAPPED;
4264	iomap->flags \|= IOMAP_F_MERGED;
4265	iomap->bdev = map.m_bdev;
4266	iomap->addr = blks_to_bytes(inode, blks: map.m_pblk);
4267	} else {
4268	if (flags & IOMAP_WRITE)
4269	return -ENOTBLK;
4270	iomap->length = blks_to_bytes(inode, blks: next_pgofs) -
4271	iomap->offset;
4272	iomap->type = IOMAP_HOLE;
4273	iomap->addr = IOMAP_NULL_ADDR;
4274	}
4275
4276	if (map.m_flags & F2FS_MAP_NEW)
4277	iomap->flags \|= IOMAP_F_NEW;
4278	if ((inode->i_state & I_DIRTY_DATASYNC) \|\|
4279	offset + length > i_size_read(inode))
4280	iomap->flags \|= IOMAP_F_DIRTY;
4281
4282	return `0`;
4283	}
4284
4285	const struct iomap_ops f2fs_iomap_ops = {
4286	.iomap_begin = f2fs_iomap_begin,
4287	};
4288

source code of linux/fs/f2fs/data.c