1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * fs/f2fs/recovery.c |
4 | * |
5 | * Copyright (c) 2012 Samsung Electronics Co., Ltd. |
6 | * http://www.samsung.com/ |
7 | */ |
8 | #include <asm/unaligned.h> |
9 | #include <linux/fs.h> |
10 | #include <linux/f2fs_fs.h> |
11 | #include <linux/sched/mm.h> |
12 | #include "f2fs.h" |
13 | #include "node.h" |
14 | #include "segment.h" |
15 | |
16 | /* |
17 | * Roll forward recovery scenarios. |
18 | * |
19 | * [Term] F: fsync_mark, D: dentry_mark |
20 | * |
21 | * 1. inode(x) | CP | inode(x) | dnode(F) |
22 | * -> Update the latest inode(x). |
23 | * |
24 | * 2. inode(x) | CP | inode(F) | dnode(F) |
25 | * -> No problem. |
26 | * |
27 | * 3. inode(x) | CP | dnode(F) | inode(x) |
28 | * -> Recover to the latest dnode(F), and drop the last inode(x) |
29 | * |
30 | * 4. inode(x) | CP | dnode(F) | inode(F) |
31 | * -> No problem. |
32 | * |
33 | * 5. CP | inode(x) | dnode(F) |
34 | * -> The inode(DF) was missing. Should drop this dnode(F). |
35 | * |
36 | * 6. CP | inode(DF) | dnode(F) |
37 | * -> No problem. |
38 | * |
39 | * 7. CP | dnode(F) | inode(DF) |
40 | * -> If f2fs_iget fails, then goto next to find inode(DF). |
41 | * |
42 | * 8. CP | dnode(F) | inode(x) |
43 | * -> If f2fs_iget fails, then goto next to find inode(DF). |
44 | * But it will fail due to no inode(DF). |
45 | */ |
46 | |
47 | static struct kmem_cache *fsync_entry_slab; |
48 | |
49 | #if IS_ENABLED(CONFIG_UNICODE) |
50 | extern struct kmem_cache *f2fs_cf_name_slab; |
51 | #endif |
52 | |
53 | bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi) |
54 | { |
55 | s64 nalloc = percpu_counter_sum_positive(fbc: &sbi->alloc_valid_block_count); |
56 | |
57 | if (sbi->last_valid_block_count + nalloc > sbi->user_block_count) |
58 | return false; |
59 | if (NM_I(sbi)->max_rf_node_blocks && |
60 | percpu_counter_sum_positive(fbc: &sbi->rf_node_block_count) >= |
61 | NM_I(sbi)->max_rf_node_blocks) |
62 | return false; |
63 | return true; |
64 | } |
65 | |
66 | static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, |
67 | nid_t ino) |
68 | { |
69 | struct fsync_inode_entry *entry; |
70 | |
71 | list_for_each_entry(entry, head, list) |
72 | if (entry->inode->i_ino == ino) |
73 | return entry; |
74 | |
75 | return NULL; |
76 | } |
77 | |
78 | static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi, |
79 | struct list_head *head, nid_t ino, bool quota_inode) |
80 | { |
81 | struct inode *inode; |
82 | struct fsync_inode_entry *entry; |
83 | int err; |
84 | |
85 | inode = f2fs_iget_retry(sb: sbi->sb, ino); |
86 | if (IS_ERR(ptr: inode)) |
87 | return ERR_CAST(ptr: inode); |
88 | |
89 | err = f2fs_dquot_initialize(inode); |
90 | if (err) |
91 | goto err_out; |
92 | |
93 | if (quota_inode) { |
94 | err = dquot_alloc_inode(inode); |
95 | if (err) |
96 | goto err_out; |
97 | } |
98 | |
99 | entry = f2fs_kmem_cache_alloc(cachep: fsync_entry_slab, |
100 | GFP_F2FS_ZERO, nofail: true, NULL); |
101 | entry->inode = inode; |
102 | list_add_tail(new: &entry->list, head); |
103 | |
104 | return entry; |
105 | err_out: |
106 | iput(inode); |
107 | return ERR_PTR(error: err); |
108 | } |
109 | |
110 | static void del_fsync_inode(struct fsync_inode_entry *entry, int drop) |
111 | { |
112 | if (drop) { |
113 | /* inode should not be recovered, drop it */ |
114 | f2fs_inode_synced(inode: entry->inode); |
115 | } |
116 | iput(entry->inode); |
117 | list_del(entry: &entry->list); |
118 | kmem_cache_free(s: fsync_entry_slab, objp: entry); |
119 | } |
120 | |
121 | static int init_recovered_filename(const struct inode *dir, |
122 | struct f2fs_inode *raw_inode, |
123 | struct f2fs_filename *fname, |
124 | struct qstr *usr_fname) |
125 | { |
126 | int err; |
127 | |
128 | memset(fname, 0, sizeof(*fname)); |
129 | fname->disk_name.len = le32_to_cpu(raw_inode->i_namelen); |
130 | fname->disk_name.name = raw_inode->i_name; |
131 | |
132 | if (WARN_ON(fname->disk_name.len > F2FS_NAME_LEN)) |
133 | return -ENAMETOOLONG; |
134 | |
135 | if (!IS_ENCRYPTED(dir)) { |
136 | usr_fname->name = fname->disk_name.name; |
137 | usr_fname->len = fname->disk_name.len; |
138 | fname->usr_fname = usr_fname; |
139 | } |
140 | |
141 | /* Compute the hash of the filename */ |
142 | if (IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)) { |
143 | /* |
144 | * In this case the hash isn't computable without the key, so it |
145 | * was saved on-disk. |
146 | */ |
147 | if (fname->disk_name.len + sizeof(f2fs_hash_t) > F2FS_NAME_LEN) |
148 | return -EINVAL; |
149 | fname->hash = get_unaligned((f2fs_hash_t *) |
150 | &raw_inode->i_name[fname->disk_name.len]); |
151 | } else if (IS_CASEFOLDED(dir)) { |
152 | err = f2fs_init_casefolded_name(dir, fname); |
153 | if (err) |
154 | return err; |
155 | f2fs_hash_filename(dir, fname); |
156 | #if IS_ENABLED(CONFIG_UNICODE) |
157 | /* Case-sensitive match is fine for recovery */ |
158 | kmem_cache_free(s: f2fs_cf_name_slab, objp: fname->cf_name.name); |
159 | fname->cf_name.name = NULL; |
160 | #endif |
161 | } else { |
162 | f2fs_hash_filename(dir, fname); |
163 | } |
164 | return 0; |
165 | } |
166 | |
167 | static int recover_dentry(struct inode *inode, struct page *ipage, |
168 | struct list_head *dir_list) |
169 | { |
170 | struct f2fs_inode *raw_inode = F2FS_INODE(page: ipage); |
171 | nid_t pino = le32_to_cpu(raw_inode->i_pino); |
172 | struct f2fs_dir_entry *de; |
173 | struct f2fs_filename fname; |
174 | struct qstr usr_fname; |
175 | struct page *page; |
176 | struct inode *dir, *einode; |
177 | struct fsync_inode_entry *entry; |
178 | int err = 0; |
179 | char *name; |
180 | |
181 | entry = get_fsync_inode(head: dir_list, ino: pino); |
182 | if (!entry) { |
183 | entry = add_fsync_inode(sbi: F2FS_I_SB(inode), head: dir_list, |
184 | ino: pino, quota_inode: false); |
185 | if (IS_ERR(ptr: entry)) { |
186 | dir = ERR_CAST(ptr: entry); |
187 | err = PTR_ERR(ptr: entry); |
188 | goto out; |
189 | } |
190 | } |
191 | |
192 | dir = entry->inode; |
193 | err = init_recovered_filename(dir, raw_inode, fname: &fname, usr_fname: &usr_fname); |
194 | if (err) |
195 | goto out; |
196 | retry: |
197 | de = __f2fs_find_entry(dir, fname: &fname, res_page: &page); |
198 | if (de && inode->i_ino == le32_to_cpu(de->ino)) |
199 | goto out_put; |
200 | |
201 | if (de) { |
202 | einode = f2fs_iget_retry(sb: inode->i_sb, le32_to_cpu(de->ino)); |
203 | if (IS_ERR(ptr: einode)) { |
204 | WARN_ON(1); |
205 | err = PTR_ERR(ptr: einode); |
206 | if (err == -ENOENT) |
207 | err = -EEXIST; |
208 | goto out_put; |
209 | } |
210 | |
211 | err = f2fs_dquot_initialize(inode: einode); |
212 | if (err) { |
213 | iput(einode); |
214 | goto out_put; |
215 | } |
216 | |
217 | err = f2fs_acquire_orphan_inode(sbi: F2FS_I_SB(inode)); |
218 | if (err) { |
219 | iput(einode); |
220 | goto out_put; |
221 | } |
222 | f2fs_delete_entry(dentry: de, page, dir, inode: einode); |
223 | iput(einode); |
224 | goto retry; |
225 | } else if (IS_ERR(ptr: page)) { |
226 | err = PTR_ERR(ptr: page); |
227 | } else { |
228 | err = f2fs_add_dentry(dir, fname: &fname, inode, |
229 | ino: inode->i_ino, mode: inode->i_mode); |
230 | } |
231 | if (err == -ENOMEM) |
232 | goto retry; |
233 | goto out; |
234 | |
235 | out_put: |
236 | f2fs_put_page(page, unlock: 0); |
237 | out: |
238 | if (file_enc_name(inode)) |
239 | name = "<encrypted>" ; |
240 | else |
241 | name = raw_inode->i_name; |
242 | f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d" , |
243 | __func__, ino_of_node(ipage), name, |
244 | IS_ERR(dir) ? 0 : dir->i_ino, err); |
245 | return err; |
246 | } |
247 | |
248 | static int recover_quota_data(struct inode *inode, struct page *page) |
249 | { |
250 | struct f2fs_inode *raw = F2FS_INODE(page); |
251 | struct iattr attr; |
252 | uid_t i_uid = le32_to_cpu(raw->i_uid); |
253 | gid_t i_gid = le32_to_cpu(raw->i_gid); |
254 | int err; |
255 | |
256 | memset(&attr, 0, sizeof(attr)); |
257 | |
258 | attr.ia_vfsuid = VFSUIDT_INIT(make_kuid(inode->i_sb->s_user_ns, i_uid)); |
259 | attr.ia_vfsgid = VFSGIDT_INIT(make_kgid(inode->i_sb->s_user_ns, i_gid)); |
260 | |
261 | if (!vfsuid_eq(left: attr.ia_vfsuid, right: i_uid_into_vfsuid(idmap: &nop_mnt_idmap, inode))) |
262 | attr.ia_valid |= ATTR_UID; |
263 | if (!vfsgid_eq(left: attr.ia_vfsgid, right: i_gid_into_vfsgid(idmap: &nop_mnt_idmap, inode))) |
264 | attr.ia_valid |= ATTR_GID; |
265 | |
266 | if (!attr.ia_valid) |
267 | return 0; |
268 | |
269 | err = dquot_transfer(idmap: &nop_mnt_idmap, inode, iattr: &attr); |
270 | if (err) |
271 | set_sbi_flag(sbi: F2FS_I_SB(inode), type: SBI_QUOTA_NEED_REPAIR); |
272 | return err; |
273 | } |
274 | |
275 | static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) |
276 | { |
277 | if (ri->i_inline & F2FS_PIN_FILE) |
278 | set_inode_flag(inode, flag: FI_PIN_FILE); |
279 | else |
280 | clear_inode_flag(inode, flag: FI_PIN_FILE); |
281 | if (ri->i_inline & F2FS_DATA_EXIST) |
282 | set_inode_flag(inode, flag: FI_DATA_EXIST); |
283 | else |
284 | clear_inode_flag(inode, flag: FI_DATA_EXIST); |
285 | } |
286 | |
287 | static int recover_inode(struct inode *inode, struct page *page) |
288 | { |
289 | struct f2fs_inode *raw = F2FS_INODE(page); |
290 | char *name; |
291 | int err; |
292 | |
293 | inode->i_mode = le16_to_cpu(raw->i_mode); |
294 | |
295 | err = recover_quota_data(inode, page); |
296 | if (err) |
297 | return err; |
298 | |
299 | i_uid_write(inode, le32_to_cpu(raw->i_uid)); |
300 | i_gid_write(inode, le32_to_cpu(raw->i_gid)); |
301 | |
302 | if (raw->i_inline & F2FS_EXTRA_ATTR) { |
303 | if (f2fs_sb_has_project_quota(sbi: F2FS_I_SB(inode)) && |
304 | F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize), |
305 | i_projid)) { |
306 | projid_t i_projid; |
307 | kprojid_t kprojid; |
308 | |
309 | i_projid = (projid_t)le32_to_cpu(raw->i_projid); |
310 | kprojid = make_kprojid(from: &init_user_ns, projid: i_projid); |
311 | |
312 | if (!projid_eq(left: kprojid, right: F2FS_I(inode)->i_projid)) { |
313 | err = f2fs_transfer_project_quota(inode, |
314 | kprojid); |
315 | if (err) |
316 | return err; |
317 | F2FS_I(inode)->i_projid = kprojid; |
318 | } |
319 | } |
320 | } |
321 | |
322 | f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); |
323 | inode_set_atime(inode, le64_to_cpu(raw->i_atime), |
324 | le32_to_cpu(raw->i_atime_nsec)); |
325 | inode_set_ctime(inode, le64_to_cpu(raw->i_ctime), |
326 | le32_to_cpu(raw->i_ctime_nsec)); |
327 | inode_set_mtime(inode, le64_to_cpu(raw->i_mtime), |
328 | le32_to_cpu(raw->i_mtime_nsec)); |
329 | |
330 | F2FS_I(inode)->i_advise = raw->i_advise; |
331 | F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); |
332 | f2fs_set_inode_flags(inode); |
333 | F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = |
334 | le16_to_cpu(raw->i_gc_failures); |
335 | |
336 | recover_inline_flags(inode, ri: raw); |
337 | |
338 | f2fs_mark_inode_dirty_sync(inode, sync: true); |
339 | |
340 | if (file_enc_name(inode)) |
341 | name = "<encrypted>" ; |
342 | else |
343 | name = F2FS_INODE(page)->i_name; |
344 | |
345 | f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x" , |
346 | ino_of_node(page), name, raw->i_inline); |
347 | return 0; |
348 | } |
349 | |
350 | static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi, |
351 | unsigned int ra_blocks, unsigned int blkaddr, |
352 | unsigned int next_blkaddr) |
353 | { |
354 | if (blkaddr + 1 == next_blkaddr) |
355 | ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS, |
356 | ra_blocks * 2); |
357 | else if (next_blkaddr % sbi->blocks_per_seg) |
358 | ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS, |
359 | ra_blocks / 2); |
360 | return ra_blocks; |
361 | } |
362 | |
363 | /* Detect looped node chain with Floyd's cycle detection algorithm. */ |
364 | static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr, |
365 | block_t *blkaddr_fast, bool *is_detecting) |
366 | { |
367 | unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; |
368 | struct page *page = NULL; |
369 | int i; |
370 | |
371 | if (!*is_detecting) |
372 | return 0; |
373 | |
374 | for (i = 0; i < 2; i++) { |
375 | if (!f2fs_is_valid_blkaddr(sbi, blkaddr: *blkaddr_fast, type: META_POR)) { |
376 | *is_detecting = false; |
377 | return 0; |
378 | } |
379 | |
380 | page = f2fs_get_tmp_page(sbi, index: *blkaddr_fast); |
381 | if (IS_ERR(ptr: page)) |
382 | return PTR_ERR(ptr: page); |
383 | |
384 | if (!is_recoverable_dnode(page)) { |
385 | f2fs_put_page(page, unlock: 1); |
386 | *is_detecting = false; |
387 | return 0; |
388 | } |
389 | |
390 | ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr: *blkaddr_fast, |
391 | next_blkaddr: next_blkaddr_of_node(node_page: page)); |
392 | |
393 | *blkaddr_fast = next_blkaddr_of_node(node_page: page); |
394 | f2fs_put_page(page, unlock: 1); |
395 | |
396 | f2fs_ra_meta_pages_cond(sbi, index: *blkaddr_fast, ra_blocks); |
397 | } |
398 | |
399 | if (*blkaddr_fast == blkaddr) { |
400 | f2fs_notice(sbi, "%s: Detect looped node chain on blkaddr:%u." |
401 | " Run fsck to fix it." , __func__, blkaddr); |
402 | return -EINVAL; |
403 | } |
404 | return 0; |
405 | } |
406 | |
407 | static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, |
408 | bool check_only) |
409 | { |
410 | struct curseg_info *curseg; |
411 | struct page *page = NULL; |
412 | block_t blkaddr, blkaddr_fast; |
413 | bool is_detecting = true; |
414 | int err = 0; |
415 | |
416 | /* get node pages in the current segment */ |
417 | curseg = CURSEG_I(sbi, type: CURSEG_WARM_NODE); |
418 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); |
419 | blkaddr_fast = blkaddr; |
420 | |
421 | while (1) { |
422 | struct fsync_inode_entry *entry; |
423 | |
424 | if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type: META_POR)) |
425 | return 0; |
426 | |
427 | page = f2fs_get_tmp_page(sbi, index: blkaddr); |
428 | if (IS_ERR(ptr: page)) { |
429 | err = PTR_ERR(ptr: page); |
430 | break; |
431 | } |
432 | |
433 | if (!is_recoverable_dnode(page)) { |
434 | f2fs_put_page(page, unlock: 1); |
435 | break; |
436 | } |
437 | |
438 | if (!is_fsync_dnode(page)) |
439 | goto next; |
440 | |
441 | entry = get_fsync_inode(head, ino: ino_of_node(node_page: page)); |
442 | if (!entry) { |
443 | bool quota_inode = false; |
444 | |
445 | if (!check_only && |
446 | IS_INODE(page) && is_dent_dnode(page)) { |
447 | err = f2fs_recover_inode_page(sbi, page); |
448 | if (err) { |
449 | f2fs_put_page(page, unlock: 1); |
450 | break; |
451 | } |
452 | quota_inode = true; |
453 | } |
454 | |
455 | /* |
456 | * CP | dnode(F) | inode(DF) |
457 | * For this case, we should not give up now. |
458 | */ |
459 | entry = add_fsync_inode(sbi, head, ino: ino_of_node(node_page: page), |
460 | quota_inode); |
461 | if (IS_ERR(ptr: entry)) { |
462 | err = PTR_ERR(ptr: entry); |
463 | if (err == -ENOENT) |
464 | goto next; |
465 | f2fs_put_page(page, unlock: 1); |
466 | break; |
467 | } |
468 | } |
469 | entry->blkaddr = blkaddr; |
470 | |
471 | if (IS_INODE(page) && is_dent_dnode(page)) |
472 | entry->last_dentry = blkaddr; |
473 | next: |
474 | /* check next segment */ |
475 | blkaddr = next_blkaddr_of_node(node_page: page); |
476 | f2fs_put_page(page, unlock: 1); |
477 | |
478 | err = sanity_check_node_chain(sbi, blkaddr, blkaddr_fast: &blkaddr_fast, |
479 | is_detecting: &is_detecting); |
480 | if (err) |
481 | break; |
482 | } |
483 | return err; |
484 | } |
485 | |
486 | static void destroy_fsync_dnodes(struct list_head *head, int drop) |
487 | { |
488 | struct fsync_inode_entry *entry, *tmp; |
489 | |
490 | list_for_each_entry_safe(entry, tmp, head, list) |
491 | del_fsync_inode(entry, drop); |
492 | } |
493 | |
494 | static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, |
495 | block_t blkaddr, struct dnode_of_data *dn) |
496 | { |
497 | struct seg_entry *sentry; |
498 | unsigned int segno = GET_SEGNO(sbi, blkaddr); |
499 | unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); |
500 | struct f2fs_summary_block *sum_node; |
501 | struct f2fs_summary sum; |
502 | struct page *sum_page, *node_page; |
503 | struct dnode_of_data tdn = *dn; |
504 | nid_t ino, nid; |
505 | struct inode *inode; |
506 | unsigned int offset, ofs_in_node, max_addrs; |
507 | block_t bidx; |
508 | int i; |
509 | |
510 | sentry = get_seg_entry(sbi, segno); |
511 | if (!f2fs_test_bit(nr: blkoff, addr: sentry->cur_valid_map)) |
512 | return 0; |
513 | |
514 | /* Get the previous summary */ |
515 | for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { |
516 | struct curseg_info *curseg = CURSEG_I(sbi, type: i); |
517 | |
518 | if (curseg->segno == segno) { |
519 | sum = curseg->sum_blk->entries[blkoff]; |
520 | goto got_it; |
521 | } |
522 | } |
523 | |
524 | sum_page = f2fs_get_sum_page(sbi, segno); |
525 | if (IS_ERR(ptr: sum_page)) |
526 | return PTR_ERR(ptr: sum_page); |
527 | sum_node = (struct f2fs_summary_block *)page_address(sum_page); |
528 | sum = sum_node->entries[blkoff]; |
529 | f2fs_put_page(page: sum_page, unlock: 1); |
530 | got_it: |
531 | /* Use the locked dnode page and inode */ |
532 | nid = le32_to_cpu(sum.nid); |
533 | ofs_in_node = le16_to_cpu(sum.ofs_in_node); |
534 | |
535 | max_addrs = ADDRS_PER_PAGE(dn->node_page, dn->inode); |
536 | if (ofs_in_node >= max_addrs) { |
537 | f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u" , |
538 | ofs_in_node, dn->inode->i_ino, nid, max_addrs); |
539 | f2fs_handle_error(sbi, error: ERROR_INCONSISTENT_SUMMARY); |
540 | return -EFSCORRUPTED; |
541 | } |
542 | |
543 | if (dn->inode->i_ino == nid) { |
544 | tdn.nid = nid; |
545 | if (!dn->inode_page_locked) |
546 | lock_page(page: dn->inode_page); |
547 | tdn.node_page = dn->inode_page; |
548 | tdn.ofs_in_node = ofs_in_node; |
549 | goto truncate_out; |
550 | } else if (dn->nid == nid) { |
551 | tdn.ofs_in_node = ofs_in_node; |
552 | goto truncate_out; |
553 | } |
554 | |
555 | /* Get the node page */ |
556 | node_page = f2fs_get_node_page(sbi, nid); |
557 | if (IS_ERR(ptr: node_page)) |
558 | return PTR_ERR(ptr: node_page); |
559 | |
560 | offset = ofs_of_node(node_page); |
561 | ino = ino_of_node(node_page); |
562 | f2fs_put_page(page: node_page, unlock: 1); |
563 | |
564 | if (ino != dn->inode->i_ino) { |
565 | int ret; |
566 | |
567 | /* Deallocate previous index in the node page */ |
568 | inode = f2fs_iget_retry(sb: sbi->sb, ino); |
569 | if (IS_ERR(ptr: inode)) |
570 | return PTR_ERR(ptr: inode); |
571 | |
572 | ret = f2fs_dquot_initialize(inode); |
573 | if (ret) { |
574 | iput(inode); |
575 | return ret; |
576 | } |
577 | } else { |
578 | inode = dn->inode; |
579 | } |
580 | |
581 | bidx = f2fs_start_bidx_of_node(node_ofs: offset, inode) + |
582 | le16_to_cpu(sum.ofs_in_node); |
583 | |
584 | /* |
585 | * if inode page is locked, unlock temporarily, but its reference |
586 | * count keeps alive. |
587 | */ |
588 | if (ino == dn->inode->i_ino && dn->inode_page_locked) |
589 | unlock_page(page: dn->inode_page); |
590 | |
591 | set_new_dnode(dn: &tdn, inode, NULL, NULL, nid: 0); |
592 | if (f2fs_get_dnode_of_data(dn: &tdn, index: bidx, mode: LOOKUP_NODE)) |
593 | goto out; |
594 | |
595 | if (tdn.data_blkaddr == blkaddr) |
596 | f2fs_truncate_data_blocks_range(dn: &tdn, count: 1); |
597 | |
598 | f2fs_put_dnode(dn: &tdn); |
599 | out: |
600 | if (ino != dn->inode->i_ino) |
601 | iput(inode); |
602 | else if (dn->inode_page_locked) |
603 | lock_page(page: dn->inode_page); |
604 | return 0; |
605 | |
606 | truncate_out: |
607 | if (f2fs_data_blkaddr(dn: &tdn) == blkaddr) |
608 | f2fs_truncate_data_blocks_range(dn: &tdn, count: 1); |
609 | if (dn->inode->i_ino == nid && !dn->inode_page_locked) |
610 | unlock_page(page: dn->inode_page); |
611 | return 0; |
612 | } |
613 | |
614 | static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, |
615 | struct page *page) |
616 | { |
617 | struct dnode_of_data dn; |
618 | struct node_info ni; |
619 | unsigned int start, end; |
620 | int err = 0, recovered = 0; |
621 | |
622 | /* step 1: recover xattr */ |
623 | if (IS_INODE(page)) { |
624 | err = f2fs_recover_inline_xattr(inode, page); |
625 | if (err) |
626 | goto out; |
627 | } else if (f2fs_has_xattr_block(ofs: ofs_of_node(node_page: page))) { |
628 | err = f2fs_recover_xattr_data(inode, page); |
629 | if (!err) |
630 | recovered++; |
631 | goto out; |
632 | } |
633 | |
634 | /* step 2: recover inline data */ |
635 | err = f2fs_recover_inline_data(inode, npage: page); |
636 | if (err) { |
637 | if (err == 1) |
638 | err = 0; |
639 | goto out; |
640 | } |
641 | |
642 | /* step 3: recover data indices */ |
643 | start = f2fs_start_bidx_of_node(node_ofs: ofs_of_node(node_page: page), inode); |
644 | end = start + ADDRS_PER_PAGE(page, inode); |
645 | |
646 | set_new_dnode(dn: &dn, inode, NULL, NULL, nid: 0); |
647 | retry_dn: |
648 | err = f2fs_get_dnode_of_data(dn: &dn, index: start, mode: ALLOC_NODE); |
649 | if (err) { |
650 | if (err == -ENOMEM) { |
651 | memalloc_retry_wait(GFP_NOFS); |
652 | goto retry_dn; |
653 | } |
654 | goto out; |
655 | } |
656 | |
657 | f2fs_wait_on_page_writeback(page: dn.node_page, type: NODE, ordered: true, locked: true); |
658 | |
659 | err = f2fs_get_node_info(sbi, nid: dn.nid, ni: &ni, checkpoint_context: false); |
660 | if (err) |
661 | goto err; |
662 | |
663 | f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); |
664 | |
665 | if (ofs_of_node(node_page: dn.node_page) != ofs_of_node(node_page: page)) { |
666 | f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u" , |
667 | inode->i_ino, ofs_of_node(dn.node_page), |
668 | ofs_of_node(page)); |
669 | err = -EFSCORRUPTED; |
670 | f2fs_handle_error(sbi, error: ERROR_INCONSISTENT_FOOTER); |
671 | goto err; |
672 | } |
673 | |
674 | for (; start < end; start++, dn.ofs_in_node++) { |
675 | block_t src, dest; |
676 | |
677 | src = f2fs_data_blkaddr(dn: &dn); |
678 | dest = data_blkaddr(inode: dn.inode, node_page: page, offset: dn.ofs_in_node); |
679 | |
680 | if (__is_valid_data_blkaddr(blkaddr: src) && |
681 | !f2fs_is_valid_blkaddr(sbi, blkaddr: src, type: META_POR)) { |
682 | err = -EFSCORRUPTED; |
683 | f2fs_handle_error(sbi, error: ERROR_INVALID_BLKADDR); |
684 | goto err; |
685 | } |
686 | |
687 | if (__is_valid_data_blkaddr(blkaddr: dest) && |
688 | !f2fs_is_valid_blkaddr(sbi, blkaddr: dest, type: META_POR)) { |
689 | err = -EFSCORRUPTED; |
690 | f2fs_handle_error(sbi, error: ERROR_INVALID_BLKADDR); |
691 | goto err; |
692 | } |
693 | |
694 | /* skip recovering if dest is the same as src */ |
695 | if (src == dest) |
696 | continue; |
697 | |
698 | /* dest is invalid, just invalidate src block */ |
699 | if (dest == NULL_ADDR) { |
700 | f2fs_truncate_data_blocks_range(dn: &dn, count: 1); |
701 | continue; |
702 | } |
703 | |
704 | if (!file_keep_isize(inode) && |
705 | (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT))) |
706 | f2fs_i_size_write(inode, |
707 | i_size: (loff_t)(start + 1) << PAGE_SHIFT); |
708 | |
709 | /* |
710 | * dest is reserved block, invalidate src block |
711 | * and then reserve one new block in dnode page. |
712 | */ |
713 | if (dest == NEW_ADDR) { |
714 | f2fs_truncate_data_blocks_range(dn: &dn, count: 1); |
715 | f2fs_reserve_new_block(dn: &dn); |
716 | continue; |
717 | } |
718 | |
719 | /* dest is valid block, try to recover from src to dest */ |
720 | if (f2fs_is_valid_blkaddr(sbi, blkaddr: dest, type: META_POR)) { |
721 | |
722 | if (src == NULL_ADDR) { |
723 | err = f2fs_reserve_new_block(dn: &dn); |
724 | while (err && |
725 | IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) |
726 | err = f2fs_reserve_new_block(dn: &dn); |
727 | /* We should not get -ENOSPC */ |
728 | f2fs_bug_on(sbi, err); |
729 | if (err) |
730 | goto err; |
731 | } |
732 | retry_prev: |
733 | /* Check the previous node page having this index */ |
734 | err = check_index_in_prev_nodes(sbi, blkaddr: dest, dn: &dn); |
735 | if (err) { |
736 | if (err == -ENOMEM) { |
737 | memalloc_retry_wait(GFP_NOFS); |
738 | goto retry_prev; |
739 | } |
740 | goto err; |
741 | } |
742 | |
743 | if (f2fs_is_valid_blkaddr(sbi, blkaddr: dest, |
744 | type: DATA_GENERIC_ENHANCE_UPDATE)) { |
745 | f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u" , |
746 | dest, inode->i_ino, dn.ofs_in_node); |
747 | err = -EFSCORRUPTED; |
748 | f2fs_handle_error(sbi, |
749 | error: ERROR_INVALID_BLKADDR); |
750 | goto err; |
751 | } |
752 | |
753 | /* write dummy data page */ |
754 | f2fs_replace_block(sbi, dn: &dn, old_addr: src, new_addr: dest, |
755 | version: ni.version, recover_curseg: false, recover_newaddr: false); |
756 | recovered++; |
757 | } |
758 | } |
759 | |
760 | copy_node_footer(dst: dn.node_page, src: page); |
761 | fill_node_footer(page: dn.node_page, nid: dn.nid, ino: ni.ino, |
762 | ofs: ofs_of_node(node_page: page), reset: false); |
763 | set_page_dirty(dn.node_page); |
764 | err: |
765 | f2fs_put_dnode(dn: &dn); |
766 | out: |
767 | f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d" , |
768 | inode->i_ino, file_keep_isize(inode) ? "keep" : "recover" , |
769 | recovered, err); |
770 | return err; |
771 | } |
772 | |
773 | static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, |
774 | struct list_head *tmp_inode_list, struct list_head *dir_list) |
775 | { |
776 | struct curseg_info *curseg; |
777 | struct page *page = NULL; |
778 | int err = 0; |
779 | block_t blkaddr; |
780 | unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; |
781 | |
782 | /* get node pages in the current segment */ |
783 | curseg = CURSEG_I(sbi, type: CURSEG_WARM_NODE); |
784 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); |
785 | |
786 | while (1) { |
787 | struct fsync_inode_entry *entry; |
788 | |
789 | if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type: META_POR)) |
790 | break; |
791 | |
792 | page = f2fs_get_tmp_page(sbi, index: blkaddr); |
793 | if (IS_ERR(ptr: page)) { |
794 | err = PTR_ERR(ptr: page); |
795 | break; |
796 | } |
797 | |
798 | if (!is_recoverable_dnode(page)) { |
799 | f2fs_put_page(page, unlock: 1); |
800 | break; |
801 | } |
802 | |
803 | entry = get_fsync_inode(head: inode_list, ino: ino_of_node(node_page: page)); |
804 | if (!entry) |
805 | goto next; |
806 | /* |
807 | * inode(x) | CP | inode(x) | dnode(F) |
808 | * In this case, we can lose the latest inode(x). |
809 | * So, call recover_inode for the inode update. |
810 | */ |
811 | if (IS_INODE(page)) { |
812 | err = recover_inode(inode: entry->inode, page); |
813 | if (err) { |
814 | f2fs_put_page(page, unlock: 1); |
815 | break; |
816 | } |
817 | } |
818 | if (entry->last_dentry == blkaddr) { |
819 | err = recover_dentry(inode: entry->inode, ipage: page, dir_list); |
820 | if (err) { |
821 | f2fs_put_page(page, unlock: 1); |
822 | break; |
823 | } |
824 | } |
825 | err = do_recover_data(sbi, inode: entry->inode, page); |
826 | if (err) { |
827 | f2fs_put_page(page, unlock: 1); |
828 | break; |
829 | } |
830 | |
831 | if (entry->blkaddr == blkaddr) |
832 | list_move_tail(list: &entry->list, head: tmp_inode_list); |
833 | next: |
834 | ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr, |
835 | next_blkaddr: next_blkaddr_of_node(node_page: page)); |
836 | |
837 | /* check next segment */ |
838 | blkaddr = next_blkaddr_of_node(node_page: page); |
839 | f2fs_put_page(page, unlock: 1); |
840 | |
841 | f2fs_ra_meta_pages_cond(sbi, index: blkaddr, ra_blocks); |
842 | } |
843 | if (!err) |
844 | f2fs_allocate_new_segments(sbi); |
845 | return err; |
846 | } |
847 | |
848 | int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) |
849 | { |
850 | struct list_head inode_list, tmp_inode_list; |
851 | struct list_head dir_list; |
852 | int err; |
853 | int ret = 0; |
854 | unsigned long s_flags = sbi->sb->s_flags; |
855 | bool need_writecp = false; |
856 | bool fix_curseg_write_pointer = false; |
857 | |
858 | if (is_sbi_flag_set(sbi, type: SBI_IS_WRITABLE)) |
859 | f2fs_info(sbi, "recover fsync data on readonly fs" ); |
860 | |
861 | INIT_LIST_HEAD(list: &inode_list); |
862 | INIT_LIST_HEAD(list: &tmp_inode_list); |
863 | INIT_LIST_HEAD(list: &dir_list); |
864 | |
865 | /* prevent checkpoint */ |
866 | f2fs_down_write(sem: &sbi->cp_global_sem); |
867 | |
868 | /* step #1: find fsynced inode numbers */ |
869 | err = find_fsync_dnodes(sbi, head: &inode_list, check_only); |
870 | if (err || list_empty(head: &inode_list)) |
871 | goto skip; |
872 | |
873 | if (check_only) { |
874 | ret = 1; |
875 | goto skip; |
876 | } |
877 | |
878 | need_writecp = true; |
879 | |
880 | /* step #2: recover data */ |
881 | err = recover_data(sbi, inode_list: &inode_list, tmp_inode_list: &tmp_inode_list, dir_list: &dir_list); |
882 | if (!err) |
883 | f2fs_bug_on(sbi, !list_empty(&inode_list)); |
884 | else |
885 | f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE); |
886 | skip: |
887 | fix_curseg_write_pointer = !check_only || list_empty(head: &inode_list); |
888 | |
889 | destroy_fsync_dnodes(head: &inode_list, drop: err); |
890 | destroy_fsync_dnodes(head: &tmp_inode_list, drop: err); |
891 | |
892 | /* truncate meta pages to be used by the recovery */ |
893 | truncate_inode_pages_range(META_MAPPING(sbi), |
894 | lstart: (loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, lend: -1); |
895 | |
896 | if (err) { |
897 | truncate_inode_pages_final(NODE_MAPPING(sbi)); |
898 | truncate_inode_pages_final(META_MAPPING(sbi)); |
899 | } |
900 | |
901 | /* |
902 | * If fsync data succeeds or there is no fsync data to recover, |
903 | * and the f2fs is not read only, check and fix zoned block devices' |
904 | * write pointer consistency. |
905 | */ |
906 | if (!err && fix_curseg_write_pointer && !f2fs_readonly(sb: sbi->sb) && |
907 | f2fs_sb_has_blkzoned(sbi)) { |
908 | err = f2fs_fix_curseg_write_pointer(sbi); |
909 | ret = err; |
910 | } |
911 | |
912 | if (!err) |
913 | clear_sbi_flag(sbi, type: SBI_POR_DOING); |
914 | |
915 | f2fs_up_write(sem: &sbi->cp_global_sem); |
916 | |
917 | /* let's drop all the directory inodes for clean checkpoint */ |
918 | destroy_fsync_dnodes(head: &dir_list, drop: err); |
919 | |
920 | if (need_writecp) { |
921 | set_sbi_flag(sbi, type: SBI_IS_RECOVERED); |
922 | |
923 | if (!err) { |
924 | struct cp_control cpc = { |
925 | .reason = CP_RECOVERY, |
926 | }; |
927 | stat_inc_cp_call_count(sbi, TOTAL_CALL); |
928 | err = f2fs_write_checkpoint(sbi, cpc: &cpc); |
929 | } |
930 | } |
931 | |
932 | sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ |
933 | |
934 | return ret ? ret : err; |
935 | } |
936 | |
937 | int __init f2fs_create_recovery_cache(void) |
938 | { |
939 | fsync_entry_slab = f2fs_kmem_cache_create(name: "f2fs_fsync_inode_entry" , |
940 | size: sizeof(struct fsync_inode_entry)); |
941 | return fsync_entry_slab ? 0 : -ENOMEM; |
942 | } |
943 | |
944 | void f2fs_destroy_recovery_cache(void) |
945 | { |
946 | kmem_cache_destroy(s: fsync_entry_slab); |
947 | } |
948 | |