1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright (C) 2018-2023 Oracle. All Rights Reserved. |
4 | * Author: Darrick J. Wong <djwong@kernel.org> |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_trans_resv.h" |
11 | #include "xfs_mount.h" |
12 | #include "xfs_btree.h" |
13 | #include "xfs_log_format.h" |
14 | #include "xfs_trans.h" |
15 | #include "xfs_sb.h" |
16 | #include "xfs_alloc.h" |
17 | #include "xfs_alloc_btree.h" |
18 | #include "xfs_ialloc.h" |
19 | #include "xfs_ialloc_btree.h" |
20 | #include "xfs_rmap.h" |
21 | #include "xfs_rmap_btree.h" |
22 | #include "xfs_refcount_btree.h" |
23 | #include "xfs_ag.h" |
24 | #include "scrub/scrub.h" |
25 | #include "scrub/common.h" |
26 | #include "scrub/trace.h" |
27 | #include "scrub/repair.h" |
28 | #include "scrub/bitmap.h" |
29 | #include "scrub/agb_bitmap.h" |
30 | #include "scrub/reap.h" |
31 | |
32 | /* Superblock */ |
33 | |
34 | /* Repair the superblock. */ |
35 | int |
36 | xrep_superblock( |
37 | struct xfs_scrub *sc) |
38 | { |
39 | struct xfs_mount *mp = sc->mp; |
40 | struct xfs_buf *bp; |
41 | xfs_agnumber_t agno; |
42 | int error; |
43 | |
44 | /* Don't try to repair AG 0's sb; let xfs_repair deal with it. */ |
45 | agno = sc->sm->sm_agno; |
46 | if (agno == 0) |
47 | return -EOPNOTSUPP; |
48 | |
49 | error = xfs_sb_get_secondary(mp, sc->tp, agno, &bp); |
50 | if (error) |
51 | return error; |
52 | |
53 | /* Last chance to abort before we start committing fixes. */ |
54 | if (xchk_should_terminate(sc, &error)) |
55 | return error; |
56 | |
57 | /* Copy AG 0's superblock to this one. */ |
58 | xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); |
59 | xfs_sb_to_disk(bp->b_addr, &mp->m_sb); |
60 | |
61 | /* |
62 | * Don't write out a secondary super with NEEDSREPAIR or log incompat |
63 | * features set, since both are ignored when set on a secondary. |
64 | */ |
65 | if (xfs_has_crc(mp)) { |
66 | struct xfs_dsb *sb = bp->b_addr; |
67 | |
68 | sb->sb_features_incompat &= |
69 | ~cpu_to_be32(XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR); |
70 | sb->sb_features_log_incompat = 0; |
71 | } |
72 | |
73 | /* Write this to disk. */ |
74 | xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SB_BUF); |
75 | xfs_trans_log_buf(sc->tp, bp, 0, BBTOB(bp->b_length) - 1); |
76 | return 0; |
77 | } |
78 | |
79 | /* AGF */ |
80 | |
81 | struct xrep_agf_allocbt { |
82 | struct xfs_scrub *sc; |
83 | xfs_agblock_t freeblks; |
84 | xfs_agblock_t longest; |
85 | }; |
86 | |
87 | /* Record free space shape information. */ |
88 | STATIC int |
89 | xrep_agf_walk_allocbt( |
90 | struct xfs_btree_cur *cur, |
91 | const struct xfs_alloc_rec_incore *rec, |
92 | void *priv) |
93 | { |
94 | struct xrep_agf_allocbt *raa = priv; |
95 | int error = 0; |
96 | |
97 | if (xchk_should_terminate(raa->sc, &error)) |
98 | return error; |
99 | |
100 | raa->freeblks += rec->ar_blockcount; |
101 | if (rec->ar_blockcount > raa->longest) |
102 | raa->longest = rec->ar_blockcount; |
103 | return error; |
104 | } |
105 | |
106 | /* Does this AGFL block look sane? */ |
107 | STATIC int |
108 | xrep_agf_check_agfl_block( |
109 | struct xfs_mount *mp, |
110 | xfs_agblock_t agbno, |
111 | void *priv) |
112 | { |
113 | struct xfs_scrub *sc = priv; |
114 | |
115 | if (!xfs_verify_agbno(sc->sa.pag, agbno)) |
116 | return -EFSCORRUPTED; |
117 | return 0; |
118 | } |
119 | |
120 | /* |
121 | * Offset within the xrep_find_ag_btree array for each btree type. Avoid the |
122 | * XFS_BTNUM_ names here to avoid creating a sparse array. |
123 | */ |
124 | enum { |
125 | XREP_AGF_BNOBT = 0, |
126 | XREP_AGF_CNTBT, |
127 | XREP_AGF_RMAPBT, |
128 | XREP_AGF_REFCOUNTBT, |
129 | XREP_AGF_END, |
130 | XREP_AGF_MAX |
131 | }; |
132 | |
133 | /* Check a btree root candidate. */ |
134 | static inline bool |
135 | xrep_check_btree_root( |
136 | struct xfs_scrub *sc, |
137 | struct xrep_find_ag_btree *fab) |
138 | { |
139 | return xfs_verify_agbno(sc->sa.pag, fab->root) && |
140 | fab->height <= fab->maxlevels; |
141 | } |
142 | |
143 | /* |
144 | * Given the btree roots described by *fab, find the roots, check them for |
145 | * sanity, and pass the root data back out via *fab. |
146 | * |
147 | * This is /also/ a chicken and egg problem because we have to use the rmapbt |
148 | * (rooted in the AGF) to find the btrees rooted in the AGF. We also have no |
149 | * idea if the btrees make any sense. If we hit obvious corruptions in those |
150 | * btrees we'll bail out. |
151 | */ |
152 | STATIC int |
153 | xrep_agf_find_btrees( |
154 | struct xfs_scrub *sc, |
155 | struct xfs_buf *agf_bp, |
156 | struct xrep_find_ag_btree *fab, |
157 | struct xfs_buf *agfl_bp) |
158 | { |
159 | struct xfs_agf *old_agf = agf_bp->b_addr; |
160 | int error; |
161 | |
162 | /* Go find the root data. */ |
163 | error = xrep_find_ag_btree_roots(sc, agf_bp, fab, agfl_bp); |
164 | if (error) |
165 | return error; |
166 | |
167 | /* We must find the bnobt, cntbt, and rmapbt roots. */ |
168 | if (!xrep_check_btree_root(sc, &fab[XREP_AGF_BNOBT]) || |
169 | !xrep_check_btree_root(sc, &fab[XREP_AGF_CNTBT]) || |
170 | !xrep_check_btree_root(sc, &fab[XREP_AGF_RMAPBT])) |
171 | return -EFSCORRUPTED; |
172 | |
173 | /* |
174 | * We relied on the rmapbt to reconstruct the AGF. If we get a |
175 | * different root then something's seriously wrong. |
176 | */ |
177 | if (fab[XREP_AGF_RMAPBT].root != be32_to_cpu(old_agf->agf_rmap_root)) |
178 | return -EFSCORRUPTED; |
179 | |
180 | /* We must find the refcountbt root if that feature is enabled. */ |
181 | if (xfs_has_reflink(sc->mp) && |
182 | !xrep_check_btree_root(sc, &fab[XREP_AGF_REFCOUNTBT])) |
183 | return -EFSCORRUPTED; |
184 | |
185 | return 0; |
186 | } |
187 | |
188 | /* |
189 | * Reinitialize the AGF header, making an in-core copy of the old contents so |
190 | * that we know which in-core state needs to be reinitialized. |
191 | */ |
192 | STATIC void |
193 | ( |
194 | struct xfs_scrub *sc, |
195 | struct xfs_buf *agf_bp, |
196 | struct xfs_agf *old_agf) |
197 | { |
198 | struct xfs_mount *mp = sc->mp; |
199 | struct xfs_perag *pag = sc->sa.pag; |
200 | struct xfs_agf *agf = agf_bp->b_addr; |
201 | |
202 | memcpy(old_agf, agf, sizeof(*old_agf)); |
203 | memset(agf, 0, BBTOB(agf_bp->b_length)); |
204 | agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); |
205 | agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); |
206 | agf->agf_seqno = cpu_to_be32(pag->pag_agno); |
207 | agf->agf_length = cpu_to_be32(pag->block_count); |
208 | agf->agf_flfirst = old_agf->agf_flfirst; |
209 | agf->agf_fllast = old_agf->agf_fllast; |
210 | agf->agf_flcount = old_agf->agf_flcount; |
211 | if (xfs_has_crc(mp)) |
212 | uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); |
213 | |
214 | /* Mark the incore AGF data stale until we're done fixing things. */ |
215 | ASSERT(xfs_perag_initialised_agf(pag)); |
216 | clear_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate); |
217 | } |
218 | |
219 | /* Set btree root information in an AGF. */ |
220 | STATIC void |
221 | xrep_agf_set_roots( |
222 | struct xfs_scrub *sc, |
223 | struct xfs_agf *agf, |
224 | struct xrep_find_ag_btree *fab) |
225 | { |
226 | agf->agf_bno_root = cpu_to_be32(fab[XREP_AGF_BNOBT].root); |
227 | agf->agf_bno_level = cpu_to_be32(fab[XREP_AGF_BNOBT].height); |
228 | |
229 | agf->agf_cnt_root = cpu_to_be32(fab[XREP_AGF_CNTBT].root); |
230 | agf->agf_cnt_level = cpu_to_be32(fab[XREP_AGF_CNTBT].height); |
231 | |
232 | agf->agf_rmap_root = cpu_to_be32(fab[XREP_AGF_RMAPBT].root); |
233 | agf->agf_rmap_level = cpu_to_be32(fab[XREP_AGF_RMAPBT].height); |
234 | |
235 | if (xfs_has_reflink(sc->mp)) { |
236 | agf->agf_refcount_root = |
237 | cpu_to_be32(fab[XREP_AGF_REFCOUNTBT].root); |
238 | agf->agf_refcount_level = |
239 | cpu_to_be32(fab[XREP_AGF_REFCOUNTBT].height); |
240 | } |
241 | } |
242 | |
243 | /* Update all AGF fields which derive from btree contents. */ |
244 | STATIC int |
245 | xrep_agf_calc_from_btrees( |
246 | struct xfs_scrub *sc, |
247 | struct xfs_buf *agf_bp) |
248 | { |
249 | struct xrep_agf_allocbt raa = { .sc = sc }; |
250 | struct xfs_btree_cur *cur = NULL; |
251 | struct xfs_agf *agf = agf_bp->b_addr; |
252 | struct xfs_mount *mp = sc->mp; |
253 | xfs_agblock_t btreeblks; |
254 | xfs_agblock_t blocks; |
255 | int error; |
256 | |
257 | /* Update the AGF counters from the bnobt. */ |
258 | cur = xfs_bnobt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); |
259 | error = xfs_alloc_query_all(cur, xrep_agf_walk_allocbt, &raa); |
260 | if (error) |
261 | goto err; |
262 | error = xfs_btree_count_blocks(cur, &blocks); |
263 | if (error) |
264 | goto err; |
265 | xfs_btree_del_cursor(cur, error); |
266 | btreeblks = blocks - 1; |
267 | agf->agf_freeblks = cpu_to_be32(raa.freeblks); |
268 | agf->agf_longest = cpu_to_be32(raa.longest); |
269 | |
270 | /* Update the AGF counters from the cntbt. */ |
271 | cur = xfs_cntbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); |
272 | error = xfs_btree_count_blocks(cur, &blocks); |
273 | if (error) |
274 | goto err; |
275 | xfs_btree_del_cursor(cur, error); |
276 | btreeblks += blocks - 1; |
277 | |
278 | /* Update the AGF counters from the rmapbt. */ |
279 | cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); |
280 | error = xfs_btree_count_blocks(cur, &blocks); |
281 | if (error) |
282 | goto err; |
283 | xfs_btree_del_cursor(cur, error); |
284 | agf->agf_rmap_blocks = cpu_to_be32(blocks); |
285 | btreeblks += blocks - 1; |
286 | |
287 | agf->agf_btreeblks = cpu_to_be32(btreeblks); |
288 | |
289 | /* Update the AGF counters from the refcountbt. */ |
290 | if (xfs_has_reflink(mp)) { |
291 | cur = xfs_refcountbt_init_cursor(mp, sc->tp, agf_bp, |
292 | sc->sa.pag); |
293 | error = xfs_btree_count_blocks(cur, &blocks); |
294 | if (error) |
295 | goto err; |
296 | xfs_btree_del_cursor(cur, error); |
297 | agf->agf_refcount_blocks = cpu_to_be32(blocks); |
298 | } |
299 | |
300 | return 0; |
301 | err: |
302 | xfs_btree_del_cursor(cur, error); |
303 | return error; |
304 | } |
305 | |
306 | /* Commit the new AGF and reinitialize the incore state. */ |
307 | STATIC int |
308 | xrep_agf_commit_new( |
309 | struct xfs_scrub *sc, |
310 | struct xfs_buf *agf_bp) |
311 | { |
312 | struct xfs_perag *pag; |
313 | struct xfs_agf *agf = agf_bp->b_addr; |
314 | |
315 | /* Trigger fdblocks recalculation */ |
316 | xfs_force_summary_recalc(sc->mp); |
317 | |
318 | /* Write this to disk. */ |
319 | xfs_trans_buf_set_type(sc->tp, agf_bp, XFS_BLFT_AGF_BUF); |
320 | xfs_trans_log_buf(sc->tp, agf_bp, 0, BBTOB(agf_bp->b_length) - 1); |
321 | |
322 | /* Now reinitialize the in-core counters we changed. */ |
323 | pag = sc->sa.pag; |
324 | pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); |
325 | pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); |
326 | pag->pagf_longest = be32_to_cpu(agf->agf_longest); |
327 | pag->pagf_bno_level = be32_to_cpu(agf->agf_bno_level); |
328 | pag->pagf_cnt_level = be32_to_cpu(agf->agf_cnt_level); |
329 | pag->pagf_rmap_level = be32_to_cpu(agf->agf_rmap_level); |
330 | pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); |
331 | set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate); |
332 | |
333 | return xrep_roll_ag_trans(sc); |
334 | } |
335 | |
336 | /* Repair the AGF. v5 filesystems only. */ |
337 | int |
338 | xrep_agf( |
339 | struct xfs_scrub *sc) |
340 | { |
341 | struct xrep_find_ag_btree fab[XREP_AGF_MAX] = { |
342 | [XREP_AGF_BNOBT] = { |
343 | .rmap_owner = XFS_RMAP_OWN_AG, |
344 | .buf_ops = &xfs_bnobt_buf_ops, |
345 | .maxlevels = sc->mp->m_alloc_maxlevels, |
346 | }, |
347 | [XREP_AGF_CNTBT] = { |
348 | .rmap_owner = XFS_RMAP_OWN_AG, |
349 | .buf_ops = &xfs_cntbt_buf_ops, |
350 | .maxlevels = sc->mp->m_alloc_maxlevels, |
351 | }, |
352 | [XREP_AGF_RMAPBT] = { |
353 | .rmap_owner = XFS_RMAP_OWN_AG, |
354 | .buf_ops = &xfs_rmapbt_buf_ops, |
355 | .maxlevels = sc->mp->m_rmap_maxlevels, |
356 | }, |
357 | [XREP_AGF_REFCOUNTBT] = { |
358 | .rmap_owner = XFS_RMAP_OWN_REFC, |
359 | .buf_ops = &xfs_refcountbt_buf_ops, |
360 | .maxlevels = sc->mp->m_refc_maxlevels, |
361 | }, |
362 | [XREP_AGF_END] = { |
363 | .buf_ops = NULL, |
364 | }, |
365 | }; |
366 | struct xfs_agf old_agf; |
367 | struct xfs_mount *mp = sc->mp; |
368 | struct xfs_buf *agf_bp; |
369 | struct xfs_buf *agfl_bp; |
370 | struct xfs_agf *agf; |
371 | int error; |
372 | |
373 | /* We require the rmapbt to rebuild anything. */ |
374 | if (!xfs_has_rmapbt(mp)) |
375 | return -EOPNOTSUPP; |
376 | |
377 | /* |
378 | * Make sure we have the AGF buffer, as scrub might have decided it |
379 | * was corrupt after xfs_alloc_read_agf failed with -EFSCORRUPTED. |
380 | */ |
381 | error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, |
382 | XFS_AG_DADDR(mp, sc->sa.pag->pag_agno, |
383 | XFS_AGF_DADDR(mp)), |
384 | XFS_FSS_TO_BB(mp, 1), 0, &agf_bp, NULL); |
385 | if (error) |
386 | return error; |
387 | agf_bp->b_ops = &xfs_agf_buf_ops; |
388 | agf = agf_bp->b_addr; |
389 | |
390 | /* |
391 | * Load the AGFL so that we can screen out OWN_AG blocks that are on |
392 | * the AGFL now; these blocks might have once been part of the |
393 | * bno/cnt/rmap btrees but are not now. This is a chicken and egg |
394 | * problem: the AGF is corrupt, so we have to trust the AGFL contents |
395 | * because we can't do any serious cross-referencing with any of the |
396 | * btrees rooted in the AGF. If the AGFL contents are obviously bad |
397 | * then we'll bail out. |
398 | */ |
399 | error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp); |
400 | if (error) |
401 | return error; |
402 | |
403 | /* |
404 | * Spot-check the AGFL blocks; if they're obviously corrupt then |
405 | * there's nothing we can do but bail out. |
406 | */ |
407 | error = xfs_agfl_walk(sc->mp, agf_bp->b_addr, agfl_bp, |
408 | xrep_agf_check_agfl_block, sc); |
409 | if (error) |
410 | return error; |
411 | |
412 | /* |
413 | * Find the AGF btree roots. This is also a chicken-and-egg situation; |
414 | * see the function for more details. |
415 | */ |
416 | error = xrep_agf_find_btrees(sc, agf_bp, fab, agfl_bp); |
417 | if (error) |
418 | return error; |
419 | |
420 | /* Last chance to abort before we start committing fixes. */ |
421 | if (xchk_should_terminate(sc, &error)) |
422 | return error; |
423 | |
424 | /* Start rewriting the header and implant the btrees we found. */ |
425 | xrep_agf_init_header(sc, agf_bp, &old_agf); |
426 | xrep_agf_set_roots(sc, agf, fab); |
427 | error = xrep_agf_calc_from_btrees(sc, agf_bp); |
428 | if (error) |
429 | goto out_revert; |
430 | |
431 | /* Commit the changes and reinitialize incore state. */ |
432 | return xrep_agf_commit_new(sc, agf_bp); |
433 | |
434 | out_revert: |
435 | /* Mark the incore AGF state stale and revert the AGF. */ |
436 | clear_bit(XFS_AGSTATE_AGF_INIT, &sc->sa.pag->pag_opstate); |
437 | memcpy(agf, &old_agf, sizeof(old_agf)); |
438 | return error; |
439 | } |
440 | |
441 | /* AGFL */ |
442 | |
443 | struct xrep_agfl { |
444 | /* Bitmap of alleged AGFL blocks that we're not going to add. */ |
445 | struct xagb_bitmap crossed; |
446 | |
447 | /* Bitmap of other OWN_AG metadata blocks. */ |
448 | struct xagb_bitmap agmetablocks; |
449 | |
450 | /* Bitmap of free space. */ |
451 | struct xagb_bitmap *freesp; |
452 | |
453 | /* rmapbt cursor for finding crosslinked blocks */ |
454 | struct xfs_btree_cur *rmap_cur; |
455 | |
456 | struct xfs_scrub *sc; |
457 | }; |
458 | |
459 | /* Record all OWN_AG (free space btree) information from the rmap data. */ |
460 | STATIC int |
461 | xrep_agfl_walk_rmap( |
462 | struct xfs_btree_cur *cur, |
463 | const struct xfs_rmap_irec *rec, |
464 | void *priv) |
465 | { |
466 | struct xrep_agfl *ra = priv; |
467 | int error = 0; |
468 | |
469 | if (xchk_should_terminate(ra->sc, &error)) |
470 | return error; |
471 | |
472 | /* Record all the OWN_AG blocks. */ |
473 | if (rec->rm_owner == XFS_RMAP_OWN_AG) { |
474 | error = xagb_bitmap_set(ra->freesp, rec->rm_startblock, |
475 | rec->rm_blockcount); |
476 | if (error) |
477 | return error; |
478 | } |
479 | |
480 | return xagb_bitmap_set_btcur_path(&ra->agmetablocks, cur); |
481 | } |
482 | |
483 | /* Strike out the blocks that are cross-linked according to the rmapbt. */ |
484 | STATIC int |
485 | xrep_agfl_check_extent( |
486 | uint32_t agbno, |
487 | uint32_t len, |
488 | void *priv) |
489 | { |
490 | struct xrep_agfl *ra = priv; |
491 | xfs_agblock_t last_agbno = agbno + len - 1; |
492 | int error; |
493 | |
494 | while (agbno <= last_agbno) { |
495 | bool other_owners; |
496 | |
497 | error = xfs_rmap_has_other_keys(ra->rmap_cur, agbno, 1, |
498 | &XFS_RMAP_OINFO_AG, &other_owners); |
499 | if (error) |
500 | return error; |
501 | |
502 | if (other_owners) { |
503 | error = xagb_bitmap_set(&ra->crossed, agbno, 1); |
504 | if (error) |
505 | return error; |
506 | } |
507 | |
508 | if (xchk_should_terminate(ra->sc, &error)) |
509 | return error; |
510 | agbno++; |
511 | } |
512 | |
513 | return 0; |
514 | } |
515 | |
516 | /* |
517 | * Map out all the non-AGFL OWN_AG space in this AG so that we can deduce |
518 | * which blocks belong to the AGFL. |
519 | * |
520 | * Compute the set of old AGFL blocks by subtracting from the list of OWN_AG |
521 | * blocks the list of blocks owned by all other OWN_AG metadata (bnobt, cntbt, |
522 | * rmapbt). These are the old AGFL blocks, so return that list and the number |
523 | * of blocks we're actually going to put back on the AGFL. |
524 | */ |
525 | STATIC int |
526 | xrep_agfl_collect_blocks( |
527 | struct xfs_scrub *sc, |
528 | struct xfs_buf *agf_bp, |
529 | struct xagb_bitmap *agfl_extents, |
530 | xfs_agblock_t *flcount) |
531 | { |
532 | struct xrep_agfl ra; |
533 | struct xfs_mount *mp = sc->mp; |
534 | struct xfs_btree_cur *cur; |
535 | int error; |
536 | |
537 | ra.sc = sc; |
538 | ra.freesp = agfl_extents; |
539 | xagb_bitmap_init(&ra.agmetablocks); |
540 | xagb_bitmap_init(&ra.crossed); |
541 | |
542 | /* Find all space used by the free space btrees & rmapbt. */ |
543 | cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); |
544 | error = xfs_rmap_query_all(cur, xrep_agfl_walk_rmap, &ra); |
545 | xfs_btree_del_cursor(cur, error); |
546 | if (error) |
547 | goto out_bmp; |
548 | |
549 | /* Find all blocks currently being used by the bnobt. */ |
550 | cur = xfs_bnobt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); |
551 | error = xagb_bitmap_set_btblocks(&ra.agmetablocks, cur); |
552 | xfs_btree_del_cursor(cur, error); |
553 | if (error) |
554 | goto out_bmp; |
555 | |
556 | /* Find all blocks currently being used by the cntbt. */ |
557 | cur = xfs_cntbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); |
558 | error = xagb_bitmap_set_btblocks(&ra.agmetablocks, cur); |
559 | xfs_btree_del_cursor(cur, error); |
560 | if (error) |
561 | goto out_bmp; |
562 | |
563 | /* |
564 | * Drop the freesp meta blocks that are in use by btrees. |
565 | * The remaining blocks /should/ be AGFL blocks. |
566 | */ |
567 | error = xagb_bitmap_disunion(agfl_extents, &ra.agmetablocks); |
568 | if (error) |
569 | goto out_bmp; |
570 | |
571 | /* Strike out the blocks that are cross-linked. */ |
572 | ra.rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); |
573 | error = xagb_bitmap_walk(agfl_extents, xrep_agfl_check_extent, &ra); |
574 | xfs_btree_del_cursor(ra.rmap_cur, error); |
575 | if (error) |
576 | goto out_bmp; |
577 | error = xagb_bitmap_disunion(agfl_extents, &ra.crossed); |
578 | if (error) |
579 | goto out_bmp; |
580 | |
581 | /* |
582 | * Calculate the new AGFL size. If we found more blocks than fit in |
583 | * the AGFL we'll free them later. |
584 | */ |
585 | *flcount = min_t(uint64_t, xagb_bitmap_hweight(agfl_extents), |
586 | xfs_agfl_size(mp)); |
587 | |
588 | out_bmp: |
589 | xagb_bitmap_destroy(&ra.crossed); |
590 | xagb_bitmap_destroy(&ra.agmetablocks); |
591 | return error; |
592 | } |
593 | |
594 | /* Update the AGF and reset the in-core state. */ |
595 | STATIC void |
596 | xrep_agfl_update_agf( |
597 | struct xfs_scrub *sc, |
598 | struct xfs_buf *agf_bp, |
599 | xfs_agblock_t flcount) |
600 | { |
601 | struct xfs_agf *agf = agf_bp->b_addr; |
602 | |
603 | ASSERT(flcount <= xfs_agfl_size(sc->mp)); |
604 | |
605 | /* Trigger fdblocks recalculation */ |
606 | xfs_force_summary_recalc(sc->mp); |
607 | |
608 | /* Update the AGF counters. */ |
609 | if (xfs_perag_initialised_agf(sc->sa.pag)) { |
610 | sc->sa.pag->pagf_flcount = flcount; |
611 | clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, |
612 | &sc->sa.pag->pag_opstate); |
613 | } |
614 | agf->agf_flfirst = cpu_to_be32(0); |
615 | agf->agf_flcount = cpu_to_be32(flcount); |
616 | if (flcount) |
617 | agf->agf_fllast = cpu_to_be32(flcount - 1); |
618 | else |
619 | agf->agf_fllast = cpu_to_be32(xfs_agfl_size(sc->mp) - 1); |
620 | |
621 | xfs_alloc_log_agf(sc->tp, agf_bp, |
622 | XFS_AGF_FLFIRST | XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); |
623 | } |
624 | |
625 | struct xrep_agfl_fill { |
626 | struct xagb_bitmap used_extents; |
627 | struct xfs_scrub *sc; |
628 | __be32 *agfl_bno; |
629 | xfs_agblock_t flcount; |
630 | unsigned int fl_off; |
631 | }; |
632 | |
633 | /* Fill the AGFL with whatever blocks are in this extent. */ |
634 | static int |
635 | xrep_agfl_fill( |
636 | uint32_t start, |
637 | uint32_t len, |
638 | void *priv) |
639 | { |
640 | struct xrep_agfl_fill *af = priv; |
641 | struct xfs_scrub *sc = af->sc; |
642 | xfs_agblock_t agbno = start; |
643 | int error; |
644 | |
645 | trace_xrep_agfl_insert(sc->sa.pag, agbno, len); |
646 | |
647 | while (agbno < start + len && af->fl_off < af->flcount) |
648 | af->agfl_bno[af->fl_off++] = cpu_to_be32(agbno++); |
649 | |
650 | error = xagb_bitmap_set(&af->used_extents, start, agbno - 1); |
651 | if (error) |
652 | return error; |
653 | |
654 | if (af->fl_off == af->flcount) |
655 | return -ECANCELED; |
656 | |
657 | return 0; |
658 | } |
659 | |
660 | /* Write out a totally new AGFL. */ |
661 | STATIC int |
662 | ( |
663 | struct xfs_scrub *sc, |
664 | struct xfs_buf *agfl_bp, |
665 | struct xagb_bitmap *agfl_extents, |
666 | xfs_agblock_t flcount) |
667 | { |
668 | struct xrep_agfl_fill af = { |
669 | .sc = sc, |
670 | .flcount = flcount, |
671 | }; |
672 | struct xfs_mount *mp = sc->mp; |
673 | struct xfs_agfl *agfl; |
674 | int error; |
675 | |
676 | ASSERT(flcount <= xfs_agfl_size(mp)); |
677 | |
678 | /* |
679 | * Start rewriting the header by setting the bno[] array to |
680 | * NULLAGBLOCK, then setting AGFL header fields. |
681 | */ |
682 | agfl = XFS_BUF_TO_AGFL(agfl_bp); |
683 | memset(agfl, 0xFF, BBTOB(agfl_bp->b_length)); |
684 | agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC); |
685 | agfl->agfl_seqno = cpu_to_be32(sc->sa.pag->pag_agno); |
686 | uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid); |
687 | |
688 | /* |
689 | * Fill the AGFL with the remaining blocks. If agfl_extents has more |
690 | * blocks than fit in the AGFL, they will be freed in a subsequent |
691 | * step. |
692 | */ |
693 | xagb_bitmap_init(&af.used_extents); |
694 | af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp), |
695 | xagb_bitmap_walk(agfl_extents, xrep_agfl_fill, &af); |
696 | error = xagb_bitmap_disunion(agfl_extents, &af.used_extents); |
697 | if (error) |
698 | return error; |
699 | |
700 | /* Write new AGFL to disk. */ |
701 | xfs_trans_buf_set_type(sc->tp, agfl_bp, XFS_BLFT_AGFL_BUF); |
702 | xfs_trans_log_buf(sc->tp, agfl_bp, 0, BBTOB(agfl_bp->b_length) - 1); |
703 | xagb_bitmap_destroy(&af.used_extents); |
704 | return 0; |
705 | } |
706 | |
707 | /* Repair the AGFL. */ |
708 | int |
709 | xrep_agfl( |
710 | struct xfs_scrub *sc) |
711 | { |
712 | struct xagb_bitmap agfl_extents; |
713 | struct xfs_mount *mp = sc->mp; |
714 | struct xfs_buf *agf_bp; |
715 | struct xfs_buf *agfl_bp; |
716 | xfs_agblock_t flcount; |
717 | int error; |
718 | |
719 | /* We require the rmapbt to rebuild anything. */ |
720 | if (!xfs_has_rmapbt(mp)) |
721 | return -EOPNOTSUPP; |
722 | |
723 | xagb_bitmap_init(&agfl_extents); |
724 | |
725 | /* |
726 | * Read the AGF so that we can query the rmapbt. We hope that there's |
727 | * nothing wrong with the AGF, but all the AG header repair functions |
728 | * have this chicken-and-egg problem. |
729 | */ |
730 | error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &agf_bp); |
731 | if (error) |
732 | return error; |
733 | |
734 | /* |
735 | * Make sure we have the AGFL buffer, as scrub might have decided it |
736 | * was corrupt after xfs_alloc_read_agfl failed with -EFSCORRUPTED. |
737 | */ |
738 | error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, |
739 | XFS_AG_DADDR(mp, sc->sa.pag->pag_agno, |
740 | XFS_AGFL_DADDR(mp)), |
741 | XFS_FSS_TO_BB(mp, 1), 0, &agfl_bp, NULL); |
742 | if (error) |
743 | return error; |
744 | agfl_bp->b_ops = &xfs_agfl_buf_ops; |
745 | |
746 | /* Gather all the extents we're going to put on the new AGFL. */ |
747 | error = xrep_agfl_collect_blocks(sc, agf_bp, &agfl_extents, &flcount); |
748 | if (error) |
749 | goto err; |
750 | |
751 | /* Last chance to abort before we start committing fixes. */ |
752 | if (xchk_should_terminate(sc, &error)) |
753 | goto err; |
754 | |
755 | /* |
756 | * Update AGF and AGFL. We reset the global free block counter when |
757 | * we adjust the AGF flcount (which can fail) so avoid updating any |
758 | * buffers until we know that part works. |
759 | */ |
760 | xrep_agfl_update_agf(sc, agf_bp, flcount); |
761 | error = xrep_agfl_init_header(sc, agfl_bp, &agfl_extents, flcount); |
762 | if (error) |
763 | goto err; |
764 | |
765 | /* |
766 | * Ok, the AGFL should be ready to go now. Roll the transaction to |
767 | * make the new AGFL permanent before we start using it to return |
768 | * freespace overflow to the freespace btrees. |
769 | */ |
770 | sc->sa.agf_bp = agf_bp; |
771 | error = xrep_roll_ag_trans(sc); |
772 | if (error) |
773 | goto err; |
774 | |
775 | /* Dump any AGFL overflow. */ |
776 | error = xrep_reap_agblocks(sc, &agfl_extents, &XFS_RMAP_OINFO_AG, |
777 | XFS_AG_RESV_AGFL); |
778 | if (error) |
779 | goto err; |
780 | |
781 | err: |
782 | xagb_bitmap_destroy(&agfl_extents); |
783 | return error; |
784 | } |
785 | |
786 | /* AGI */ |
787 | |
788 | /* |
789 | * Offset within the xrep_find_ag_btree array for each btree type. Avoid the |
790 | * XFS_BTNUM_ names here to avoid creating a sparse array. |
791 | */ |
792 | enum { |
793 | XREP_AGI_INOBT = 0, |
794 | XREP_AGI_FINOBT, |
795 | XREP_AGI_END, |
796 | XREP_AGI_MAX |
797 | }; |
798 | |
799 | /* |
800 | * Given the inode btree roots described by *fab, find the roots, check them |
801 | * for sanity, and pass the root data back out via *fab. |
802 | */ |
803 | STATIC int |
804 | xrep_agi_find_btrees( |
805 | struct xfs_scrub *sc, |
806 | struct xrep_find_ag_btree *fab) |
807 | { |
808 | struct xfs_buf *agf_bp; |
809 | struct xfs_mount *mp = sc->mp; |
810 | int error; |
811 | |
812 | /* Read the AGF. */ |
813 | error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &agf_bp); |
814 | if (error) |
815 | return error; |
816 | |
817 | /* Find the btree roots. */ |
818 | error = xrep_find_ag_btree_roots(sc, agf_bp, fab, NULL); |
819 | if (error) |
820 | return error; |
821 | |
822 | /* We must find the inobt root. */ |
823 | if (!xrep_check_btree_root(sc, &fab[XREP_AGI_INOBT])) |
824 | return -EFSCORRUPTED; |
825 | |
826 | /* We must find the finobt root if that feature is enabled. */ |
827 | if (xfs_has_finobt(mp) && |
828 | !xrep_check_btree_root(sc, &fab[XREP_AGI_FINOBT])) |
829 | return -EFSCORRUPTED; |
830 | |
831 | return 0; |
832 | } |
833 | |
834 | /* |
835 | * Reinitialize the AGI header, making an in-core copy of the old contents so |
836 | * that we know which in-core state needs to be reinitialized. |
837 | */ |
838 | STATIC void |
839 | ( |
840 | struct xfs_scrub *sc, |
841 | struct xfs_buf *agi_bp, |
842 | struct xfs_agi *old_agi) |
843 | { |
844 | struct xfs_agi *agi = agi_bp->b_addr; |
845 | struct xfs_perag *pag = sc->sa.pag; |
846 | struct xfs_mount *mp = sc->mp; |
847 | |
848 | memcpy(old_agi, agi, sizeof(*old_agi)); |
849 | memset(agi, 0, BBTOB(agi_bp->b_length)); |
850 | agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); |
851 | agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION); |
852 | agi->agi_seqno = cpu_to_be32(pag->pag_agno); |
853 | agi->agi_length = cpu_to_be32(pag->block_count); |
854 | agi->agi_newino = cpu_to_be32(NULLAGINO); |
855 | agi->agi_dirino = cpu_to_be32(NULLAGINO); |
856 | if (xfs_has_crc(mp)) |
857 | uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid); |
858 | |
859 | /* We don't know how to fix the unlinked list yet. */ |
860 | memcpy(&agi->agi_unlinked, &old_agi->agi_unlinked, |
861 | sizeof(agi->agi_unlinked)); |
862 | |
863 | /* Mark the incore AGF data stale until we're done fixing things. */ |
864 | ASSERT(xfs_perag_initialised_agi(pag)); |
865 | clear_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate); |
866 | } |
867 | |
868 | /* Set btree root information in an AGI. */ |
869 | STATIC void |
870 | xrep_agi_set_roots( |
871 | struct xfs_scrub *sc, |
872 | struct xfs_agi *agi, |
873 | struct xrep_find_ag_btree *fab) |
874 | { |
875 | agi->agi_root = cpu_to_be32(fab[XREP_AGI_INOBT].root); |
876 | agi->agi_level = cpu_to_be32(fab[XREP_AGI_INOBT].height); |
877 | |
878 | if (xfs_has_finobt(sc->mp)) { |
879 | agi->agi_free_root = cpu_to_be32(fab[XREP_AGI_FINOBT].root); |
880 | agi->agi_free_level = cpu_to_be32(fab[XREP_AGI_FINOBT].height); |
881 | } |
882 | } |
883 | |
884 | /* Update the AGI counters. */ |
885 | STATIC int |
886 | xrep_agi_calc_from_btrees( |
887 | struct xfs_scrub *sc, |
888 | struct xfs_buf *agi_bp) |
889 | { |
890 | struct xfs_btree_cur *cur; |
891 | struct xfs_agi *agi = agi_bp->b_addr; |
892 | struct xfs_mount *mp = sc->mp; |
893 | xfs_agino_t count; |
894 | xfs_agino_t freecount; |
895 | int error; |
896 | |
897 | cur = xfs_inobt_init_cursor(sc->sa.pag, sc->tp, agi_bp); |
898 | error = xfs_ialloc_count_inodes(cur, &count, &freecount); |
899 | if (error) |
900 | goto err; |
901 | if (xfs_has_inobtcounts(mp)) { |
902 | xfs_agblock_t blocks; |
903 | |
904 | error = xfs_btree_count_blocks(cur, &blocks); |
905 | if (error) |
906 | goto err; |
907 | agi->agi_iblocks = cpu_to_be32(blocks); |
908 | } |
909 | xfs_btree_del_cursor(cur, error); |
910 | |
911 | agi->agi_count = cpu_to_be32(count); |
912 | agi->agi_freecount = cpu_to_be32(freecount); |
913 | |
914 | if (xfs_has_finobt(mp) && xfs_has_inobtcounts(mp)) { |
915 | xfs_agblock_t blocks; |
916 | |
917 | cur = xfs_finobt_init_cursor(sc->sa.pag, sc->tp, agi_bp); |
918 | error = xfs_btree_count_blocks(cur, &blocks); |
919 | if (error) |
920 | goto err; |
921 | xfs_btree_del_cursor(cur, error); |
922 | agi->agi_fblocks = cpu_to_be32(blocks); |
923 | } |
924 | |
925 | return 0; |
926 | err: |
927 | xfs_btree_del_cursor(cur, error); |
928 | return error; |
929 | } |
930 | |
931 | /* Trigger reinitialization of the in-core data. */ |
932 | STATIC int |
933 | xrep_agi_commit_new( |
934 | struct xfs_scrub *sc, |
935 | struct xfs_buf *agi_bp) |
936 | { |
937 | struct xfs_perag *pag; |
938 | struct xfs_agi *agi = agi_bp->b_addr; |
939 | |
940 | /* Trigger inode count recalculation */ |
941 | xfs_force_summary_recalc(sc->mp); |
942 | |
943 | /* Write this to disk. */ |
944 | xfs_trans_buf_set_type(sc->tp, agi_bp, XFS_BLFT_AGI_BUF); |
945 | xfs_trans_log_buf(sc->tp, agi_bp, 0, BBTOB(agi_bp->b_length) - 1); |
946 | |
947 | /* Now reinitialize the in-core counters if necessary. */ |
948 | pag = sc->sa.pag; |
949 | pag->pagi_count = be32_to_cpu(agi->agi_count); |
950 | pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); |
951 | set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate); |
952 | |
953 | return xrep_roll_ag_trans(sc); |
954 | } |
955 | |
956 | /* Repair the AGI. */ |
957 | int |
958 | xrep_agi( |
959 | struct xfs_scrub *sc) |
960 | { |
961 | struct xrep_find_ag_btree fab[XREP_AGI_MAX] = { |
962 | [XREP_AGI_INOBT] = { |
963 | .rmap_owner = XFS_RMAP_OWN_INOBT, |
964 | .buf_ops = &xfs_inobt_buf_ops, |
965 | .maxlevels = M_IGEO(sc->mp)->inobt_maxlevels, |
966 | }, |
967 | [XREP_AGI_FINOBT] = { |
968 | .rmap_owner = XFS_RMAP_OWN_INOBT, |
969 | .buf_ops = &xfs_finobt_buf_ops, |
970 | .maxlevels = M_IGEO(sc->mp)->inobt_maxlevels, |
971 | }, |
972 | [XREP_AGI_END] = { |
973 | .buf_ops = NULL |
974 | }, |
975 | }; |
976 | struct xfs_agi old_agi; |
977 | struct xfs_mount *mp = sc->mp; |
978 | struct xfs_buf *agi_bp; |
979 | struct xfs_agi *agi; |
980 | int error; |
981 | |
982 | /* We require the rmapbt to rebuild anything. */ |
983 | if (!xfs_has_rmapbt(mp)) |
984 | return -EOPNOTSUPP; |
985 | |
986 | /* |
987 | * Make sure we have the AGI buffer, as scrub might have decided it |
988 | * was corrupt after xfs_ialloc_read_agi failed with -EFSCORRUPTED. |
989 | */ |
990 | error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, |
991 | XFS_AG_DADDR(mp, sc->sa.pag->pag_agno, |
992 | XFS_AGI_DADDR(mp)), |
993 | XFS_FSS_TO_BB(mp, 1), 0, &agi_bp, NULL); |
994 | if (error) |
995 | return error; |
996 | agi_bp->b_ops = &xfs_agi_buf_ops; |
997 | agi = agi_bp->b_addr; |
998 | |
999 | /* Find the AGI btree roots. */ |
1000 | error = xrep_agi_find_btrees(sc, fab); |
1001 | if (error) |
1002 | return error; |
1003 | |
1004 | /* Last chance to abort before we start committing fixes. */ |
1005 | if (xchk_should_terminate(sc, &error)) |
1006 | return error; |
1007 | |
1008 | /* Start rewriting the header and implant the btrees we found. */ |
1009 | xrep_agi_init_header(sc, agi_bp, &old_agi); |
1010 | xrep_agi_set_roots(sc, agi, fab); |
1011 | error = xrep_agi_calc_from_btrees(sc, agi_bp); |
1012 | if (error) |
1013 | goto out_revert; |
1014 | |
1015 | /* Reinitialize in-core state. */ |
1016 | return xrep_agi_commit_new(sc, agi_bp); |
1017 | |
1018 | out_revert: |
1019 | /* Mark the incore AGI state stale and revert the AGI. */ |
1020 | clear_bit(XFS_AGSTATE_AGI_INIT, &sc->sa.pag->pag_opstate); |
1021 | memcpy(agi, &old_agi, sizeof(old_agi)); |
1022 | return error; |
1023 | } |
1024 | |