1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright (C) 2017-2023 Oracle. All Rights Reserved. |
4 | * Author: Darrick J. Wong <djwong@kernel.org> |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_trans_resv.h" |
11 | #include "xfs_mount.h" |
12 | #include "xfs_btree.h" |
13 | #include "xfs_log_format.h" |
14 | #include "xfs_trans.h" |
15 | #include "xfs_inode.h" |
16 | #include "xfs_ialloc.h" |
17 | #include "xfs_ialloc_btree.h" |
18 | #include "xfs_icache.h" |
19 | #include "xfs_rmap.h" |
20 | #include "scrub/scrub.h" |
21 | #include "scrub/common.h" |
22 | #include "scrub/btree.h" |
23 | #include "scrub/trace.h" |
24 | #include "xfs_ag.h" |
25 | |
26 | /* |
27 | * Set us up to scrub inode btrees. |
28 | * If we detect a discrepancy between the inobt and the inode, |
29 | * try again after forcing logged inode cores out to disk. |
30 | */ |
31 | int |
32 | xchk_setup_ag_iallocbt( |
33 | struct xfs_scrub *sc) |
34 | { |
35 | if (xchk_need_intent_drain(sc)) |
36 | xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); |
37 | return xchk_setup_ag_btree(sc, sc->flags & XCHK_TRY_HARDER); |
38 | } |
39 | |
40 | /* Inode btree scrubber. */ |
41 | |
42 | struct xchk_iallocbt { |
43 | /* Number of inodes we see while scanning inobt. */ |
44 | unsigned long long inodes; |
45 | |
46 | /* Expected next startino, for big block filesystems. */ |
47 | xfs_agino_t next_startino; |
48 | |
49 | /* Expected end of the current inode cluster. */ |
50 | xfs_agino_t next_cluster_ino; |
51 | }; |
52 | |
53 | /* |
54 | * Does the finobt have a record for this inode with the same hole/free state? |
55 | * This is a bit complicated because of the following: |
56 | * |
57 | * - The finobt need not have a record if all inodes in the inobt record are |
58 | * allocated. |
59 | * - The finobt need not have a record if all inodes in the inobt record are |
60 | * free. |
61 | * - The finobt need not have a record if the inobt record says this is a hole. |
62 | * This likely doesn't happen in practice. |
63 | */ |
64 | STATIC int |
65 | xchk_inobt_xref_finobt( |
66 | struct xfs_scrub *sc, |
67 | struct xfs_inobt_rec_incore *irec, |
68 | xfs_agino_t agino, |
69 | bool free, |
70 | bool hole) |
71 | { |
72 | struct xfs_inobt_rec_incore frec; |
73 | struct xfs_btree_cur *cur = sc->sa.fino_cur; |
74 | bool ffree, fhole; |
75 | unsigned int frec_idx, fhole_idx; |
76 | int has_record; |
77 | int error; |
78 | |
79 | ASSERT(xfs_btree_is_fino(cur->bc_ops)); |
80 | |
81 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_record); |
82 | if (error) |
83 | return error; |
84 | if (!has_record) |
85 | goto no_record; |
86 | |
87 | error = xfs_inobt_get_rec(cur, &frec, &has_record); |
88 | if (!has_record) |
89 | return -EFSCORRUPTED; |
90 | |
91 | if (frec.ir_startino + XFS_INODES_PER_CHUNK <= agino) |
92 | goto no_record; |
93 | |
94 | /* There's a finobt record; free and hole status must match. */ |
95 | frec_idx = agino - frec.ir_startino; |
96 | ffree = frec.ir_free & (1ULL << frec_idx); |
97 | fhole_idx = frec_idx / XFS_INODES_PER_HOLEMASK_BIT; |
98 | fhole = frec.ir_holemask & (1U << fhole_idx); |
99 | |
100 | if (ffree != free) |
101 | xchk_btree_xref_set_corrupt(sc, cur, 0); |
102 | if (fhole != hole) |
103 | xchk_btree_xref_set_corrupt(sc, cur, 0); |
104 | return 0; |
105 | |
106 | no_record: |
107 | /* inobt record is fully allocated */ |
108 | if (irec->ir_free == 0) |
109 | return 0; |
110 | |
111 | /* inobt record is totally unallocated */ |
112 | if (irec->ir_free == XFS_INOBT_ALL_FREE) |
113 | return 0; |
114 | |
115 | /* inobt record says this is a hole */ |
116 | if (hole) |
117 | return 0; |
118 | |
119 | /* finobt doesn't care about allocated inodes */ |
120 | if (!free) |
121 | return 0; |
122 | |
123 | xchk_btree_xref_set_corrupt(sc, cur, 0); |
124 | return 0; |
125 | } |
126 | |
127 | /* |
128 | * Make sure that each inode of this part of an inobt record has the same |
129 | * sparse and free status as the finobt. |
130 | */ |
131 | STATIC void |
132 | xchk_inobt_chunk_xref_finobt( |
133 | struct xfs_scrub *sc, |
134 | struct xfs_inobt_rec_incore *irec, |
135 | xfs_agino_t agino, |
136 | unsigned int nr_inodes) |
137 | { |
138 | xfs_agino_t i; |
139 | unsigned int rec_idx; |
140 | int error; |
141 | |
142 | ASSERT(sc->sm->sm_type == XFS_SCRUB_TYPE_INOBT); |
143 | |
144 | if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm)) |
145 | return; |
146 | |
147 | for (i = agino, rec_idx = agino - irec->ir_startino; |
148 | i < agino + nr_inodes; |
149 | i++, rec_idx++) { |
150 | bool free, hole; |
151 | unsigned int hole_idx; |
152 | |
153 | free = irec->ir_free & (1ULL << rec_idx); |
154 | hole_idx = rec_idx / XFS_INODES_PER_HOLEMASK_BIT; |
155 | hole = irec->ir_holemask & (1U << hole_idx); |
156 | |
157 | error = xchk_inobt_xref_finobt(sc, irec, i, free, hole); |
158 | if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur)) |
159 | return; |
160 | } |
161 | } |
162 | |
163 | /* |
164 | * Does the inobt have a record for this inode with the same hole/free state? |
165 | * The inobt must always have a record if there's a finobt record. |
166 | */ |
167 | STATIC int |
168 | xchk_finobt_xref_inobt( |
169 | struct xfs_scrub *sc, |
170 | struct xfs_inobt_rec_incore *frec, |
171 | xfs_agino_t agino, |
172 | bool ffree, |
173 | bool fhole) |
174 | { |
175 | struct xfs_inobt_rec_incore irec; |
176 | struct xfs_btree_cur *cur = sc->sa.ino_cur; |
177 | bool free, hole; |
178 | unsigned int rec_idx, hole_idx; |
179 | int has_record; |
180 | int error; |
181 | |
182 | ASSERT(xfs_btree_is_ino(cur->bc_ops)); |
183 | |
184 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_record); |
185 | if (error) |
186 | return error; |
187 | if (!has_record) |
188 | goto no_record; |
189 | |
190 | error = xfs_inobt_get_rec(cur, &irec, &has_record); |
191 | if (!has_record) |
192 | return -EFSCORRUPTED; |
193 | |
194 | if (irec.ir_startino + XFS_INODES_PER_CHUNK <= agino) |
195 | goto no_record; |
196 | |
197 | /* There's an inobt record; free and hole status must match. */ |
198 | rec_idx = agino - irec.ir_startino; |
199 | free = irec.ir_free & (1ULL << rec_idx); |
200 | hole_idx = rec_idx / XFS_INODES_PER_HOLEMASK_BIT; |
201 | hole = irec.ir_holemask & (1U << hole_idx); |
202 | |
203 | if (ffree != free) |
204 | xchk_btree_xref_set_corrupt(sc, cur, 0); |
205 | if (fhole != hole) |
206 | xchk_btree_xref_set_corrupt(sc, cur, 0); |
207 | return 0; |
208 | |
209 | no_record: |
210 | /* finobt should never have a record for which the inobt does not */ |
211 | xchk_btree_xref_set_corrupt(sc, cur, 0); |
212 | return 0; |
213 | } |
214 | |
215 | /* |
216 | * Make sure that each inode of this part of an finobt record has the same |
217 | * sparse and free status as the inobt. |
218 | */ |
219 | STATIC void |
220 | xchk_finobt_chunk_xref_inobt( |
221 | struct xfs_scrub *sc, |
222 | struct xfs_inobt_rec_incore *frec, |
223 | xfs_agino_t agino, |
224 | unsigned int nr_inodes) |
225 | { |
226 | xfs_agino_t i; |
227 | unsigned int rec_idx; |
228 | int error; |
229 | |
230 | ASSERT(sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT); |
231 | |
232 | if (!sc->sa.ino_cur || xchk_skip_xref(sc->sm)) |
233 | return; |
234 | |
235 | for (i = agino, rec_idx = agino - frec->ir_startino; |
236 | i < agino + nr_inodes; |
237 | i++, rec_idx++) { |
238 | bool ffree, fhole; |
239 | unsigned int hole_idx; |
240 | |
241 | ffree = frec->ir_free & (1ULL << rec_idx); |
242 | hole_idx = rec_idx / XFS_INODES_PER_HOLEMASK_BIT; |
243 | fhole = frec->ir_holemask & (1U << hole_idx); |
244 | |
245 | error = xchk_finobt_xref_inobt(sc, frec, i, ffree, fhole); |
246 | if (!xchk_should_check_xref(sc, &error, &sc->sa.ino_cur)) |
247 | return; |
248 | } |
249 | } |
250 | |
251 | /* Is this chunk worth checking and cross-referencing? */ |
252 | STATIC bool |
253 | xchk_iallocbt_chunk( |
254 | struct xchk_btree *bs, |
255 | struct xfs_inobt_rec_incore *irec, |
256 | xfs_agino_t agino, |
257 | unsigned int nr_inodes) |
258 | { |
259 | struct xfs_scrub *sc = bs->sc; |
260 | struct xfs_mount *mp = bs->cur->bc_mp; |
261 | struct xfs_perag *pag = bs->cur->bc_ag.pag; |
262 | xfs_agblock_t agbno; |
263 | xfs_extlen_t len; |
264 | |
265 | agbno = XFS_AGINO_TO_AGBNO(mp, agino); |
266 | len = XFS_B_TO_FSB(mp, nr_inodes * mp->m_sb.sb_inodesize); |
267 | |
268 | if (!xfs_verify_agbext(pag, agbno, len)) |
269 | xchk_btree_set_corrupt(bs->sc, bs->cur, 0); |
270 | |
271 | if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
272 | return false; |
273 | |
274 | xchk_xref_is_used_space(sc, agbno, len); |
275 | if (sc->sm->sm_type == XFS_SCRUB_TYPE_INOBT) |
276 | xchk_inobt_chunk_xref_finobt(sc, irec, agino, nr_inodes); |
277 | else |
278 | xchk_finobt_chunk_xref_inobt(sc, irec, agino, nr_inodes); |
279 | xchk_xref_is_only_owned_by(sc, agbno, len, &XFS_RMAP_OINFO_INODES); |
280 | xchk_xref_is_not_shared(sc, agbno, len); |
281 | xchk_xref_is_not_cow_staging(sc, agbno, len); |
282 | return true; |
283 | } |
284 | |
285 | /* |
286 | * Check that an inode's allocation status matches ir_free in the inobt |
287 | * record. First we try querying the in-core inode state, and if the inode |
288 | * isn't loaded we examine the on-disk inode directly. |
289 | * |
290 | * Since there can be 1:M and M:1 mappings between inobt records and inode |
291 | * clusters, we pass in the inode location information as an inobt record; |
292 | * the index of an inode cluster within the inobt record (as well as the |
293 | * cluster buffer itself); and the index of the inode within the cluster. |
294 | * |
295 | * @irec is the inobt record. |
296 | * @irec_ino is the inode offset from the start of the record. |
297 | * @dip is the on-disk inode. |
298 | */ |
299 | STATIC int |
300 | xchk_iallocbt_check_cluster_ifree( |
301 | struct xchk_btree *bs, |
302 | struct xfs_inobt_rec_incore *irec, |
303 | unsigned int irec_ino, |
304 | struct xfs_dinode *dip) |
305 | { |
306 | struct xfs_mount *mp = bs->cur->bc_mp; |
307 | xfs_ino_t fsino; |
308 | xfs_agino_t agino; |
309 | bool irec_free; |
310 | bool ino_inuse; |
311 | bool freemask_ok; |
312 | int error = 0; |
313 | |
314 | if (xchk_should_terminate(bs->sc, &error)) |
315 | return error; |
316 | |
317 | /* |
318 | * Given an inobt record and the offset of an inode from the start of |
319 | * the record, compute which fs inode we're talking about. |
320 | */ |
321 | agino = irec->ir_startino + irec_ino; |
322 | fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_ag.pag->pag_agno, agino); |
323 | irec_free = (irec->ir_free & XFS_INOBT_MASK(irec_ino)); |
324 | |
325 | if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC || |
326 | (dip->di_version >= 3 && be64_to_cpu(dip->di_ino) != fsino)) { |
327 | xchk_btree_set_corrupt(bs->sc, bs->cur, 0); |
328 | goto out; |
329 | } |
330 | |
331 | error = xchk_inode_is_allocated(bs->sc, agino, &ino_inuse); |
332 | if (error == -ENODATA) { |
333 | /* Not cached, just read the disk buffer */ |
334 | freemask_ok = irec_free ^ !!(dip->di_mode); |
335 | if (!(bs->sc->flags & XCHK_TRY_HARDER) && !freemask_ok) |
336 | return -EDEADLOCK; |
337 | } else if (error < 0) { |
338 | /* |
339 | * Inode is only half assembled, or there was an IO error, |
340 | * or the verifier failed, so don't bother trying to check. |
341 | * The inode scrubber can deal with this. |
342 | */ |
343 | goto out; |
344 | } else { |
345 | /* Inode is all there. */ |
346 | freemask_ok = irec_free ^ ino_inuse; |
347 | } |
348 | if (!freemask_ok) |
349 | xchk_btree_set_corrupt(bs->sc, bs->cur, 0); |
350 | out: |
351 | return 0; |
352 | } |
353 | |
354 | /* |
355 | * Check that the holemask and freemask of a hypothetical inode cluster match |
356 | * what's actually on disk. If sparse inodes are enabled, the cluster does |
357 | * not actually have to map to inodes if the corresponding holemask bit is set. |
358 | * |
359 | * @cluster_base is the first inode in the cluster within the @irec. |
360 | */ |
361 | STATIC int |
362 | xchk_iallocbt_check_cluster( |
363 | struct xchk_btree *bs, |
364 | struct xfs_inobt_rec_incore *irec, |
365 | unsigned int cluster_base) |
366 | { |
367 | struct xfs_imap imap; |
368 | struct xfs_mount *mp = bs->cur->bc_mp; |
369 | struct xfs_buf *cluster_bp; |
370 | unsigned int nr_inodes; |
371 | xfs_agnumber_t agno = bs->cur->bc_ag.pag->pag_agno; |
372 | xfs_agblock_t agbno; |
373 | unsigned int cluster_index; |
374 | uint16_t cluster_mask = 0; |
375 | uint16_t ir_holemask; |
376 | int error = 0; |
377 | |
378 | nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK, |
379 | M_IGEO(mp)->inodes_per_cluster); |
380 | |
381 | /* Map this inode cluster */ |
382 | agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base); |
383 | |
384 | /* Compute a bitmask for this cluster that can be used for holemask. */ |
385 | for (cluster_index = 0; |
386 | cluster_index < nr_inodes; |
387 | cluster_index += XFS_INODES_PER_HOLEMASK_BIT) |
388 | cluster_mask |= XFS_INOBT_MASK((cluster_base + cluster_index) / |
389 | XFS_INODES_PER_HOLEMASK_BIT); |
390 | |
391 | /* |
392 | * Map the first inode of this cluster to a buffer and offset. |
393 | * Be careful about inobt records that don't align with the start of |
394 | * the inode buffer when block sizes are large enough to hold multiple |
395 | * inode chunks. When this happens, cluster_base will be zero but |
396 | * ir_startino can be large enough to make im_boffset nonzero. |
397 | */ |
398 | ir_holemask = (irec->ir_holemask & cluster_mask); |
399 | imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); |
400 | imap.im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster); |
401 | imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino) << |
402 | mp->m_sb.sb_inodelog; |
403 | |
404 | if (imap.im_boffset != 0 && cluster_base != 0) { |
405 | ASSERT(imap.im_boffset == 0 || cluster_base == 0); |
406 | xchk_btree_set_corrupt(bs->sc, bs->cur, 0); |
407 | return 0; |
408 | } |
409 | |
410 | trace_xchk_iallocbt_check_cluster(mp, agno, irec->ir_startino, |
411 | imap.im_blkno, imap.im_len, cluster_base, nr_inodes, |
412 | cluster_mask, ir_holemask, |
413 | XFS_INO_TO_OFFSET(mp, irec->ir_startino + |
414 | cluster_base)); |
415 | |
416 | /* The whole cluster must be a hole or not a hole. */ |
417 | if (ir_holemask != cluster_mask && ir_holemask != 0) { |
418 | xchk_btree_set_corrupt(bs->sc, bs->cur, 0); |
419 | return 0; |
420 | } |
421 | |
422 | /* If any part of this is a hole, skip it. */ |
423 | if (ir_holemask) { |
424 | xchk_xref_is_not_owned_by(bs->sc, agbno, |
425 | M_IGEO(mp)->blocks_per_cluster, |
426 | &XFS_RMAP_OINFO_INODES); |
427 | return 0; |
428 | } |
429 | |
430 | xchk_xref_is_only_owned_by(bs->sc, agbno, M_IGEO(mp)->blocks_per_cluster, |
431 | &XFS_RMAP_OINFO_INODES); |
432 | |
433 | /* Grab the inode cluster buffer. */ |
434 | error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, &cluster_bp); |
435 | if (!xchk_btree_xref_process_error(bs->sc, bs->cur, 0, &error)) |
436 | return error; |
437 | |
438 | /* Check free status of each inode within this cluster. */ |
439 | for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) { |
440 | struct xfs_dinode *dip; |
441 | |
442 | if (imap.im_boffset >= BBTOB(cluster_bp->b_length)) { |
443 | xchk_btree_set_corrupt(bs->sc, bs->cur, 0); |
444 | break; |
445 | } |
446 | |
447 | dip = xfs_buf_offset(cluster_bp, imap.im_boffset); |
448 | error = xchk_iallocbt_check_cluster_ifree(bs, irec, |
449 | cluster_base + cluster_index, dip); |
450 | if (error) |
451 | break; |
452 | imap.im_boffset += mp->m_sb.sb_inodesize; |
453 | } |
454 | |
455 | xfs_trans_brelse(bs->cur->bc_tp, cluster_bp); |
456 | return error; |
457 | } |
458 | |
459 | /* |
460 | * For all the inode clusters that could map to this inobt record, make sure |
461 | * that the holemask makes sense and that the allocation status of each inode |
462 | * matches the freemask. |
463 | */ |
464 | STATIC int |
465 | xchk_iallocbt_check_clusters( |
466 | struct xchk_btree *bs, |
467 | struct xfs_inobt_rec_incore *irec) |
468 | { |
469 | unsigned int cluster_base; |
470 | int error = 0; |
471 | |
472 | /* |
473 | * For the common case where this inobt record maps to multiple inode |
474 | * clusters this will call _check_cluster for each cluster. |
475 | * |
476 | * For the case that multiple inobt records map to a single cluster, |
477 | * this will call _check_cluster once. |
478 | */ |
479 | for (cluster_base = 0; |
480 | cluster_base < XFS_INODES_PER_CHUNK; |
481 | cluster_base += M_IGEO(bs->sc->mp)->inodes_per_cluster) { |
482 | error = xchk_iallocbt_check_cluster(bs, irec, cluster_base); |
483 | if (error) |
484 | break; |
485 | } |
486 | |
487 | return error; |
488 | } |
489 | |
490 | /* |
491 | * Make sure this inode btree record is aligned properly. Because a fs block |
492 | * contains multiple inodes, we check that the inobt record is aligned to the |
493 | * correct inode, not just the correct block on disk. This results in a finer |
494 | * grained corruption check. |
495 | */ |
496 | STATIC void |
497 | xchk_iallocbt_rec_alignment( |
498 | struct xchk_btree *bs, |
499 | struct xfs_inobt_rec_incore *irec) |
500 | { |
501 | struct xfs_mount *mp = bs->sc->mp; |
502 | struct xchk_iallocbt *iabt = bs->private; |
503 | struct xfs_ino_geometry *igeo = M_IGEO(mp); |
504 | |
505 | /* |
506 | * finobt records have different positioning requirements than inobt |
507 | * records: each finobt record must have a corresponding inobt record. |
508 | * That is checked in the xref function, so for now we only catch the |
509 | * obvious case where the record isn't at all aligned properly. |
510 | * |
511 | * Note that if a fs block contains more than a single chunk of inodes, |
512 | * we will have finobt records only for those chunks containing free |
513 | * inodes, and therefore expect chunk alignment of finobt records. |
514 | * Otherwise, we expect that the finobt record is aligned to the |
515 | * cluster alignment as told by the superblock. |
516 | */ |
517 | if (xfs_btree_is_fino(bs->cur->bc_ops)) { |
518 | unsigned int imask; |
519 | |
520 | imask = min_t(unsigned int, XFS_INODES_PER_CHUNK, |
521 | igeo->cluster_align_inodes) - 1; |
522 | if (irec->ir_startino & imask) |
523 | xchk_btree_set_corrupt(bs->sc, bs->cur, 0); |
524 | return; |
525 | } |
526 | |
527 | if (iabt->next_startino != NULLAGINO) { |
528 | /* |
529 | * We're midway through a cluster of inodes that is mapped by |
530 | * multiple inobt records. Did we get the record for the next |
531 | * irec in the sequence? |
532 | */ |
533 | if (irec->ir_startino != iabt->next_startino) { |
534 | xchk_btree_set_corrupt(bs->sc, bs->cur, 0); |
535 | return; |
536 | } |
537 | |
538 | iabt->next_startino += XFS_INODES_PER_CHUNK; |
539 | |
540 | /* Are we done with the cluster? */ |
541 | if (iabt->next_startino >= iabt->next_cluster_ino) { |
542 | iabt->next_startino = NULLAGINO; |
543 | iabt->next_cluster_ino = NULLAGINO; |
544 | } |
545 | return; |
546 | } |
547 | |
548 | /* inobt records must be aligned to cluster and inoalignmnt size. */ |
549 | if (irec->ir_startino & (igeo->cluster_align_inodes - 1)) { |
550 | xchk_btree_set_corrupt(bs->sc, bs->cur, 0); |
551 | return; |
552 | } |
553 | |
554 | if (irec->ir_startino & (igeo->inodes_per_cluster - 1)) { |
555 | xchk_btree_set_corrupt(bs->sc, bs->cur, 0); |
556 | return; |
557 | } |
558 | |
559 | if (igeo->inodes_per_cluster <= XFS_INODES_PER_CHUNK) |
560 | return; |
561 | |
562 | /* |
563 | * If this is the start of an inode cluster that can be mapped by |
564 | * multiple inobt records, the next inobt record must follow exactly |
565 | * after this one. |
566 | */ |
567 | iabt->next_startino = irec->ir_startino + XFS_INODES_PER_CHUNK; |
568 | iabt->next_cluster_ino = irec->ir_startino + igeo->inodes_per_cluster; |
569 | } |
570 | |
571 | /* Scrub an inobt/finobt record. */ |
572 | STATIC int |
573 | xchk_iallocbt_rec( |
574 | struct xchk_btree *bs, |
575 | const union xfs_btree_rec *rec) |
576 | { |
577 | struct xfs_mount *mp = bs->cur->bc_mp; |
578 | struct xchk_iallocbt *iabt = bs->private; |
579 | struct xfs_inobt_rec_incore irec; |
580 | uint64_t holes; |
581 | xfs_agino_t agino; |
582 | int holecount; |
583 | int i; |
584 | int error = 0; |
585 | uint16_t holemask; |
586 | |
587 | xfs_inobt_btrec_to_irec(mp, rec, &irec); |
588 | if (xfs_inobt_check_irec(bs->cur->bc_ag.pag, &irec) != NULL) { |
589 | xchk_btree_set_corrupt(bs->sc, bs->cur, 0); |
590 | return 0; |
591 | } |
592 | |
593 | agino = irec.ir_startino; |
594 | |
595 | xchk_iallocbt_rec_alignment(bs, &irec); |
596 | if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
597 | goto out; |
598 | |
599 | iabt->inodes += irec.ir_count; |
600 | |
601 | /* Handle non-sparse inodes */ |
602 | if (!xfs_inobt_issparse(irec.ir_holemask)) { |
603 | if (irec.ir_count != XFS_INODES_PER_CHUNK) |
604 | xchk_btree_set_corrupt(bs->sc, bs->cur, 0); |
605 | |
606 | if (!xchk_iallocbt_chunk(bs, &irec, agino, |
607 | XFS_INODES_PER_CHUNK)) |
608 | goto out; |
609 | goto check_clusters; |
610 | } |
611 | |
612 | /* Check each chunk of a sparse inode cluster. */ |
613 | holemask = irec.ir_holemask; |
614 | holecount = 0; |
615 | holes = ~xfs_inobt_irec_to_allocmask(&irec); |
616 | if ((holes & irec.ir_free) != holes || |
617 | irec.ir_freecount > irec.ir_count) |
618 | xchk_btree_set_corrupt(bs->sc, bs->cur, 0); |
619 | |
620 | for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; i++) { |
621 | if (holemask & 1) |
622 | holecount += XFS_INODES_PER_HOLEMASK_BIT; |
623 | else if (!xchk_iallocbt_chunk(bs, &irec, agino, |
624 | XFS_INODES_PER_HOLEMASK_BIT)) |
625 | goto out; |
626 | holemask >>= 1; |
627 | agino += XFS_INODES_PER_HOLEMASK_BIT; |
628 | } |
629 | |
630 | if (holecount > XFS_INODES_PER_CHUNK || |
631 | holecount + irec.ir_count != XFS_INODES_PER_CHUNK) |
632 | xchk_btree_set_corrupt(bs->sc, bs->cur, 0); |
633 | |
634 | check_clusters: |
635 | if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
636 | goto out; |
637 | |
638 | error = xchk_iallocbt_check_clusters(bs, &irec); |
639 | if (error) |
640 | goto out; |
641 | |
642 | out: |
643 | return error; |
644 | } |
645 | |
646 | /* |
647 | * Make sure the inode btrees are as large as the rmap thinks they are. |
648 | * Don't bother if we're missing btree cursors, as we're already corrupt. |
649 | */ |
650 | STATIC void |
651 | xchk_iallocbt_xref_rmap_btreeblks( |
652 | struct xfs_scrub *sc) |
653 | { |
654 | xfs_filblks_t blocks; |
655 | xfs_extlen_t inobt_blocks = 0; |
656 | xfs_extlen_t finobt_blocks = 0; |
657 | int error; |
658 | |
659 | if (!sc->sa.ino_cur || !sc->sa.rmap_cur || |
660 | (xfs_has_finobt(sc->mp) && !sc->sa.fino_cur) || |
661 | xchk_skip_xref(sc->sm)) |
662 | return; |
663 | |
664 | /* Check that we saw as many inobt blocks as the rmap says. */ |
665 | error = xfs_btree_count_blocks(sc->sa.ino_cur, &inobt_blocks); |
666 | if (!xchk_process_error(sc, 0, 0, &error)) |
667 | return; |
668 | |
669 | if (sc->sa.fino_cur) { |
670 | error = xfs_btree_count_blocks(sc->sa.fino_cur, &finobt_blocks); |
671 | if (!xchk_process_error(sc, 0, 0, &error)) |
672 | return; |
673 | } |
674 | |
675 | error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, |
676 | &XFS_RMAP_OINFO_INOBT, &blocks); |
677 | if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) |
678 | return; |
679 | if (blocks != inobt_blocks + finobt_blocks) |
680 | xchk_btree_set_corrupt(sc, sc->sa.ino_cur, 0); |
681 | } |
682 | |
683 | /* |
684 | * Make sure that the inobt records point to the same number of blocks as |
685 | * the rmap says are owned by inodes. |
686 | */ |
687 | STATIC void |
688 | xchk_iallocbt_xref_rmap_inodes( |
689 | struct xfs_scrub *sc, |
690 | unsigned long long inodes) |
691 | { |
692 | xfs_filblks_t blocks; |
693 | xfs_filblks_t inode_blocks; |
694 | int error; |
695 | |
696 | if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm)) |
697 | return; |
698 | |
699 | /* Check that we saw as many inode blocks as the rmap knows about. */ |
700 | error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, |
701 | &XFS_RMAP_OINFO_INODES, &blocks); |
702 | if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) |
703 | return; |
704 | inode_blocks = XFS_B_TO_FSB(sc->mp, inodes * sc->mp->m_sb.sb_inodesize); |
705 | if (blocks != inode_blocks) |
706 | xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); |
707 | } |
708 | |
709 | /* Scrub one of the inode btrees for some AG. */ |
710 | int |
711 | xchk_iallocbt( |
712 | struct xfs_scrub *sc) |
713 | { |
714 | struct xfs_btree_cur *cur; |
715 | struct xchk_iallocbt iabt = { |
716 | .inodes = 0, |
717 | .next_startino = NULLAGINO, |
718 | .next_cluster_ino = NULLAGINO, |
719 | }; |
720 | int error; |
721 | |
722 | switch (sc->sm->sm_type) { |
723 | case XFS_SCRUB_TYPE_INOBT: |
724 | cur = sc->sa.ino_cur; |
725 | break; |
726 | case XFS_SCRUB_TYPE_FINOBT: |
727 | cur = sc->sa.fino_cur; |
728 | break; |
729 | default: |
730 | ASSERT(0); |
731 | return -EIO; |
732 | } |
733 | |
734 | error = xchk_btree(sc, cur, xchk_iallocbt_rec, &XFS_RMAP_OINFO_INOBT, |
735 | &iabt); |
736 | if (error) |
737 | return error; |
738 | |
739 | xchk_iallocbt_xref_rmap_btreeblks(sc); |
740 | |
741 | /* |
742 | * If we're scrubbing the inode btree, inode_blocks is the number of |
743 | * blocks pointed to by all the inode chunk records. Therefore, we |
744 | * should compare to the number of inode chunk blocks that the rmap |
745 | * knows about. We can't do this for the finobt since it only points |
746 | * to inode chunks with free inodes. |
747 | */ |
748 | if (sc->sm->sm_type == XFS_SCRUB_TYPE_INOBT) |
749 | xchk_iallocbt_xref_rmap_inodes(sc, iabt.inodes); |
750 | return error; |
751 | } |
752 | |
753 | /* See if an inode btree has (or doesn't have) an inode chunk record. */ |
754 | static inline void |
755 | xchk_xref_inode_check( |
756 | struct xfs_scrub *sc, |
757 | xfs_agblock_t agbno, |
758 | xfs_extlen_t len, |
759 | struct xfs_btree_cur **icur, |
760 | enum xbtree_recpacking expected) |
761 | { |
762 | enum xbtree_recpacking outcome; |
763 | int error; |
764 | |
765 | if (!(*icur) || xchk_skip_xref(sc->sm)) |
766 | return; |
767 | |
768 | error = xfs_ialloc_has_inodes_at_extent(*icur, agbno, len, &outcome); |
769 | if (!xchk_should_check_xref(sc, &error, icur)) |
770 | return; |
771 | if (outcome != expected) |
772 | xchk_btree_xref_set_corrupt(sc, *icur, 0); |
773 | } |
774 | |
775 | /* xref check that the extent is not covered by inodes */ |
776 | void |
777 | xchk_xref_is_not_inode_chunk( |
778 | struct xfs_scrub *sc, |
779 | xfs_agblock_t agbno, |
780 | xfs_extlen_t len) |
781 | { |
782 | xchk_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, |
783 | XBTREE_RECPACKING_EMPTY); |
784 | xchk_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, |
785 | XBTREE_RECPACKING_EMPTY); |
786 | } |
787 | |
788 | /* xref check that the extent is covered by inodes */ |
789 | void |
790 | xchk_xref_is_inode_chunk( |
791 | struct xfs_scrub *sc, |
792 | xfs_agblock_t agbno, |
793 | xfs_extlen_t len) |
794 | { |
795 | xchk_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, |
796 | XBTREE_RECPACKING_FULL); |
797 | } |
798 | |