1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright (C) 2017-2023 Oracle. All Rights Reserved. |
4 | * Author: Darrick J. Wong <djwong@kernel.org> |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_trans_resv.h" |
11 | #include "xfs_mount.h" |
12 | #include "xfs_log_format.h" |
13 | #include "xfs_trans.h" |
14 | #include "xfs_inode.h" |
15 | #include "xfs_dir2.h" |
16 | #include "xfs_dir2_priv.h" |
17 | #include "xfs_attr_leaf.h" |
18 | #include "scrub/scrub.h" |
19 | #include "scrub/common.h" |
20 | #include "scrub/trace.h" |
21 | #include "scrub/dabtree.h" |
22 | |
23 | /* Directory/Attribute Btree */ |
24 | |
25 | /* |
26 | * Check for da btree operation errors. See the section about handling |
27 | * operational errors in common.c. |
28 | */ |
29 | bool |
30 | xchk_da_process_error( |
31 | struct xchk_da_btree *ds, |
32 | int level, |
33 | int *error) |
34 | { |
35 | struct xfs_scrub *sc = ds->sc; |
36 | |
37 | if (*error == 0) |
38 | return true; |
39 | |
40 | switch (*error) { |
41 | case -EDEADLOCK: |
42 | case -ECHRNG: |
43 | /* Used to restart an op with deadlock avoidance. */ |
44 | trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); |
45 | break; |
46 | case -EFSBADCRC: |
47 | case -EFSCORRUPTED: |
48 | /* Note the badness but don't abort. */ |
49 | sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; |
50 | *error = 0; |
51 | fallthrough; |
52 | default: |
53 | trace_xchk_file_op_error(sc, ds->dargs.whichfork, |
54 | xfs_dir2_da_to_db(ds->dargs.geo, |
55 | ds->state->path.blk[level].blkno), |
56 | *error, __return_address); |
57 | break; |
58 | } |
59 | return false; |
60 | } |
61 | |
62 | /* |
63 | * Check for da btree corruption. See the section about handling |
64 | * operational errors in common.c. |
65 | */ |
66 | void |
67 | xchk_da_set_corrupt( |
68 | struct xchk_da_btree *ds, |
69 | int level) |
70 | { |
71 | struct xfs_scrub *sc = ds->sc; |
72 | |
73 | sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; |
74 | |
75 | trace_xchk_fblock_error(sc, ds->dargs.whichfork, |
76 | xfs_dir2_da_to_db(ds->dargs.geo, |
77 | ds->state->path.blk[level].blkno), |
78 | __return_address); |
79 | } |
80 | |
81 | static struct xfs_da_node_entry * |
82 | xchk_da_btree_node_entry( |
83 | struct xchk_da_btree *ds, |
84 | int level) |
85 | { |
86 | struct xfs_da_state_blk *blk = &ds->state->path.blk[level]; |
87 | struct xfs_da3_icnode_hdr hdr; |
88 | |
89 | ASSERT(blk->magic == XFS_DA_NODE_MAGIC); |
90 | |
91 | xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr); |
92 | return hdr.btree + blk->index; |
93 | } |
94 | |
95 | /* Scrub a da btree hash (key). */ |
96 | int |
97 | xchk_da_btree_hash( |
98 | struct xchk_da_btree *ds, |
99 | int level, |
100 | __be32 *hashp) |
101 | { |
102 | struct xfs_da_node_entry *entry; |
103 | xfs_dahash_t hash; |
104 | xfs_dahash_t parent_hash; |
105 | |
106 | /* Is this hash in order? */ |
107 | hash = be32_to_cpu(*hashp); |
108 | if (hash < ds->hashes[level]) |
109 | xchk_da_set_corrupt(ds, level); |
110 | ds->hashes[level] = hash; |
111 | |
112 | if (level == 0) |
113 | return 0; |
114 | |
115 | /* Is this hash no larger than the parent hash? */ |
116 | entry = xchk_da_btree_node_entry(ds, level: level - 1); |
117 | parent_hash = be32_to_cpu(entry->hashval); |
118 | if (parent_hash < hash) |
119 | xchk_da_set_corrupt(ds, level); |
120 | |
121 | return 0; |
122 | } |
123 | |
124 | /* |
125 | * Check a da btree pointer. Returns true if it's ok to use this |
126 | * pointer. |
127 | */ |
128 | STATIC bool |
129 | xchk_da_btree_ptr_ok( |
130 | struct xchk_da_btree *ds, |
131 | int level, |
132 | xfs_dablk_t blkno) |
133 | { |
134 | if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) { |
135 | xchk_da_set_corrupt(ds, level); |
136 | return false; |
137 | } |
138 | |
139 | return true; |
140 | } |
141 | |
142 | /* |
143 | * The da btree scrubber can handle leaf1 blocks as a degenerate |
144 | * form of leafn blocks. Since the regular da code doesn't handle |
145 | * leaf1, we must multiplex the verifiers. |
146 | */ |
147 | static void |
148 | xchk_da_btree_read_verify( |
149 | struct xfs_buf *bp) |
150 | { |
151 | struct xfs_da_blkinfo *info = bp->b_addr; |
152 | |
153 | switch (be16_to_cpu(info->magic)) { |
154 | case XFS_DIR2_LEAF1_MAGIC: |
155 | case XFS_DIR3_LEAF1_MAGIC: |
156 | bp->b_ops = &xfs_dir3_leaf1_buf_ops; |
157 | bp->b_ops->verify_read(bp); |
158 | return; |
159 | default: |
160 | /* |
161 | * xfs_da3_node_buf_ops already know how to handle |
162 | * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. |
163 | */ |
164 | bp->b_ops = &xfs_da3_node_buf_ops; |
165 | bp->b_ops->verify_read(bp); |
166 | return; |
167 | } |
168 | } |
169 | static void |
170 | xchk_da_btree_write_verify( |
171 | struct xfs_buf *bp) |
172 | { |
173 | struct xfs_da_blkinfo *info = bp->b_addr; |
174 | |
175 | switch (be16_to_cpu(info->magic)) { |
176 | case XFS_DIR2_LEAF1_MAGIC: |
177 | case XFS_DIR3_LEAF1_MAGIC: |
178 | bp->b_ops = &xfs_dir3_leaf1_buf_ops; |
179 | bp->b_ops->verify_write(bp); |
180 | return; |
181 | default: |
182 | /* |
183 | * xfs_da3_node_buf_ops already know how to handle |
184 | * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. |
185 | */ |
186 | bp->b_ops = &xfs_da3_node_buf_ops; |
187 | bp->b_ops->verify_write(bp); |
188 | return; |
189 | } |
190 | } |
191 | static void * |
192 | xchk_da_btree_verify( |
193 | struct xfs_buf *bp) |
194 | { |
195 | struct xfs_da_blkinfo *info = bp->b_addr; |
196 | |
197 | switch (be16_to_cpu(info->magic)) { |
198 | case XFS_DIR2_LEAF1_MAGIC: |
199 | case XFS_DIR3_LEAF1_MAGIC: |
200 | bp->b_ops = &xfs_dir3_leaf1_buf_ops; |
201 | return bp->b_ops->verify_struct(bp); |
202 | default: |
203 | bp->b_ops = &xfs_da3_node_buf_ops; |
204 | return bp->b_ops->verify_struct(bp); |
205 | } |
206 | } |
207 | |
208 | static const struct xfs_buf_ops xchk_da_btree_buf_ops = { |
209 | .name = "xchk_da_btree" , |
210 | .verify_read = xchk_da_btree_read_verify, |
211 | .verify_write = xchk_da_btree_write_verify, |
212 | .verify_struct = xchk_da_btree_verify, |
213 | }; |
214 | |
215 | /* Check a block's sibling. */ |
216 | STATIC int |
217 | xchk_da_btree_block_check_sibling( |
218 | struct xchk_da_btree *ds, |
219 | int level, |
220 | int direction, |
221 | xfs_dablk_t sibling) |
222 | { |
223 | struct xfs_da_state_path *path = &ds->state->path; |
224 | struct xfs_da_state_path *altpath = &ds->state->altpath; |
225 | int retval; |
226 | int plevel; |
227 | int error; |
228 | |
229 | memcpy(altpath, path, sizeof(ds->state->altpath)); |
230 | |
231 | /* |
232 | * If the pointer is null, we shouldn't be able to move the upper |
233 | * level pointer anywhere. |
234 | */ |
235 | if (sibling == 0) { |
236 | error = xfs_da3_path_shift(ds->state, altpath, direction, |
237 | false, &retval); |
238 | if (error == 0 && retval == 0) |
239 | xchk_da_set_corrupt(ds, level); |
240 | error = 0; |
241 | goto out; |
242 | } |
243 | |
244 | /* Move the alternate cursor one block in the direction given. */ |
245 | error = xfs_da3_path_shift(ds->state, altpath, direction, false, |
246 | &retval); |
247 | if (!xchk_da_process_error(ds, level, &error)) |
248 | goto out; |
249 | if (retval) { |
250 | xchk_da_set_corrupt(ds, level); |
251 | goto out; |
252 | } |
253 | if (altpath->blk[level].bp) |
254 | xchk_buffer_recheck(ds->sc, altpath->blk[level].bp); |
255 | |
256 | /* Compare upper level pointer to sibling pointer. */ |
257 | if (altpath->blk[level].blkno != sibling) |
258 | xchk_da_set_corrupt(ds, level); |
259 | |
260 | out: |
261 | /* Free all buffers in the altpath that aren't referenced from path. */ |
262 | for (plevel = 0; plevel < altpath->active; plevel++) { |
263 | if (altpath->blk[plevel].bp == NULL || |
264 | (plevel < path->active && |
265 | altpath->blk[plevel].bp == path->blk[plevel].bp)) |
266 | continue; |
267 | |
268 | xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp); |
269 | altpath->blk[plevel].bp = NULL; |
270 | } |
271 | |
272 | return error; |
273 | } |
274 | |
275 | /* Check a block's sibling pointers. */ |
276 | STATIC int |
277 | xchk_da_btree_block_check_siblings( |
278 | struct xchk_da_btree *ds, |
279 | int level, |
280 | struct xfs_da_blkinfo *hdr) |
281 | { |
282 | xfs_dablk_t forw; |
283 | xfs_dablk_t back; |
284 | int error = 0; |
285 | |
286 | forw = be32_to_cpu(hdr->forw); |
287 | back = be32_to_cpu(hdr->back); |
288 | |
289 | /* Top level blocks should not have sibling pointers. */ |
290 | if (level == 0) { |
291 | if (forw != 0 || back != 0) |
292 | xchk_da_set_corrupt(ds, level); |
293 | return 0; |
294 | } |
295 | |
296 | /* |
297 | * Check back (left) and forw (right) pointers. These functions |
298 | * absorb error codes for us. |
299 | */ |
300 | error = xchk_da_btree_block_check_sibling(ds, level, 0, back); |
301 | if (error) |
302 | goto out; |
303 | error = xchk_da_btree_block_check_sibling(ds, level, 1, forw); |
304 | |
305 | out: |
306 | memset(&ds->state->altpath, 0, sizeof(ds->state->altpath)); |
307 | return error; |
308 | } |
309 | |
310 | /* Load a dir/attribute block from a btree. */ |
311 | STATIC int |
312 | xchk_da_btree_block( |
313 | struct xchk_da_btree *ds, |
314 | int level, |
315 | xfs_dablk_t blkno) |
316 | { |
317 | struct xfs_da_state_blk *blk; |
318 | struct xfs_da_intnode *node; |
319 | struct xfs_da_node_entry *btree; |
320 | struct xfs_da3_blkinfo *hdr3; |
321 | struct xfs_da_args *dargs = &ds->dargs; |
322 | struct xfs_inode *ip = ds->dargs.dp; |
323 | xfs_ino_t owner; |
324 | int *pmaxrecs; |
325 | struct xfs_da3_icnode_hdr nodehdr; |
326 | int error = 0; |
327 | |
328 | blk = &ds->state->path.blk[level]; |
329 | ds->state->path.active = level + 1; |
330 | |
331 | /* Release old block. */ |
332 | if (blk->bp) { |
333 | xfs_trans_brelse(dargs->trans, blk->bp); |
334 | blk->bp = NULL; |
335 | } |
336 | |
337 | /* Check the pointer. */ |
338 | blk->blkno = blkno; |
339 | if (!xchk_da_btree_ptr_ok(ds, level, blkno)) |
340 | goto out_nobuf; |
341 | |
342 | /* Read the buffer. */ |
343 | error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, |
344 | XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork, |
345 | &xchk_da_btree_buf_ops); |
346 | if (!xchk_da_process_error(ds, level, &error)) |
347 | goto out_nobuf; |
348 | if (blk->bp) |
349 | xchk_buffer_recheck(ds->sc, blk->bp); |
350 | |
351 | /* |
352 | * We didn't find a dir btree root block, which means that |
353 | * there's no LEAF1/LEAFN tree (at least not where it's supposed |
354 | * to be), so jump out now. |
355 | */ |
356 | if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 && |
357 | blk->bp == NULL) |
358 | goto out_nobuf; |
359 | |
360 | /* It's /not/ ok for attr trees not to have a da btree. */ |
361 | if (blk->bp == NULL) { |
362 | xchk_da_set_corrupt(ds, level); |
363 | goto out_nobuf; |
364 | } |
365 | |
366 | hdr3 = blk->bp->b_addr; |
367 | blk->magic = be16_to_cpu(hdr3->hdr.magic); |
368 | pmaxrecs = &ds->maxrecs[level]; |
369 | |
370 | /* We only started zeroing the header on v5 filesystems. */ |
371 | if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad) |
372 | xchk_da_set_corrupt(ds, level); |
373 | |
374 | /* Check the owner. */ |
375 | if (xfs_has_crc(ip->i_mount)) { |
376 | owner = be64_to_cpu(hdr3->owner); |
377 | if (owner != ip->i_ino) |
378 | xchk_da_set_corrupt(ds, level); |
379 | } |
380 | |
381 | /* Check the siblings. */ |
382 | error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr); |
383 | if (error) |
384 | goto out; |
385 | |
386 | /* Interpret the buffer. */ |
387 | switch (blk->magic) { |
388 | case XFS_ATTR_LEAF_MAGIC: |
389 | case XFS_ATTR3_LEAF_MAGIC: |
390 | xfs_trans_buf_set_type(dargs->trans, blk->bp, |
391 | XFS_BLFT_ATTR_LEAF_BUF); |
392 | blk->magic = XFS_ATTR_LEAF_MAGIC; |
393 | blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs); |
394 | if (ds->tree_level != 0) |
395 | xchk_da_set_corrupt(ds, level); |
396 | break; |
397 | case XFS_DIR2_LEAFN_MAGIC: |
398 | case XFS_DIR3_LEAFN_MAGIC: |
399 | xfs_trans_buf_set_type(dargs->trans, blk->bp, |
400 | XFS_BLFT_DIR_LEAFN_BUF); |
401 | blk->magic = XFS_DIR2_LEAFN_MAGIC; |
402 | blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); |
403 | if (ds->tree_level != 0) |
404 | xchk_da_set_corrupt(ds, level); |
405 | break; |
406 | case XFS_DIR2_LEAF1_MAGIC: |
407 | case XFS_DIR3_LEAF1_MAGIC: |
408 | xfs_trans_buf_set_type(dargs->trans, blk->bp, |
409 | XFS_BLFT_DIR_LEAF1_BUF); |
410 | blk->magic = XFS_DIR2_LEAF1_MAGIC; |
411 | blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); |
412 | if (ds->tree_level != 0) |
413 | xchk_da_set_corrupt(ds, level); |
414 | break; |
415 | case XFS_DA_NODE_MAGIC: |
416 | case XFS_DA3_NODE_MAGIC: |
417 | xfs_trans_buf_set_type(dargs->trans, blk->bp, |
418 | XFS_BLFT_DA_NODE_BUF); |
419 | blk->magic = XFS_DA_NODE_MAGIC; |
420 | node = blk->bp->b_addr; |
421 | xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node); |
422 | btree = nodehdr.btree; |
423 | *pmaxrecs = nodehdr.count; |
424 | blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval); |
425 | if (level == 0) { |
426 | if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { |
427 | xchk_da_set_corrupt(ds, level); |
428 | goto out_freebp; |
429 | } |
430 | ds->tree_level = nodehdr.level; |
431 | } else { |
432 | if (ds->tree_level != nodehdr.level) { |
433 | xchk_da_set_corrupt(ds, level); |
434 | goto out_freebp; |
435 | } |
436 | } |
437 | |
438 | /* XXX: Check hdr3.pad32 once we know how to fix it. */ |
439 | break; |
440 | default: |
441 | xchk_da_set_corrupt(ds, level); |
442 | goto out_freebp; |
443 | } |
444 | |
445 | /* |
446 | * If we've been handed a block that is below the dabtree root, does |
447 | * its hashval match what the parent block expected to see? |
448 | */ |
449 | if (level > 0) { |
450 | struct xfs_da_node_entry *key; |
451 | |
452 | key = xchk_da_btree_node_entry(ds, level: level - 1); |
453 | if (be32_to_cpu(key->hashval) != blk->hashval) { |
454 | xchk_da_set_corrupt(ds, level); |
455 | goto out_freebp; |
456 | } |
457 | } |
458 | |
459 | out: |
460 | return error; |
461 | out_freebp: |
462 | xfs_trans_brelse(dargs->trans, blk->bp); |
463 | blk->bp = NULL; |
464 | out_nobuf: |
465 | blk->blkno = 0; |
466 | return error; |
467 | } |
468 | |
469 | /* Visit all nodes and leaves of a da btree. */ |
470 | int |
471 | xchk_da_btree( |
472 | struct xfs_scrub *sc, |
473 | int whichfork, |
474 | xchk_da_btree_rec_fn scrub_fn, |
475 | void *private) |
476 | { |
477 | struct xchk_da_btree *ds; |
478 | struct xfs_mount *mp = sc->mp; |
479 | struct xfs_da_state_blk *blks; |
480 | struct xfs_da_node_entry *key; |
481 | xfs_dablk_t blkno; |
482 | int level; |
483 | int error; |
484 | |
485 | /* Skip short format data structures; no btree to scan. */ |
486 | if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc->ip, whichfork))) |
487 | return 0; |
488 | |
489 | /* Set up initial da state. */ |
490 | ds = kzalloc(sizeof(struct xchk_da_btree), XCHK_GFP_FLAGS); |
491 | if (!ds) |
492 | return -ENOMEM; |
493 | ds->dargs.dp = sc->ip; |
494 | ds->dargs.whichfork = whichfork; |
495 | ds->dargs.trans = sc->tp; |
496 | ds->dargs.op_flags = XFS_DA_OP_OKNOENT; |
497 | ds->state = xfs_da_state_alloc(&ds->dargs); |
498 | ds->sc = sc; |
499 | ds->private = private; |
500 | if (whichfork == XFS_ATTR_FORK) { |
501 | ds->dargs.geo = mp->m_attr_geo; |
502 | ds->lowest = 0; |
503 | ds->highest = 0; |
504 | } else { |
505 | ds->dargs.geo = mp->m_dir_geo; |
506 | ds->lowest = ds->dargs.geo->leafblk; |
507 | ds->highest = ds->dargs.geo->freeblk; |
508 | } |
509 | blkno = ds->lowest; |
510 | level = 0; |
511 | |
512 | /* Find the root of the da tree, if present. */ |
513 | blks = ds->state->path.blk; |
514 | error = xchk_da_btree_block(ds, level, blkno); |
515 | if (error) |
516 | goto out_state; |
517 | /* |
518 | * We didn't find a block at ds->lowest, which means that there's |
519 | * no LEAF1/LEAFN tree (at least not where it's supposed to be), |
520 | * so jump out now. |
521 | */ |
522 | if (blks[level].bp == NULL) |
523 | goto out_state; |
524 | |
525 | blks[level].index = 0; |
526 | while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) { |
527 | /* Handle leaf block. */ |
528 | if (blks[level].magic != XFS_DA_NODE_MAGIC) { |
529 | /* End of leaf, pop back towards the root. */ |
530 | if (blks[level].index >= ds->maxrecs[level]) { |
531 | if (level > 0) |
532 | blks[level - 1].index++; |
533 | ds->tree_level++; |
534 | level--; |
535 | continue; |
536 | } |
537 | |
538 | /* Dispatch record scrubbing. */ |
539 | error = scrub_fn(ds, level); |
540 | if (error) |
541 | break; |
542 | if (xchk_should_terminate(sc, &error) || |
543 | (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) |
544 | break; |
545 | |
546 | blks[level].index++; |
547 | continue; |
548 | } |
549 | |
550 | |
551 | /* End of node, pop back towards the root. */ |
552 | if (blks[level].index >= ds->maxrecs[level]) { |
553 | if (level > 0) |
554 | blks[level - 1].index++; |
555 | ds->tree_level++; |
556 | level--; |
557 | continue; |
558 | } |
559 | |
560 | /* Hashes in order for scrub? */ |
561 | key = xchk_da_btree_node_entry(ds, level); |
562 | error = xchk_da_btree_hash(ds, level, &key->hashval); |
563 | if (error) |
564 | goto out; |
565 | |
566 | /* Drill another level deeper. */ |
567 | blkno = be32_to_cpu(key->before); |
568 | level++; |
569 | if (level >= XFS_DA_NODE_MAXDEPTH) { |
570 | /* Too deep! */ |
571 | xchk_da_set_corrupt(ds, level: level - 1); |
572 | break; |
573 | } |
574 | ds->tree_level--; |
575 | error = xchk_da_btree_block(ds, level, blkno); |
576 | if (error) |
577 | goto out; |
578 | if (blks[level].bp == NULL) |
579 | goto out; |
580 | |
581 | blks[level].index = 0; |
582 | } |
583 | |
584 | out: |
585 | /* Release all the buffers we're tracking. */ |
586 | for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) { |
587 | if (blks[level].bp == NULL) |
588 | continue; |
589 | xfs_trans_brelse(sc->tp, blks[level].bp); |
590 | blks[level].bp = NULL; |
591 | } |
592 | |
593 | out_state: |
594 | xfs_da_state_free(ds->state); |
595 | kfree(ds); |
596 | return error; |
597 | } |
598 | |