1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright (C) 2017-2023 Oracle. All Rights Reserved. |
4 | * Author: Darrick J. Wong <djwong@kernel.org> |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_trans_resv.h" |
11 | #include "xfs_mount.h" |
12 | #include "xfs_btree.h" |
13 | #include "xfs_log_format.h" |
14 | #include "xfs_trans.h" |
15 | #include "xfs_ag.h" |
16 | #include "xfs_inode.h" |
17 | #include "xfs_ialloc.h" |
18 | #include "xfs_icache.h" |
19 | #include "xfs_da_format.h" |
20 | #include "xfs_reflink.h" |
21 | #include "xfs_rmap.h" |
22 | #include "xfs_bmap_util.h" |
23 | #include "xfs_rtbitmap.h" |
24 | #include "scrub/scrub.h" |
25 | #include "scrub/common.h" |
26 | #include "scrub/btree.h" |
27 | #include "scrub/trace.h" |
28 | #include "scrub/repair.h" |
29 | |
30 | /* Prepare the attached inode for scrubbing. */ |
31 | static inline int |
32 | xchk_prepare_iscrub( |
33 | struct xfs_scrub *sc) |
34 | { |
35 | int error; |
36 | |
37 | xchk_ilock(sc, XFS_IOLOCK_EXCL); |
38 | |
39 | error = xchk_trans_alloc(sc, 0); |
40 | if (error) |
41 | return error; |
42 | |
43 | error = xchk_ino_dqattach(sc); |
44 | if (error) |
45 | return error; |
46 | |
47 | xchk_ilock(sc, XFS_ILOCK_EXCL); |
48 | return 0; |
49 | } |
50 | |
51 | /* Install this scrub-by-handle inode and prepare it for scrubbing. */ |
52 | static inline int |
53 | xchk_install_handle_iscrub( |
54 | struct xfs_scrub *sc, |
55 | struct xfs_inode *ip) |
56 | { |
57 | int error; |
58 | |
59 | error = xchk_install_handle_inode(sc, ip); |
60 | if (error) |
61 | return error; |
62 | |
63 | return xchk_prepare_iscrub(sc); |
64 | } |
65 | |
66 | /* |
67 | * Grab total control of the inode metadata. In the best case, we grab the |
68 | * incore inode and take all locks on it. If the incore inode cannot be |
69 | * constructed due to corruption problems, lock the AGI so that we can single |
70 | * step the loading process to fix everything that can go wrong. |
71 | */ |
72 | int |
73 | xchk_setup_inode( |
74 | struct xfs_scrub *sc) |
75 | { |
76 | struct xfs_imap imap; |
77 | struct xfs_inode *ip; |
78 | struct xfs_mount *mp = sc->mp; |
79 | struct xfs_inode *ip_in = XFS_I(file_inode(sc->file)); |
80 | struct xfs_buf *agi_bp; |
81 | struct xfs_perag *pag; |
82 | xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, sc->sm->sm_ino); |
83 | int error; |
84 | |
85 | if (xchk_need_intent_drain(sc)) |
86 | xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); |
87 | |
88 | /* We want to scan the opened inode, so lock it and exit. */ |
89 | if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) { |
90 | error = xchk_install_live_inode(sc, ip_in); |
91 | if (error) |
92 | return error; |
93 | |
94 | return xchk_prepare_iscrub(sc); |
95 | } |
96 | |
97 | /* Reject internal metadata files and obviously bad inode numbers. */ |
98 | if (xfs_internal_inum(mp, sc->sm->sm_ino)) |
99 | return -ENOENT; |
100 | if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino)) |
101 | return -ENOENT; |
102 | |
103 | /* Try a safe untrusted iget. */ |
104 | error = xchk_iget_safe(sc, sc->sm->sm_ino, &ip); |
105 | if (!error) |
106 | return xchk_install_handle_iscrub(sc, ip); |
107 | if (error == -ENOENT) |
108 | return error; |
109 | if (error != -EFSCORRUPTED && error != -EFSBADCRC && error != -EINVAL) |
110 | goto out_error; |
111 | |
112 | /* |
113 | * EINVAL with IGET_UNTRUSTED probably means one of several things: |
114 | * userspace gave us an inode number that doesn't correspond to fs |
115 | * space; the inode btree lacks a record for this inode; or there is |
116 | * a record, and it says this inode is free. |
117 | * |
118 | * EFSCORRUPTED/EFSBADCRC could mean that the inode was mappable, but |
119 | * some other metadata corruption (e.g. inode forks) prevented |
120 | * instantiation of the incore inode. Or it could mean the inobt is |
121 | * corrupt. |
122 | * |
123 | * We want to look up this inode in the inobt directly to distinguish |
124 | * three different scenarios: (1) the inobt says the inode is free, |
125 | * in which case there's nothing to do; (2) the inobt is corrupt so we |
126 | * should flag the corruption and exit to userspace to let it fix the |
127 | * inobt; and (3) the inobt says the inode is allocated, but loading it |
128 | * failed due to corruption. |
129 | * |
130 | * Allocate a transaction and grab the AGI to prevent inobt activity in |
131 | * this AG. Retry the iget in case someone allocated a new inode after |
132 | * the first iget failed. |
133 | */ |
134 | error = xchk_trans_alloc(sc, 0); |
135 | if (error) |
136 | goto out_error; |
137 | |
138 | error = xchk_iget_agi(sc, sc->sm->sm_ino, &agi_bp, &ip); |
139 | if (error == 0) { |
140 | /* Actually got the incore inode, so install it and proceed. */ |
141 | xchk_trans_cancel(sc); |
142 | return xchk_install_handle_iscrub(sc, ip); |
143 | } |
144 | if (error == -ENOENT) |
145 | goto out_gone; |
146 | if (error != -EFSCORRUPTED && error != -EFSBADCRC && error != -EINVAL) |
147 | goto out_cancel; |
148 | |
149 | /* Ensure that we have protected against inode allocation/freeing. */ |
150 | if (agi_bp == NULL) { |
151 | ASSERT(agi_bp != NULL); |
152 | error = -ECANCELED; |
153 | goto out_cancel; |
154 | } |
155 | |
156 | /* |
157 | * Untrusted iget failed a second time. Let's try an inobt lookup. |
158 | * If the inobt doesn't think this is an allocated inode then we'll |
159 | * return ENOENT to signal that the check can be skipped. |
160 | * |
161 | * If the lookup signals corruption, we'll mark this inode corrupt and |
162 | * exit to userspace. There's little chance of fixing anything until |
163 | * the inobt is straightened out, but there's nothing we can do here. |
164 | * |
165 | * If the lookup encounters a runtime error, exit to userspace. |
166 | */ |
167 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino)); |
168 | if (!pag) { |
169 | error = -EFSCORRUPTED; |
170 | goto out_cancel; |
171 | } |
172 | |
173 | error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap, |
174 | XFS_IGET_UNTRUSTED); |
175 | xfs_perag_put(pag); |
176 | if (error == -EINVAL || error == -ENOENT) |
177 | goto out_gone; |
178 | if (error) |
179 | goto out_cancel; |
180 | |
181 | /* |
182 | * The lookup succeeded. Chances are the ondisk inode is corrupt and |
183 | * preventing iget from reading it. Retain the scrub transaction and |
184 | * the AGI buffer to prevent anyone from allocating or freeing inodes. |
185 | * This ensures that we preserve the inconsistency between the inobt |
186 | * saying the inode is allocated and the icache being unable to load |
187 | * the inode until we can flag the corruption in xchk_inode. The |
188 | * scrub function has to note the corruption, since we're not really |
189 | * supposed to do that from the setup function. Save the mapping to |
190 | * make repairs to the ondisk inode buffer. |
191 | */ |
192 | if (xchk_could_repair(sc)) |
193 | xrep_setup_inode(sc, &imap); |
194 | return 0; |
195 | |
196 | out_cancel: |
197 | xchk_trans_cancel(sc); |
198 | out_error: |
199 | trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), |
200 | error, __return_address); |
201 | return error; |
202 | out_gone: |
203 | /* The file is gone, so there's nothing to check. */ |
204 | xchk_trans_cancel(sc); |
205 | return -ENOENT; |
206 | } |
207 | |
208 | /* Inode core */ |
209 | |
210 | /* Validate di_extsize hint. */ |
211 | STATIC void |
212 | xchk_inode_extsize( |
213 | struct xfs_scrub *sc, |
214 | struct xfs_dinode *dip, |
215 | xfs_ino_t ino, |
216 | uint16_t mode, |
217 | uint16_t flags) |
218 | { |
219 | xfs_failaddr_t fa; |
220 | uint32_t value = be32_to_cpu(dip->di_extsize); |
221 | |
222 | fa = xfs_inode_validate_extsize(sc->mp, value, mode, flags); |
223 | if (fa) |
224 | xchk_ino_set_corrupt(sc, ino); |
225 | |
226 | /* |
227 | * XFS allows a sysadmin to change the rt extent size when adding a rt |
228 | * section to a filesystem after formatting. If there are any |
229 | * directories with extszinherit and rtinherit set, the hint could |
230 | * become misaligned with the new rextsize. The verifier doesn't check |
231 | * this, because we allow rtinherit directories even without an rt |
232 | * device. Flag this as an administrative warning since we will clean |
233 | * this up eventually. |
234 | */ |
235 | if ((flags & XFS_DIFLAG_RTINHERIT) && |
236 | (flags & XFS_DIFLAG_EXTSZINHERIT) && |
237 | xfs_extlen_to_rtxmod(sc->mp, value) > 0) |
238 | xchk_ino_set_warning(sc, ino); |
239 | } |
240 | |
241 | /* |
242 | * Validate di_cowextsize hint. |
243 | * |
244 | * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). |
245 | * These functions must be kept in sync with each other. |
246 | */ |
247 | STATIC void |
248 | xchk_inode_cowextsize( |
249 | struct xfs_scrub *sc, |
250 | struct xfs_dinode *dip, |
251 | xfs_ino_t ino, |
252 | uint16_t mode, |
253 | uint16_t flags, |
254 | uint64_t flags2) |
255 | { |
256 | xfs_failaddr_t fa; |
257 | |
258 | fa = xfs_inode_validate_cowextsize(sc->mp, |
259 | be32_to_cpu(dip->di_cowextsize), mode, flags, |
260 | flags2); |
261 | if (fa) |
262 | xchk_ino_set_corrupt(sc, ino); |
263 | } |
264 | |
265 | /* Make sure the di_flags make sense for the inode. */ |
266 | STATIC void |
267 | xchk_inode_flags( |
268 | struct xfs_scrub *sc, |
269 | struct xfs_dinode *dip, |
270 | xfs_ino_t ino, |
271 | uint16_t mode, |
272 | uint16_t flags) |
273 | { |
274 | struct xfs_mount *mp = sc->mp; |
275 | |
276 | /* di_flags are all taken, last bit cannot be used */ |
277 | if (flags & ~XFS_DIFLAG_ANY) |
278 | goto bad; |
279 | |
280 | /* rt flags require rt device */ |
281 | if ((flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) |
282 | goto bad; |
283 | |
284 | /* new rt bitmap flag only valid for rbmino */ |
285 | if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino) |
286 | goto bad; |
287 | |
288 | /* directory-only flags */ |
289 | if ((flags & (XFS_DIFLAG_RTINHERIT | |
290 | XFS_DIFLAG_EXTSZINHERIT | |
291 | XFS_DIFLAG_PROJINHERIT | |
292 | XFS_DIFLAG_NOSYMLINKS)) && |
293 | !S_ISDIR(mode)) |
294 | goto bad; |
295 | |
296 | /* file-only flags */ |
297 | if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) && |
298 | !S_ISREG(mode)) |
299 | goto bad; |
300 | |
301 | /* filestreams and rt make no sense */ |
302 | if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME)) |
303 | goto bad; |
304 | |
305 | return; |
306 | bad: |
307 | xchk_ino_set_corrupt(sc, ino); |
308 | } |
309 | |
310 | /* Make sure the di_flags2 make sense for the inode. */ |
311 | STATIC void |
312 | xchk_inode_flags2( |
313 | struct xfs_scrub *sc, |
314 | struct xfs_dinode *dip, |
315 | xfs_ino_t ino, |
316 | uint16_t mode, |
317 | uint16_t flags, |
318 | uint64_t flags2) |
319 | { |
320 | struct xfs_mount *mp = sc->mp; |
321 | |
322 | /* Unknown di_flags2 could be from a future kernel */ |
323 | if (flags2 & ~XFS_DIFLAG2_ANY) |
324 | xchk_ino_set_warning(sc, ino); |
325 | |
326 | /* reflink flag requires reflink feature */ |
327 | if ((flags2 & XFS_DIFLAG2_REFLINK) && |
328 | !xfs_has_reflink(mp)) |
329 | goto bad; |
330 | |
331 | /* cowextsize flag is checked w.r.t. mode separately */ |
332 | |
333 | /* file/dir-only flags */ |
334 | if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode))) |
335 | goto bad; |
336 | |
337 | /* file-only flags */ |
338 | if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode)) |
339 | goto bad; |
340 | |
341 | /* realtime and reflink make no sense, currently */ |
342 | if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK)) |
343 | goto bad; |
344 | |
345 | /* no bigtime iflag without the bigtime feature */ |
346 | if (xfs_dinode_has_bigtime(dip) && !xfs_has_bigtime(mp)) |
347 | goto bad; |
348 | |
349 | /* no large extent counts without the filesystem feature */ |
350 | if ((flags2 & XFS_DIFLAG2_NREXT64) && !xfs_has_large_extent_counts(mp)) |
351 | goto bad; |
352 | |
353 | return; |
354 | bad: |
355 | xchk_ino_set_corrupt(sc, ino); |
356 | } |
357 | |
358 | static inline void |
359 | xchk_dinode_nsec( |
360 | struct xfs_scrub *sc, |
361 | xfs_ino_t ino, |
362 | struct xfs_dinode *dip, |
363 | const xfs_timestamp_t ts) |
364 | { |
365 | struct timespec64 tv; |
366 | |
367 | tv = xfs_inode_from_disk_ts(dip, ts); |
368 | if (tv.tv_nsec < 0 || tv.tv_nsec >= NSEC_PER_SEC) |
369 | xchk_ino_set_corrupt(sc, ino); |
370 | } |
371 | |
372 | /* Scrub all the ondisk inode fields. */ |
373 | STATIC void |
374 | xchk_dinode( |
375 | struct xfs_scrub *sc, |
376 | struct xfs_dinode *dip, |
377 | xfs_ino_t ino) |
378 | { |
379 | struct xfs_mount *mp = sc->mp; |
380 | size_t fork_recs; |
381 | unsigned long long isize; |
382 | uint64_t flags2; |
383 | xfs_extnum_t nextents; |
384 | xfs_extnum_t naextents; |
385 | prid_t prid; |
386 | uint16_t flags; |
387 | uint16_t mode; |
388 | |
389 | flags = be16_to_cpu(dip->di_flags); |
390 | if (dip->di_version >= 3) |
391 | flags2 = be64_to_cpu(dip->di_flags2); |
392 | else |
393 | flags2 = 0; |
394 | |
395 | /* di_mode */ |
396 | mode = be16_to_cpu(dip->di_mode); |
397 | switch (mode & S_IFMT) { |
398 | case S_IFLNK: |
399 | case S_IFREG: |
400 | case S_IFDIR: |
401 | case S_IFCHR: |
402 | case S_IFBLK: |
403 | case S_IFIFO: |
404 | case S_IFSOCK: |
405 | /* mode is recognized */ |
406 | break; |
407 | default: |
408 | xchk_ino_set_corrupt(sc, ino); |
409 | break; |
410 | } |
411 | |
412 | /* v1/v2 fields */ |
413 | switch (dip->di_version) { |
414 | case 1: |
415 | /* |
416 | * We autoconvert v1 inodes into v2 inodes on writeout, |
417 | * so just mark this inode for preening. |
418 | */ |
419 | xchk_ino_set_preen(sc, ino); |
420 | prid = 0; |
421 | break; |
422 | case 2: |
423 | case 3: |
424 | if (dip->di_onlink != 0) |
425 | xchk_ino_set_corrupt(sc, ino); |
426 | |
427 | if (dip->di_mode == 0 && sc->ip) |
428 | xchk_ino_set_corrupt(sc, ino); |
429 | |
430 | if (dip->di_projid_hi != 0 && |
431 | !xfs_has_projid32(mp)) |
432 | xchk_ino_set_corrupt(sc, ino); |
433 | |
434 | prid = be16_to_cpu(dip->di_projid_lo); |
435 | break; |
436 | default: |
437 | xchk_ino_set_corrupt(sc, ino); |
438 | return; |
439 | } |
440 | |
441 | if (xfs_has_projid32(mp)) |
442 | prid |= (prid_t)be16_to_cpu(dip->di_projid_hi) << 16; |
443 | |
444 | /* |
445 | * di_uid/di_gid -- -1 isn't invalid, but there's no way that |
446 | * userspace could have created that. |
447 | */ |
448 | if (dip->di_uid == cpu_to_be32(-1U) || |
449 | dip->di_gid == cpu_to_be32(-1U)) |
450 | xchk_ino_set_warning(sc, ino); |
451 | |
452 | /* |
453 | * project id of -1 isn't supposed to be valid, but the kernel didn't |
454 | * always validate that. |
455 | */ |
456 | if (prid == -1U) |
457 | xchk_ino_set_warning(sc, ino); |
458 | |
459 | /* di_format */ |
460 | switch (dip->di_format) { |
461 | case XFS_DINODE_FMT_DEV: |
462 | if (!S_ISCHR(mode) && !S_ISBLK(mode) && |
463 | !S_ISFIFO(mode) && !S_ISSOCK(mode)) |
464 | xchk_ino_set_corrupt(sc, ino); |
465 | break; |
466 | case XFS_DINODE_FMT_LOCAL: |
467 | if (!S_ISDIR(mode) && !S_ISLNK(mode)) |
468 | xchk_ino_set_corrupt(sc, ino); |
469 | break; |
470 | case XFS_DINODE_FMT_EXTENTS: |
471 | if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) |
472 | xchk_ino_set_corrupt(sc, ino); |
473 | break; |
474 | case XFS_DINODE_FMT_BTREE: |
475 | if (!S_ISREG(mode) && !S_ISDIR(mode)) |
476 | xchk_ino_set_corrupt(sc, ino); |
477 | break; |
478 | case XFS_DINODE_FMT_UUID: |
479 | default: |
480 | xchk_ino_set_corrupt(sc, ino); |
481 | break; |
482 | } |
483 | |
484 | /* di_[amc]time.nsec */ |
485 | xchk_dinode_nsec(sc, ino, dip, dip->di_atime); |
486 | xchk_dinode_nsec(sc, ino, dip, dip->di_mtime); |
487 | xchk_dinode_nsec(sc, ino, dip, dip->di_ctime); |
488 | |
489 | /* |
490 | * di_size. xfs_dinode_verify checks for things that screw up |
491 | * the VFS such as the upper bit being set and zero-length |
492 | * symlinks/directories, but we can do more here. |
493 | */ |
494 | isize = be64_to_cpu(dip->di_size); |
495 | if (isize & (1ULL << 63)) |
496 | xchk_ino_set_corrupt(sc, ino); |
497 | |
498 | /* Devices, fifos, and sockets must have zero size */ |
499 | if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) |
500 | xchk_ino_set_corrupt(sc, ino); |
501 | |
502 | /* Directories can't be larger than the data section size (32G) */ |
503 | if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) |
504 | xchk_ino_set_corrupt(sc, ino); |
505 | |
506 | /* Symlinks can't be larger than SYMLINK_MAXLEN */ |
507 | if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) |
508 | xchk_ino_set_corrupt(sc, ino); |
509 | |
510 | /* |
511 | * Warn if the running kernel can't handle the kinds of offsets |
512 | * needed to deal with the file size. In other words, if the |
513 | * pagecache can't cache all the blocks in this file due to |
514 | * overly large offsets, flag the inode for admin review. |
515 | */ |
516 | if (isize > mp->m_super->s_maxbytes) |
517 | xchk_ino_set_warning(sc, ino); |
518 | |
519 | /* di_nblocks */ |
520 | if (flags2 & XFS_DIFLAG2_REFLINK) { |
521 | ; /* nblocks can exceed dblocks */ |
522 | } else if (flags & XFS_DIFLAG_REALTIME) { |
523 | /* |
524 | * nblocks is the sum of data extents (in the rtdev), |
525 | * attr extents (in the datadev), and both forks' bmbt |
526 | * blocks (in the datadev). This clumsy check is the |
527 | * best we can do without cross-referencing with the |
528 | * inode forks. |
529 | */ |
530 | if (be64_to_cpu(dip->di_nblocks) >= |
531 | mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) |
532 | xchk_ino_set_corrupt(sc, ino); |
533 | } else { |
534 | if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) |
535 | xchk_ino_set_corrupt(sc, ino); |
536 | } |
537 | |
538 | xchk_inode_flags(sc, dip, ino, mode, flags); |
539 | |
540 | xchk_inode_extsize(sc, dip, ino, mode, flags); |
541 | |
542 | nextents = xfs_dfork_data_extents(dip); |
543 | naextents = xfs_dfork_attr_extents(dip); |
544 | |
545 | /* di_nextents */ |
546 | fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); |
547 | switch (dip->di_format) { |
548 | case XFS_DINODE_FMT_EXTENTS: |
549 | if (nextents > fork_recs) |
550 | xchk_ino_set_corrupt(sc, ino); |
551 | break; |
552 | case XFS_DINODE_FMT_BTREE: |
553 | if (nextents <= fork_recs) |
554 | xchk_ino_set_corrupt(sc, ino); |
555 | break; |
556 | default: |
557 | if (nextents != 0) |
558 | xchk_ino_set_corrupt(sc, ino); |
559 | break; |
560 | } |
561 | |
562 | /* di_forkoff */ |
563 | if (XFS_DFORK_BOFF(dip) >= mp->m_sb.sb_inodesize) |
564 | xchk_ino_set_corrupt(sc, ino); |
565 | if (naextents != 0 && dip->di_forkoff == 0) |
566 | xchk_ino_set_corrupt(sc, ino); |
567 | if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) |
568 | xchk_ino_set_corrupt(sc, ino); |
569 | |
570 | /* di_aformat */ |
571 | if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && |
572 | dip->di_aformat != XFS_DINODE_FMT_EXTENTS && |
573 | dip->di_aformat != XFS_DINODE_FMT_BTREE) |
574 | xchk_ino_set_corrupt(sc, ino); |
575 | |
576 | /* di_anextents */ |
577 | fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); |
578 | switch (dip->di_aformat) { |
579 | case XFS_DINODE_FMT_EXTENTS: |
580 | if (naextents > fork_recs) |
581 | xchk_ino_set_corrupt(sc, ino); |
582 | break; |
583 | case XFS_DINODE_FMT_BTREE: |
584 | if (naextents <= fork_recs) |
585 | xchk_ino_set_corrupt(sc, ino); |
586 | break; |
587 | default: |
588 | if (naextents != 0) |
589 | xchk_ino_set_corrupt(sc, ino); |
590 | } |
591 | |
592 | if (dip->di_version >= 3) { |
593 | xchk_dinode_nsec(sc, ino, dip, dip->di_crtime); |
594 | xchk_inode_flags2(sc, dip, ino, mode, flags, flags2); |
595 | xchk_inode_cowextsize(sc, dip, ino, mode, flags, |
596 | flags2); |
597 | } |
598 | } |
599 | |
600 | /* |
601 | * Make sure the finobt doesn't think this inode is free. |
602 | * We don't have to check the inobt ourselves because we got the inode via |
603 | * IGET_UNTRUSTED, which checks the inobt for us. |
604 | */ |
605 | static void |
606 | xchk_inode_xref_finobt( |
607 | struct xfs_scrub *sc, |
608 | xfs_ino_t ino) |
609 | { |
610 | struct xfs_inobt_rec_incore rec; |
611 | xfs_agino_t agino; |
612 | int has_record; |
613 | int error; |
614 | |
615 | if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm)) |
616 | return; |
617 | |
618 | agino = XFS_INO_TO_AGINO(sc->mp, ino); |
619 | |
620 | /* |
621 | * Try to get the finobt record. If we can't get it, then we're |
622 | * in good shape. |
623 | */ |
624 | error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, |
625 | &has_record); |
626 | if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || |
627 | !has_record) |
628 | return; |
629 | |
630 | error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); |
631 | if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || |
632 | !has_record) |
633 | return; |
634 | |
635 | /* |
636 | * Otherwise, make sure this record either doesn't cover this inode, |
637 | * or that it does but it's marked present. |
638 | */ |
639 | if (rec.ir_startino > agino || |
640 | rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) |
641 | return; |
642 | |
643 | if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) |
644 | xchk_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); |
645 | } |
646 | |
647 | /* Cross reference the inode fields with the forks. */ |
648 | STATIC void |
649 | xchk_inode_xref_bmap( |
650 | struct xfs_scrub *sc, |
651 | struct xfs_dinode *dip) |
652 | { |
653 | xfs_extnum_t nextents; |
654 | xfs_filblks_t count; |
655 | xfs_filblks_t acount; |
656 | int error; |
657 | |
658 | if (xchk_skip_xref(sc->sm)) |
659 | return; |
660 | |
661 | /* Walk all the extents to check nextents/naextents/nblocks. */ |
662 | error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, |
663 | &nextents, &count); |
664 | if (!xchk_should_check_xref(sc, &error, NULL)) |
665 | return; |
666 | if (nextents < xfs_dfork_data_extents(dip)) |
667 | xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); |
668 | |
669 | error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, |
670 | &nextents, &acount); |
671 | if (!xchk_should_check_xref(sc, &error, NULL)) |
672 | return; |
673 | if (nextents != xfs_dfork_attr_extents(dip)) |
674 | xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); |
675 | |
676 | /* Check nblocks against the inode. */ |
677 | if (count + acount != be64_to_cpu(dip->di_nblocks)) |
678 | xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); |
679 | } |
680 | |
681 | /* Cross-reference with the other btrees. */ |
682 | STATIC void |
683 | xchk_inode_xref( |
684 | struct xfs_scrub *sc, |
685 | xfs_ino_t ino, |
686 | struct xfs_dinode *dip) |
687 | { |
688 | xfs_agnumber_t agno; |
689 | xfs_agblock_t agbno; |
690 | int error; |
691 | |
692 | if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
693 | return; |
694 | |
695 | agno = XFS_INO_TO_AGNO(sc->mp, ino); |
696 | agbno = XFS_INO_TO_AGBNO(sc->mp, ino); |
697 | |
698 | error = xchk_ag_init_existing(sc, agno, &sc->sa); |
699 | if (!xchk_xref_process_error(sc, agno, agbno, &error)) |
700 | goto out_free; |
701 | |
702 | xchk_xref_is_used_space(sc, agbno, 1); |
703 | xchk_inode_xref_finobt(sc, ino); |
704 | xchk_xref_is_only_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_INODES); |
705 | xchk_xref_is_not_shared(sc, agbno, 1); |
706 | xchk_xref_is_not_cow_staging(sc, agbno, 1); |
707 | xchk_inode_xref_bmap(sc, dip); |
708 | |
709 | out_free: |
710 | xchk_ag_free(sc, &sc->sa); |
711 | } |
712 | |
713 | /* |
714 | * If the reflink iflag disagrees with a scan for shared data fork extents, |
715 | * either flag an error (shared extents w/ no flag) or a preen (flag set w/o |
716 | * any shared extents). We already checked for reflink iflag set on a non |
717 | * reflink filesystem. |
718 | */ |
719 | static void |
720 | xchk_inode_check_reflink_iflag( |
721 | struct xfs_scrub *sc, |
722 | xfs_ino_t ino) |
723 | { |
724 | struct xfs_mount *mp = sc->mp; |
725 | bool has_shared; |
726 | int error; |
727 | |
728 | if (!xfs_has_reflink(mp)) |
729 | return; |
730 | |
731 | error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, |
732 | &has_shared); |
733 | if (!xchk_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), |
734 | XFS_INO_TO_AGBNO(mp, ino), &error)) |
735 | return; |
736 | if (xfs_is_reflink_inode(sc->ip) && !has_shared) |
737 | xchk_ino_set_preen(sc, ino); |
738 | else if (!xfs_is_reflink_inode(sc->ip) && has_shared) |
739 | xchk_ino_set_corrupt(sc, ino); |
740 | } |
741 | |
742 | /* Scrub an inode. */ |
743 | int |
744 | xchk_inode( |
745 | struct xfs_scrub *sc) |
746 | { |
747 | struct xfs_dinode di; |
748 | int error = 0; |
749 | |
750 | /* |
751 | * If sc->ip is NULL, that means that the setup function called |
752 | * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED |
753 | * and a NULL inode, so flag the corruption error and return. |
754 | */ |
755 | if (!sc->ip) { |
756 | xchk_ino_set_corrupt(sc, sc->sm->sm_ino); |
757 | return 0; |
758 | } |
759 | |
760 | /* Scrub the inode core. */ |
761 | xfs_inode_to_disk(sc->ip, &di, 0); |
762 | xchk_dinode(sc, &di, sc->ip->i_ino); |
763 | if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
764 | goto out; |
765 | |
766 | /* |
767 | * Look for discrepancies between file's data blocks and the reflink |
768 | * iflag. We already checked the iflag against the file mode when |
769 | * we scrubbed the dinode. |
770 | */ |
771 | if (S_ISREG(VFS_I(sc->ip)->i_mode)) |
772 | xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino); |
773 | |
774 | xchk_inode_xref(sc, sc->ip->i_ino, &di); |
775 | out: |
776 | return error; |
777 | } |
778 | |