1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright (C) 2017-2023 Oracle. All Rights Reserved. |
4 | * Author: Darrick J. Wong <djwong@kernel.org> |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_trans_resv.h" |
11 | #include "xfs_mount.h" |
12 | #include "xfs_log_format.h" |
13 | #include "xfs_trans.h" |
14 | #include "xfs_inode.h" |
15 | #include "xfs_quota.h" |
16 | #include "xfs_qm.h" |
17 | #include "xfs_scrub.h" |
18 | #include "xfs_buf_mem.h" |
19 | #include "xfs_rmap.h" |
20 | #include "scrub/scrub.h" |
21 | #include "scrub/common.h" |
22 | #include "scrub/trace.h" |
23 | #include "scrub/repair.h" |
24 | #include "scrub/health.h" |
25 | #include "scrub/stats.h" |
26 | #include "scrub/xfile.h" |
27 | |
28 | /* |
29 | * Online Scrub and Repair |
30 | * |
31 | * Traditionally, XFS (the kernel driver) did not know how to check or |
32 | * repair on-disk data structures. That task was left to the xfs_check |
33 | * and xfs_repair tools, both of which require taking the filesystem |
34 | * offline for a thorough but time consuming examination. Online |
35 | * scrub & repair, on the other hand, enables us to check the metadata |
36 | * for obvious errors while carefully stepping around the filesystem's |
37 | * ongoing operations, locking rules, etc. |
38 | * |
39 | * Given that most XFS metadata consist of records stored in a btree, |
40 | * most of the checking functions iterate the btree blocks themselves |
41 | * looking for irregularities. When a record block is encountered, each |
42 | * record can be checked for obviously bad values. Record values can |
43 | * also be cross-referenced against other btrees to look for potential |
44 | * misunderstandings between pieces of metadata. |
45 | * |
46 | * It is expected that the checkers responsible for per-AG metadata |
47 | * structures will lock the AG headers (AGI, AGF, AGFL), iterate the |
48 | * metadata structure, and perform any relevant cross-referencing before |
49 | * unlocking the AG and returning the results to userspace. These |
50 | * scrubbers must not keep an AG locked for too long to avoid tying up |
51 | * the block and inode allocators. |
52 | * |
53 | * Block maps and b-trees rooted in an inode present a special challenge |
54 | * because they can involve extents from any AG. The general scrubber |
55 | * structure of lock -> check -> xref -> unlock still holds, but AG |
56 | * locking order rules /must/ be obeyed to avoid deadlocks. The |
57 | * ordering rule, of course, is that we must lock in increasing AG |
58 | * order. Helper functions are provided to track which AG headers we've |
59 | * already locked. If we detect an imminent locking order violation, we |
60 | * can signal a potential deadlock, in which case the scrubber can jump |
61 | * out to the top level, lock all the AGs in order, and retry the scrub. |
62 | * |
63 | * For file data (directories, extended attributes, symlinks) scrub, we |
64 | * can simply lock the inode and walk the data. For btree data |
65 | * (directories and attributes) we follow the same btree-scrubbing |
66 | * strategy outlined previously to check the records. |
67 | * |
68 | * We use a bit of trickery with transactions to avoid buffer deadlocks |
69 | * if there is a cycle in the metadata. The basic problem is that |
70 | * travelling down a btree involves locking the current buffer at each |
71 | * tree level. If a pointer should somehow point back to a buffer that |
72 | * we've already examined, we will deadlock due to the second buffer |
73 | * locking attempt. Note however that grabbing a buffer in transaction |
74 | * context links the locked buffer to the transaction. If we try to |
75 | * re-grab the buffer in the context of the same transaction, we avoid |
76 | * the second lock attempt and continue. Between the verifier and the |
77 | * scrubber, something will notice that something is amiss and report |
78 | * the corruption. Therefore, each scrubber will allocate an empty |
79 | * transaction, attach buffers to it, and cancel the transaction at the |
80 | * end of the scrub run. Cancelling a non-dirty transaction simply |
81 | * unlocks the buffers. |
82 | * |
83 | * There are four pieces of data that scrub can communicate to |
84 | * userspace. The first is the error code (errno), which can be used to |
85 | * communicate operational errors in performing the scrub. There are |
86 | * also three flags that can be set in the scrub context. If the data |
87 | * structure itself is corrupt, the CORRUPT flag will be set. If |
88 | * the metadata is correct but otherwise suboptimal, the PREEN flag |
89 | * will be set. |
90 | * |
91 | * We perform secondary validation of filesystem metadata by |
92 | * cross-referencing every record with all other available metadata. |
93 | * For example, for block mapping extents, we verify that there are no |
94 | * records in the free space and inode btrees corresponding to that |
95 | * space extent and that there is a corresponding entry in the reverse |
96 | * mapping btree. Inconsistent metadata is noted by setting the |
97 | * XCORRUPT flag; btree query function errors are noted by setting the |
98 | * XFAIL flag and deleting the cursor to prevent further attempts to |
99 | * cross-reference with a defective btree. |
100 | * |
101 | * If a piece of metadata proves corrupt or suboptimal, the userspace |
102 | * program can ask the kernel to apply some tender loving care (TLC) to |
103 | * the metadata object by setting the REPAIR flag and re-calling the |
104 | * scrub ioctl. "Corruption" is defined by metadata violating the |
105 | * on-disk specification; operations cannot continue if the violation is |
106 | * left untreated. It is possible for XFS to continue if an object is |
107 | * "suboptimal", however performance may be degraded. Repairs are |
108 | * usually performed by rebuilding the metadata entirely out of |
109 | * redundant metadata. Optimizing, on the other hand, can sometimes be |
110 | * done without rebuilding entire structures. |
111 | * |
112 | * Generally speaking, the repair code has the following code structure: |
113 | * Lock -> scrub -> repair -> commit -> re-lock -> re-scrub -> unlock. |
114 | * The first check helps us figure out if we need to rebuild or simply |
115 | * optimize the structure so that the rebuild knows what to do. The |
116 | * second check evaluates the completeness of the repair; that is what |
117 | * is reported to userspace. |
118 | * |
119 | * A quick note on symbol prefixes: |
120 | * - "xfs_" are general XFS symbols. |
121 | * - "xchk_" are symbols related to metadata checking. |
122 | * - "xrep_" are symbols related to metadata repair. |
123 | * - "xfs_scrub_" are symbols that tie online fsck to the rest of XFS. |
124 | */ |
125 | |
126 | /* |
127 | * Scrub probe -- userspace uses this to probe if we're willing to scrub |
128 | * or repair a given mountpoint. This will be used by xfs_scrub to |
129 | * probe the kernel's abilities to scrub (and repair) the metadata. We |
130 | * do this by validating the ioctl inputs from userspace, preparing the |
131 | * filesystem for a scrub (or a repair) operation, and immediately |
132 | * returning to userspace. Userspace can use the returned errno and |
133 | * structure state to decide (in broad terms) if scrub/repair are |
134 | * supported by the running kernel. |
135 | */ |
136 | static int |
137 | xchk_probe( |
138 | struct xfs_scrub *sc) |
139 | { |
140 | int error = 0; |
141 | |
142 | if (xchk_should_terminate(sc, &error)) |
143 | return error; |
144 | |
145 | return 0; |
146 | } |
147 | |
148 | /* Scrub setup and teardown */ |
149 | |
150 | static inline void |
151 | xchk_fsgates_disable( |
152 | struct xfs_scrub *sc) |
153 | { |
154 | if (!(sc->flags & XCHK_FSGATES_ALL)) |
155 | return; |
156 | |
157 | trace_xchk_fsgates_disable(sc, sc->flags & XCHK_FSGATES_ALL); |
158 | |
159 | if (sc->flags & XCHK_FSGATES_DRAIN) |
160 | xfs_drain_wait_disable(); |
161 | |
162 | if (sc->flags & XCHK_FSGATES_QUOTA) |
163 | xfs_dqtrx_hook_disable(); |
164 | |
165 | if (sc->flags & XCHK_FSGATES_DIRENTS) |
166 | xfs_dir_hook_disable(); |
167 | |
168 | if (sc->flags & XCHK_FSGATES_RMAP) |
169 | xfs_rmap_hook_disable(); |
170 | |
171 | sc->flags &= ~XCHK_FSGATES_ALL; |
172 | } |
173 | |
174 | /* Free all the resources and finish the transactions. */ |
175 | STATIC int |
176 | xchk_teardown( |
177 | struct xfs_scrub *sc, |
178 | int error) |
179 | { |
180 | xchk_ag_free(sc, &sc->sa); |
181 | if (sc->tp) { |
182 | if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)) |
183 | error = xfs_trans_commit(sc->tp); |
184 | else |
185 | xfs_trans_cancel(sc->tp); |
186 | sc->tp = NULL; |
187 | } |
188 | if (sc->ip) { |
189 | if (sc->ilock_flags) |
190 | xchk_iunlock(sc, sc->ilock_flags); |
191 | xchk_irele(sc, sc->ip); |
192 | sc->ip = NULL; |
193 | } |
194 | if (sc->flags & XCHK_HAVE_FREEZE_PROT) { |
195 | sc->flags &= ~XCHK_HAVE_FREEZE_PROT; |
196 | mnt_drop_write_file(sc->file); |
197 | } |
198 | if (sc->xmbtp) { |
199 | xmbuf_free(sc->xmbtp); |
200 | sc->xmbtp = NULL; |
201 | } |
202 | if (sc->xfile) { |
203 | xfile_destroy(sc->xfile); |
204 | sc->xfile = NULL; |
205 | } |
206 | if (sc->buf) { |
207 | if (sc->buf_cleanup) |
208 | sc->buf_cleanup(sc->buf); |
209 | kvfree(sc->buf); |
210 | sc->buf_cleanup = NULL; |
211 | sc->buf = NULL; |
212 | } |
213 | |
214 | xchk_fsgates_disable(sc); |
215 | return error; |
216 | } |
217 | |
218 | /* Scrubbing dispatch. */ |
219 | |
220 | static const struct xchk_meta_ops meta_scrub_ops[] = { |
221 | [XFS_SCRUB_TYPE_PROBE] = { /* ioctl presence test */ |
222 | .type = ST_NONE, |
223 | .setup = xchk_setup_fs, |
224 | .scrub = xchk_probe, |
225 | .repair = xrep_probe, |
226 | }, |
227 | [XFS_SCRUB_TYPE_SB] = { /* superblock */ |
228 | .type = ST_PERAG, |
229 | .setup = xchk_setup_agheader, |
230 | .scrub = xchk_superblock, |
231 | .repair = xrep_superblock, |
232 | }, |
233 | [XFS_SCRUB_TYPE_AGF] = { /* agf */ |
234 | .type = ST_PERAG, |
235 | .setup = xchk_setup_agheader, |
236 | .scrub = xchk_agf, |
237 | .repair = xrep_agf, |
238 | }, |
239 | [XFS_SCRUB_TYPE_AGFL]= { /* agfl */ |
240 | .type = ST_PERAG, |
241 | .setup = xchk_setup_agheader, |
242 | .scrub = xchk_agfl, |
243 | .repair = xrep_agfl, |
244 | }, |
245 | [XFS_SCRUB_TYPE_AGI] = { /* agi */ |
246 | .type = ST_PERAG, |
247 | .setup = xchk_setup_agheader, |
248 | .scrub = xchk_agi, |
249 | .repair = xrep_agi, |
250 | }, |
251 | [XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */ |
252 | .type = ST_PERAG, |
253 | .setup = xchk_setup_ag_allocbt, |
254 | .scrub = xchk_allocbt, |
255 | .repair = xrep_allocbt, |
256 | .repair_eval = xrep_revalidate_allocbt, |
257 | }, |
258 | [XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */ |
259 | .type = ST_PERAG, |
260 | .setup = xchk_setup_ag_allocbt, |
261 | .scrub = xchk_allocbt, |
262 | .repair = xrep_allocbt, |
263 | .repair_eval = xrep_revalidate_allocbt, |
264 | }, |
265 | [XFS_SCRUB_TYPE_INOBT] = { /* inobt */ |
266 | .type = ST_PERAG, |
267 | .setup = xchk_setup_ag_iallocbt, |
268 | .scrub = xchk_iallocbt, |
269 | .repair = xrep_iallocbt, |
270 | .repair_eval = xrep_revalidate_iallocbt, |
271 | }, |
272 | [XFS_SCRUB_TYPE_FINOBT] = { /* finobt */ |
273 | .type = ST_PERAG, |
274 | .setup = xchk_setup_ag_iallocbt, |
275 | .scrub = xchk_iallocbt, |
276 | .has = xfs_has_finobt, |
277 | .repair = xrep_iallocbt, |
278 | .repair_eval = xrep_revalidate_iallocbt, |
279 | }, |
280 | [XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */ |
281 | .type = ST_PERAG, |
282 | .setup = xchk_setup_ag_rmapbt, |
283 | .scrub = xchk_rmapbt, |
284 | .has = xfs_has_rmapbt, |
285 | .repair = xrep_rmapbt, |
286 | }, |
287 | [XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */ |
288 | .type = ST_PERAG, |
289 | .setup = xchk_setup_ag_refcountbt, |
290 | .scrub = xchk_refcountbt, |
291 | .has = xfs_has_reflink, |
292 | .repair = xrep_refcountbt, |
293 | }, |
294 | [XFS_SCRUB_TYPE_INODE] = { /* inode record */ |
295 | .type = ST_INODE, |
296 | .setup = xchk_setup_inode, |
297 | .scrub = xchk_inode, |
298 | .repair = xrep_inode, |
299 | }, |
300 | [XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */ |
301 | .type = ST_INODE, |
302 | .setup = xchk_setup_inode_bmap, |
303 | .scrub = xchk_bmap_data, |
304 | .repair = xrep_bmap_data, |
305 | }, |
306 | [XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */ |
307 | .type = ST_INODE, |
308 | .setup = xchk_setup_inode_bmap, |
309 | .scrub = xchk_bmap_attr, |
310 | .repair = xrep_bmap_attr, |
311 | }, |
312 | [XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */ |
313 | .type = ST_INODE, |
314 | .setup = xchk_setup_inode_bmap, |
315 | .scrub = xchk_bmap_cow, |
316 | .repair = xrep_bmap_cow, |
317 | }, |
318 | [XFS_SCRUB_TYPE_DIR] = { /* directory */ |
319 | .type = ST_INODE, |
320 | .setup = xchk_setup_directory, |
321 | .scrub = xchk_directory, |
322 | .repair = xrep_notsupported, |
323 | }, |
324 | [XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */ |
325 | .type = ST_INODE, |
326 | .setup = xchk_setup_xattr, |
327 | .scrub = xchk_xattr, |
328 | .repair = xrep_notsupported, |
329 | }, |
330 | [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */ |
331 | .type = ST_INODE, |
332 | .setup = xchk_setup_symlink, |
333 | .scrub = xchk_symlink, |
334 | .repair = xrep_notsupported, |
335 | }, |
336 | [XFS_SCRUB_TYPE_PARENT] = { /* parent pointers */ |
337 | .type = ST_INODE, |
338 | .setup = xchk_setup_parent, |
339 | .scrub = xchk_parent, |
340 | .repair = xrep_notsupported, |
341 | }, |
342 | [XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */ |
343 | .type = ST_FS, |
344 | .setup = xchk_setup_rtbitmap, |
345 | .scrub = xchk_rtbitmap, |
346 | .repair = xrep_rtbitmap, |
347 | }, |
348 | [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */ |
349 | .type = ST_FS, |
350 | .setup = xchk_setup_rtsummary, |
351 | .scrub = xchk_rtsummary, |
352 | .repair = xrep_notsupported, |
353 | }, |
354 | [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */ |
355 | .type = ST_FS, |
356 | .setup = xchk_setup_quota, |
357 | .scrub = xchk_quota, |
358 | .repair = xrep_quota, |
359 | }, |
360 | [XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */ |
361 | .type = ST_FS, |
362 | .setup = xchk_setup_quota, |
363 | .scrub = xchk_quota, |
364 | .repair = xrep_quota, |
365 | }, |
366 | [XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */ |
367 | .type = ST_FS, |
368 | .setup = xchk_setup_quota, |
369 | .scrub = xchk_quota, |
370 | .repair = xrep_quota, |
371 | }, |
372 | [XFS_SCRUB_TYPE_FSCOUNTERS] = { /* fs summary counters */ |
373 | .type = ST_FS, |
374 | .setup = xchk_setup_fscounters, |
375 | .scrub = xchk_fscounters, |
376 | .repair = xrep_fscounters, |
377 | }, |
378 | [XFS_SCRUB_TYPE_QUOTACHECK] = { /* quota counters */ |
379 | .type = ST_FS, |
380 | .setup = xchk_setup_quotacheck, |
381 | .scrub = xchk_quotacheck, |
382 | .repair = xrep_quotacheck, |
383 | }, |
384 | [XFS_SCRUB_TYPE_NLINKS] = { /* inode link counts */ |
385 | .type = ST_FS, |
386 | .setup = xchk_setup_nlinks, |
387 | .scrub = xchk_nlinks, |
388 | .repair = xrep_nlinks, |
389 | }, |
390 | [XFS_SCRUB_TYPE_HEALTHY] = { /* fs healthy; clean all reminders */ |
391 | .type = ST_FS, |
392 | .setup = xchk_setup_fs, |
393 | .scrub = xchk_health_record, |
394 | .repair = xrep_notsupported, |
395 | }, |
396 | }; |
397 | |
398 | static int |
399 | xchk_validate_inputs( |
400 | struct xfs_mount *mp, |
401 | struct xfs_scrub_metadata *sm) |
402 | { |
403 | int error; |
404 | const struct xchk_meta_ops *ops; |
405 | |
406 | error = -EINVAL; |
407 | /* Check our inputs. */ |
408 | sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; |
409 | if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN) |
410 | goto out; |
411 | /* sm_reserved[] must be zero */ |
412 | if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved))) |
413 | goto out; |
414 | |
415 | error = -ENOENT; |
416 | /* Do we know about this type of metadata? */ |
417 | if (sm->sm_type >= XFS_SCRUB_TYPE_NR) |
418 | goto out; |
419 | ops = &meta_scrub_ops[sm->sm_type]; |
420 | if (ops->setup == NULL || ops->scrub == NULL) |
421 | goto out; |
422 | /* Does this fs even support this type of metadata? */ |
423 | if (ops->has && !ops->has(mp)) |
424 | goto out; |
425 | |
426 | error = -EINVAL; |
427 | /* restricting fields must be appropriate for type */ |
428 | switch (ops->type) { |
429 | case ST_NONE: |
430 | case ST_FS: |
431 | if (sm->sm_ino || sm->sm_gen || sm->sm_agno) |
432 | goto out; |
433 | break; |
434 | case ST_PERAG: |
435 | if (sm->sm_ino || sm->sm_gen || |
436 | sm->sm_agno >= mp->m_sb.sb_agcount) |
437 | goto out; |
438 | break; |
439 | case ST_INODE: |
440 | if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino)) |
441 | goto out; |
442 | break; |
443 | default: |
444 | goto out; |
445 | } |
446 | |
447 | /* No rebuild without repair. */ |
448 | if ((sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) && |
449 | !(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)) |
450 | return -EINVAL; |
451 | |
452 | /* |
453 | * We only want to repair read-write v5+ filesystems. Defer the check |
454 | * for ops->repair until after our scrub confirms that we need to |
455 | * perform repairs so that we avoid failing due to not supporting |
456 | * repairing an object that doesn't need repairs. |
457 | */ |
458 | if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) { |
459 | error = -EOPNOTSUPP; |
460 | if (!xfs_has_crc(mp)) |
461 | goto out; |
462 | |
463 | error = -EROFS; |
464 | if (xfs_is_readonly(mp)) |
465 | goto out; |
466 | } |
467 | |
468 | error = 0; |
469 | out: |
470 | return error; |
471 | } |
472 | |
473 | #ifdef CONFIG_XFS_ONLINE_REPAIR |
474 | static inline void xchk_postmortem(struct xfs_scrub *sc) |
475 | { |
476 | /* |
477 | * Userspace asked us to repair something, we repaired it, rescanned |
478 | * it, and the rescan says it's still broken. Scream about this in |
479 | * the system logs. |
480 | */ |
481 | if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && |
482 | (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | |
483 | XFS_SCRUB_OFLAG_XCORRUPT))) |
484 | xrep_failure(sc->mp); |
485 | } |
486 | #else |
487 | static inline void xchk_postmortem(struct xfs_scrub *sc) |
488 | { |
489 | /* |
490 | * Userspace asked us to scrub something, it's broken, and we have no |
491 | * way of fixing it. Scream in the logs. |
492 | */ |
493 | if (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | |
494 | XFS_SCRUB_OFLAG_XCORRUPT)) |
495 | xfs_alert_ratelimited(sc->mp, |
496 | "Corruption detected during scrub." ); |
497 | } |
498 | #endif /* CONFIG_XFS_ONLINE_REPAIR */ |
499 | |
500 | /* Dispatch metadata scrubbing. */ |
501 | int |
502 | xfs_scrub_metadata( |
503 | struct file *file, |
504 | struct xfs_scrub_metadata *sm) |
505 | { |
506 | struct xchk_stats_run run = { }; |
507 | struct xfs_scrub *sc; |
508 | struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount; |
509 | u64 check_start; |
510 | int error = 0; |
511 | |
512 | BUILD_BUG_ON(sizeof(meta_scrub_ops) != |
513 | (sizeof(struct xchk_meta_ops) * XFS_SCRUB_TYPE_NR)); |
514 | |
515 | trace_xchk_start(XFS_I(file_inode(file)), sm, error); |
516 | |
517 | /* Forbidden if we are shut down or mounted norecovery. */ |
518 | error = -ESHUTDOWN; |
519 | if (xfs_is_shutdown(mp)) |
520 | goto out; |
521 | error = -ENOTRECOVERABLE; |
522 | if (xfs_has_norecovery(mp)) |
523 | goto out; |
524 | |
525 | error = xchk_validate_inputs(mp, sm); |
526 | if (error) |
527 | goto out; |
528 | |
529 | xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB, |
530 | "EXPERIMENTAL online scrub feature in use. Use at your own risk!" ); |
531 | |
532 | sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS); |
533 | if (!sc) { |
534 | error = -ENOMEM; |
535 | goto out; |
536 | } |
537 | |
538 | sc->mp = mp; |
539 | sc->file = file; |
540 | sc->sm = sm; |
541 | sc->ops = &meta_scrub_ops[sm->sm_type]; |
542 | sc->sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type); |
543 | retry_op: |
544 | /* |
545 | * When repairs are allowed, prevent freezing or readonly remount while |
546 | * scrub is running with a real transaction. |
547 | */ |
548 | if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) { |
549 | error = mnt_want_write_file(sc->file); |
550 | if (error) |
551 | goto out_sc; |
552 | |
553 | sc->flags |= XCHK_HAVE_FREEZE_PROT; |
554 | } |
555 | |
556 | /* Set up for the operation. */ |
557 | error = sc->ops->setup(sc); |
558 | if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER)) |
559 | goto try_harder; |
560 | if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN)) |
561 | goto need_drain; |
562 | if (error) |
563 | goto out_teardown; |
564 | |
565 | /* Scrub for errors. */ |
566 | check_start = xchk_stats_now(); |
567 | if ((sc->flags & XREP_ALREADY_FIXED) && sc->ops->repair_eval != NULL) |
568 | error = sc->ops->repair_eval(sc); |
569 | else |
570 | error = sc->ops->scrub(sc); |
571 | run.scrub_ns += xchk_stats_elapsed_ns(check_start); |
572 | if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER)) |
573 | goto try_harder; |
574 | if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN)) |
575 | goto need_drain; |
576 | if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)) |
577 | goto out_teardown; |
578 | |
579 | xchk_update_health(sc); |
580 | |
581 | if (xchk_could_repair(sc)) { |
582 | /* |
583 | * If userspace asked for a repair but it wasn't necessary, |
584 | * report that back to userspace. |
585 | */ |
586 | if (!xrep_will_attempt(sc)) { |
587 | sc->sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED; |
588 | goto out_nofix; |
589 | } |
590 | |
591 | /* |
592 | * If it's broken, userspace wants us to fix it, and we haven't |
593 | * already tried to fix it, then attempt a repair. |
594 | */ |
595 | error = xrep_attempt(sc, &run); |
596 | if (error == -EAGAIN) { |
597 | /* |
598 | * Either the repair function succeeded or it couldn't |
599 | * get all the resources it needs; either way, we go |
600 | * back to the beginning and call the scrub function. |
601 | */ |
602 | error = xchk_teardown(sc, 0); |
603 | if (error) { |
604 | xrep_failure(mp); |
605 | goto out_sc; |
606 | } |
607 | goto retry_op; |
608 | } |
609 | } |
610 | |
611 | out_nofix: |
612 | xchk_postmortem(sc); |
613 | out_teardown: |
614 | error = xchk_teardown(sc, error); |
615 | out_sc: |
616 | if (error != -ENOENT) |
617 | xchk_stats_merge(mp, sm, &run); |
618 | kfree(sc); |
619 | out: |
620 | trace_xchk_done(XFS_I(file_inode(file)), sm, error); |
621 | if (error == -EFSCORRUPTED || error == -EFSBADCRC) { |
622 | sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; |
623 | error = 0; |
624 | } |
625 | return error; |
626 | need_drain: |
627 | error = xchk_teardown(sc, 0); |
628 | if (error) |
629 | goto out_sc; |
630 | sc->flags |= XCHK_NEED_DRAIN; |
631 | run.retries++; |
632 | goto retry_op; |
633 | try_harder: |
634 | /* |
635 | * Scrubbers return -EDEADLOCK to mean 'try harder'. Tear down |
636 | * everything we hold, then set up again with preparation for |
637 | * worst-case scenarios. |
638 | */ |
639 | error = xchk_teardown(sc, 0); |
640 | if (error) |
641 | goto out_sc; |
642 | sc->flags |= XCHK_TRY_HARDER; |
643 | run.retries++; |
644 | goto retry_op; |
645 | } |
646 | |