1// SPDX-License-Identifier: GPL-2.0
2#include "bcachefs.h"
3#include "bbpos.h"
4#include "alloc_background.h"
5#include "backpointers.h"
6#include "bkey_buf.h"
7#include "btree_cache.h"
8#include "btree_update.h"
9#include "btree_update_interior.h"
10#include "btree_write_buffer.h"
11#include "checksum.h"
12#include "error.h"
13
14#include <linux/mm.h>
15
16static bool extent_matches_bp(struct bch_fs *c,
17 enum btree_id btree_id, unsigned level,
18 struct bkey_s_c k,
19 struct bpos bucket,
20 struct bch_backpointer bp)
21{
22 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
23 const union bch_extent_entry *entry;
24 struct extent_ptr_decoded p;
25
26 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
27 struct bpos bucket2;
28 struct bch_backpointer bp2;
29
30 if (p.ptr.cached)
31 continue;
32
33 bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, bucket_pos: &bucket2, bp: &bp2);
34 if (bpos_eq(l: bucket, r: bucket2) &&
35 !memcmp(p: &bp, q: &bp2, size: sizeof(bp)))
36 return true;
37 }
38
39 return false;
40}
41
42int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
43 enum bkey_invalid_flags flags,
44 struct printbuf *err)
45{
46 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
47
48 /* these will be caught by fsck */
49 if (!bch2_dev_exists2(c, dev: bp.k->p.inode))
50 return 0;
51
52 struct bch_dev *ca = bch_dev_bkey_exists(c, idx: bp.k->p.inode);
53 struct bpos bucket = bp_pos_to_bucket(c, bp_pos: bp.k->p);
54 int ret = 0;
55
56 bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
57 !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
58 c, err,
59 backpointer_bucket_offset_wrong,
60 "backpointer bucket_offset wrong");
61fsck_err:
62 return ret;
63}
64
65void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
66{
67 prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
68 bch2_btree_id_str(bp->btree_id),
69 bp->level,
70 (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
71 (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
72 bp->bucket_len);
73 bch2_bpos_to_text(out, bp->pos);
74}
75
76void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
77{
78 if (bch2_dev_exists2(c, dev: k.k->p.inode)) {
79 prt_str(out, str: "bucket=");
80 bch2_bpos_to_text(out, bp_pos_to_bucket(c, bp_pos: k.k->p));
81 prt_str(out, str: " ");
82 }
83
84 bch2_backpointer_to_text(out, bp: bkey_s_c_to_backpointer(k).v);
85}
86
87void bch2_backpointer_swab(struct bkey_s k)
88{
89 struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
90
91 bp.v->bucket_offset = swab40(x: bp.v->bucket_offset);
92 bp.v->bucket_len = swab32(bp.v->bucket_len);
93 bch2_bpos_swab(&bp.v->pos);
94}
95
96static noinline int backpointer_mod_err(struct btree_trans *trans,
97 struct bch_backpointer bp,
98 struct bkey_s_c bp_k,
99 struct bkey_s_c orig_k,
100 bool insert)
101{
102 struct bch_fs *c = trans->c;
103 struct printbuf buf = PRINTBUF;
104
105 if (insert) {
106 prt_printf(&buf, "existing backpointer found when inserting ");
107 bch2_backpointer_to_text(out: &buf, bp: &bp);
108 prt_newline(&buf);
109 printbuf_indent_add(&buf, 2);
110
111 prt_printf(&buf, "found ");
112 bch2_bkey_val_to_text(&buf, c, bp_k);
113 prt_newline(&buf);
114
115 prt_printf(&buf, "for ");
116 bch2_bkey_val_to_text(&buf, c, orig_k);
117
118 bch_err(c, "%s", buf.buf);
119 } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
120 prt_printf(&buf, "backpointer not found when deleting");
121 prt_newline(&buf);
122 printbuf_indent_add(&buf, 2);
123
124 prt_printf(&buf, "searching for ");
125 bch2_backpointer_to_text(out: &buf, bp: &bp);
126 prt_newline(&buf);
127
128 prt_printf(&buf, "got ");
129 bch2_bkey_val_to_text(&buf, c, bp_k);
130 prt_newline(&buf);
131
132 prt_printf(&buf, "for ");
133 bch2_bkey_val_to_text(&buf, c, orig_k);
134
135 bch_err(c, "%s", buf.buf);
136 }
137
138 printbuf_exit(&buf);
139
140 if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
141 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
142 } else {
143 return 0;
144 }
145}
146
147int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
148 struct bpos bucket,
149 struct bch_backpointer bp,
150 struct bkey_s_c orig_k,
151 bool insert)
152{
153 struct btree_iter bp_iter;
154 struct bkey_s_c k;
155 struct bkey_i_backpointer *bp_k;
156 int ret;
157
158 bp_k = bch2_trans_kmalloc_nomemzero(trans, size: sizeof(struct bkey_i_backpointer));
159 ret = PTR_ERR_OR_ZERO(ptr: bp_k);
160 if (ret)
161 return ret;
162
163 bkey_backpointer_init(k: &bp_k->k_i);
164 bp_k->k.p = bucket_pos_to_bp(c: trans->c, bucket, bucket_offset: bp.bucket_offset);
165 bp_k->v = bp;
166
167 if (!insert) {
168 bp_k->k.type = KEY_TYPE_deleted;
169 set_bkey_val_u64s(k: &bp_k->k, val_u64s: 0);
170 }
171
172 k = bch2_bkey_get_iter(trans, iter: &bp_iter, btree_id: BTREE_ID_backpointers,
173 pos: bp_k->k.p,
174 flags: BTREE_ITER_INTENT|
175 BTREE_ITER_SLOTS|
176 BTREE_ITER_WITH_UPDATES);
177 ret = bkey_err(k);
178 if (ret)
179 goto err;
180
181 if (insert
182 ? k.k->type
183 : (k.k->type != KEY_TYPE_backpointer ||
184 memcmp(p: bkey_s_c_to_backpointer(k).v, q: &bp, size: sizeof(bp)))) {
185 ret = backpointer_mod_err(trans, bp, bp_k: k, orig_k, insert);
186 if (ret)
187 goto err;
188 }
189
190 ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
191err:
192 bch2_trans_iter_exit(trans, &bp_iter);
193 return ret;
194}
195
196/*
197 * Find the next backpointer >= *bp_offset:
198 */
199int bch2_get_next_backpointer(struct btree_trans *trans,
200 struct bpos bucket, int gen,
201 struct bpos *bp_pos,
202 struct bch_backpointer *bp,
203 unsigned iter_flags)
204{
205 struct bch_fs *c = trans->c;
206 struct bpos bp_end_pos = bucket_pos_to_bp(c, bucket: bpos_nosnap_successor(p: bucket), bucket_offset: 0);
207 struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
208 struct bkey_s_c k;
209 int ret = 0;
210
211 if (bpos_ge(l: *bp_pos, r: bp_end_pos))
212 goto done;
213
214 if (gen >= 0) {
215 k = bch2_bkey_get_iter(trans, iter: &alloc_iter, btree_id: BTREE_ID_alloc,
216 pos: bucket, flags: BTREE_ITER_CACHED|iter_flags);
217 ret = bkey_err(k);
218 if (ret)
219 goto out;
220
221 if (k.k->type != KEY_TYPE_alloc_v4 ||
222 bkey_s_c_to_alloc_v4(k).v->gen != gen)
223 goto done;
224 }
225
226 *bp_pos = bpos_max(l: *bp_pos, r: bucket_pos_to_bp(c, bucket, bucket_offset: 0));
227
228 for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
229 *bp_pos, iter_flags, k, ret) {
230 if (bpos_ge(l: k.k->p, r: bp_end_pos))
231 break;
232
233 *bp_pos = k.k->p;
234 *bp = *bkey_s_c_to_backpointer(k).v;
235 goto out;
236 }
237done:
238 *bp_pos = SPOS_MAX;
239out:
240 bch2_trans_iter_exit(trans, &bp_iter);
241 bch2_trans_iter_exit(trans, &alloc_iter);
242 return ret;
243}
244
245static void backpointer_not_found(struct btree_trans *trans,
246 struct bpos bp_pos,
247 struct bch_backpointer bp,
248 struct bkey_s_c k)
249{
250 struct bch_fs *c = trans->c;
251 struct printbuf buf = PRINTBUF;
252 struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
253
254 /*
255 * If we're using the btree write buffer, the backpointer we were
256 * looking at may have already been deleted - failure to find what it
257 * pointed to is not an error:
258 */
259 if (likely(!bch2_backpointers_no_use_write_buffer))
260 return;
261
262 prt_printf(&buf, "backpointer doesn't match %s it points to:\n ",
263 bp.level ? "btree node" : "extent");
264 prt_printf(&buf, "bucket: ");
265 bch2_bpos_to_text(&buf, bucket);
266 prt_printf(&buf, "\n ");
267
268 prt_printf(&buf, "backpointer pos: ");
269 bch2_bpos_to_text(&buf, bp_pos);
270 prt_printf(&buf, "\n ");
271
272 bch2_backpointer_to_text(out: &buf, bp: &bp);
273 prt_printf(&buf, "\n ");
274 bch2_bkey_val_to_text(&buf, c, k);
275 if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
276 bch_err_ratelimited(c, "%s", buf.buf);
277 else
278 bch2_trans_inconsistent(trans, "%s", buf.buf);
279
280 printbuf_exit(&buf);
281}
282
283struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
284 struct btree_iter *iter,
285 struct bpos bp_pos,
286 struct bch_backpointer bp,
287 unsigned iter_flags)
288{
289 if (likely(!bp.level)) {
290 struct bch_fs *c = trans->c;
291 struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
292 struct bkey_s_c k;
293
294 bch2_trans_node_iter_init(trans, iter,
295 bp.btree_id,
296 bp.pos,
297 0, 0,
298 iter_flags);
299 k = bch2_btree_iter_peek_slot(iter);
300 if (bkey_err(k)) {
301 bch2_trans_iter_exit(trans, iter);
302 return k;
303 }
304
305 if (k.k && extent_matches_bp(c, btree_id: bp.btree_id, level: bp.level, k, bucket, bp))
306 return k;
307
308 bch2_trans_iter_exit(trans, iter);
309 backpointer_not_found(trans, bp_pos, bp, k);
310 return bkey_s_c_null;
311 } else {
312 struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
313
314 if (IS_ERR_OR_NULL(ptr: b)) {
315 bch2_trans_iter_exit(trans, iter);
316 return IS_ERR(ptr: b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
317 }
318 return bkey_i_to_s_c(k: &b->key);
319 }
320}
321
322struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
323 struct btree_iter *iter,
324 struct bpos bp_pos,
325 struct bch_backpointer bp)
326{
327 struct bch_fs *c = trans->c;
328 struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
329 struct btree *b;
330
331 BUG_ON(!bp.level);
332
333 bch2_trans_node_iter_init(trans, iter,
334 bp.btree_id,
335 bp.pos,
336 0,
337 bp.level - 1,
338 0);
339 b = bch2_btree_iter_peek_node(iter);
340 if (IS_ERR_OR_NULL(ptr: b))
341 goto err;
342
343 BUG_ON(b->c.level != bp.level - 1);
344
345 if (extent_matches_bp(c, btree_id: bp.btree_id, level: bp.level,
346 k: bkey_i_to_s_c(k: &b->key),
347 bucket, bp))
348 return b;
349
350 if (btree_node_will_make_reachable(b)) {
351 b = ERR_PTR(error: -BCH_ERR_backpointer_to_overwritten_btree_node);
352 } else {
353 backpointer_not_found(trans, bp_pos, bp, k: bkey_i_to_s_c(k: &b->key));
354 b = NULL;
355 }
356err:
357 bch2_trans_iter_exit(trans, iter);
358 return b;
359}
360
361static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
362 struct bkey_s_c k)
363{
364 struct bch_fs *c = trans->c;
365 struct btree_iter alloc_iter = { NULL };
366 struct bkey_s_c alloc_k;
367 struct printbuf buf = PRINTBUF;
368 int ret = 0;
369
370 if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
371 backpointer_to_missing_device,
372 "backpointer for missing device:\n%s",
373 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
374 ret = bch2_btree_delete_at(trans, bp_iter, 0);
375 goto out;
376 }
377
378 alloc_k = bch2_bkey_get_iter(trans, iter: &alloc_iter, btree_id: BTREE_ID_alloc,
379 pos: bp_pos_to_bucket(c, bp_pos: k.k->p), flags: 0);
380 ret = bkey_err(alloc_k);
381 if (ret)
382 goto out;
383
384 if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
385 backpointer_to_missing_alloc,
386 "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
387 alloc_iter.pos.inode, alloc_iter.pos.offset,
388 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
389 ret = bch2_btree_delete_at(trans, bp_iter, 0);
390 goto out;
391 }
392out:
393fsck_err:
394 bch2_trans_iter_exit(trans, &alloc_iter);
395 printbuf_exit(&buf);
396 return ret;
397}
398
399/* verify that every backpointer has a corresponding alloc key */
400int bch2_check_btree_backpointers(struct bch_fs *c)
401{
402 int ret = bch2_trans_run(c,
403 for_each_btree_key_commit(trans, iter,
404 BTREE_ID_backpointers, POS_MIN, 0, k,
405 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
406 bch2_check_btree_backpointer(trans, &iter, k)));
407 bch_err_fn(c, ret);
408 return ret;
409}
410
411static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
412{
413 return bpos_eq(l: l.k->p, r: r.k->p) &&
414 bkey_bytes(l.k) == bkey_bytes(r.k) &&
415 !memcmp(p: l.v, q: r.v, size: bkey_val_bytes(k: l.k));
416}
417
418struct extents_to_bp_state {
419 struct bpos bucket_start;
420 struct bpos bucket_end;
421 struct bkey_buf last_flushed;
422};
423
424static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
425 struct bkey_s_c extent, unsigned dev)
426{
427 struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, k: extent);
428 int ret = PTR_ERR_OR_ZERO(ptr: n);
429 if (ret)
430 return ret;
431
432 bch2_bkey_drop_device(bkey_i_to_s(k: n), dev);
433 return bch2_btree_insert_trans(trans, btree, n, 0);
434}
435
436static int check_extent_checksum(struct btree_trans *trans,
437 enum btree_id btree, struct bkey_s_c extent,
438 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
439{
440 struct bch_fs *c = trans->c;
441 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k: extent);
442 const union bch_extent_entry *entry;
443 struct extent_ptr_decoded p;
444 struct printbuf buf = PRINTBUF;
445 void *data_buf = NULL;
446 struct bio *bio = NULL;
447 size_t bytes;
448 int ret = 0;
449
450 if (bkey_is_btree_ptr(k: extent.k))
451 return false;
452
453 bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
454 if (p.ptr.dev == dev)
455 goto found;
456 BUG();
457found:
458 if (!p.crc.csum_type)
459 return false;
460
461 bytes = p.crc.compressed_size << 9;
462
463 struct bch_dev *ca = bch_dev_bkey_exists(c, idx: dev);
464 if (!bch2_dev_get_ioref(ca, READ))
465 return false;
466
467 data_buf = kvmalloc(size: bytes, GFP_KERNEL);
468 if (!data_buf) {
469 ret = -ENOMEM;
470 goto err;
471 }
472
473 bio = bio_alloc(bdev: ca->disk_sb.bdev, nr_vecs: buf_pages(p: data_buf, len: bytes), opf: REQ_OP_READ, GFP_KERNEL);
474 bio->bi_iter.bi_sector = p.ptr.offset;
475 bch2_bio_map(bio, base: data_buf, bytes);
476 ret = submit_bio_wait(bio);
477 if (ret)
478 goto err;
479
480 prt_str(out: &buf, str: "extents pointing to same space, but first extent checksum bad:");
481 prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree));
482 bch2_bkey_val_to_text(&buf, c, extent);
483 prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree));
484 bch2_bkey_val_to_text(&buf, c, extent2);
485
486 struct nonce nonce = extent_nonce(version: extent.k->version, crc: p.crc);
487 struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
488 if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
489 c, dup_backpointer_to_bad_csum_extent,
490 "%s", buf.buf))
491 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
492fsck_err:
493err:
494 if (bio)
495 bio_put(bio);
496 kvfree(addr: data_buf);
497 percpu_ref_put(ref: &ca->io_ref);
498 printbuf_exit(&buf);
499 return ret;
500}
501
502static int check_bp_exists(struct btree_trans *trans,
503 struct extents_to_bp_state *s,
504 struct bpos bucket,
505 struct bch_backpointer bp,
506 struct bkey_s_c orig_k)
507{
508 struct bch_fs *c = trans->c;
509 struct btree_iter bp_iter = {};
510 struct btree_iter other_extent_iter = {};
511 struct printbuf buf = PRINTBUF;
512 struct bkey_s_c bp_k;
513 struct bkey_buf tmp;
514 int ret;
515
516 bch2_bkey_buf_init(s: &tmp);
517
518 if (!bch2_dev_bucket_exists(c, pos: bucket)) {
519 prt_str(out: &buf, str: "extent for nonexistent device:bucket ");
520 bch2_bpos_to_text(&buf, bucket);
521 prt_str(out: &buf, str: "\n ");
522 bch2_bkey_val_to_text(&buf, c, orig_k);
523 bch_err(c, "%s", buf.buf);
524 return -BCH_ERR_fsck_repair_unimplemented;
525 }
526
527 if (bpos_lt(l: bucket, r: s->bucket_start) ||
528 bpos_gt(l: bucket, r: s->bucket_end))
529 return 0;
530
531 bp_k = bch2_bkey_get_iter(trans, iter: &bp_iter, btree_id: BTREE_ID_backpointers,
532 pos: bucket_pos_to_bp(c, bucket, bucket_offset: bp.bucket_offset),
533 flags: 0);
534 ret = bkey_err(bp_k);
535 if (ret)
536 goto err;
537
538 if (bp_k.k->type != KEY_TYPE_backpointer ||
539 memcmp(p: bkey_s_c_to_backpointer(k: bp_k).v, q: &bp, size: sizeof(bp))) {
540 bch2_bkey_buf_reassemble(s: &tmp, c, k: orig_k);
541
542 if (!bkey_and_val_eq(l: orig_k, r: bkey_i_to_s_c(k: s->last_flushed.k))) {
543 if (bp.level) {
544 bch2_trans_unlock(trans);
545 bch2_btree_interior_updates_flush(c);
546 }
547
548 ret = bch2_btree_write_buffer_flush_sync(trans);
549 if (ret)
550 goto err;
551
552 bch2_bkey_buf_copy(s: &s->last_flushed, c, src: tmp.k);
553 ret = -BCH_ERR_transaction_restart_write_buffer_flush;
554 goto out;
555 }
556
557 goto check_existing_bp;
558 }
559out:
560err:
561fsck_err:
562 bch2_trans_iter_exit(trans, &other_extent_iter);
563 bch2_trans_iter_exit(trans, &bp_iter);
564 bch2_bkey_buf_exit(s: &tmp, c);
565 printbuf_exit(&buf);
566 return ret;
567check_existing_bp:
568 /* Do we have a backpointer for a different extent? */
569 if (bp_k.k->type != KEY_TYPE_backpointer)
570 goto missing;
571
572 struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(k: bp_k).v;
573
574 struct bkey_s_c other_extent =
575 bch2_backpointer_get_key(trans, iter: &other_extent_iter, bp_pos: bp_k.k->p, bp: other_bp, iter_flags: 0);
576 ret = bkey_err(other_extent);
577 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
578 ret = 0;
579 if (ret)
580 goto err;
581
582 if (!other_extent.k)
583 goto missing;
584
585 if (bch2_extents_match(orig_k, other_extent)) {
586 printbuf_reset(buf: &buf);
587 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n ");
588 bch2_bkey_val_to_text(&buf, c, orig_k);
589 prt_str(out: &buf, str: "\n ");
590 bch2_bkey_val_to_text(&buf, c, other_extent);
591 bch_err(c, "%s", buf.buf);
592
593 if (other_extent.k->size <= orig_k.k->size) {
594 ret = drop_dev_and_update(trans, btree: other_bp.btree_id, extent: other_extent, dev: bucket.inode);
595 if (ret)
596 goto err;
597 goto out;
598 } else {
599 ret = drop_dev_and_update(trans, btree: bp.btree_id, extent: orig_k, dev: bucket.inode);
600 if (ret)
601 goto err;
602 goto missing;
603 }
604 }
605
606 ret = check_extent_checksum(trans, btree: other_bp.btree_id, extent: other_extent, o_btree: bp.btree_id, extent2: orig_k, dev: bucket.inode);
607 if (ret < 0)
608 goto err;
609 if (ret) {
610 ret = 0;
611 goto missing;
612 }
613
614 ret = check_extent_checksum(trans, btree: bp.btree_id, extent: orig_k, o_btree: other_bp.btree_id, extent2: other_extent, dev: bucket.inode);
615 if (ret < 0)
616 goto err;
617 if (ret) {
618 ret = 0;
619 goto out;
620 }
621
622 printbuf_reset(buf: &buf);
623 prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode);
624 bch2_bkey_val_to_text(&buf, c, orig_k);
625 prt_str(out: &buf, str: "\n ");
626 bch2_bkey_val_to_text(&buf, c, other_extent);
627 bch_err(c, "%s", buf.buf);
628 ret = -BCH_ERR_fsck_repair_unimplemented;
629 goto err;
630missing:
631 printbuf_reset(buf: &buf);
632 prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
633 bch2_btree_id_str(bp.btree_id), bp.level);
634 bch2_bkey_val_to_text(&buf, c, orig_k);
635 prt_printf(&buf, "\n got: ");
636 bch2_bkey_val_to_text(&buf, c, bp_k);
637
638 struct bkey_i_backpointer n_bp_k;
639 bkey_backpointer_init(k: &n_bp_k.k_i);
640 n_bp_k.k.p = bucket_pos_to_bp(c: trans->c, bucket, bucket_offset: bp.bucket_offset);
641 n_bp_k.v = bp;
642 prt_printf(&buf, "\n want: ");
643 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k: &n_bp_k.k_i));
644
645 if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
646 ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, insert: true);
647
648 goto out;
649}
650
651static int check_extent_to_backpointers(struct btree_trans *trans,
652 struct extents_to_bp_state *s,
653 enum btree_id btree, unsigned level,
654 struct bkey_s_c k)
655{
656 struct bch_fs *c = trans->c;
657 struct bkey_ptrs_c ptrs;
658 const union bch_extent_entry *entry;
659 struct extent_ptr_decoded p;
660 int ret;
661
662 ptrs = bch2_bkey_ptrs_c(k);
663 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
664 struct bpos bucket_pos;
665 struct bch_backpointer bp;
666
667 if (p.ptr.cached)
668 continue;
669
670 bch2_extent_ptr_to_bp(c, btree_id: btree, level, k, p, entry, bucket_pos: &bucket_pos, bp: &bp);
671
672 ret = check_bp_exists(trans, s, bucket: bucket_pos, bp, orig_k: k);
673 if (ret)
674 return ret;
675 }
676
677 return 0;
678}
679
680static int check_btree_root_to_backpointers(struct btree_trans *trans,
681 struct extents_to_bp_state *s,
682 enum btree_id btree_id,
683 int *level)
684{
685 struct bch_fs *c = trans->c;
686 struct btree_iter iter;
687 struct btree *b;
688 struct bkey_s_c k;
689 int ret;
690retry:
691 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
692 0, bch2_btree_id_root(c, id: btree_id)->b->c.level, 0);
693 b = bch2_btree_iter_peek_node(&iter);
694 ret = PTR_ERR_OR_ZERO(ptr: b);
695 if (ret)
696 goto err;
697
698 if (b != btree_node_root(c, b)) {
699 bch2_trans_iter_exit(trans, &iter);
700 goto retry;
701 }
702
703 *level = b->c.level;
704
705 k = bkey_i_to_s_c(k: &b->key);
706 ret = check_extent_to_backpointers(trans, s, btree: btree_id, level: b->c.level + 1, k);
707err:
708 bch2_trans_iter_exit(trans, &iter);
709 return ret;
710}
711
712static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
713{
714 return (struct bbpos) {
715 .btree = bp.btree_id,
716 .pos = bp.pos,
717 };
718}
719
720static u64 mem_may_pin_bytes(struct bch_fs *c)
721{
722 struct sysinfo i;
723 si_meminfo(val: &i);
724
725 u64 mem_bytes = i.totalram * i.mem_unit;
726 return div_u64(dividend: mem_bytes * c->opts.fsck_memory_usage_percent, divisor: 100);
727}
728
729static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
730{
731 return div_u64(dividend: mem_may_pin_bytes(c), divisor: c->opts.btree_node_size);
732}
733
734static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
735 u64 btree_leaf_mask,
736 u64 btree_interior_mask,
737 struct bbpos start, struct bbpos *end)
738{
739 struct bch_fs *c = trans->c;
740 s64 mem_may_pin = mem_may_pin_bytes(c);
741 int ret = 0;
742
743 btree_interior_mask |= btree_leaf_mask;
744
745 c->btree_cache.pinned_nodes_leaf_mask = btree_leaf_mask;
746 c->btree_cache.pinned_nodes_interior_mask = btree_interior_mask;
747 c->btree_cache.pinned_nodes_start = start;
748 c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX;
749
750 for (enum btree_id btree = start.btree;
751 btree < BTREE_ID_NR && !ret;
752 btree++) {
753 unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1;
754 struct btree_iter iter;
755 struct btree *b;
756
757 if (!((1U << btree) & btree_leaf_mask) &&
758 !((1U << btree) & btree_interior_mask))
759 continue;
760
761 __for_each_btree_node(trans, iter, btree,
762 btree == start.btree ? start.pos : POS_MIN,
763 0, depth, BTREE_ITER_PREFETCH, b, ret) {
764 mem_may_pin -= btree_buf_bytes(b);
765 if (mem_may_pin <= 0) {
766 c->btree_cache.pinned_nodes_end = *end =
767 BBPOS(btree, pos: b->key.k.p);
768 bch2_trans_iter_exit(trans, &iter);
769 return 0;
770 }
771 }
772 bch2_trans_iter_exit(trans, &iter);
773 }
774
775 return ret;
776}
777
778static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
779 struct extents_to_bp_state *s)
780{
781 struct bch_fs *c = trans->c;
782 int ret = 0;
783
784 for (enum btree_id btree_id = 0;
785 btree_id < btree_id_nr_alive(c);
786 btree_id++) {
787 int level, depth = btree_type_has_ptrs(id: btree_id) ? 0 : 1;
788
789 ret = commit_do(trans, NULL, NULL,
790 BCH_TRANS_COMMIT_no_enospc,
791 check_btree_root_to_backpointers(trans, s, btree_id, &level));
792 if (ret)
793 return ret;
794
795 while (level >= depth) {
796 struct btree_iter iter;
797 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
798 level,
799 BTREE_ITER_PREFETCH);
800 while (1) {
801 bch2_trans_begin(trans);
802
803 struct bkey_s_c k = bch2_btree_iter_peek(iter: &iter);
804 if (!k.k)
805 break;
806 ret = bkey_err(k) ?:
807 check_extent_to_backpointers(trans, s, btree: btree_id, level, k) ?:
808 bch2_trans_commit(trans, NULL, NULL,
809 flags: BCH_TRANS_COMMIT_no_enospc);
810 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
811 ret = 0;
812 continue;
813 }
814 if (ret)
815 break;
816 if (bpos_eq(l: iter.pos, SPOS_MAX))
817 break;
818 bch2_btree_iter_advance(&iter);
819 }
820 bch2_trans_iter_exit(trans, &iter);
821
822 if (ret)
823 return ret;
824
825 --level;
826 }
827 }
828
829 return 0;
830}
831
832int bch2_check_extents_to_backpointers(struct bch_fs *c)
833{
834 struct btree_trans *trans = bch2_trans_get(c);
835 struct extents_to_bp_state s = { .bucket_start = POS_MIN };
836 int ret;
837
838 bch2_bkey_buf_init(s: &s.last_flushed);
839 bkey_init(k: &s.last_flushed.k->k);
840
841 while (1) {
842 struct bbpos end;
843 ret = bch2_get_btree_in_memory_pos(trans,
844 BIT_ULL(BTREE_ID_backpointers),
845 BIT_ULL(BTREE_ID_backpointers),
846 start: BBPOS(btree: BTREE_ID_backpointers, pos: s.bucket_start), end: &end);
847 if (ret)
848 break;
849
850 s.bucket_end = end.pos;
851
852 if ( bpos_eq(l: s.bucket_start, POS_MIN) &&
853 !bpos_eq(l: s.bucket_end, SPOS_MAX))
854 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
855 __func__, btree_nodes_fit_in_ram(c));
856
857 if (!bpos_eq(l: s.bucket_start, POS_MIN) ||
858 !bpos_eq(l: s.bucket_end, SPOS_MAX)) {
859 struct printbuf buf = PRINTBUF;
860
861 prt_str(out: &buf, str: "check_extents_to_backpointers(): ");
862 bch2_bpos_to_text(&buf, s.bucket_start);
863 prt_str(out: &buf, str: "-");
864 bch2_bpos_to_text(&buf, s.bucket_end);
865
866 bch_verbose(c, "%s", buf.buf);
867 printbuf_exit(&buf);
868 }
869
870 ret = bch2_check_extents_to_backpointers_pass(trans, s: &s);
871 if (ret || bpos_eq(l: s.bucket_end, SPOS_MAX))
872 break;
873
874 s.bucket_start = bpos_successor(p: s.bucket_end);
875 }
876 bch2_trans_put(trans);
877 bch2_bkey_buf_exit(s: &s.last_flushed, c);
878
879 c->btree_cache.pinned_nodes_leaf_mask = 0;
880 c->btree_cache.pinned_nodes_interior_mask = 0;
881
882 bch_err_fn(c, ret);
883 return ret;
884}
885
886static int check_one_backpointer(struct btree_trans *trans,
887 struct bbpos start,
888 struct bbpos end,
889 struct bkey_s_c_backpointer bp,
890 struct bpos *last_flushed_pos)
891{
892 struct bch_fs *c = trans->c;
893 struct btree_iter iter;
894 struct bbpos pos = bp_to_bbpos(bp: *bp.v);
895 struct bkey_s_c k;
896 struct printbuf buf = PRINTBUF;
897 int ret;
898
899 if (bbpos_cmp(l: pos, r: start) < 0 ||
900 bbpos_cmp(l: pos, r: end) > 0)
901 return 0;
902
903 k = bch2_backpointer_get_key(trans, iter: &iter, bp_pos: bp.k->p, bp: *bp.v, iter_flags: 0);
904 ret = bkey_err(k);
905 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
906 return 0;
907 if (ret)
908 return ret;
909
910 if (!k.k && !bpos_eq(l: *last_flushed_pos, r: bp.k->p)) {
911 *last_flushed_pos = bp.k->p;
912 ret = bch2_btree_write_buffer_flush_sync(trans) ?:
913 -BCH_ERR_transaction_restart_write_buffer_flush;
914 goto out;
915 }
916
917 if (fsck_err_on(!k.k, c,
918 backpointer_to_missing_ptr,
919 "backpointer for missing %s\n %s",
920 bp.v->level ? "btree node" : "extent",
921 (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
922 ret = bch2_btree_delete_at_buffered(trans, btree: BTREE_ID_backpointers, pos: bp.k->p);
923 goto out;
924 }
925out:
926fsck_err:
927 bch2_trans_iter_exit(trans, &iter);
928 printbuf_exit(&buf);
929 return ret;
930}
931
932static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
933 struct bbpos start,
934 struct bbpos end)
935{
936 struct bpos last_flushed_pos = SPOS_MAX;
937
938 return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
939 POS_MIN, BTREE_ITER_PREFETCH, k,
940 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
941 check_one_backpointer(trans, start, end,
942 bkey_s_c_to_backpointer(k),
943 &last_flushed_pos));
944}
945
946int bch2_check_backpointers_to_extents(struct bch_fs *c)
947{
948 struct btree_trans *trans = bch2_trans_get(c);
949 struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
950 int ret;
951
952 while (1) {
953 ret = bch2_get_btree_in_memory_pos(trans,
954 btree_leaf_mask: (1U << BTREE_ID_extents)|
955 (1U << BTREE_ID_reflink),
956 btree_interior_mask: ~0,
957 start, end: &end);
958 if (ret)
959 break;
960
961 if (!bbpos_cmp(l: start, BBPOS_MIN) &&
962 bbpos_cmp(l: end, BBPOS_MAX))
963 bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
964 __func__, btree_nodes_fit_in_ram(c));
965
966 if (bbpos_cmp(l: start, BBPOS_MIN) ||
967 bbpos_cmp(l: end, BBPOS_MAX)) {
968 struct printbuf buf = PRINTBUF;
969
970 prt_str(out: &buf, str: "check_backpointers_to_extents(): ");
971 bch2_bbpos_to_text(out: &buf, pos: start);
972 prt_str(out: &buf, str: "-");
973 bch2_bbpos_to_text(out: &buf, pos: end);
974
975 bch_verbose(c, "%s", buf.buf);
976 printbuf_exit(&buf);
977 }
978
979 ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
980 if (ret || !bbpos_cmp(l: end, BBPOS_MAX))
981 break;
982
983 start = bbpos_successor(pos: end);
984 }
985 bch2_trans_put(trans);
986
987 c->btree_cache.pinned_nodes_leaf_mask = 0;
988 c->btree_cache.pinned_nodes_interior_mask = 0;
989
990 bch_err_fn(c, ret);
991 return ret;
992}
993

source code of linux/fs/bcachefs/backpointers.c