1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #include "bcachefs.h" |
4 | #include "checksum.h" |
5 | #include "disk_groups.h" |
6 | #include "ec.h" |
7 | #include "error.h" |
8 | #include "journal.h" |
9 | #include "journal_sb.h" |
10 | #include "journal_seq_blacklist.h" |
11 | #include "recovery_passes.h" |
12 | #include "replicas.h" |
13 | #include "quota.h" |
14 | #include "sb-clean.h" |
15 | #include "sb-counters.h" |
16 | #include "sb-downgrade.h" |
17 | #include "sb-errors.h" |
18 | #include "sb-members.h" |
19 | #include "super-io.h" |
20 | #include "super.h" |
21 | #include "trace.h" |
22 | #include "vstructs.h" |
23 | |
24 | #include <linux/backing-dev.h> |
25 | #include <linux/sort.h> |
26 | |
27 | static const struct blk_holder_ops bch2_sb_handle_bdev_ops = { |
28 | }; |
29 | |
30 | struct bch2_metadata_version { |
31 | u16 version; |
32 | const char *name; |
33 | }; |
34 | |
35 | static const struct bch2_metadata_version bch2_metadata_versions[] = { |
36 | #define x(n, v) { \ |
37 | .version = v, \ |
38 | .name = #n, \ |
39 | }, |
40 | BCH_METADATA_VERSIONS() |
41 | #undef x |
42 | }; |
43 | |
44 | void bch2_version_to_text(struct printbuf *out, unsigned v) |
45 | { |
46 | const char *str = "(unknown version)" ; |
47 | |
48 | for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++) |
49 | if (bch2_metadata_versions[i].version == v) { |
50 | str = bch2_metadata_versions[i].name; |
51 | break; |
52 | } |
53 | |
54 | prt_printf(out, "%u.%u: %s" , BCH_VERSION_MAJOR(v), BCH_VERSION_MINOR(v), str); |
55 | } |
56 | |
57 | unsigned bch2_latest_compatible_version(unsigned v) |
58 | { |
59 | if (!BCH_VERSION_MAJOR(v)) |
60 | return v; |
61 | |
62 | for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++) |
63 | if (bch2_metadata_versions[i].version > v && |
64 | BCH_VERSION_MAJOR(bch2_metadata_versions[i].version) == |
65 | BCH_VERSION_MAJOR(v)) |
66 | v = bch2_metadata_versions[i].version; |
67 | |
68 | return v; |
69 | } |
70 | |
71 | const char * const bch2_sb_fields[] = { |
72 | #define x(name, nr) #name, |
73 | BCH_SB_FIELDS() |
74 | #undef x |
75 | NULL |
76 | }; |
77 | |
78 | static int bch2_sb_field_validate(struct bch_sb *, struct bch_sb_field *, |
79 | struct printbuf *); |
80 | |
81 | struct bch_sb_field *bch2_sb_field_get_id(struct bch_sb *sb, |
82 | enum bch_sb_field_type type) |
83 | { |
84 | /* XXX: need locking around superblock to access optional fields */ |
85 | |
86 | vstruct_for_each(sb, f) |
87 | if (le32_to_cpu(f->type) == type) |
88 | return f; |
89 | return NULL; |
90 | } |
91 | |
92 | static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb, |
93 | struct bch_sb_field *f, |
94 | unsigned u64s) |
95 | { |
96 | unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0; |
97 | unsigned sb_u64s = le32_to_cpu(sb->sb->u64s) + u64s - old_u64s; |
98 | |
99 | BUG_ON(__vstruct_bytes(struct bch_sb, sb_u64s) > sb->buffer_size); |
100 | |
101 | if (!f && !u64s) { |
102 | /* nothing to do: */ |
103 | } else if (!f) { |
104 | f = vstruct_last(sb->sb); |
105 | memset(f, 0, sizeof(u64) * u64s); |
106 | f->u64s = cpu_to_le32(u64s); |
107 | f->type = 0; |
108 | } else { |
109 | void *src, *dst; |
110 | |
111 | src = vstruct_end(f); |
112 | |
113 | if (u64s) { |
114 | f->u64s = cpu_to_le32(u64s); |
115 | dst = vstruct_end(f); |
116 | } else { |
117 | dst = f; |
118 | } |
119 | |
120 | memmove(dst, src, vstruct_end(sb->sb) - src); |
121 | |
122 | if (dst > src) |
123 | memset(src, 0, dst - src); |
124 | } |
125 | |
126 | sb->sb->u64s = cpu_to_le32(sb_u64s); |
127 | |
128 | return u64s ? f : NULL; |
129 | } |
130 | |
131 | void bch2_sb_field_delete(struct bch_sb_handle *sb, |
132 | enum bch_sb_field_type type) |
133 | { |
134 | struct bch_sb_field *f = bch2_sb_field_get_id(sb: sb->sb, type); |
135 | |
136 | if (f) |
137 | __bch2_sb_field_resize(sb, f, u64s: 0); |
138 | } |
139 | |
140 | /* Superblock realloc/free: */ |
141 | |
142 | void bch2_free_super(struct bch_sb_handle *sb) |
143 | { |
144 | kfree(objp: sb->bio); |
145 | if (!IS_ERR_OR_NULL(ptr: sb->s_bdev_file)) |
146 | bdev_fput(bdev_file: sb->s_bdev_file); |
147 | kfree(objp: sb->holder); |
148 | kfree(objp: sb->sb_name); |
149 | |
150 | kfree(objp: sb->sb); |
151 | memset(sb, 0, sizeof(*sb)); |
152 | } |
153 | |
154 | int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) |
155 | { |
156 | size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s); |
157 | size_t new_buffer_size; |
158 | struct bch_sb *new_sb; |
159 | struct bio *bio; |
160 | |
161 | if (sb->bdev) |
162 | new_bytes = max_t(size_t, new_bytes, bdev_logical_block_size(sb->bdev)); |
163 | |
164 | new_buffer_size = roundup_pow_of_two(new_bytes); |
165 | |
166 | if (sb->sb && sb->buffer_size >= new_buffer_size) |
167 | return 0; |
168 | |
169 | if (sb->sb && sb->have_layout) { |
170 | u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits; |
171 | |
172 | if (new_bytes > max_bytes) { |
173 | struct printbuf buf = PRINTBUF; |
174 | |
175 | prt_bdevname(out: &buf, bdev: sb->bdev); |
176 | prt_printf(&buf, ": superblock too big: want %zu but have %llu" , new_bytes, max_bytes); |
177 | pr_err("%s" , buf.buf); |
178 | printbuf_exit(&buf); |
179 | return -BCH_ERR_ENOSPC_sb; |
180 | } |
181 | } |
182 | |
183 | if (sb->buffer_size >= new_buffer_size && sb->sb) |
184 | return 0; |
185 | |
186 | if (dynamic_fault("bcachefs:add:super_realloc" )) |
187 | return -BCH_ERR_ENOMEM_sb_realloc_injected; |
188 | |
189 | new_sb = krealloc(objp: sb->sb, new_size: new_buffer_size, GFP_NOFS|__GFP_ZERO); |
190 | if (!new_sb) |
191 | return -BCH_ERR_ENOMEM_sb_buf_realloc; |
192 | |
193 | sb->sb = new_sb; |
194 | |
195 | if (sb->have_bio) { |
196 | unsigned nr_bvecs = buf_pages(p: sb->sb, len: new_buffer_size); |
197 | |
198 | bio = bio_kmalloc(nr_vecs: nr_bvecs, GFP_KERNEL); |
199 | if (!bio) |
200 | return -BCH_ERR_ENOMEM_sb_bio_realloc; |
201 | |
202 | bio_init(bio, NULL, table: bio->bi_inline_vecs, max_vecs: nr_bvecs, opf: 0); |
203 | |
204 | kfree(objp: sb->bio); |
205 | sb->bio = bio; |
206 | } |
207 | |
208 | sb->buffer_size = new_buffer_size; |
209 | |
210 | return 0; |
211 | } |
212 | |
213 | struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, |
214 | enum bch_sb_field_type type, |
215 | unsigned u64s) |
216 | { |
217 | struct bch_sb_field *f = bch2_sb_field_get_id(sb: sb->sb, type); |
218 | ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0; |
219 | ssize_t d = -old_u64s + u64s; |
220 | |
221 | if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) |
222 | return NULL; |
223 | |
224 | if (sb->fs_sb) { |
225 | struct bch_fs *c = container_of(sb, struct bch_fs, disk_sb); |
226 | |
227 | lockdep_assert_held(&c->sb_lock); |
228 | |
229 | /* XXX: we're not checking that offline device have enough space */ |
230 | |
231 | for_each_online_member(c, ca) { |
232 | struct bch_sb_handle *dev_sb = &ca->disk_sb; |
233 | |
234 | if (bch2_sb_realloc(sb: dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) { |
235 | percpu_ref_put(ref: &ca->ref); |
236 | return NULL; |
237 | } |
238 | } |
239 | } |
240 | |
241 | f = bch2_sb_field_get_id(sb: sb->sb, type); |
242 | f = __bch2_sb_field_resize(sb, f, u64s); |
243 | if (f) |
244 | f->type = cpu_to_le32(type); |
245 | return f; |
246 | } |
247 | |
248 | struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *sb, |
249 | enum bch_sb_field_type type, |
250 | unsigned u64s) |
251 | { |
252 | struct bch_sb_field *f = bch2_sb_field_get_id(sb: sb->sb, type); |
253 | |
254 | if (!f || le32_to_cpu(f->u64s) < u64s) |
255 | f = bch2_sb_field_resize_id(sb, type, u64s); |
256 | return f; |
257 | } |
258 | |
259 | /* Superblock validate: */ |
260 | |
261 | static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out) |
262 | { |
263 | u64 offset, prev_offset, max_sectors; |
264 | unsigned i; |
265 | |
266 | BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512); |
267 | |
268 | if (!uuid_equal(u1: &layout->magic, u2: &BCACHE_MAGIC) && |
269 | !uuid_equal(u1: &layout->magic, u2: &BCHFS_MAGIC)) { |
270 | prt_printf(out, "Not a bcachefs superblock layout" ); |
271 | return -BCH_ERR_invalid_sb_layout; |
272 | } |
273 | |
274 | if (layout->layout_type != 0) { |
275 | prt_printf(out, "Invalid superblock layout type %u" , |
276 | layout->layout_type); |
277 | return -BCH_ERR_invalid_sb_layout_type; |
278 | } |
279 | |
280 | if (!layout->nr_superblocks) { |
281 | prt_printf(out, "Invalid superblock layout: no superblocks" ); |
282 | return -BCH_ERR_invalid_sb_layout_nr_superblocks; |
283 | } |
284 | |
285 | if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset)) { |
286 | prt_printf(out, "Invalid superblock layout: too many superblocks" ); |
287 | return -BCH_ERR_invalid_sb_layout_nr_superblocks; |
288 | } |
289 | |
290 | max_sectors = 1 << layout->sb_max_size_bits; |
291 | |
292 | prev_offset = le64_to_cpu(layout->sb_offset[0]); |
293 | |
294 | for (i = 1; i < layout->nr_superblocks; i++) { |
295 | offset = le64_to_cpu(layout->sb_offset[i]); |
296 | |
297 | if (offset < prev_offset + max_sectors) { |
298 | prt_printf(out, "Invalid superblock layout: superblocks overlap\n" |
299 | " (sb %u ends at %llu next starts at %llu" , |
300 | i - 1, prev_offset + max_sectors, offset); |
301 | return -BCH_ERR_invalid_sb_layout_superblocks_overlap; |
302 | } |
303 | prev_offset = offset; |
304 | } |
305 | |
306 | return 0; |
307 | } |
308 | |
309 | static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out) |
310 | { |
311 | u16 version = le16_to_cpu(sb->version); |
312 | u16 version_min = le16_to_cpu(sb->version_min); |
313 | |
314 | if (!bch2_version_compatible(version)) { |
315 | prt_str(out, str: "Unsupported superblock version " ); |
316 | bch2_version_to_text(out, v: version); |
317 | prt_str(out, str: " (min " ); |
318 | bch2_version_to_text(out, v: bcachefs_metadata_version_min); |
319 | prt_str(out, str: ", max " ); |
320 | bch2_version_to_text(out, bcachefs_metadata_version_current); |
321 | prt_str(out, str: ")" ); |
322 | return -BCH_ERR_invalid_sb_version; |
323 | } |
324 | |
325 | if (!bch2_version_compatible(version: version_min)) { |
326 | prt_str(out, str: "Unsupported superblock version_min " ); |
327 | bch2_version_to_text(out, v: version_min); |
328 | prt_str(out, str: " (min " ); |
329 | bch2_version_to_text(out, v: bcachefs_metadata_version_min); |
330 | prt_str(out, str: ", max " ); |
331 | bch2_version_to_text(out, bcachefs_metadata_version_current); |
332 | prt_str(out, str: ")" ); |
333 | return -BCH_ERR_invalid_sb_version; |
334 | } |
335 | |
336 | if (version_min > version) { |
337 | prt_str(out, str: "Bad minimum version " ); |
338 | bch2_version_to_text(out, v: version_min); |
339 | prt_str(out, str: ", greater than version field " ); |
340 | bch2_version_to_text(out, v: version); |
341 | return -BCH_ERR_invalid_sb_version; |
342 | } |
343 | |
344 | return 0; |
345 | } |
346 | |
347 | static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out, |
348 | int rw) |
349 | { |
350 | struct bch_sb *sb = disk_sb->sb; |
351 | struct bch_sb_field_members_v1 *mi; |
352 | enum bch_opt_id opt_id; |
353 | u16 block_size; |
354 | int ret; |
355 | |
356 | ret = bch2_sb_compatible(sb, out); |
357 | if (ret) |
358 | return ret; |
359 | |
360 | if (sb->features[1] || |
361 | (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) { |
362 | prt_printf(out, "Filesystem has incompatible features" ); |
363 | return -BCH_ERR_invalid_sb_features; |
364 | } |
365 | |
366 | block_size = le16_to_cpu(sb->block_size); |
367 | |
368 | if (block_size > PAGE_SECTORS) { |
369 | prt_printf(out, "Block size too big (got %u, max %u)" , |
370 | block_size, PAGE_SECTORS); |
371 | return -BCH_ERR_invalid_sb_block_size; |
372 | } |
373 | |
374 | if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) { |
375 | prt_printf(out, "Bad user UUID (got zeroes)" ); |
376 | return -BCH_ERR_invalid_sb_uuid; |
377 | } |
378 | |
379 | if (bch2_is_zero(sb->uuid.b, sizeof(sb->uuid))) { |
380 | prt_printf(out, "Bad internal UUID (got zeroes)" ); |
381 | return -BCH_ERR_invalid_sb_uuid; |
382 | } |
383 | |
384 | if (!sb->nr_devices || |
385 | sb->nr_devices > BCH_SB_MEMBERS_MAX) { |
386 | prt_printf(out, "Bad number of member devices %u (max %u)" , |
387 | sb->nr_devices, BCH_SB_MEMBERS_MAX); |
388 | return -BCH_ERR_invalid_sb_too_many_members; |
389 | } |
390 | |
391 | if (sb->dev_idx >= sb->nr_devices) { |
392 | prt_printf(out, "Bad dev_idx (got %u, nr_devices %u)" , |
393 | sb->dev_idx, sb->nr_devices); |
394 | return -BCH_ERR_invalid_sb_dev_idx; |
395 | } |
396 | |
397 | if (!sb->time_precision || |
398 | le32_to_cpu(sb->time_precision) > NSEC_PER_SEC) { |
399 | prt_printf(out, "Invalid time precision: %u (min 1, max %lu)" , |
400 | le32_to_cpu(sb->time_precision), NSEC_PER_SEC); |
401 | return -BCH_ERR_invalid_sb_time_precision; |
402 | } |
403 | |
404 | if (rw == READ) { |
405 | /* |
406 | * Been seeing a bug where these are getting inexplicably |
407 | * zeroed, so we're now validating them, but we have to be |
408 | * careful not to preven people's filesystems from mounting: |
409 | */ |
410 | if (!BCH_SB_JOURNAL_FLUSH_DELAY(k: sb)) |
411 | SET_BCH_SB_JOURNAL_FLUSH_DELAY(k: sb, v: 1000); |
412 | if (!BCH_SB_JOURNAL_RECLAIM_DELAY(k: sb)) |
413 | SET_BCH_SB_JOURNAL_RECLAIM_DELAY(k: sb, v: 1000); |
414 | |
415 | if (!BCH_SB_VERSION_UPGRADE_COMPLETE(k: sb)) |
416 | SET_BCH_SB_VERSION_UPGRADE_COMPLETE(k: sb, le16_to_cpu(sb->version)); |
417 | } |
418 | |
419 | for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { |
420 | const struct bch_option *opt = bch2_opt_table + opt_id; |
421 | |
422 | if (opt->get_sb != BCH2_NO_SB_OPT) { |
423 | u64 v = bch2_opt_from_sb(sb, opt_id); |
424 | |
425 | prt_printf(out, "Invalid option " ); |
426 | ret = bch2_opt_validate(opt, v, out); |
427 | if (ret) |
428 | return ret; |
429 | |
430 | printbuf_reset(buf: out); |
431 | } |
432 | } |
433 | |
434 | /* validate layout */ |
435 | ret = validate_sb_layout(layout: &sb->layout, out); |
436 | if (ret) |
437 | return ret; |
438 | |
439 | vstruct_for_each(sb, f) { |
440 | if (!f->u64s) { |
441 | prt_printf(out, "Invalid superblock: optional field with size 0 (type %u)" , |
442 | le32_to_cpu(f->type)); |
443 | return -BCH_ERR_invalid_sb_field_size; |
444 | } |
445 | |
446 | if (vstruct_next(f) > vstruct_last(sb)) { |
447 | prt_printf(out, "Invalid superblock: optional field extends past end of superblock (type %u)" , |
448 | le32_to_cpu(f->type)); |
449 | return -BCH_ERR_invalid_sb_field_size; |
450 | } |
451 | } |
452 | |
453 | /* members must be validated first: */ |
454 | mi = bch2_sb_field_get(sb, members_v1); |
455 | if (!mi) { |
456 | prt_printf(out, "Invalid superblock: member info area missing" ); |
457 | return -BCH_ERR_invalid_sb_members_missing; |
458 | } |
459 | |
460 | ret = bch2_sb_field_validate(sb, &mi->field, out); |
461 | if (ret) |
462 | return ret; |
463 | |
464 | vstruct_for_each(sb, f) { |
465 | if (le32_to_cpu(f->type) == BCH_SB_FIELD_members_v1) |
466 | continue; |
467 | |
468 | ret = bch2_sb_field_validate(sb, f, out); |
469 | if (ret) |
470 | return ret; |
471 | } |
472 | |
473 | if (rw == WRITE && |
474 | bch2_sb_member_get(sb, i: sb->dev_idx).seq != sb->seq) { |
475 | prt_printf(out, "Invalid superblock: member seq %llu != sb seq %llu" , |
476 | le64_to_cpu(bch2_sb_member_get(sb, sb->dev_idx).seq), |
477 | le64_to_cpu(sb->seq)); |
478 | return -BCH_ERR_invalid_sb_members_missing; |
479 | } |
480 | |
481 | return 0; |
482 | } |
483 | |
484 | /* device open: */ |
485 | |
486 | static unsigned long le_ulong_to_cpu(unsigned long v) |
487 | { |
488 | return sizeof(unsigned long) == 8 |
489 | ? le64_to_cpu(v) |
490 | : le32_to_cpu(v); |
491 | } |
492 | |
493 | static void le_bitvector_to_cpu(unsigned long *dst, unsigned long *src, unsigned nr) |
494 | { |
495 | BUG_ON(nr & (BITS_PER_TYPE(long) - 1)); |
496 | |
497 | for (unsigned i = 0; i < BITS_TO_LONGS(nr); i++) |
498 | dst[i] = le_ulong_to_cpu(v: src[i]); |
499 | } |
500 | |
501 | static void bch2_sb_update(struct bch_fs *c) |
502 | { |
503 | struct bch_sb *src = c->disk_sb.sb; |
504 | |
505 | lockdep_assert_held(&c->sb_lock); |
506 | |
507 | c->sb.uuid = src->uuid; |
508 | c->sb.user_uuid = src->user_uuid; |
509 | c->sb.version = le16_to_cpu(src->version); |
510 | c->sb.version_min = le16_to_cpu(src->version_min); |
511 | c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(k: src); |
512 | c->sb.nr_devices = src->nr_devices; |
513 | c->sb.clean = BCH_SB_CLEAN(k: src); |
514 | c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(k: src); |
515 | |
516 | c->sb.nsec_per_time_unit = le32_to_cpu(src->time_precision); |
517 | c->sb.time_units_per_sec = NSEC_PER_SEC / c->sb.nsec_per_time_unit; |
518 | |
519 | /* XXX this is wrong, we need a 96 or 128 bit integer type */ |
520 | c->sb.time_base_lo = div_u64(le64_to_cpu(src->time_base_lo), |
521 | divisor: c->sb.nsec_per_time_unit); |
522 | c->sb.time_base_hi = le32_to_cpu(src->time_base_hi); |
523 | |
524 | c->sb.features = le64_to_cpu(src->features[0]); |
525 | c->sb.compat = le64_to_cpu(src->compat[0]); |
526 | |
527 | memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent)); |
528 | |
529 | struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext); |
530 | if (ext) { |
531 | le_bitvector_to_cpu(dst: c->sb.errors_silent, src: (void *) ext->errors_silent, |
532 | nr: sizeof(c->sb.errors_silent) * 8); |
533 | c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data); |
534 | } |
535 | |
536 | for_each_member_device(c, ca) { |
537 | struct bch_member m = bch2_sb_member_get(sb: src, i: ca->dev_idx); |
538 | ca->mi = bch2_mi_to_cpu(mi: &m); |
539 | } |
540 | } |
541 | |
542 | static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) |
543 | { |
544 | struct bch_sb_field *src_f, *dst_f; |
545 | struct bch_sb *dst = dst_handle->sb; |
546 | unsigned i; |
547 | |
548 | dst->version = src->version; |
549 | dst->version_min = src->version_min; |
550 | dst->seq = src->seq; |
551 | dst->uuid = src->uuid; |
552 | dst->user_uuid = src->user_uuid; |
553 | memcpy(dst->label, src->label, sizeof(dst->label)); |
554 | |
555 | dst->block_size = src->block_size; |
556 | dst->nr_devices = src->nr_devices; |
557 | |
558 | dst->time_base_lo = src->time_base_lo; |
559 | dst->time_base_hi = src->time_base_hi; |
560 | dst->time_precision = src->time_precision; |
561 | dst->write_time = src->write_time; |
562 | |
563 | memcpy(dst->flags, src->flags, sizeof(dst->flags)); |
564 | memcpy(dst->features, src->features, sizeof(dst->features)); |
565 | memcpy(dst->compat, src->compat, sizeof(dst->compat)); |
566 | |
567 | for (i = 0; i < BCH_SB_FIELD_NR; i++) { |
568 | int d; |
569 | |
570 | if ((1U << i) & BCH_SINGLE_DEVICE_SB_FIELDS) |
571 | continue; |
572 | |
573 | src_f = bch2_sb_field_get_id(sb: src, type: i); |
574 | dst_f = bch2_sb_field_get_id(sb: dst, type: i); |
575 | |
576 | d = (src_f ? le32_to_cpu(src_f->u64s) : 0) - |
577 | (dst_f ? le32_to_cpu(dst_f->u64s) : 0); |
578 | if (d > 0) { |
579 | int ret = bch2_sb_realloc(sb: dst_handle, |
580 | le32_to_cpu(dst_handle->sb->u64s) + d); |
581 | |
582 | if (ret) |
583 | return ret; |
584 | |
585 | dst = dst_handle->sb; |
586 | dst_f = bch2_sb_field_get_id(sb: dst, type: i); |
587 | } |
588 | |
589 | dst_f = __bch2_sb_field_resize(sb: dst_handle, f: dst_f, |
590 | u64s: src_f ? le32_to_cpu(src_f->u64s) : 0); |
591 | |
592 | if (src_f) |
593 | memcpy(dst_f, src_f, vstruct_bytes(src_f)); |
594 | } |
595 | |
596 | return 0; |
597 | } |
598 | |
599 | int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src) |
600 | { |
601 | int ret; |
602 | |
603 | lockdep_assert_held(&c->sb_lock); |
604 | |
605 | ret = bch2_sb_realloc(sb: &c->disk_sb, u64s: 0) ?: |
606 | __copy_super(dst_handle: &c->disk_sb, src) ?: |
607 | bch2_sb_replicas_to_cpu_replicas(c) ?: |
608 | bch2_sb_disk_groups_to_cpu(c); |
609 | if (ret) |
610 | return ret; |
611 | |
612 | bch2_sb_update(c); |
613 | return 0; |
614 | } |
615 | |
616 | int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca) |
617 | { |
618 | return __copy_super(dst_handle: &ca->disk_sb, src: c->disk_sb.sb); |
619 | } |
620 | |
621 | /* read superblock: */ |
622 | |
623 | static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err) |
624 | { |
625 | size_t bytes; |
626 | int ret; |
627 | reread: |
628 | bio_reset(bio: sb->bio, bdev: sb->bdev, opf: REQ_OP_READ|REQ_SYNC|REQ_META); |
629 | sb->bio->bi_iter.bi_sector = offset; |
630 | bch2_bio_map(bio: sb->bio, base: sb->sb, sb->buffer_size); |
631 | |
632 | ret = submit_bio_wait(bio: sb->bio); |
633 | if (ret) { |
634 | prt_printf(err, "IO error: %i" , ret); |
635 | return ret; |
636 | } |
637 | |
638 | if (!uuid_equal(u1: &sb->sb->magic, u2: &BCACHE_MAGIC) && |
639 | !uuid_equal(u1: &sb->sb->magic, u2: &BCHFS_MAGIC)) { |
640 | prt_str(out: err, str: "Not a bcachefs superblock (got magic " ); |
641 | pr_uuid(out: err, uuid: sb->sb->magic.b); |
642 | prt_str(out: err, str: ")" ); |
643 | return -BCH_ERR_invalid_sb_magic; |
644 | } |
645 | |
646 | ret = bch2_sb_compatible(sb: sb->sb, out: err); |
647 | if (ret) |
648 | return ret; |
649 | |
650 | bytes = vstruct_bytes(sb->sb); |
651 | |
652 | if (bytes > 512 << sb->sb->layout.sb_max_size_bits) { |
653 | prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)" , |
654 | bytes, 512UL << sb->sb->layout.sb_max_size_bits); |
655 | return -BCH_ERR_invalid_sb_too_big; |
656 | } |
657 | |
658 | if (bytes > sb->buffer_size) { |
659 | ret = bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s)); |
660 | if (ret) |
661 | return ret; |
662 | goto reread; |
663 | } |
664 | |
665 | enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(k: sb->sb); |
666 | if (csum_type >= BCH_CSUM_NR) { |
667 | prt_printf(err, "unknown checksum type %llu" , BCH_SB_CSUM_TYPE(sb->sb)); |
668 | return -BCH_ERR_invalid_sb_csum_type; |
669 | } |
670 | |
671 | /* XXX: verify MACs */ |
672 | struct bch_csum csum = csum_vstruct(NULL, csum_type, null_nonce(), sb->sb); |
673 | if (bch2_crc_cmp(l: csum, r: sb->sb->csum)) { |
674 | bch2_csum_err_msg(out: err, type: csum_type, expected: sb->sb->csum, got: csum); |
675 | return -BCH_ERR_invalid_sb_csum; |
676 | } |
677 | |
678 | sb->seq = le64_to_cpu(sb->sb->seq); |
679 | |
680 | return 0; |
681 | } |
682 | |
683 | static int __bch2_read_super(const char *path, struct bch_opts *opts, |
684 | struct bch_sb_handle *sb, bool ignore_notbchfs_msg) |
685 | { |
686 | u64 offset = opt_get(*opts, sb); |
687 | struct bch_sb_layout layout; |
688 | struct printbuf err = PRINTBUF; |
689 | struct printbuf err2 = PRINTBUF; |
690 | __le64 *i; |
691 | int ret; |
692 | #ifndef __KERNEL__ |
693 | retry: |
694 | #endif |
695 | memset(sb, 0, sizeof(*sb)); |
696 | sb->mode = BLK_OPEN_READ; |
697 | sb->have_bio = true; |
698 | sb->holder = kmalloc(size: 1, GFP_KERNEL); |
699 | if (!sb->holder) |
700 | return -ENOMEM; |
701 | |
702 | sb->sb_name = kstrdup(s: path, GFP_KERNEL); |
703 | if (!sb->sb_name) { |
704 | ret = -ENOMEM; |
705 | prt_printf(&err, "error allocating memory for sb_name" ); |
706 | goto err; |
707 | } |
708 | |
709 | #ifndef __KERNEL__ |
710 | if (opt_get(*opts, direct_io) == false) |
711 | sb->mode |= BLK_OPEN_BUFFERED; |
712 | #endif |
713 | |
714 | if (!opt_get(*opts, noexcl)) |
715 | sb->mode |= BLK_OPEN_EXCL; |
716 | |
717 | if (!opt_get(*opts, nochanges)) |
718 | sb->mode |= BLK_OPEN_WRITE; |
719 | |
720 | sb->s_bdev_file = bdev_file_open_by_path(path, mode: sb->mode, holder: sb->holder, hops: &bch2_sb_handle_bdev_ops); |
721 | if (IS_ERR(ptr: sb->s_bdev_file) && |
722 | PTR_ERR(ptr: sb->s_bdev_file) == -EACCES && |
723 | opt_get(*opts, read_only)) { |
724 | sb->mode &= ~BLK_OPEN_WRITE; |
725 | |
726 | sb->s_bdev_file = bdev_file_open_by_path(path, mode: sb->mode, holder: sb->holder, hops: &bch2_sb_handle_bdev_ops); |
727 | if (!IS_ERR(ptr: sb->s_bdev_file)) |
728 | opt_set(*opts, nochanges, true); |
729 | } |
730 | |
731 | if (IS_ERR(ptr: sb->s_bdev_file)) { |
732 | ret = PTR_ERR(ptr: sb->s_bdev_file); |
733 | prt_printf(&err, "error opening %s: %s" , path, bch2_err_str(ret)); |
734 | goto err; |
735 | } |
736 | sb->bdev = file_bdev(bdev_file: sb->s_bdev_file); |
737 | |
738 | ret = bch2_sb_realloc(sb, u64s: 0); |
739 | if (ret) { |
740 | prt_printf(&err, "error allocating memory for superblock" ); |
741 | goto err; |
742 | } |
743 | |
744 | if (bch2_fs_init_fault("read_super" )) { |
745 | prt_printf(&err, "dynamic fault" ); |
746 | ret = -EFAULT; |
747 | goto err; |
748 | } |
749 | |
750 | ret = read_one_super(sb, offset, err: &err); |
751 | if (!ret) |
752 | goto got_super; |
753 | |
754 | if (opt_defined(*opts, sb)) |
755 | goto err; |
756 | |
757 | prt_printf(&err2, "bcachefs (%s): error reading default superblock: %s\n" , |
758 | path, err.buf); |
759 | if (ret == -BCH_ERR_invalid_sb_magic && ignore_notbchfs_msg) |
760 | bch2_print_opts(opts, KERN_INFO "%s" , err2.buf); |
761 | else |
762 | bch2_print_opts(opts, KERN_ERR "%s" , err2.buf); |
763 | |
764 | printbuf_exit(&err2); |
765 | printbuf_reset(buf: &err); |
766 | |
767 | /* |
768 | * Error reading primary superblock - read location of backup |
769 | * superblocks: |
770 | */ |
771 | bio_reset(bio: sb->bio, bdev: sb->bdev, opf: REQ_OP_READ|REQ_SYNC|REQ_META); |
772 | sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR; |
773 | /* |
774 | * use sb buffer to read layout, since sb buffer is page aligned but |
775 | * layout won't be: |
776 | */ |
777 | bch2_bio_map(bio: sb->bio, base: sb->sb, sizeof(struct bch_sb_layout)); |
778 | |
779 | ret = submit_bio_wait(bio: sb->bio); |
780 | if (ret) { |
781 | prt_printf(&err, "IO error: %i" , ret); |
782 | goto err; |
783 | } |
784 | |
785 | memcpy(&layout, sb->sb, sizeof(layout)); |
786 | ret = validate_sb_layout(layout: &layout, out: &err); |
787 | if (ret) |
788 | goto err; |
789 | |
790 | for (i = layout.sb_offset; |
791 | i < layout.sb_offset + layout.nr_superblocks; i++) { |
792 | offset = le64_to_cpu(*i); |
793 | |
794 | if (offset == opt_get(*opts, sb)) |
795 | continue; |
796 | |
797 | ret = read_one_super(sb, offset, err: &err); |
798 | if (!ret) |
799 | goto got_super; |
800 | } |
801 | |
802 | goto err; |
803 | |
804 | got_super: |
805 | if (le16_to_cpu(sb->sb->block_size) << 9 < |
806 | bdev_logical_block_size(bdev: sb->bdev) && |
807 | opt_get(*opts, direct_io)) { |
808 | #ifndef __KERNEL__ |
809 | opt_set(*opts, direct_io, false); |
810 | bch2_free_super(sb); |
811 | goto retry; |
812 | #endif |
813 | prt_printf(&err, "block size (%u) smaller than device block size (%u)" , |
814 | le16_to_cpu(sb->sb->block_size) << 9, |
815 | bdev_logical_block_size(sb->bdev)); |
816 | ret = -BCH_ERR_block_size_too_small; |
817 | goto err; |
818 | } |
819 | |
820 | sb->have_layout = true; |
821 | |
822 | ret = bch2_sb_validate(disk_sb: sb, out: &err, READ); |
823 | if (ret) { |
824 | bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error validating superblock: %s\n" , |
825 | path, err.buf); |
826 | goto err_no_print; |
827 | } |
828 | out: |
829 | printbuf_exit(&err); |
830 | return ret; |
831 | err: |
832 | bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error reading superblock: %s\n" , |
833 | path, err.buf); |
834 | err_no_print: |
835 | bch2_free_super(sb); |
836 | goto out; |
837 | } |
838 | |
839 | int bch2_read_super(const char *path, struct bch_opts *opts, |
840 | struct bch_sb_handle *sb) |
841 | { |
842 | return __bch2_read_super(path, opts, sb, ignore_notbchfs_msg: false); |
843 | } |
844 | |
845 | /* provide a silenced version for mount.bcachefs */ |
846 | |
847 | int bch2_read_super_silent(const char *path, struct bch_opts *opts, |
848 | struct bch_sb_handle *sb) |
849 | { |
850 | return __bch2_read_super(path, opts, sb, ignore_notbchfs_msg: true); |
851 | } |
852 | |
853 | /* write superblock: */ |
854 | |
855 | static void write_super_endio(struct bio *bio) |
856 | { |
857 | struct bch_dev *ca = bio->bi_private; |
858 | |
859 | /* XXX: return errors directly */ |
860 | |
861 | if (bch2_dev_io_err_on(bio->bi_status, ca, |
862 | bio_data_dir(bio) |
863 | ? BCH_MEMBER_ERROR_write |
864 | : BCH_MEMBER_ERROR_read, |
865 | "superblock %s error: %s" , |
866 | bio_data_dir(bio) ? "write" : "read" , |
867 | bch2_blk_status_to_str(bio->bi_status))) |
868 | ca->sb_write_error = 1; |
869 | |
870 | closure_put(cl: &ca->fs->sb_write); |
871 | percpu_ref_put(ref: &ca->io_ref); |
872 | } |
873 | |
874 | static void read_back_super(struct bch_fs *c, struct bch_dev *ca) |
875 | { |
876 | struct bch_sb *sb = ca->disk_sb.sb; |
877 | struct bio *bio = ca->disk_sb.bio; |
878 | |
879 | bio_reset(bio, bdev: ca->disk_sb.bdev, opf: REQ_OP_READ|REQ_SYNC|REQ_META); |
880 | bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]); |
881 | bio->bi_end_io = write_super_endio; |
882 | bio->bi_private = ca; |
883 | bch2_bio_map(bio, base: ca->sb_read_scratch, PAGE_SIZE); |
884 | |
885 | this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], |
886 | bio_sectors(bio)); |
887 | |
888 | percpu_ref_get(ref: &ca->io_ref); |
889 | closure_bio_submit(bio, &c->sb_write); |
890 | } |
891 | |
892 | static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) |
893 | { |
894 | struct bch_sb *sb = ca->disk_sb.sb; |
895 | struct bio *bio = ca->disk_sb.bio; |
896 | |
897 | sb->offset = sb->layout.sb_offset[idx]; |
898 | |
899 | SET_BCH_SB_CSUM_TYPE(k: sb, v: bch2_csum_opt_to_type(type: c->opts.metadata_checksum, data: false)); |
900 | sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb), |
901 | null_nonce(), sb); |
902 | |
903 | bio_reset(bio, bdev: ca->disk_sb.bdev, opf: REQ_OP_WRITE|REQ_SYNC|REQ_META); |
904 | bio->bi_iter.bi_sector = le64_to_cpu(sb->offset); |
905 | bio->bi_end_io = write_super_endio; |
906 | bio->bi_private = ca; |
907 | bch2_bio_map(bio, base: sb, |
908 | roundup((size_t) vstruct_bytes(sb), |
909 | bdev_logical_block_size(ca->disk_sb.bdev))); |
910 | |
911 | this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb], |
912 | bio_sectors(bio)); |
913 | |
914 | percpu_ref_get(ref: &ca->io_ref); |
915 | closure_bio_submit(bio, &c->sb_write); |
916 | } |
917 | |
918 | int bch2_write_super(struct bch_fs *c) |
919 | { |
920 | struct closure *cl = &c->sb_write; |
921 | struct printbuf err = PRINTBUF; |
922 | unsigned sb = 0, nr_wrote; |
923 | struct bch_devs_mask sb_written; |
924 | bool wrote, can_mount_without_written, can_mount_with_written; |
925 | unsigned degraded_flags = BCH_FORCE_IF_DEGRADED; |
926 | int ret = 0; |
927 | |
928 | trace_and_count(c, write_super, c, _RET_IP_); |
929 | |
930 | if (c->opts.very_degraded) |
931 | degraded_flags |= BCH_FORCE_IF_LOST; |
932 | |
933 | lockdep_assert_held(&c->sb_lock); |
934 | |
935 | closure_init_stack(cl); |
936 | memset(&sb_written, 0, sizeof(sb_written)); |
937 | |
938 | /* Make sure we're using the new magic numbers: */ |
939 | c->disk_sb.sb->magic = BCHFS_MAGIC; |
940 | c->disk_sb.sb->layout.magic = BCHFS_MAGIC; |
941 | |
942 | le64_add_cpu(var: &c->disk_sb.sb->seq, val: 1); |
943 | |
944 | struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); |
945 | for_each_online_member(c, ca) |
946 | __bch2_members_v2_get_mut(mi, i: ca->dev_idx)->seq = c->disk_sb.sb->seq; |
947 | c->disk_sb.sb->write_time = cpu_to_le64(ktime_get_real_seconds()); |
948 | |
949 | if (test_bit(BCH_FS_error, &c->flags)) |
950 | SET_BCH_SB_HAS_ERRORS(k: c->disk_sb.sb, v: 1); |
951 | if (test_bit(BCH_FS_topology_error, &c->flags)) |
952 | SET_BCH_SB_HAS_TOPOLOGY_ERRORS(k: c->disk_sb.sb, v: 1); |
953 | |
954 | SET_BCH_SB_BIG_ENDIAN(k: c->disk_sb.sb, CPU_BIG_ENDIAN); |
955 | |
956 | bch2_sb_counters_from_cpu(c); |
957 | bch2_sb_members_from_cpu(c); |
958 | bch2_sb_members_cpy_v2_v1(disk_sb: &c->disk_sb); |
959 | bch2_sb_errors_from_cpu(c); |
960 | bch2_sb_downgrade_update(c); |
961 | |
962 | for_each_online_member(c, ca) |
963 | bch2_sb_from_fs(c, ca); |
964 | |
965 | for_each_online_member(c, ca) { |
966 | printbuf_reset(buf: &err); |
967 | |
968 | ret = bch2_sb_validate(disk_sb: &ca->disk_sb, out: &err, WRITE); |
969 | if (ret) { |
970 | bch2_fs_inconsistent(c, "sb invalid before write: %s" , err.buf); |
971 | percpu_ref_put(ref: &ca->io_ref); |
972 | goto out; |
973 | } |
974 | } |
975 | |
976 | if (c->opts.nochanges) |
977 | goto out; |
978 | |
979 | /* |
980 | * Defer writing the superblock until filesystem initialization is |
981 | * complete - don't write out a partly initialized superblock: |
982 | */ |
983 | if (!BCH_SB_INITIALIZED(k: c->disk_sb.sb)) |
984 | goto out; |
985 | |
986 | if (le16_to_cpu(c->disk_sb.sb->version) > bcachefs_metadata_version_current) { |
987 | struct printbuf buf = PRINTBUF; |
988 | prt_printf(&buf, "attempting to write superblock that wasn't version downgraded (" ); |
989 | bch2_version_to_text(out: &buf, le16_to_cpu(c->disk_sb.sb->version)); |
990 | prt_str(out: &buf, str: " > " ); |
991 | bch2_version_to_text(out: &buf, bcachefs_metadata_version_current); |
992 | prt_str(out: &buf, str: ")" ); |
993 | bch2_fs_fatal_error(c, ": %s" , buf.buf); |
994 | printbuf_exit(&buf); |
995 | return -BCH_ERR_sb_not_downgraded; |
996 | } |
997 | |
998 | for_each_online_member(c, ca) { |
999 | __set_bit(ca->dev_idx, sb_written.d); |
1000 | ca->sb_write_error = 0; |
1001 | } |
1002 | |
1003 | for_each_online_member(c, ca) |
1004 | read_back_super(c, ca); |
1005 | closure_sync(cl); |
1006 | |
1007 | for_each_online_member(c, ca) { |
1008 | if (ca->sb_write_error) |
1009 | continue; |
1010 | |
1011 | if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) { |
1012 | bch2_fs_fatal_error(c, |
1013 | ": Superblock write was silently dropped! (seq %llu expected %llu)" , |
1014 | le64_to_cpu(ca->sb_read_scratch->seq), |
1015 | ca->disk_sb.seq); |
1016 | percpu_ref_put(ref: &ca->io_ref); |
1017 | ret = -BCH_ERR_erofs_sb_err; |
1018 | goto out; |
1019 | } |
1020 | |
1021 | if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) { |
1022 | bch2_fs_fatal_error(c, |
1023 | ": Superblock modified by another process (seq %llu expected %llu)" , |
1024 | le64_to_cpu(ca->sb_read_scratch->seq), |
1025 | ca->disk_sb.seq); |
1026 | percpu_ref_put(ref: &ca->io_ref); |
1027 | ret = -BCH_ERR_erofs_sb_err; |
1028 | goto out; |
1029 | } |
1030 | } |
1031 | |
1032 | do { |
1033 | wrote = false; |
1034 | for_each_online_member(c, ca) |
1035 | if (!ca->sb_write_error && |
1036 | sb < ca->disk_sb.sb->layout.nr_superblocks) { |
1037 | write_one_super(c, ca, idx: sb); |
1038 | wrote = true; |
1039 | } |
1040 | closure_sync(cl); |
1041 | sb++; |
1042 | } while (wrote); |
1043 | |
1044 | for_each_online_member(c, ca) { |
1045 | if (ca->sb_write_error) |
1046 | __clear_bit(ca->dev_idx, sb_written.d); |
1047 | else |
1048 | ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq); |
1049 | } |
1050 | |
1051 | nr_wrote = dev_mask_nr(devs: &sb_written); |
1052 | |
1053 | can_mount_with_written = |
1054 | bch2_have_enough_devs(c, sb_written, degraded_flags, false); |
1055 | |
1056 | for (unsigned i = 0; i < ARRAY_SIZE(sb_written.d); i++) |
1057 | sb_written.d[i] = ~sb_written.d[i]; |
1058 | |
1059 | can_mount_without_written = |
1060 | bch2_have_enough_devs(c, sb_written, degraded_flags, false); |
1061 | |
1062 | /* |
1063 | * If we would be able to mount _without_ the devices we successfully |
1064 | * wrote superblocks to, we weren't able to write to enough devices: |
1065 | * |
1066 | * Exception: if we can mount without the successes because we haven't |
1067 | * written anything (new filesystem), we continue if we'd be able to |
1068 | * mount with the devices we did successfully write to: |
1069 | */ |
1070 | if (bch2_fs_fatal_err_on(!nr_wrote || |
1071 | !can_mount_with_written || |
1072 | (can_mount_without_written && |
1073 | !can_mount_with_written), c, |
1074 | ": Unable to write superblock to sufficient devices (from %ps)" , |
1075 | (void *) _RET_IP_)) |
1076 | ret = -1; |
1077 | out: |
1078 | /* Make new options visible after they're persistent: */ |
1079 | bch2_sb_update(c); |
1080 | printbuf_exit(&err); |
1081 | return ret; |
1082 | } |
1083 | |
1084 | void __bch2_check_set_feature(struct bch_fs *c, unsigned feat) |
1085 | { |
1086 | mutex_lock(&c->sb_lock); |
1087 | if (!(c->sb.features & (1ULL << feat))) { |
1088 | c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat); |
1089 | |
1090 | bch2_write_super(c); |
1091 | } |
1092 | mutex_unlock(lock: &c->sb_lock); |
1093 | } |
1094 | |
1095 | /* Downgrade if superblock is at a higher version than currently supported: */ |
1096 | bool bch2_check_version_downgrade(struct bch_fs *c) |
1097 | { |
1098 | bool ret = bcachefs_metadata_version_current < c->sb.version; |
1099 | |
1100 | lockdep_assert_held(&c->sb_lock); |
1101 | |
1102 | /* |
1103 | * Downgrade, if superblock is at a higher version than currently |
1104 | * supported: |
1105 | * |
1106 | * c->sb will be checked before we write the superblock, so update it as |
1107 | * well: |
1108 | */ |
1109 | if (BCH_SB_VERSION_UPGRADE_COMPLETE(k: c->disk_sb.sb) > bcachefs_metadata_version_current) { |
1110 | SET_BCH_SB_VERSION_UPGRADE_COMPLETE(k: c->disk_sb.sb, bcachefs_metadata_version_current); |
1111 | c->sb.version_upgrade_complete = bcachefs_metadata_version_current; |
1112 | } |
1113 | if (c->sb.version > bcachefs_metadata_version_current) { |
1114 | c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); |
1115 | c->sb.version = bcachefs_metadata_version_current; |
1116 | } |
1117 | if (c->sb.version_min > bcachefs_metadata_version_current) { |
1118 | c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current); |
1119 | c->sb.version_min = bcachefs_metadata_version_current; |
1120 | } |
1121 | c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1); |
1122 | return ret; |
1123 | } |
1124 | |
1125 | void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) |
1126 | { |
1127 | lockdep_assert_held(&c->sb_lock); |
1128 | |
1129 | if (BCH_VERSION_MAJOR(new_version) > |
1130 | BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) |
1131 | bch2_sb_field_resize(&c->disk_sb, downgrade, 0); |
1132 | |
1133 | c->disk_sb.sb->version = cpu_to_le16(new_version); |
1134 | c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); |
1135 | } |
1136 | |
1137 | static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, |
1138 | struct printbuf *err) |
1139 | { |
1140 | if (vstruct_bytes(f) < 88) { |
1141 | prt_printf(err, "field too small (%zu < %u)" , vstruct_bytes(f), 88); |
1142 | return -BCH_ERR_invalid_sb_ext; |
1143 | } |
1144 | |
1145 | return 0; |
1146 | } |
1147 | |
1148 | static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb, |
1149 | struct bch_sb_field *f) |
1150 | { |
1151 | struct bch_sb_field_ext *e = field_to_type(f, ext); |
1152 | |
1153 | prt_printf(out, "Recovery passes required:" ); |
1154 | prt_tab(out); |
1155 | prt_bitflags(out, bch2_recovery_passes, |
1156 | bch2_recovery_passes_from_stable(le64_to_cpu(e->recovery_passes_required[0]))); |
1157 | prt_newline(out); |
1158 | |
1159 | unsigned long *errors_silent = kmalloc(size: sizeof(e->errors_silent), GFP_KERNEL); |
1160 | if (errors_silent) { |
1161 | le_bitvector_to_cpu(dst: errors_silent, src: (void *) e->errors_silent, nr: sizeof(e->errors_silent) * 8); |
1162 | |
1163 | prt_printf(out, "Errors to silently fix:" ); |
1164 | prt_tab(out); |
1165 | prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, sizeof(e->errors_silent) * 8); |
1166 | prt_newline(out); |
1167 | |
1168 | kfree(objp: errors_silent); |
1169 | } |
1170 | |
1171 | prt_printf(out, "Btrees with missing data:" ); |
1172 | prt_tab(out); |
1173 | prt_bitflags(out, __bch2_btree_ids, le64_to_cpu(e->btrees_lost_data)); |
1174 | prt_newline(out); |
1175 | } |
1176 | |
1177 | static const struct bch_sb_field_ops bch_sb_field_ops_ext = { |
1178 | .validate = bch2_sb_ext_validate, |
1179 | .to_text = bch2_sb_ext_to_text, |
1180 | }; |
1181 | |
1182 | static const struct bch_sb_field_ops *bch2_sb_field_ops[] = { |
1183 | #define x(f, nr) \ |
1184 | [BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f, |
1185 | BCH_SB_FIELDS() |
1186 | #undef x |
1187 | }; |
1188 | |
1189 | static const struct bch_sb_field_ops bch2_sb_field_null_ops; |
1190 | |
1191 | static const struct bch_sb_field_ops *bch2_sb_field_type_ops(unsigned type) |
1192 | { |
1193 | return likely(type < ARRAY_SIZE(bch2_sb_field_ops)) |
1194 | ? bch2_sb_field_ops[type] |
1195 | : &bch2_sb_field_null_ops; |
1196 | } |
1197 | |
1198 | static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f, |
1199 | struct printbuf *err) |
1200 | { |
1201 | unsigned type = le32_to_cpu(f->type); |
1202 | struct printbuf field_err = PRINTBUF; |
1203 | const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type); |
1204 | int ret; |
1205 | |
1206 | ret = ops->validate ? ops->validate(sb, f, &field_err) : 0; |
1207 | if (ret) { |
1208 | prt_printf(err, "Invalid superblock section %s: %s" , |
1209 | bch2_sb_fields[type], field_err.buf); |
1210 | prt_newline(err); |
1211 | bch2_sb_field_to_text(err, sb, f); |
1212 | } |
1213 | |
1214 | printbuf_exit(&field_err); |
1215 | return ret; |
1216 | } |
1217 | |
1218 | void __bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, |
1219 | struct bch_sb_field *f) |
1220 | { |
1221 | unsigned type = le32_to_cpu(f->type); |
1222 | const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type); |
1223 | |
1224 | if (!out->nr_tabstops) |
1225 | printbuf_tabstop_push(out, 32); |
1226 | |
1227 | if (ops->to_text) |
1228 | ops->to_text(out, sb, f); |
1229 | } |
1230 | |
1231 | void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, |
1232 | struct bch_sb_field *f) |
1233 | { |
1234 | unsigned type = le32_to_cpu(f->type); |
1235 | |
1236 | if (type < BCH_SB_FIELD_NR) |
1237 | prt_printf(out, "%s" , bch2_sb_fields[type]); |
1238 | else |
1239 | prt_printf(out, "(unknown field %u)" , type); |
1240 | |
1241 | prt_printf(out, " (size %zu):" , vstruct_bytes(f)); |
1242 | prt_newline(out); |
1243 | |
1244 | __bch2_sb_field_to_text(out, sb, f); |
1245 | } |
1246 | |
1247 | void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l) |
1248 | { |
1249 | unsigned i; |
1250 | |
1251 | prt_printf(out, "Type: %u" , l->layout_type); |
1252 | prt_newline(out); |
1253 | |
1254 | prt_str(out, str: "Superblock max size: " ); |
1255 | prt_units_u64(out, 512 << l->sb_max_size_bits); |
1256 | prt_newline(out); |
1257 | |
1258 | prt_printf(out, "Nr superblocks: %u" , l->nr_superblocks); |
1259 | prt_newline(out); |
1260 | |
1261 | prt_str(out, str: "Offsets: " ); |
1262 | for (i = 0; i < l->nr_superblocks; i++) { |
1263 | if (i) |
1264 | prt_str(out, str: ", " ); |
1265 | prt_printf(out, "%llu" , le64_to_cpu(l->sb_offset[i])); |
1266 | } |
1267 | prt_newline(out); |
1268 | } |
1269 | |
1270 | void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, |
1271 | bool print_layout, unsigned fields) |
1272 | { |
1273 | u64 fields_have = 0; |
1274 | unsigned nr_devices = 0; |
1275 | |
1276 | if (!out->nr_tabstops) |
1277 | printbuf_tabstop_push(out, 44); |
1278 | |
1279 | for (int i = 0; i < sb->nr_devices; i++) |
1280 | nr_devices += bch2_dev_exists(sb, dev: i); |
1281 | |
1282 | prt_printf(out, "External UUID:" ); |
1283 | prt_tab(out); |
1284 | pr_uuid(out, uuid: sb->user_uuid.b); |
1285 | prt_newline(out); |
1286 | |
1287 | prt_printf(out, "Internal UUID:" ); |
1288 | prt_tab(out); |
1289 | pr_uuid(out, uuid: sb->uuid.b); |
1290 | prt_newline(out); |
1291 | |
1292 | prt_printf(out, "Magic number:" ); |
1293 | prt_tab(out); |
1294 | pr_uuid(out, uuid: sb->magic.b); |
1295 | prt_newline(out); |
1296 | |
1297 | prt_str(out, str: "Device index:" ); |
1298 | prt_tab(out); |
1299 | prt_printf(out, "%u" , sb->dev_idx); |
1300 | prt_newline(out); |
1301 | |
1302 | prt_str(out, str: "Label:" ); |
1303 | prt_tab(out); |
1304 | prt_printf(out, "%.*s" , (int) sizeof(sb->label), sb->label); |
1305 | prt_newline(out); |
1306 | |
1307 | prt_str(out, str: "Version:" ); |
1308 | prt_tab(out); |
1309 | bch2_version_to_text(out, le16_to_cpu(sb->version)); |
1310 | prt_newline(out); |
1311 | |
1312 | prt_str(out, str: "Version upgrade complete:" ); |
1313 | prt_tab(out); |
1314 | bch2_version_to_text(out, v: BCH_SB_VERSION_UPGRADE_COMPLETE(k: sb)); |
1315 | prt_newline(out); |
1316 | |
1317 | prt_printf(out, "Oldest version on disk:" ); |
1318 | prt_tab(out); |
1319 | bch2_version_to_text(out, le16_to_cpu(sb->version_min)); |
1320 | prt_newline(out); |
1321 | |
1322 | prt_printf(out, "Created:" ); |
1323 | prt_tab(out); |
1324 | if (sb->time_base_lo) |
1325 | bch2_prt_datetime(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC)); |
1326 | else |
1327 | prt_printf(out, "(not set)" ); |
1328 | prt_newline(out); |
1329 | |
1330 | prt_printf(out, "Sequence number:" ); |
1331 | prt_tab(out); |
1332 | prt_printf(out, "%llu" , le64_to_cpu(sb->seq)); |
1333 | prt_newline(out); |
1334 | |
1335 | prt_printf(out, "Time of last write:" ); |
1336 | prt_tab(out); |
1337 | bch2_prt_datetime(out, le64_to_cpu(sb->write_time)); |
1338 | prt_newline(out); |
1339 | |
1340 | prt_printf(out, "Superblock size:" ); |
1341 | prt_tab(out); |
1342 | prt_units_u64(out, vstruct_bytes(sb)); |
1343 | prt_str(out, str: "/" ); |
1344 | prt_units_u64(out, 512ULL << sb->layout.sb_max_size_bits); |
1345 | prt_newline(out); |
1346 | |
1347 | prt_printf(out, "Clean:" ); |
1348 | prt_tab(out); |
1349 | prt_printf(out, "%llu" , BCH_SB_CLEAN(sb)); |
1350 | prt_newline(out); |
1351 | |
1352 | prt_printf(out, "Devices:" ); |
1353 | prt_tab(out); |
1354 | prt_printf(out, "%u" , nr_devices); |
1355 | prt_newline(out); |
1356 | |
1357 | prt_printf(out, "Sections:" ); |
1358 | vstruct_for_each(sb, f) |
1359 | fields_have |= 1 << le32_to_cpu(f->type); |
1360 | prt_tab(out); |
1361 | prt_bitflags(out, bch2_sb_fields, fields_have); |
1362 | prt_newline(out); |
1363 | |
1364 | prt_printf(out, "Features:" ); |
1365 | prt_tab(out); |
1366 | prt_bitflags(out, bch2_sb_features, le64_to_cpu(sb->features[0])); |
1367 | prt_newline(out); |
1368 | |
1369 | prt_printf(out, "Compat features:" ); |
1370 | prt_tab(out); |
1371 | prt_bitflags(out, bch2_sb_compat, le64_to_cpu(sb->compat[0])); |
1372 | prt_newline(out); |
1373 | |
1374 | prt_newline(out); |
1375 | prt_printf(out, "Options:" ); |
1376 | prt_newline(out); |
1377 | printbuf_indent_add(out, 2); |
1378 | { |
1379 | enum bch_opt_id id; |
1380 | |
1381 | for (id = 0; id < bch2_opts_nr; id++) { |
1382 | const struct bch_option *opt = bch2_opt_table + id; |
1383 | |
1384 | if (opt->get_sb != BCH2_NO_SB_OPT) { |
1385 | u64 v = bch2_opt_from_sb(sb, id); |
1386 | |
1387 | prt_printf(out, "%s:" , opt->attr.name); |
1388 | prt_tab(out); |
1389 | bch2_opt_to_text(out, NULL, sb, opt, v, |
1390 | OPT_HUMAN_READABLE|OPT_SHOW_FULL_LIST); |
1391 | prt_newline(out); |
1392 | } |
1393 | } |
1394 | } |
1395 | |
1396 | printbuf_indent_sub(out, 2); |
1397 | |
1398 | if (print_layout) { |
1399 | prt_newline(out); |
1400 | prt_printf(out, "layout:" ); |
1401 | prt_newline(out); |
1402 | printbuf_indent_add(out, 2); |
1403 | bch2_sb_layout_to_text(out, l: &sb->layout); |
1404 | printbuf_indent_sub(out, 2); |
1405 | } |
1406 | |
1407 | vstruct_for_each(sb, f) |
1408 | if (fields & (1 << le32_to_cpu(f->type))) { |
1409 | prt_newline(out); |
1410 | bch2_sb_field_to_text(out, sb, f); |
1411 | } |
1412 | } |
1413 | |