1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #include "bcachefs.h" |
4 | #include "btree_cache.h" |
5 | #include "disk_groups.h" |
6 | #include "opts.h" |
7 | #include "replicas.h" |
8 | #include "sb-members.h" |
9 | #include "super-io.h" |
10 | |
11 | #define x(t, n, ...) [n] = #t, |
12 | static const char * const bch2_iops_measurements[] = { |
13 | BCH_IOPS_MEASUREMENTS() |
14 | NULL |
15 | }; |
16 | |
17 | char * const bch2_member_error_strs[] = { |
18 | BCH_MEMBER_ERROR_TYPES() |
19 | NULL |
20 | }; |
21 | #undef x |
22 | |
23 | /* Code for bch_sb_field_members_v1: */ |
24 | |
25 | struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i) |
26 | { |
27 | return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i); |
28 | } |
29 | |
30 | static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i) |
31 | { |
32 | struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i); |
33 | memset(&ret, 0, sizeof(ret)); |
34 | memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret))); |
35 | return ret; |
36 | } |
37 | |
38 | static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i) |
39 | { |
40 | return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES); |
41 | } |
42 | |
43 | static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i) |
44 | { |
45 | struct bch_member ret, *p = members_v1_get_mut(mi, i); |
46 | memset(&ret, 0, sizeof(ret)); |
47 | memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret))); |
48 | return ret; |
49 | } |
50 | |
51 | struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i) |
52 | { |
53 | struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2); |
54 | if (mi2) |
55 | return members_v2_get(mi: mi2, i); |
56 | struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1); |
57 | return members_v1_get(mi: mi1, i); |
58 | } |
59 | |
60 | static int sb_members_v2_resize_entries(struct bch_fs *c) |
61 | { |
62 | struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); |
63 | |
64 | if (le16_to_cpu(mi->member_bytes) < sizeof(struct bch_member)) { |
65 | unsigned u64s = DIV_ROUND_UP((sizeof(*mi) + sizeof(mi->_members[0]) * |
66 | c->disk_sb.sb->nr_devices), 8); |
67 | |
68 | mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s); |
69 | if (!mi) |
70 | return -BCH_ERR_ENOSPC_sb_members_v2; |
71 | |
72 | for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) { |
73 | void *dst = (void *) mi->_members + (i * sizeof(struct bch_member)); |
74 | memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes)); |
75 | memset(dst + le16_to_cpu(mi->member_bytes), |
76 | 0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes))); |
77 | } |
78 | mi->member_bytes = cpu_to_le16(sizeof(struct bch_member)); |
79 | } |
80 | return 0; |
81 | } |
82 | |
83 | int bch2_sb_members_v2_init(struct bch_fs *c) |
84 | { |
85 | struct bch_sb_field_members_v1 *mi1; |
86 | struct bch_sb_field_members_v2 *mi2; |
87 | |
88 | if (!bch2_sb_field_get(c->disk_sb.sb, members_v2)) { |
89 | mi2 = bch2_sb_field_resize(&c->disk_sb, members_v2, |
90 | DIV_ROUND_UP(sizeof(*mi2) + |
91 | sizeof(struct bch_member) * c->sb.nr_devices, |
92 | sizeof(u64))); |
93 | mi1 = bch2_sb_field_get(c->disk_sb.sb, members_v1); |
94 | memcpy(&mi2->_members[0], &mi1->_members[0], |
95 | BCH_MEMBER_V1_BYTES * c->sb.nr_devices); |
96 | memset(&mi2->pad[0], 0, sizeof(mi2->pad)); |
97 | mi2->member_bytes = cpu_to_le16(BCH_MEMBER_V1_BYTES); |
98 | } |
99 | |
100 | return sb_members_v2_resize_entries(c); |
101 | } |
102 | |
103 | int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) |
104 | { |
105 | struct bch_sb_field_members_v1 *mi1; |
106 | struct bch_sb_field_members_v2 *mi2; |
107 | |
108 | mi1 = bch2_sb_field_resize(disk_sb, members_v1, |
109 | DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES * |
110 | disk_sb->sb->nr_devices, sizeof(u64))); |
111 | if (!mi1) |
112 | return -BCH_ERR_ENOSPC_sb_members; |
113 | |
114 | mi2 = bch2_sb_field_get(disk_sb->sb, members_v2); |
115 | |
116 | for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++) |
117 | memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES); |
118 | |
119 | return 0; |
120 | } |
121 | |
122 | static int validate_member(struct printbuf *err, |
123 | struct bch_member m, |
124 | struct bch_sb *sb, |
125 | int i) |
126 | { |
127 | if (le64_to_cpu(m.nbuckets) > LONG_MAX) { |
128 | prt_printf(err, "device %u: too many buckets (got %llu, max %lu)" , |
129 | i, le64_to_cpu(m.nbuckets), LONG_MAX); |
130 | return -BCH_ERR_invalid_sb_members; |
131 | } |
132 | |
133 | if (le64_to_cpu(m.nbuckets) - |
134 | le16_to_cpu(m.first_bucket) < BCH_MIN_NR_NBUCKETS) { |
135 | prt_printf(err, "device %u: not enough buckets (got %llu, max %u)" , |
136 | i, le64_to_cpu(m.nbuckets), BCH_MIN_NR_NBUCKETS); |
137 | return -BCH_ERR_invalid_sb_members; |
138 | } |
139 | |
140 | if (le16_to_cpu(m.bucket_size) < |
141 | le16_to_cpu(sb->block_size)) { |
142 | prt_printf(err, "device %u: bucket size %u smaller than block size %u" , |
143 | i, le16_to_cpu(m.bucket_size), le16_to_cpu(sb->block_size)); |
144 | return -BCH_ERR_invalid_sb_members; |
145 | } |
146 | |
147 | if (le16_to_cpu(m.bucket_size) < |
148 | BCH_SB_BTREE_NODE_SIZE(k: sb)) { |
149 | prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu" , |
150 | i, le16_to_cpu(m.bucket_size), BCH_SB_BTREE_NODE_SIZE(sb)); |
151 | return -BCH_ERR_invalid_sb_members; |
152 | } |
153 | |
154 | return 0; |
155 | } |
156 | |
157 | static void member_to_text(struct printbuf *out, |
158 | struct bch_member m, |
159 | struct bch_sb_field_disk_groups *gi, |
160 | struct bch_sb *sb, |
161 | int i) |
162 | { |
163 | unsigned data_have = bch2_sb_dev_has_data(sb, i); |
164 | u64 bucket_size = le16_to_cpu(m.bucket_size); |
165 | u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size; |
166 | |
167 | if (!bch2_member_exists(m: &m)) |
168 | return; |
169 | |
170 | prt_printf(out, "Device:" ); |
171 | prt_tab(out); |
172 | prt_printf(out, "%u" , i); |
173 | prt_newline(out); |
174 | |
175 | printbuf_indent_add(out, 2); |
176 | |
177 | prt_printf(out, "Label:" ); |
178 | prt_tab(out); |
179 | if (BCH_MEMBER_GROUP(k: &m)) { |
180 | unsigned idx = BCH_MEMBER_GROUP(k: &m) - 1; |
181 | |
182 | if (idx < disk_groups_nr(groups: gi)) |
183 | prt_printf(out, "%s (%u)" , |
184 | gi->entries[idx].label, idx); |
185 | else |
186 | prt_printf(out, "(bad disk labels section)" ); |
187 | } else { |
188 | prt_printf(out, "(none)" ); |
189 | } |
190 | prt_newline(out); |
191 | |
192 | prt_printf(out, "UUID:" ); |
193 | prt_tab(out); |
194 | pr_uuid(out, uuid: m.uuid.b); |
195 | prt_newline(out); |
196 | |
197 | prt_printf(out, "Size:" ); |
198 | prt_tab(out); |
199 | prt_units_u64(out, device_size << 9); |
200 | prt_newline(out); |
201 | |
202 | for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) { |
203 | prt_printf(out, "%s errors:" , bch2_member_error_strs[i]); |
204 | prt_tab(out); |
205 | prt_u64(out, le64_to_cpu(m.errors[i])); |
206 | prt_newline(out); |
207 | } |
208 | |
209 | for (unsigned i = 0; i < BCH_IOPS_NR; i++) { |
210 | prt_printf(out, "%s iops:" , bch2_iops_measurements[i]); |
211 | prt_tab(out); |
212 | prt_printf(out, "%u" , le32_to_cpu(m.iops[i])); |
213 | prt_newline(out); |
214 | } |
215 | |
216 | prt_printf(out, "Bucket size:" ); |
217 | prt_tab(out); |
218 | prt_units_u64(out, bucket_size << 9); |
219 | prt_newline(out); |
220 | |
221 | prt_printf(out, "First bucket:" ); |
222 | prt_tab(out); |
223 | prt_printf(out, "%u" , le16_to_cpu(m.first_bucket)); |
224 | prt_newline(out); |
225 | |
226 | prt_printf(out, "Buckets:" ); |
227 | prt_tab(out); |
228 | prt_printf(out, "%llu" , le64_to_cpu(m.nbuckets)); |
229 | prt_newline(out); |
230 | |
231 | prt_printf(out, "Last mount:" ); |
232 | prt_tab(out); |
233 | if (m.last_mount) |
234 | bch2_prt_datetime(out, le64_to_cpu(m.last_mount)); |
235 | else |
236 | prt_printf(out, "(never)" ); |
237 | prt_newline(out); |
238 | |
239 | prt_printf(out, "Last superblock write:" ); |
240 | prt_tab(out); |
241 | prt_u64(out, le64_to_cpu(m.seq)); |
242 | prt_newline(out); |
243 | |
244 | prt_printf(out, "State:" ); |
245 | prt_tab(out); |
246 | prt_printf(out, "%s" , |
247 | BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR |
248 | ? bch2_member_states[BCH_MEMBER_STATE(&m)] |
249 | : "unknown" ); |
250 | prt_newline(out); |
251 | |
252 | prt_printf(out, "Data allowed:" ); |
253 | prt_tab(out); |
254 | if (BCH_MEMBER_DATA_ALLOWED(k: &m)) |
255 | prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m)); |
256 | else |
257 | prt_printf(out, "(none)" ); |
258 | prt_newline(out); |
259 | |
260 | prt_printf(out, "Has data:" ); |
261 | prt_tab(out); |
262 | if (data_have) |
263 | prt_bitflags(out, __bch2_data_types, data_have); |
264 | else |
265 | prt_printf(out, "(none)" ); |
266 | prt_newline(out); |
267 | |
268 | prt_str(out, str: "Durability:" ); |
269 | prt_tab(out); |
270 | prt_printf(out, "%llu" , BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1); |
271 | prt_newline(out); |
272 | |
273 | prt_printf(out, "Discard:" ); |
274 | prt_tab(out); |
275 | prt_printf(out, "%llu" , BCH_MEMBER_DISCARD(&m)); |
276 | prt_newline(out); |
277 | |
278 | prt_printf(out, "Freespace initialized:" ); |
279 | prt_tab(out); |
280 | prt_printf(out, "%llu" , BCH_MEMBER_FREESPACE_INITIALIZED(&m)); |
281 | prt_newline(out); |
282 | |
283 | printbuf_indent_sub(out, 2); |
284 | } |
285 | |
286 | static int bch2_sb_members_v1_validate(struct bch_sb *sb, |
287 | struct bch_sb_field *f, |
288 | struct printbuf *err) |
289 | { |
290 | struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); |
291 | unsigned i; |
292 | |
293 | if ((void *) members_v1_get_mut(mi, i: sb->nr_devices) > vstruct_end(&mi->field)) { |
294 | prt_printf(err, "too many devices for section size" ); |
295 | return -BCH_ERR_invalid_sb_members; |
296 | } |
297 | |
298 | for (i = 0; i < sb->nr_devices; i++) { |
299 | struct bch_member m = members_v1_get(mi, i); |
300 | |
301 | int ret = validate_member(err, m, sb, i); |
302 | if (ret) |
303 | return ret; |
304 | } |
305 | |
306 | return 0; |
307 | } |
308 | |
309 | static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, |
310 | struct bch_sb_field *f) |
311 | { |
312 | struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); |
313 | struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); |
314 | unsigned i; |
315 | |
316 | for (i = 0; i < sb->nr_devices; i++) |
317 | member_to_text(out, m: members_v1_get(mi, i), gi, sb, i); |
318 | } |
319 | |
320 | const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = { |
321 | .validate = bch2_sb_members_v1_validate, |
322 | .to_text = bch2_sb_members_v1_to_text, |
323 | }; |
324 | |
325 | static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, |
326 | struct bch_sb_field *f) |
327 | { |
328 | struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); |
329 | struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); |
330 | unsigned i; |
331 | |
332 | for (i = 0; i < sb->nr_devices; i++) |
333 | member_to_text(out, m: members_v2_get(mi, i), gi, sb, i); |
334 | } |
335 | |
336 | static int bch2_sb_members_v2_validate(struct bch_sb *sb, |
337 | struct bch_sb_field *f, |
338 | struct printbuf *err) |
339 | { |
340 | struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); |
341 | size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, i: sb->nr_devices) - |
342 | (void *) mi; |
343 | |
344 | if (mi_bytes > vstruct_bytes(&mi->field)) { |
345 | prt_printf(err, "section too small (%zu > %zu)" , |
346 | mi_bytes, vstruct_bytes(&mi->field)); |
347 | return -BCH_ERR_invalid_sb_members; |
348 | } |
349 | |
350 | for (unsigned i = 0; i < sb->nr_devices; i++) { |
351 | int ret = validate_member(err, m: members_v2_get(mi, i), sb, i); |
352 | if (ret) |
353 | return ret; |
354 | } |
355 | |
356 | return 0; |
357 | } |
358 | |
359 | const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = { |
360 | .validate = bch2_sb_members_v2_validate, |
361 | .to_text = bch2_sb_members_v2_to_text, |
362 | }; |
363 | |
364 | void bch2_sb_members_from_cpu(struct bch_fs *c) |
365 | { |
366 | struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); |
367 | |
368 | rcu_read_lock(); |
369 | for_each_member_device_rcu(c, ca, NULL) { |
370 | struct bch_member *m = __bch2_members_v2_get_mut(mi, i: ca->dev_idx); |
371 | |
372 | for (unsigned e = 0; e < BCH_MEMBER_ERROR_NR; e++) |
373 | m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e])); |
374 | } |
375 | rcu_read_unlock(); |
376 | } |
377 | |
378 | void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) |
379 | { |
380 | struct bch_fs *c = ca->fs; |
381 | struct bch_member m; |
382 | |
383 | mutex_lock(&ca->fs->sb_lock); |
384 | m = bch2_sb_member_get(sb: c->disk_sb.sb, i: ca->dev_idx); |
385 | mutex_unlock(lock: &ca->fs->sb_lock); |
386 | |
387 | printbuf_tabstop_push(out, 12); |
388 | |
389 | prt_str(out, str: "IO errors since filesystem creation" ); |
390 | prt_newline(out); |
391 | |
392 | printbuf_indent_add(out, 2); |
393 | for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) { |
394 | prt_printf(out, "%s:" , bch2_member_error_strs[i]); |
395 | prt_tab(out); |
396 | prt_u64(out, atomic64_read(&ca->errors[i])); |
397 | prt_newline(out); |
398 | } |
399 | printbuf_indent_sub(out, 2); |
400 | |
401 | prt_str(out, str: "IO errors since " ); |
402 | bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC); |
403 | prt_str(out, str: " ago" ); |
404 | prt_newline(out); |
405 | |
406 | printbuf_indent_add(out, 2); |
407 | for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) { |
408 | prt_printf(out, "%s:" , bch2_member_error_strs[i]); |
409 | prt_tab(out); |
410 | prt_u64(out, atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i])); |
411 | prt_newline(out); |
412 | } |
413 | printbuf_indent_sub(out, 2); |
414 | } |
415 | |
416 | void bch2_dev_errors_reset(struct bch_dev *ca) |
417 | { |
418 | struct bch_fs *c = ca->fs; |
419 | struct bch_member *m; |
420 | |
421 | mutex_lock(&c->sb_lock); |
422 | m = bch2_members_v2_get_mut(sb: c->disk_sb.sb, i: ca->dev_idx); |
423 | for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++) |
424 | m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i])); |
425 | m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds()); |
426 | |
427 | bch2_write_super(c); |
428 | mutex_unlock(lock: &c->sb_lock); |
429 | } |
430 | |
431 | /* |
432 | * Per member "range has btree nodes" bitmap: |
433 | * |
434 | * This is so that if we ever have to run the btree node scan to repair we don't |
435 | * have to scan full devices: |
436 | */ |
437 | |
438 | bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k) |
439 | { |
440 | bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) |
441 | if (!bch2_dev_btree_bitmap_marked_sectors(ca: bch_dev_bkey_exists(c, idx: ptr->dev), |
442 | start: ptr->offset, sectors: btree_sectors(c))) |
443 | return false; |
444 | return true; |
445 | } |
446 | |
447 | static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev, |
448 | u64 start, unsigned sectors) |
449 | { |
450 | struct bch_member *m = __bch2_members_v2_get_mut(mi, i: dev); |
451 | u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap); |
452 | |
453 | u64 end = start + sectors; |
454 | |
455 | int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6); |
456 | if (resize > 0) { |
457 | u64 new_bitmap = 0; |
458 | |
459 | for (unsigned i = 0; i < 64; i++) |
460 | if (bitmap & BIT_ULL(i)) |
461 | new_bitmap |= BIT_ULL(i >> resize); |
462 | bitmap = new_bitmap; |
463 | m->btree_bitmap_shift += resize; |
464 | } |
465 | |
466 | for (unsigned bit = start >> m->btree_bitmap_shift; |
467 | (u64) bit << m->btree_bitmap_shift < end; |
468 | bit++) |
469 | bitmap |= BIT_ULL(bit); |
470 | |
471 | m->btree_allocated_bitmap = cpu_to_le64(bitmap); |
472 | } |
473 | |
474 | void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k) |
475 | { |
476 | lockdep_assert_held(&c->sb_lock); |
477 | |
478 | struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); |
479 | bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) |
480 | __bch2_dev_btree_bitmap_mark(mi, dev: ptr->dev, start: ptr->offset, sectors: btree_sectors(c)); |
481 | } |
482 | |