1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #include "bcachefs.h" |
4 | #include "bbpos.h" |
5 | #include "bkey_buf.h" |
6 | #include "btree_cache.h" |
7 | #include "btree_io.h" |
8 | #include "btree_iter.h" |
9 | #include "btree_locking.h" |
10 | #include "debug.h" |
11 | #include "errcode.h" |
12 | #include "error.h" |
13 | #include "journal.h" |
14 | #include "trace.h" |
15 | |
16 | #include <linux/prefetch.h> |
17 | #include <linux/sched/mm.h> |
18 | |
19 | const char * const bch2_btree_node_flags[] = { |
20 | #define x(f) #f, |
21 | BTREE_FLAGS() |
22 | #undef x |
23 | NULL |
24 | }; |
25 | |
26 | void bch2_recalc_btree_reserve(struct bch_fs *c) |
27 | { |
28 | unsigned i, reserve = 16; |
29 | |
30 | if (!c->btree_roots_known[0].b) |
31 | reserve += 8; |
32 | |
33 | for (i = 0; i < btree_id_nr_alive(c); i++) { |
34 | struct btree_root *r = bch2_btree_id_root(c, id: i); |
35 | |
36 | if (r->b) |
37 | reserve += min_t(unsigned, 1, r->b->c.level) * 8; |
38 | } |
39 | |
40 | c->btree_cache.reserve = reserve; |
41 | } |
42 | |
43 | static inline unsigned btree_cache_can_free(struct btree_cache *bc) |
44 | { |
45 | return max_t(int, 0, bc->used - bc->reserve); |
46 | } |
47 | |
48 | static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b) |
49 | { |
50 | if (b->c.lock.readers) |
51 | list_move(list: &b->list, head: &bc->freed_pcpu); |
52 | else |
53 | list_move(list: &b->list, head: &bc->freed_nonpcpu); |
54 | } |
55 | |
56 | static void btree_node_data_free(struct bch_fs *c, struct btree *b) |
57 | { |
58 | struct btree_cache *bc = &c->btree_cache; |
59 | |
60 | EBUG_ON(btree_node_write_in_flight(b)); |
61 | |
62 | clear_btree_node_just_written(b); |
63 | |
64 | kvfree(addr: b->data); |
65 | b->data = NULL; |
66 | #ifdef __KERNEL__ |
67 | kvfree(addr: b->aux_data); |
68 | #else |
69 | munmap(b->aux_data, btree_aux_data_bytes(b)); |
70 | #endif |
71 | b->aux_data = NULL; |
72 | |
73 | bc->used--; |
74 | |
75 | btree_node_to_freedlist(bc, b); |
76 | } |
77 | |
78 | static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg, |
79 | const void *obj) |
80 | { |
81 | const struct btree *b = obj; |
82 | const u64 *v = arg->key; |
83 | |
84 | return b->hash_val == *v ? 0 : 1; |
85 | } |
86 | |
87 | static const struct rhashtable_params bch_btree_cache_params = { |
88 | .head_offset = offsetof(struct btree, hash), |
89 | .key_offset = offsetof(struct btree, hash_val), |
90 | .key_len = sizeof(u64), |
91 | .obj_cmpfn = bch2_btree_cache_cmp_fn, |
92 | }; |
93 | |
94 | static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) |
95 | { |
96 | BUG_ON(b->data || b->aux_data); |
97 | |
98 | b->data = kvmalloc(size: btree_buf_bytes(b), flags: gfp); |
99 | if (!b->data) |
100 | return -BCH_ERR_ENOMEM_btree_node_mem_alloc; |
101 | #ifdef __KERNEL__ |
102 | b->aux_data = kvmalloc(size: btree_aux_data_bytes(b), flags: gfp); |
103 | #else |
104 | b->aux_data = mmap(NULL, btree_aux_data_bytes(b), |
105 | PROT_READ|PROT_WRITE|PROT_EXEC, |
106 | MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); |
107 | if (b->aux_data == MAP_FAILED) |
108 | b->aux_data = NULL; |
109 | #endif |
110 | if (!b->aux_data) { |
111 | kvfree(addr: b->data); |
112 | b->data = NULL; |
113 | return -BCH_ERR_ENOMEM_btree_node_mem_alloc; |
114 | } |
115 | |
116 | return 0; |
117 | } |
118 | |
119 | static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) |
120 | { |
121 | struct btree *b; |
122 | |
123 | b = kzalloc(size: sizeof(struct btree), flags: gfp); |
124 | if (!b) |
125 | return NULL; |
126 | |
127 | bkey_btree_ptr_init(k: &b->key); |
128 | INIT_LIST_HEAD(list: &b->list); |
129 | INIT_LIST_HEAD(list: &b->write_blocked); |
130 | b->byte_order = ilog2(c->opts.btree_node_size); |
131 | return b; |
132 | } |
133 | |
134 | struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) |
135 | { |
136 | struct btree_cache *bc = &c->btree_cache; |
137 | struct btree *b; |
138 | |
139 | b = __btree_node_mem_alloc(c, GFP_KERNEL); |
140 | if (!b) |
141 | return NULL; |
142 | |
143 | if (btree_node_data_alloc(c, b, GFP_KERNEL)) { |
144 | kfree(objp: b); |
145 | return NULL; |
146 | } |
147 | |
148 | bch2_btree_lock_init(&b->c, 0); |
149 | |
150 | bc->used++; |
151 | list_add(new: &b->list, head: &bc->freeable); |
152 | return b; |
153 | } |
154 | |
155 | /* Btree in memory cache - hash table */ |
156 | |
157 | void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) |
158 | { |
159 | int ret = rhashtable_remove_fast(ht: &bc->table, obj: &b->hash, params: bch_btree_cache_params); |
160 | |
161 | BUG_ON(ret); |
162 | |
163 | /* Cause future lookups for this node to fail: */ |
164 | b->hash_val = 0; |
165 | } |
166 | |
167 | int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) |
168 | { |
169 | BUG_ON(b->hash_val); |
170 | b->hash_val = btree_ptr_hash_val(k: &b->key); |
171 | |
172 | return rhashtable_lookup_insert_fast(ht: &bc->table, obj: &b->hash, |
173 | params: bch_btree_cache_params); |
174 | } |
175 | |
176 | int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, |
177 | unsigned level, enum btree_id id) |
178 | { |
179 | int ret; |
180 | |
181 | b->c.level = level; |
182 | b->c.btree_id = id; |
183 | |
184 | mutex_lock(&bc->lock); |
185 | ret = __bch2_btree_node_hash_insert(bc, b); |
186 | if (!ret) |
187 | list_add_tail(new: &b->list, head: &bc->live); |
188 | mutex_unlock(lock: &bc->lock); |
189 | |
190 | return ret; |
191 | } |
192 | |
193 | __flatten |
194 | static inline struct btree *btree_cache_find(struct btree_cache *bc, |
195 | const struct bkey_i *k) |
196 | { |
197 | u64 v = btree_ptr_hash_val(k); |
198 | |
199 | return rhashtable_lookup_fast(ht: &bc->table, key: &v, params: bch_btree_cache_params); |
200 | } |
201 | |
202 | /* |
203 | * this version is for btree nodes that have already been freed (we're not |
204 | * reaping a real btree node) |
205 | */ |
206 | static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) |
207 | { |
208 | struct btree_cache *bc = &c->btree_cache; |
209 | int ret = 0; |
210 | |
211 | lockdep_assert_held(&bc->lock); |
212 | |
213 | struct bbpos pos = BBPOS(btree: b->c.btree_id, pos: b->key.k.p); |
214 | |
215 | u64 mask = b->c.level |
216 | ? bc->pinned_nodes_interior_mask |
217 | : bc->pinned_nodes_leaf_mask; |
218 | |
219 | if ((mask & BIT_ULL(b->c.btree_id)) && |
220 | bbpos_cmp(l: bc->pinned_nodes_start, r: pos) < 0 && |
221 | bbpos_cmp(l: bc->pinned_nodes_end, r: pos) >= 0) |
222 | return -BCH_ERR_ENOMEM_btree_node_reclaim; |
223 | |
224 | wait_on_io: |
225 | if (b->flags & ((1U << BTREE_NODE_dirty)| |
226 | (1U << BTREE_NODE_read_in_flight)| |
227 | (1U << BTREE_NODE_write_in_flight))) { |
228 | if (!flush) |
229 | return -BCH_ERR_ENOMEM_btree_node_reclaim; |
230 | |
231 | /* XXX: waiting on IO with btree cache lock held */ |
232 | bch2_btree_node_wait_on_read(b); |
233 | bch2_btree_node_wait_on_write(b); |
234 | } |
235 | |
236 | if (!six_trylock_intent(lock: &b->c.lock)) |
237 | return -BCH_ERR_ENOMEM_btree_node_reclaim; |
238 | |
239 | if (!six_trylock_write(lock: &b->c.lock)) |
240 | goto out_unlock_intent; |
241 | |
242 | /* recheck under lock */ |
243 | if (b->flags & ((1U << BTREE_NODE_read_in_flight)| |
244 | (1U << BTREE_NODE_write_in_flight))) { |
245 | if (!flush) |
246 | goto out_unlock; |
247 | six_unlock_write(lock: &b->c.lock); |
248 | six_unlock_intent(lock: &b->c.lock); |
249 | goto wait_on_io; |
250 | } |
251 | |
252 | if (btree_node_noevict(b) || |
253 | btree_node_write_blocked(b) || |
254 | btree_node_will_make_reachable(b)) |
255 | goto out_unlock; |
256 | |
257 | if (btree_node_dirty(b)) { |
258 | if (!flush) |
259 | goto out_unlock; |
260 | /* |
261 | * Using the underscore version because we don't want to compact |
262 | * bsets after the write, since this node is about to be evicted |
263 | * - unless btree verify mode is enabled, since it runs out of |
264 | * the post write cleanup: |
265 | */ |
266 | if (bch2_verify_btree_ondisk) |
267 | bch2_btree_node_write(c, b, SIX_LOCK_intent, |
268 | BTREE_WRITE_cache_reclaim); |
269 | else |
270 | __bch2_btree_node_write(c, b, |
271 | BTREE_WRITE_cache_reclaim); |
272 | |
273 | six_unlock_write(lock: &b->c.lock); |
274 | six_unlock_intent(lock: &b->c.lock); |
275 | goto wait_on_io; |
276 | } |
277 | out: |
278 | if (b->hash_val && !ret) |
279 | trace_and_count(c, btree_cache_reap, c, b); |
280 | return ret; |
281 | out_unlock: |
282 | six_unlock_write(lock: &b->c.lock); |
283 | out_unlock_intent: |
284 | six_unlock_intent(lock: &b->c.lock); |
285 | ret = -BCH_ERR_ENOMEM_btree_node_reclaim; |
286 | goto out; |
287 | } |
288 | |
289 | static int btree_node_reclaim(struct bch_fs *c, struct btree *b) |
290 | { |
291 | return __btree_node_reclaim(c, b, flush: false); |
292 | } |
293 | |
294 | static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b) |
295 | { |
296 | return __btree_node_reclaim(c, b, flush: true); |
297 | } |
298 | |
299 | static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, |
300 | struct shrink_control *sc) |
301 | { |
302 | struct bch_fs *c = shrink->private_data; |
303 | struct btree_cache *bc = &c->btree_cache; |
304 | struct btree *b, *t; |
305 | unsigned long nr = sc->nr_to_scan; |
306 | unsigned long can_free = 0; |
307 | unsigned long freed = 0; |
308 | unsigned long touched = 0; |
309 | unsigned i, flags; |
310 | unsigned long ret = SHRINK_STOP; |
311 | bool trigger_writes = atomic_read(v: &bc->dirty) + nr >= |
312 | bc->used * 3 / 4; |
313 | |
314 | if (bch2_btree_shrinker_disabled) |
315 | return SHRINK_STOP; |
316 | |
317 | mutex_lock(&bc->lock); |
318 | flags = memalloc_nofs_save(); |
319 | |
320 | /* |
321 | * It's _really_ critical that we don't free too many btree nodes - we |
322 | * have to always leave ourselves a reserve. The reserve is how we |
323 | * guarantee that allocating memory for a new btree node can always |
324 | * succeed, so that inserting keys into the btree can always succeed and |
325 | * IO can always make forward progress: |
326 | */ |
327 | can_free = btree_cache_can_free(bc); |
328 | nr = min_t(unsigned long, nr, can_free); |
329 | |
330 | i = 0; |
331 | list_for_each_entry_safe(b, t, &bc->freeable, list) { |
332 | /* |
333 | * Leave a few nodes on the freeable list, so that a btree split |
334 | * won't have to hit the system allocator: |
335 | */ |
336 | if (++i <= 3) |
337 | continue; |
338 | |
339 | touched++; |
340 | |
341 | if (touched >= nr) |
342 | goto out; |
343 | |
344 | if (!btree_node_reclaim(c, b)) { |
345 | btree_node_data_free(c, b); |
346 | six_unlock_write(lock: &b->c.lock); |
347 | six_unlock_intent(lock: &b->c.lock); |
348 | freed++; |
349 | } |
350 | } |
351 | restart: |
352 | list_for_each_entry_safe(b, t, &bc->live, list) { |
353 | touched++; |
354 | |
355 | if (btree_node_accessed(b)) { |
356 | clear_btree_node_accessed(b); |
357 | } else if (!btree_node_reclaim(c, b)) { |
358 | freed++; |
359 | btree_node_data_free(c, b); |
360 | |
361 | bch2_btree_node_hash_remove(bc, b); |
362 | six_unlock_write(lock: &b->c.lock); |
363 | six_unlock_intent(lock: &b->c.lock); |
364 | |
365 | if (freed == nr) |
366 | goto out_rotate; |
367 | } else if (trigger_writes && |
368 | btree_node_dirty(b) && |
369 | !btree_node_will_make_reachable(b) && |
370 | !btree_node_write_blocked(b) && |
371 | six_trylock_read(lock: &b->c.lock)) { |
372 | list_move(list: &bc->live, head: &b->list); |
373 | mutex_unlock(lock: &bc->lock); |
374 | __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); |
375 | six_unlock_read(lock: &b->c.lock); |
376 | if (touched >= nr) |
377 | goto out_nounlock; |
378 | mutex_lock(&bc->lock); |
379 | goto restart; |
380 | } |
381 | |
382 | if (touched >= nr) |
383 | break; |
384 | } |
385 | out_rotate: |
386 | if (&t->list != &bc->live) |
387 | list_move_tail(list: &bc->live, head: &t->list); |
388 | out: |
389 | mutex_unlock(lock: &bc->lock); |
390 | out_nounlock: |
391 | ret = freed; |
392 | memalloc_nofs_restore(flags); |
393 | trace_and_count(c, btree_cache_scan, sc->nr_to_scan, can_free, ret); |
394 | return ret; |
395 | } |
396 | |
397 | static unsigned long bch2_btree_cache_count(struct shrinker *shrink, |
398 | struct shrink_control *sc) |
399 | { |
400 | struct bch_fs *c = shrink->private_data; |
401 | struct btree_cache *bc = &c->btree_cache; |
402 | |
403 | if (bch2_btree_shrinker_disabled) |
404 | return 0; |
405 | |
406 | return btree_cache_can_free(bc); |
407 | } |
408 | |
409 | void bch2_fs_btree_cache_exit(struct bch_fs *c) |
410 | { |
411 | struct btree_cache *bc = &c->btree_cache; |
412 | struct btree *b; |
413 | unsigned i, flags; |
414 | |
415 | shrinker_free(shrinker: bc->shrink); |
416 | |
417 | /* vfree() can allocate memory: */ |
418 | flags = memalloc_nofs_save(); |
419 | mutex_lock(&bc->lock); |
420 | |
421 | if (c->verify_data) |
422 | list_move(list: &c->verify_data->list, head: &bc->live); |
423 | |
424 | kvfree(addr: c->verify_ondisk); |
425 | |
426 | for (i = 0; i < btree_id_nr_alive(c); i++) { |
427 | struct btree_root *r = bch2_btree_id_root(c, id: i); |
428 | |
429 | if (r->b) |
430 | list_add(new: &r->b->list, head: &bc->live); |
431 | } |
432 | |
433 | list_splice(list: &bc->freeable, head: &bc->live); |
434 | |
435 | while (!list_empty(head: &bc->live)) { |
436 | b = list_first_entry(&bc->live, struct btree, list); |
437 | |
438 | BUG_ON(btree_node_read_in_flight(b) || |
439 | btree_node_write_in_flight(b)); |
440 | |
441 | btree_node_data_free(c, b); |
442 | } |
443 | |
444 | BUG_ON(!bch2_journal_error(&c->journal) && |
445 | atomic_read(&c->btree_cache.dirty)); |
446 | |
447 | list_splice(list: &bc->freed_pcpu, head: &bc->freed_nonpcpu); |
448 | |
449 | while (!list_empty(head: &bc->freed_nonpcpu)) { |
450 | b = list_first_entry(&bc->freed_nonpcpu, struct btree, list); |
451 | list_del(entry: &b->list); |
452 | six_lock_exit(lock: &b->c.lock); |
453 | kfree(objp: b); |
454 | } |
455 | |
456 | mutex_unlock(lock: &bc->lock); |
457 | memalloc_nofs_restore(flags); |
458 | |
459 | if (bc->table_init_done) |
460 | rhashtable_destroy(ht: &bc->table); |
461 | } |
462 | |
463 | int bch2_fs_btree_cache_init(struct bch_fs *c) |
464 | { |
465 | struct btree_cache *bc = &c->btree_cache; |
466 | struct shrinker *shrink; |
467 | unsigned i; |
468 | int ret = 0; |
469 | |
470 | ret = rhashtable_init(ht: &bc->table, params: &bch_btree_cache_params); |
471 | if (ret) |
472 | goto err; |
473 | |
474 | bc->table_init_done = true; |
475 | |
476 | bch2_recalc_btree_reserve(c); |
477 | |
478 | for (i = 0; i < bc->reserve; i++) |
479 | if (!__bch2_btree_node_mem_alloc(c)) |
480 | goto err; |
481 | |
482 | list_splice_init(list: &bc->live, head: &bc->freeable); |
483 | |
484 | mutex_init(&c->verify_lock); |
485 | |
486 | shrink = shrinker_alloc(flags: 0, fmt: "%s-btree_cache" , c->name); |
487 | if (!shrink) |
488 | goto err; |
489 | bc->shrink = shrink; |
490 | shrink->count_objects = bch2_btree_cache_count; |
491 | shrink->scan_objects = bch2_btree_cache_scan; |
492 | shrink->seeks = 4; |
493 | shrink->private_data = c; |
494 | shrinker_register(shrinker: shrink); |
495 | |
496 | return 0; |
497 | err: |
498 | return -BCH_ERR_ENOMEM_fs_btree_cache_init; |
499 | } |
500 | |
501 | void bch2_fs_btree_cache_init_early(struct btree_cache *bc) |
502 | { |
503 | mutex_init(&bc->lock); |
504 | INIT_LIST_HEAD(list: &bc->live); |
505 | INIT_LIST_HEAD(list: &bc->freeable); |
506 | INIT_LIST_HEAD(list: &bc->freed_pcpu); |
507 | INIT_LIST_HEAD(list: &bc->freed_nonpcpu); |
508 | } |
509 | |
510 | /* |
511 | * We can only have one thread cannibalizing other cached btree nodes at a time, |
512 | * or we'll deadlock. We use an open coded mutex to ensure that, which a |
513 | * cannibalize_bucket() will take. This means every time we unlock the root of |
514 | * the btree, we need to release this lock if we have it held. |
515 | */ |
516 | void bch2_btree_cache_cannibalize_unlock(struct btree_trans *trans) |
517 | { |
518 | struct bch_fs *c = trans->c; |
519 | struct btree_cache *bc = &c->btree_cache; |
520 | |
521 | if (bc->alloc_lock == current) { |
522 | trace_and_count(c, btree_cache_cannibalize_unlock, trans); |
523 | bc->alloc_lock = NULL; |
524 | closure_wake_up(list: &bc->alloc_wait); |
525 | } |
526 | } |
527 | |
528 | int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure *cl) |
529 | { |
530 | struct bch_fs *c = trans->c; |
531 | struct btree_cache *bc = &c->btree_cache; |
532 | struct task_struct *old; |
533 | |
534 | old = cmpxchg(&bc->alloc_lock, NULL, current); |
535 | if (old == NULL || old == current) |
536 | goto success; |
537 | |
538 | if (!cl) { |
539 | trace_and_count(c, btree_cache_cannibalize_lock_fail, trans); |
540 | return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock; |
541 | } |
542 | |
543 | closure_wait(list: &bc->alloc_wait, cl); |
544 | |
545 | /* Try again, after adding ourselves to waitlist */ |
546 | old = cmpxchg(&bc->alloc_lock, NULL, current); |
547 | if (old == NULL || old == current) { |
548 | /* We raced */ |
549 | closure_wake_up(list: &bc->alloc_wait); |
550 | goto success; |
551 | } |
552 | |
553 | trace_and_count(c, btree_cache_cannibalize_lock_fail, trans); |
554 | return -BCH_ERR_btree_cache_cannibalize_lock_blocked; |
555 | |
556 | success: |
557 | trace_and_count(c, btree_cache_cannibalize_lock, trans); |
558 | return 0; |
559 | } |
560 | |
561 | static struct btree *btree_node_cannibalize(struct bch_fs *c) |
562 | { |
563 | struct btree_cache *bc = &c->btree_cache; |
564 | struct btree *b; |
565 | |
566 | list_for_each_entry_reverse(b, &bc->live, list) |
567 | if (!btree_node_reclaim(c, b)) |
568 | return b; |
569 | |
570 | while (1) { |
571 | list_for_each_entry_reverse(b, &bc->live, list) |
572 | if (!btree_node_write_and_reclaim(c, b)) |
573 | return b; |
574 | |
575 | /* |
576 | * Rare case: all nodes were intent-locked. |
577 | * Just busy-wait. |
578 | */ |
579 | WARN_ONCE(1, "btree cache cannibalize failed\n" ); |
580 | cond_resched(); |
581 | } |
582 | } |
583 | |
584 | struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_read_locks) |
585 | { |
586 | struct bch_fs *c = trans->c; |
587 | struct btree_cache *bc = &c->btree_cache; |
588 | struct list_head *freed = pcpu_read_locks |
589 | ? &bc->freed_pcpu |
590 | : &bc->freed_nonpcpu; |
591 | struct btree *b, *b2; |
592 | u64 start_time = local_clock(); |
593 | unsigned flags; |
594 | |
595 | flags = memalloc_nofs_save(); |
596 | mutex_lock(&bc->lock); |
597 | |
598 | /* |
599 | * We never free struct btree itself, just the memory that holds the on |
600 | * disk node. Check the freed list before allocating a new one: |
601 | */ |
602 | list_for_each_entry(b, freed, list) |
603 | if (!btree_node_reclaim(c, b)) { |
604 | list_del_init(entry: &b->list); |
605 | goto got_node; |
606 | } |
607 | |
608 | b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN); |
609 | if (!b) { |
610 | mutex_unlock(lock: &bc->lock); |
611 | bch2_trans_unlock(trans); |
612 | b = __btree_node_mem_alloc(c, GFP_KERNEL); |
613 | if (!b) |
614 | goto err; |
615 | mutex_lock(&bc->lock); |
616 | } |
617 | |
618 | bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0); |
619 | |
620 | BUG_ON(!six_trylock_intent(&b->c.lock)); |
621 | BUG_ON(!six_trylock_write(&b->c.lock)); |
622 | got_node: |
623 | |
624 | /* |
625 | * btree_free() doesn't free memory; it sticks the node on the end of |
626 | * the list. Check if there's any freed nodes there: |
627 | */ |
628 | list_for_each_entry(b2, &bc->freeable, list) |
629 | if (!btree_node_reclaim(c, b: b2)) { |
630 | swap(b->data, b2->data); |
631 | swap(b->aux_data, b2->aux_data); |
632 | btree_node_to_freedlist(bc, b: b2); |
633 | six_unlock_write(lock: &b2->c.lock); |
634 | six_unlock_intent(lock: &b2->c.lock); |
635 | goto got_mem; |
636 | } |
637 | |
638 | mutex_unlock(lock: &bc->lock); |
639 | |
640 | if (btree_node_data_alloc(c, b, GFP_NOWAIT|__GFP_NOWARN)) { |
641 | bch2_trans_unlock(trans); |
642 | if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN)) |
643 | goto err; |
644 | } |
645 | |
646 | mutex_lock(&bc->lock); |
647 | bc->used++; |
648 | got_mem: |
649 | mutex_unlock(lock: &bc->lock); |
650 | |
651 | BUG_ON(btree_node_hashed(b)); |
652 | BUG_ON(btree_node_dirty(b)); |
653 | BUG_ON(btree_node_write_in_flight(b)); |
654 | out: |
655 | b->flags = 0; |
656 | b->written = 0; |
657 | b->nsets = 0; |
658 | b->sib_u64s[0] = 0; |
659 | b->sib_u64s[1] = 0; |
660 | b->whiteout_u64s = 0; |
661 | bch2_btree_keys_init(b); |
662 | set_btree_node_accessed(b); |
663 | |
664 | bch2_time_stats_update(stats: &c->times[BCH_TIME_btree_node_mem_alloc], |
665 | start: start_time); |
666 | |
667 | memalloc_nofs_restore(flags); |
668 | return b; |
669 | err: |
670 | mutex_lock(&bc->lock); |
671 | |
672 | /* Try to cannibalize another cached btree node: */ |
673 | if (bc->alloc_lock == current) { |
674 | b2 = btree_node_cannibalize(c); |
675 | clear_btree_node_just_written(b: b2); |
676 | bch2_btree_node_hash_remove(bc, b: b2); |
677 | |
678 | if (b) { |
679 | swap(b->data, b2->data); |
680 | swap(b->aux_data, b2->aux_data); |
681 | btree_node_to_freedlist(bc, b: b2); |
682 | six_unlock_write(lock: &b2->c.lock); |
683 | six_unlock_intent(lock: &b2->c.lock); |
684 | } else { |
685 | b = b2; |
686 | list_del_init(entry: &b->list); |
687 | } |
688 | |
689 | mutex_unlock(lock: &bc->lock); |
690 | |
691 | trace_and_count(c, btree_cache_cannibalize, trans); |
692 | goto out; |
693 | } |
694 | |
695 | mutex_unlock(lock: &bc->lock); |
696 | memalloc_nofs_restore(flags); |
697 | return ERR_PTR(error: -BCH_ERR_ENOMEM_btree_node_mem_alloc); |
698 | } |
699 | |
700 | /* Slowpath, don't want it inlined into btree_iter_traverse() */ |
701 | static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, |
702 | struct btree_path *path, |
703 | const struct bkey_i *k, |
704 | enum btree_id btree_id, |
705 | unsigned level, |
706 | enum six_lock_type lock_type, |
707 | bool sync) |
708 | { |
709 | struct bch_fs *c = trans->c; |
710 | struct btree_cache *bc = &c->btree_cache; |
711 | struct btree *b; |
712 | |
713 | if (unlikely(level >= BTREE_MAX_DEPTH)) { |
714 | int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u" , |
715 | level, BTREE_MAX_DEPTH); |
716 | return ERR_PTR(error: ret); |
717 | } |
718 | |
719 | if (unlikely(!bkey_is_btree_ptr(&k->k))) { |
720 | struct printbuf buf = PRINTBUF; |
721 | bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); |
722 | |
723 | int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s" , buf.buf); |
724 | printbuf_exit(&buf); |
725 | return ERR_PTR(error: ret); |
726 | } |
727 | |
728 | if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) { |
729 | struct printbuf buf = PRINTBUF; |
730 | bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); |
731 | |
732 | int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s" , buf.buf); |
733 | printbuf_exit(&buf); |
734 | return ERR_PTR(error: ret); |
735 | } |
736 | |
737 | /* |
738 | * Parent node must be locked, else we could read in a btree node that's |
739 | * been freed: |
740 | */ |
741 | if (path && !bch2_btree_node_relock(trans, path, level: level + 1)) { |
742 | trace_and_count(c, trans_restart_relock_parent_for_fill, trans, _THIS_IP_, path); |
743 | return ERR_PTR(error: btree_trans_restart(trans, err: BCH_ERR_transaction_restart_fill_relock)); |
744 | } |
745 | |
746 | b = bch2_btree_node_mem_alloc(trans, pcpu_read_locks: level != 0); |
747 | |
748 | if (bch2_err_matches(PTR_ERR_OR_ZERO(b), ENOMEM)) { |
749 | if (!path) |
750 | return b; |
751 | |
752 | trans->memory_allocation_failure = true; |
753 | trace_and_count(c, trans_restart_memory_allocation_failure, trans, _THIS_IP_, path); |
754 | return ERR_PTR(error: btree_trans_restart(trans, err: BCH_ERR_transaction_restart_fill_mem_alloc_fail)); |
755 | } |
756 | |
757 | if (IS_ERR(ptr: b)) |
758 | return b; |
759 | |
760 | bkey_copy(dst: &b->key, src: k); |
761 | if (bch2_btree_node_hash_insert(bc, b, level, id: btree_id)) { |
762 | /* raced with another fill: */ |
763 | |
764 | /* mark as unhashed... */ |
765 | b->hash_val = 0; |
766 | |
767 | mutex_lock(&bc->lock); |
768 | list_add(new: &b->list, head: &bc->freeable); |
769 | mutex_unlock(lock: &bc->lock); |
770 | |
771 | six_unlock_write(lock: &b->c.lock); |
772 | six_unlock_intent(lock: &b->c.lock); |
773 | return NULL; |
774 | } |
775 | |
776 | set_btree_node_read_in_flight(b); |
777 | six_unlock_write(lock: &b->c.lock); |
778 | |
779 | if (path) { |
780 | u32 seq = six_lock_seq(lock: &b->c.lock); |
781 | |
782 | /* Unlock before doing IO: */ |
783 | six_unlock_intent(lock: &b->c.lock); |
784 | bch2_trans_unlock_noassert(trans); |
785 | |
786 | bch2_btree_node_read(trans, b, sync); |
787 | |
788 | if (!sync) |
789 | return NULL; |
790 | |
791 | if (!six_relock_type(lock: &b->c.lock, type: lock_type, seq)) |
792 | b = NULL; |
793 | } else { |
794 | bch2_btree_node_read(trans, b, sync); |
795 | if (lock_type == SIX_LOCK_read) |
796 | six_lock_downgrade(&b->c.lock); |
797 | } |
798 | |
799 | return b; |
800 | } |
801 | |
802 | static noinline void (struct bch_fs *c, struct btree *b) |
803 | { |
804 | struct printbuf buf = PRINTBUF; |
805 | |
806 | if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations) |
807 | return; |
808 | |
809 | prt_printf(&buf, |
810 | "btree node header doesn't match ptr\n" |
811 | "btree %s level %u\n" |
812 | "ptr: " , |
813 | bch2_btree_id_str(b->c.btree_id), b->c.level); |
814 | bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k: &b->key)); |
815 | |
816 | prt_printf(&buf, "\nheader: btree %s level %llu\n" |
817 | "min " , |
818 | bch2_btree_id_str(BTREE_NODE_ID(b->data)), |
819 | BTREE_NODE_LEVEL(b->data)); |
820 | bch2_bpos_to_text(&buf, b->data->min_key); |
821 | |
822 | prt_printf(&buf, "\nmax " ); |
823 | bch2_bpos_to_text(&buf, b->data->max_key); |
824 | |
825 | bch2_fs_topology_error(c, "%s" , buf.buf); |
826 | |
827 | printbuf_exit(&buf); |
828 | } |
829 | |
830 | static inline void (struct bch_fs *c, struct btree *b) |
831 | { |
832 | if (b->c.btree_id != BTREE_NODE_ID(n: b->data) || |
833 | b->c.level != BTREE_NODE_LEVEL(k: b->data) || |
834 | !bpos_eq(l: b->data->max_key, r: b->key.k.p) || |
835 | (b->key.k.type == KEY_TYPE_btree_ptr_v2 && |
836 | !bpos_eq(l: b->data->min_key, |
837 | r: bkey_i_to_btree_ptr_v2(k: &b->key)->v.min_key))) |
838 | btree_bad_header(c, b); |
839 | } |
840 | |
841 | static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path, |
842 | const struct bkey_i *k, unsigned level, |
843 | enum six_lock_type lock_type, |
844 | unsigned long trace_ip) |
845 | { |
846 | struct bch_fs *c = trans->c; |
847 | struct btree_cache *bc = &c->btree_cache; |
848 | struct btree *b; |
849 | struct bset_tree *t; |
850 | bool need_relock = false; |
851 | int ret; |
852 | |
853 | EBUG_ON(level >= BTREE_MAX_DEPTH); |
854 | retry: |
855 | b = btree_cache_find(bc, k); |
856 | if (unlikely(!b)) { |
857 | /* |
858 | * We must have the parent locked to call bch2_btree_node_fill(), |
859 | * else we could read in a btree node from disk that's been |
860 | * freed: |
861 | */ |
862 | b = bch2_btree_node_fill(trans, path, k, btree_id: path->btree_id, |
863 | level, lock_type, sync: true); |
864 | need_relock = true; |
865 | |
866 | /* We raced and found the btree node in the cache */ |
867 | if (!b) |
868 | goto retry; |
869 | |
870 | if (IS_ERR(ptr: b)) |
871 | return b; |
872 | } else { |
873 | if (btree_node_read_locked(path, l: level + 1)) |
874 | btree_node_unlock(trans, path, level: level + 1); |
875 | |
876 | ret = btree_node_lock(trans, path, b: &b->c, level, type: lock_type, ip: trace_ip); |
877 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
878 | return ERR_PTR(error: ret); |
879 | |
880 | BUG_ON(ret); |
881 | |
882 | if (unlikely(b->hash_val != btree_ptr_hash_val(k) || |
883 | b->c.level != level || |
884 | race_fault())) { |
885 | six_unlock_type(lock: &b->c.lock, type: lock_type); |
886 | if (bch2_btree_node_relock(trans, path, level: level + 1)) |
887 | goto retry; |
888 | |
889 | trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path); |
890 | return ERR_PTR(error: btree_trans_restart(trans, err: BCH_ERR_transaction_restart_lock_node_reused)); |
891 | } |
892 | |
893 | /* avoid atomic set bit if it's not needed: */ |
894 | if (!btree_node_accessed(b)) |
895 | set_btree_node_accessed(b); |
896 | } |
897 | |
898 | if (unlikely(btree_node_read_in_flight(b))) { |
899 | u32 seq = six_lock_seq(lock: &b->c.lock); |
900 | |
901 | six_unlock_type(lock: &b->c.lock, type: lock_type); |
902 | bch2_trans_unlock(trans); |
903 | need_relock = true; |
904 | |
905 | bch2_btree_node_wait_on_read(b); |
906 | |
907 | /* |
908 | * should_be_locked is not set on this path yet, so we need to |
909 | * relock it specifically: |
910 | */ |
911 | if (!six_relock_type(lock: &b->c.lock, type: lock_type, seq)) |
912 | goto retry; |
913 | } |
914 | |
915 | if (unlikely(need_relock)) { |
916 | ret = bch2_trans_relock(trans) ?: |
917 | bch2_btree_path_relock_intent(trans, path); |
918 | if (ret) { |
919 | six_unlock_type(lock: &b->c.lock, type: lock_type); |
920 | return ERR_PTR(error: ret); |
921 | } |
922 | } |
923 | |
924 | prefetch(b->aux_data); |
925 | |
926 | for_each_bset(b, t) { |
927 | void *p = (u64 *) b->aux_data + t->aux_data_offset; |
928 | |
929 | prefetch(p + L1_CACHE_BYTES * 0); |
930 | prefetch(p + L1_CACHE_BYTES * 1); |
931 | prefetch(p + L1_CACHE_BYTES * 2); |
932 | } |
933 | |
934 | if (unlikely(btree_node_read_error(b))) { |
935 | six_unlock_type(lock: &b->c.lock, type: lock_type); |
936 | return ERR_PTR(error: -BCH_ERR_btree_node_read_error); |
937 | } |
938 | |
939 | EBUG_ON(b->c.btree_id != path->btree_id); |
940 | EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); |
941 | btree_check_header(c, b); |
942 | |
943 | return b; |
944 | } |
945 | |
946 | /** |
947 | * bch2_btree_node_get - find a btree node in the cache and lock it, reading it |
948 | * in from disk if necessary. |
949 | * |
950 | * @trans: btree transaction object |
951 | * @path: btree_path being traversed |
952 | * @k: pointer to btree node (generally KEY_TYPE_btree_ptr_v2) |
953 | * @level: level of btree node being looked up (0 == leaf node) |
954 | * @lock_type: SIX_LOCK_read or SIX_LOCK_intent |
955 | * @trace_ip: ip of caller of btree iterator code (i.e. caller of bch2_btree_iter_peek()) |
956 | * |
957 | * The btree node will have either a read or a write lock held, depending on |
958 | * the @write parameter. |
959 | * |
960 | * Returns: btree node or ERR_PTR() |
961 | */ |
962 | struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path, |
963 | const struct bkey_i *k, unsigned level, |
964 | enum six_lock_type lock_type, |
965 | unsigned long trace_ip) |
966 | { |
967 | struct bch_fs *c = trans->c; |
968 | struct btree *b; |
969 | struct bset_tree *t; |
970 | int ret; |
971 | |
972 | EBUG_ON(level >= BTREE_MAX_DEPTH); |
973 | |
974 | b = btree_node_mem_ptr(k); |
975 | |
976 | /* |
977 | * Check b->hash_val _before_ calling btree_node_lock() - this might not |
978 | * be the node we want anymore, and trying to lock the wrong node could |
979 | * cause an unneccessary transaction restart: |
980 | */ |
981 | if (unlikely(!c->opts.btree_node_mem_ptr_optimization || |
982 | !b || |
983 | b->hash_val != btree_ptr_hash_val(k))) |
984 | return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip); |
985 | |
986 | if (btree_node_read_locked(path, l: level + 1)) |
987 | btree_node_unlock(trans, path, level: level + 1); |
988 | |
989 | ret = btree_node_lock(trans, path, b: &b->c, level, type: lock_type, ip: trace_ip); |
990 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
991 | return ERR_PTR(error: ret); |
992 | |
993 | BUG_ON(ret); |
994 | |
995 | if (unlikely(b->hash_val != btree_ptr_hash_val(k) || |
996 | b->c.level != level || |
997 | race_fault())) { |
998 | six_unlock_type(lock: &b->c.lock, type: lock_type); |
999 | if (bch2_btree_node_relock(trans, path, level: level + 1)) |
1000 | return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip); |
1001 | |
1002 | trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path); |
1003 | return ERR_PTR(error: btree_trans_restart(trans, err: BCH_ERR_transaction_restart_lock_node_reused)); |
1004 | } |
1005 | |
1006 | if (unlikely(btree_node_read_in_flight(b))) { |
1007 | six_unlock_type(lock: &b->c.lock, type: lock_type); |
1008 | return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip); |
1009 | } |
1010 | |
1011 | prefetch(b->aux_data); |
1012 | |
1013 | for_each_bset(b, t) { |
1014 | void *p = (u64 *) b->aux_data + t->aux_data_offset; |
1015 | |
1016 | prefetch(p + L1_CACHE_BYTES * 0); |
1017 | prefetch(p + L1_CACHE_BYTES * 1); |
1018 | prefetch(p + L1_CACHE_BYTES * 2); |
1019 | } |
1020 | |
1021 | /* avoid atomic set bit if it's not needed: */ |
1022 | if (!btree_node_accessed(b)) |
1023 | set_btree_node_accessed(b); |
1024 | |
1025 | if (unlikely(btree_node_read_error(b))) { |
1026 | six_unlock_type(lock: &b->c.lock, type: lock_type); |
1027 | return ERR_PTR(error: -BCH_ERR_btree_node_read_error); |
1028 | } |
1029 | |
1030 | EBUG_ON(b->c.btree_id != path->btree_id); |
1031 | EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); |
1032 | btree_check_header(c, b); |
1033 | |
1034 | return b; |
1035 | } |
1036 | |
1037 | struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, |
1038 | const struct bkey_i *k, |
1039 | enum btree_id btree_id, |
1040 | unsigned level, |
1041 | bool nofill) |
1042 | { |
1043 | struct bch_fs *c = trans->c; |
1044 | struct btree_cache *bc = &c->btree_cache; |
1045 | struct btree *b; |
1046 | struct bset_tree *t; |
1047 | int ret; |
1048 | |
1049 | EBUG_ON(level >= BTREE_MAX_DEPTH); |
1050 | |
1051 | if (c->opts.btree_node_mem_ptr_optimization) { |
1052 | b = btree_node_mem_ptr(k); |
1053 | if (b) |
1054 | goto lock_node; |
1055 | } |
1056 | retry: |
1057 | b = btree_cache_find(bc, k); |
1058 | if (unlikely(!b)) { |
1059 | if (nofill) |
1060 | goto out; |
1061 | |
1062 | b = bch2_btree_node_fill(trans, NULL, k, btree_id, |
1063 | level, lock_type: SIX_LOCK_read, sync: true); |
1064 | |
1065 | /* We raced and found the btree node in the cache */ |
1066 | if (!b) |
1067 | goto retry; |
1068 | |
1069 | if (IS_ERR(ptr: b) && |
1070 | !bch2_btree_cache_cannibalize_lock(trans, NULL)) |
1071 | goto retry; |
1072 | |
1073 | if (IS_ERR(ptr: b)) |
1074 | goto out; |
1075 | } else { |
1076 | lock_node: |
1077 | ret = btree_node_lock_nopath(trans, b: &b->c, type: SIX_LOCK_read, _THIS_IP_); |
1078 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
1079 | return ERR_PTR(error: ret); |
1080 | |
1081 | BUG_ON(ret); |
1082 | |
1083 | if (unlikely(b->hash_val != btree_ptr_hash_val(k) || |
1084 | b->c.btree_id != btree_id || |
1085 | b->c.level != level)) { |
1086 | six_unlock_read(lock: &b->c.lock); |
1087 | goto retry; |
1088 | } |
1089 | } |
1090 | |
1091 | /* XXX: waiting on IO with btree locks held: */ |
1092 | __bch2_btree_node_wait_on_read(b); |
1093 | |
1094 | prefetch(b->aux_data); |
1095 | |
1096 | for_each_bset(b, t) { |
1097 | void *p = (u64 *) b->aux_data + t->aux_data_offset; |
1098 | |
1099 | prefetch(p + L1_CACHE_BYTES * 0); |
1100 | prefetch(p + L1_CACHE_BYTES * 1); |
1101 | prefetch(p + L1_CACHE_BYTES * 2); |
1102 | } |
1103 | |
1104 | /* avoid atomic set bit if it's not needed: */ |
1105 | if (!btree_node_accessed(b)) |
1106 | set_btree_node_accessed(b); |
1107 | |
1108 | if (unlikely(btree_node_read_error(b))) { |
1109 | six_unlock_read(lock: &b->c.lock); |
1110 | b = ERR_PTR(error: -BCH_ERR_btree_node_read_error); |
1111 | goto out; |
1112 | } |
1113 | |
1114 | EBUG_ON(b->c.btree_id != btree_id); |
1115 | EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); |
1116 | btree_check_header(c, b); |
1117 | out: |
1118 | bch2_btree_cache_cannibalize_unlock(trans); |
1119 | return b; |
1120 | } |
1121 | |
1122 | int bch2_btree_node_prefetch(struct btree_trans *trans, |
1123 | struct btree_path *path, |
1124 | const struct bkey_i *k, |
1125 | enum btree_id btree_id, unsigned level) |
1126 | { |
1127 | struct bch_fs *c = trans->c; |
1128 | struct btree_cache *bc = &c->btree_cache; |
1129 | |
1130 | BUG_ON(path && !btree_node_locked(path, level + 1)); |
1131 | BUG_ON(level >= BTREE_MAX_DEPTH); |
1132 | |
1133 | struct btree *b = btree_cache_find(bc, k); |
1134 | if (b) |
1135 | return 0; |
1136 | |
1137 | b = bch2_btree_node_fill(trans, path, k, btree_id, |
1138 | level, lock_type: SIX_LOCK_read, sync: false); |
1139 | if (!IS_ERR_OR_NULL(ptr: b)) |
1140 | six_unlock_read(lock: &b->c.lock); |
1141 | return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(ptr: b); |
1142 | } |
1143 | |
1144 | void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k) |
1145 | { |
1146 | struct bch_fs *c = trans->c; |
1147 | struct btree_cache *bc = &c->btree_cache; |
1148 | struct btree *b; |
1149 | |
1150 | b = btree_cache_find(bc, k); |
1151 | if (!b) |
1152 | return; |
1153 | |
1154 | BUG_ON(b == btree_node_root(trans->c, b)); |
1155 | wait_on_io: |
1156 | /* not allowed to wait on io with btree locks held: */ |
1157 | |
1158 | /* XXX we're called from btree_gc which will be holding other btree |
1159 | * nodes locked |
1160 | */ |
1161 | __bch2_btree_node_wait_on_read(b); |
1162 | __bch2_btree_node_wait_on_write(b); |
1163 | |
1164 | btree_node_lock_nopath_nofail(trans, b: &b->c, type: SIX_LOCK_intent); |
1165 | btree_node_lock_nopath_nofail(trans, b: &b->c, type: SIX_LOCK_write); |
1166 | if (unlikely(b->hash_val != btree_ptr_hash_val(k))) |
1167 | goto out; |
1168 | |
1169 | if (btree_node_dirty(b)) { |
1170 | __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); |
1171 | six_unlock_write(lock: &b->c.lock); |
1172 | six_unlock_intent(lock: &b->c.lock); |
1173 | goto wait_on_io; |
1174 | } |
1175 | |
1176 | BUG_ON(btree_node_dirty(b)); |
1177 | |
1178 | mutex_lock(&bc->lock); |
1179 | btree_node_data_free(c, b); |
1180 | bch2_btree_node_hash_remove(bc, b); |
1181 | mutex_unlock(lock: &bc->lock); |
1182 | out: |
1183 | six_unlock_write(lock: &b->c.lock); |
1184 | six_unlock_intent(lock: &b->c.lock); |
1185 | } |
1186 | |
1187 | const char *bch2_btree_id_str(enum btree_id btree) |
1188 | { |
1189 | return btree < BTREE_ID_NR ? __bch2_btree_ids[btree] : "(unknown)" ; |
1190 | } |
1191 | |
1192 | void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) |
1193 | { |
1194 | prt_printf(out, "%s level %u/%u\n " , |
1195 | bch2_btree_id_str(b->c.btree_id), |
1196 | b->c.level, |
1197 | bch2_btree_id_root(c, b->c.btree_id)->level); |
1198 | bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k: &b->key)); |
1199 | } |
1200 | |
1201 | void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) |
1202 | { |
1203 | struct bset_stats stats; |
1204 | |
1205 | memset(&stats, 0, sizeof(stats)); |
1206 | |
1207 | bch2_btree_keys_stats(b, &stats); |
1208 | |
1209 | prt_printf(out, "l %u " , b->c.level); |
1210 | bch2_bpos_to_text(out, b->data->min_key); |
1211 | prt_printf(out, " - " ); |
1212 | bch2_bpos_to_text(out, b->data->max_key); |
1213 | prt_printf(out, ":\n" |
1214 | " ptrs: " ); |
1215 | bch2_val_to_text(out, c, bkey_i_to_s_c(k: &b->key)); |
1216 | prt_newline(out); |
1217 | |
1218 | prt_printf(out, |
1219 | " format: " ); |
1220 | bch2_bkey_format_to_text(out, &b->format); |
1221 | |
1222 | prt_printf(out, |
1223 | " unpack fn len: %u\n" |
1224 | " bytes used %zu/%zu (%zu%% full)\n" |
1225 | " sib u64s: %u, %u (merge threshold %u)\n" |
1226 | " nr packed keys %u\n" |
1227 | " nr unpacked keys %u\n" |
1228 | " floats %zu\n" |
1229 | " failed unpacked %zu\n" , |
1230 | b->unpack_fn_len, |
1231 | b->nr.live_u64s * sizeof(u64), |
1232 | btree_buf_bytes(b) - sizeof(struct btree_node), |
1233 | b->nr.live_u64s * 100 / btree_max_u64s(c), |
1234 | b->sib_u64s[0], |
1235 | b->sib_u64s[1], |
1236 | c->btree_foreground_merge_threshold, |
1237 | b->nr.packed_keys, |
1238 | b->nr.unpacked_keys, |
1239 | stats.floats, |
1240 | stats.failed); |
1241 | } |
1242 | |
1243 | void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs *c) |
1244 | { |
1245 | prt_printf(out, "nr nodes:\t\t%u\n" , c->btree_cache.used); |
1246 | prt_printf(out, "nr dirty:\t\t%u\n" , atomic_read(&c->btree_cache.dirty)); |
1247 | prt_printf(out, "cannibalize lock:\t%p\n" , c->btree_cache.alloc_lock); |
1248 | } |
1249 | |