1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #include "bcachefs.h" |
4 | #include "btree_locking.h" |
5 | #include "btree_types.h" |
6 | |
7 | static struct lock_class_key bch2_btree_node_lock_key; |
8 | |
9 | void bch2_btree_lock_init(struct btree_bkey_cached_common *b, |
10 | enum six_lock_init_flags flags) |
11 | { |
12 | __six_lock_init(lock: &b->lock, name: "b->c.lock" , key: &bch2_btree_node_lock_key, flags); |
13 | lockdep_set_novalidate_class(&b->lock); |
14 | } |
15 | |
16 | #ifdef CONFIG_LOCKDEP |
17 | void bch2_assert_btree_nodes_not_locked(void) |
18 | { |
19 | #if 0 |
20 | //Re-enable when lock_class_is_held() is merged: |
21 | BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key)); |
22 | #endif |
23 | } |
24 | #endif |
25 | |
26 | /* Btree node locking: */ |
27 | |
28 | struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans, |
29 | struct btree_path *skip, |
30 | struct btree_bkey_cached_common *b, |
31 | unsigned level) |
32 | { |
33 | struct btree_path *path; |
34 | struct six_lock_count ret; |
35 | unsigned i; |
36 | |
37 | memset(&ret, 0, sizeof(ret)); |
38 | |
39 | if (IS_ERR_OR_NULL(ptr: b)) |
40 | return ret; |
41 | |
42 | trans_for_each_path(trans, path, i) |
43 | if (path != skip && &path->l[level].b->c == b) { |
44 | int t = btree_node_locked_type(path, level); |
45 | |
46 | if (t != BTREE_NODE_UNLOCKED) |
47 | ret.n[t]++; |
48 | } |
49 | |
50 | return ret; |
51 | } |
52 | |
53 | /* unlock */ |
54 | |
55 | void bch2_btree_node_unlock_write(struct btree_trans *trans, |
56 | struct btree_path *path, struct btree *b) |
57 | { |
58 | bch2_btree_node_unlock_write_inlined(trans, path, b); |
59 | } |
60 | |
61 | /* lock */ |
62 | |
63 | /* |
64 | * @trans wants to lock @b with type @type |
65 | */ |
66 | struct trans_waiting_for_lock { |
67 | struct btree_trans *trans; |
68 | struct btree_bkey_cached_common *node_want; |
69 | enum six_lock_type lock_want; |
70 | |
71 | /* for iterating over held locks :*/ |
72 | u8 path_idx; |
73 | u8 level; |
74 | u64 lock_start_time; |
75 | }; |
76 | |
77 | struct lock_graph { |
78 | struct trans_waiting_for_lock g[8]; |
79 | unsigned nr; |
80 | }; |
81 | |
82 | static noinline void print_cycle(struct printbuf *out, struct lock_graph *g) |
83 | { |
84 | struct trans_waiting_for_lock *i; |
85 | |
86 | prt_printf(out, "Found lock cycle (%u entries):" , g->nr); |
87 | prt_newline(out); |
88 | |
89 | for (i = g->g; i < g->g + g->nr; i++) { |
90 | struct task_struct *task = READ_ONCE(i->trans->locking_wait.task); |
91 | if (!task) |
92 | continue; |
93 | |
94 | bch2_btree_trans_to_text(out, i->trans); |
95 | bch2_prt_task_backtrace(out, task, i == g->g ? 5 : 1, GFP_NOWAIT); |
96 | } |
97 | } |
98 | |
99 | static noinline void print_chain(struct printbuf *out, struct lock_graph *g) |
100 | { |
101 | struct trans_waiting_for_lock *i; |
102 | |
103 | for (i = g->g; i != g->g + g->nr; i++) { |
104 | struct task_struct *task = i->trans->locking_wait.task; |
105 | if (i != g->g) |
106 | prt_str(out, str: "<- " ); |
107 | prt_printf(out, "%u " , task ?task->pid : 0); |
108 | } |
109 | prt_newline(out); |
110 | } |
111 | |
112 | static void lock_graph_up(struct lock_graph *g) |
113 | { |
114 | closure_put(cl: &g->g[--g->nr].trans->ref); |
115 | } |
116 | |
117 | static noinline void lock_graph_pop_all(struct lock_graph *g) |
118 | { |
119 | while (g->nr) |
120 | lock_graph_up(g); |
121 | } |
122 | |
123 | static void __lock_graph_down(struct lock_graph *g, struct btree_trans *trans) |
124 | { |
125 | g->g[g->nr++] = (struct trans_waiting_for_lock) { |
126 | .trans = trans, |
127 | .node_want = trans->locking, |
128 | .lock_want = trans->locking_wait.lock_want, |
129 | }; |
130 | } |
131 | |
132 | static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans) |
133 | { |
134 | closure_get(cl: &trans->ref); |
135 | __lock_graph_down(g, trans); |
136 | } |
137 | |
138 | static bool lock_graph_remove_non_waiters(struct lock_graph *g) |
139 | { |
140 | struct trans_waiting_for_lock *i; |
141 | |
142 | for (i = g->g + 1; i < g->g + g->nr; i++) |
143 | if (i->trans->locking != i->node_want || |
144 | i->trans->locking_wait.start_time != i[-1].lock_start_time) { |
145 | while (g->g + g->nr > i) |
146 | lock_graph_up(g); |
147 | return true; |
148 | } |
149 | |
150 | return false; |
151 | } |
152 | |
153 | static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans) |
154 | { |
155 | struct bch_fs *c = trans->c; |
156 | |
157 | count_event(c, trans_restart_would_deadlock); |
158 | |
159 | if (trace_trans_restart_would_deadlock_enabled()) { |
160 | struct printbuf buf = PRINTBUF; |
161 | |
162 | buf.atomic++; |
163 | print_cycle(out: &buf, g); |
164 | |
165 | trace_trans_restart_would_deadlock(trans, cycle: buf.buf); |
166 | printbuf_exit(&buf); |
167 | } |
168 | } |
169 | |
170 | static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i) |
171 | { |
172 | if (i == g->g) { |
173 | trace_would_deadlock(g, trans: i->trans); |
174 | return btree_trans_restart(trans: i->trans, err: BCH_ERR_transaction_restart_would_deadlock); |
175 | } else { |
176 | i->trans->lock_must_abort = true; |
177 | wake_up_process(tsk: i->trans->locking_wait.task); |
178 | return 0; |
179 | } |
180 | } |
181 | |
182 | static int btree_trans_abort_preference(struct btree_trans *trans) |
183 | { |
184 | if (trans->lock_may_not_fail) |
185 | return 0; |
186 | if (trans->locking_wait.lock_want == SIX_LOCK_write) |
187 | return 1; |
188 | if (!trans->in_traverse_all) |
189 | return 2; |
190 | return 3; |
191 | } |
192 | |
193 | static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) |
194 | { |
195 | struct trans_waiting_for_lock *i, *abort = NULL; |
196 | unsigned best = 0, pref; |
197 | int ret; |
198 | |
199 | if (lock_graph_remove_non_waiters(g)) |
200 | return 0; |
201 | |
202 | /* Only checking, for debugfs: */ |
203 | if (cycle) { |
204 | print_cycle(out: cycle, g); |
205 | ret = -1; |
206 | goto out; |
207 | } |
208 | |
209 | for (i = g->g; i < g->g + g->nr; i++) { |
210 | pref = btree_trans_abort_preference(trans: i->trans); |
211 | if (pref > best) { |
212 | abort = i; |
213 | best = pref; |
214 | } |
215 | } |
216 | |
217 | if (unlikely(!best)) { |
218 | struct printbuf buf = PRINTBUF; |
219 | |
220 | prt_printf(&buf, bch2_fmt(g->g->trans->c, "cycle of nofail locks" )); |
221 | |
222 | for (i = g->g; i < g->g + g->nr; i++) { |
223 | struct btree_trans *trans = i->trans; |
224 | |
225 | bch2_btree_trans_to_text(&buf, trans); |
226 | |
227 | prt_printf(&buf, "backtrace:" ); |
228 | prt_newline(&buf); |
229 | printbuf_indent_add(&buf, 2); |
230 | bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT); |
231 | printbuf_indent_sub(&buf, 2); |
232 | prt_newline(&buf); |
233 | } |
234 | |
235 | bch2_print_string_as_lines(KERN_ERR, lines: buf.buf); |
236 | printbuf_exit(&buf); |
237 | BUG(); |
238 | } |
239 | |
240 | ret = abort_lock(g, i: abort); |
241 | out: |
242 | if (ret) |
243 | while (g->nr) |
244 | lock_graph_up(g); |
245 | return ret; |
246 | } |
247 | |
248 | static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, |
249 | struct printbuf *cycle) |
250 | { |
251 | struct btree_trans *orig_trans = g->g->trans; |
252 | struct trans_waiting_for_lock *i; |
253 | |
254 | for (i = g->g; i < g->g + g->nr; i++) |
255 | if (i->trans == trans) { |
256 | closure_put(cl: &trans->ref); |
257 | return break_cycle(g, cycle); |
258 | } |
259 | |
260 | if (g->nr == ARRAY_SIZE(g->g)) { |
261 | closure_put(cl: &trans->ref); |
262 | |
263 | if (orig_trans->lock_may_not_fail) |
264 | return 0; |
265 | |
266 | while (g->nr) |
267 | lock_graph_up(g); |
268 | |
269 | if (cycle) |
270 | return 0; |
271 | |
272 | trace_and_count(trans->c, trans_restart_would_deadlock_recursion_limit, trans, _RET_IP_); |
273 | return btree_trans_restart(trans: orig_trans, err: BCH_ERR_transaction_restart_deadlock_recursion_limit); |
274 | } |
275 | |
276 | __lock_graph_down(g, trans); |
277 | return 0; |
278 | } |
279 | |
280 | static bool lock_type_conflicts(enum six_lock_type t1, enum six_lock_type t2) |
281 | { |
282 | return t1 + t2 > 1; |
283 | } |
284 | |
285 | int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) |
286 | { |
287 | struct lock_graph g; |
288 | struct trans_waiting_for_lock *top; |
289 | struct btree_bkey_cached_common *b; |
290 | btree_path_idx_t path_idx; |
291 | int ret = 0; |
292 | |
293 | g.nr = 0; |
294 | |
295 | if (trans->lock_must_abort) { |
296 | if (cycle) |
297 | return -1; |
298 | |
299 | trace_would_deadlock(g: &g, trans); |
300 | return btree_trans_restart(trans, err: BCH_ERR_transaction_restart_would_deadlock); |
301 | } |
302 | |
303 | lock_graph_down(g: &g, trans); |
304 | |
305 | /* trans->paths is rcu protected vs. freeing */ |
306 | rcu_read_lock(); |
307 | if (cycle) |
308 | cycle->atomic++; |
309 | next: |
310 | if (!g.nr) |
311 | goto out; |
312 | |
313 | top = &g.g[g.nr - 1]; |
314 | |
315 | struct btree_path *paths = rcu_dereference(top->trans->paths); |
316 | if (!paths) |
317 | goto up; |
318 | |
319 | unsigned long *paths_allocated = trans_paths_allocated(paths); |
320 | |
321 | trans_for_each_path_idx_from(paths_allocated, *trans_paths_nr(paths), |
322 | path_idx, top->path_idx) { |
323 | struct btree_path *path = paths + path_idx; |
324 | if (!path->nodes_locked) |
325 | continue; |
326 | |
327 | if (path_idx != top->path_idx) { |
328 | top->path_idx = path_idx; |
329 | top->level = 0; |
330 | top->lock_start_time = 0; |
331 | } |
332 | |
333 | for (; |
334 | top->level < BTREE_MAX_DEPTH; |
335 | top->level++, top->lock_start_time = 0) { |
336 | int lock_held = btree_node_locked_type(path, level: top->level); |
337 | |
338 | if (lock_held == BTREE_NODE_UNLOCKED) |
339 | continue; |
340 | |
341 | b = &READ_ONCE(path->l[top->level].b)->c; |
342 | |
343 | if (IS_ERR_OR_NULL(ptr: b)) { |
344 | /* |
345 | * If we get here, it means we raced with the |
346 | * other thread updating its btree_path |
347 | * structures - which means it can't be blocked |
348 | * waiting on a lock: |
349 | */ |
350 | if (!lock_graph_remove_non_waiters(g: &g)) { |
351 | /* |
352 | * If lock_graph_remove_non_waiters() |
353 | * didn't do anything, it must be |
354 | * because we're being called by debugfs |
355 | * checking for lock cycles, which |
356 | * invokes us on btree_transactions that |
357 | * aren't actually waiting on anything. |
358 | * Just bail out: |
359 | */ |
360 | lock_graph_pop_all(g: &g); |
361 | } |
362 | |
363 | goto next; |
364 | } |
365 | |
366 | if (list_empty_careful(head: &b->lock.wait_list)) |
367 | continue; |
368 | |
369 | raw_spin_lock(&b->lock.wait_lock); |
370 | list_for_each_entry(trans, &b->lock.wait_list, locking_wait.list) { |
371 | BUG_ON(b != trans->locking); |
372 | |
373 | if (top->lock_start_time && |
374 | time_after_eq64(top->lock_start_time, trans->locking_wait.start_time)) |
375 | continue; |
376 | |
377 | top->lock_start_time = trans->locking_wait.start_time; |
378 | |
379 | /* Don't check for self deadlock: */ |
380 | if (trans == top->trans || |
381 | !lock_type_conflicts(t1: lock_held, t2: trans->locking_wait.lock_want)) |
382 | continue; |
383 | |
384 | closure_get(cl: &trans->ref); |
385 | raw_spin_unlock(&b->lock.wait_lock); |
386 | |
387 | ret = lock_graph_descend(g: &g, trans, cycle); |
388 | if (ret) |
389 | goto out; |
390 | goto next; |
391 | |
392 | } |
393 | raw_spin_unlock(&b->lock.wait_lock); |
394 | } |
395 | } |
396 | up: |
397 | if (g.nr > 1 && cycle) |
398 | print_chain(out: cycle, g: &g); |
399 | lock_graph_up(g: &g); |
400 | goto next; |
401 | out: |
402 | if (cycle) |
403 | --cycle->atomic; |
404 | rcu_read_unlock(); |
405 | return ret; |
406 | } |
407 | |
408 | int bch2_six_check_for_deadlock(struct six_lock *lock, void *p) |
409 | { |
410 | struct btree_trans *trans = p; |
411 | |
412 | return bch2_check_for_deadlock(trans, NULL); |
413 | } |
414 | |
415 | int __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree_path *path, |
416 | struct btree_bkey_cached_common *b, |
417 | bool lock_may_not_fail) |
418 | { |
419 | int readers = bch2_btree_node_lock_counts(trans, NULL, b, level: b->level).n[SIX_LOCK_read]; |
420 | int ret; |
421 | |
422 | /* |
423 | * Must drop our read locks before calling six_lock_write() - |
424 | * six_unlock() won't do wakeups until the reader count |
425 | * goes to 0, and it's safe because we have the node intent |
426 | * locked: |
427 | */ |
428 | six_lock_readers_add(&b->lock, -readers); |
429 | ret = __btree_node_lock_nopath(trans, b, type: SIX_LOCK_write, |
430 | lock_may_not_fail, _RET_IP_); |
431 | six_lock_readers_add(&b->lock, readers); |
432 | |
433 | if (ret) |
434 | mark_btree_node_locked_noreset(path, level: b->level, type: BTREE_NODE_INTENT_LOCKED); |
435 | |
436 | return ret; |
437 | } |
438 | |
439 | void bch2_btree_node_lock_write_nofail(struct btree_trans *trans, |
440 | struct btree_path *path, |
441 | struct btree_bkey_cached_common *b) |
442 | { |
443 | int ret = __btree_node_lock_write(trans, path, b, lock_may_not_fail: true); |
444 | BUG_ON(ret); |
445 | } |
446 | |
447 | /* relock */ |
448 | |
449 | static inline bool btree_path_get_locks(struct btree_trans *trans, |
450 | struct btree_path *path, |
451 | bool upgrade, |
452 | struct get_locks_fail *f) |
453 | { |
454 | unsigned l = path->level; |
455 | int fail_idx = -1; |
456 | |
457 | do { |
458 | if (!btree_path_node(path, level: l)) |
459 | break; |
460 | |
461 | if (!(upgrade |
462 | ? bch2_btree_node_upgrade(trans, path, l) |
463 | : bch2_btree_node_relock(trans, path, level: l))) { |
464 | fail_idx = l; |
465 | |
466 | if (f) { |
467 | f->l = l; |
468 | f->b = path->l[l].b; |
469 | } |
470 | } |
471 | |
472 | l++; |
473 | } while (l < path->locks_want); |
474 | |
475 | /* |
476 | * When we fail to get a lock, we have to ensure that any child nodes |
477 | * can't be relocked so bch2_btree_path_traverse has to walk back up to |
478 | * the node that we failed to relock: |
479 | */ |
480 | if (fail_idx >= 0) { |
481 | __bch2_btree_path_unlock(trans, path); |
482 | btree_path_set_dirty(path, u: BTREE_ITER_NEED_TRAVERSE); |
483 | |
484 | do { |
485 | path->l[fail_idx].b = upgrade |
486 | ? ERR_PTR(error: -BCH_ERR_no_btree_node_upgrade) |
487 | : ERR_PTR(error: -BCH_ERR_no_btree_node_relock); |
488 | --fail_idx; |
489 | } while (fail_idx >= 0); |
490 | } |
491 | |
492 | if (path->uptodate == BTREE_ITER_NEED_RELOCK) |
493 | path->uptodate = BTREE_ITER_UPTODATE; |
494 | |
495 | bch2_trans_verify_locks(trans); |
496 | |
497 | return path->uptodate < BTREE_ITER_NEED_RELOCK; |
498 | } |
499 | |
500 | bool __bch2_btree_node_relock(struct btree_trans *trans, |
501 | struct btree_path *path, unsigned level, |
502 | bool trace) |
503 | { |
504 | struct btree *b = btree_path_node(path, level); |
505 | int want = __btree_lock_want(path, level); |
506 | |
507 | if (race_fault()) |
508 | goto fail; |
509 | |
510 | if (six_relock_type(lock: &b->c.lock, type: want, seq: path->l[level].lock_seq) || |
511 | (btree_node_lock_seq_matches(path, b, level) && |
512 | btree_node_lock_increment(trans, b: &b->c, level, want))) { |
513 | mark_btree_node_locked(trans, path, level, type: want); |
514 | return true; |
515 | } |
516 | fail: |
517 | if (trace && !trans->notrace_relock_fail) |
518 | trace_and_count(trans->c, btree_path_relock_fail, trans, _RET_IP_, path, level); |
519 | return false; |
520 | } |
521 | |
522 | /* upgrade */ |
523 | |
524 | bool bch2_btree_node_upgrade(struct btree_trans *trans, |
525 | struct btree_path *path, unsigned level) |
526 | { |
527 | struct btree *b = path->l[level].b; |
528 | struct six_lock_count count = bch2_btree_node_lock_counts(trans, skip: path, b: &b->c, level); |
529 | |
530 | if (!is_btree_node(path, l: level)) |
531 | return false; |
532 | |
533 | switch (btree_lock_want(path, level)) { |
534 | case BTREE_NODE_UNLOCKED: |
535 | BUG_ON(btree_node_locked(path, level)); |
536 | return true; |
537 | case BTREE_NODE_READ_LOCKED: |
538 | BUG_ON(btree_node_intent_locked(path, level)); |
539 | return bch2_btree_node_relock(trans, path, level); |
540 | case BTREE_NODE_INTENT_LOCKED: |
541 | break; |
542 | case BTREE_NODE_WRITE_LOCKED: |
543 | BUG(); |
544 | } |
545 | |
546 | if (btree_node_intent_locked(path, l: level)) |
547 | return true; |
548 | |
549 | if (race_fault()) |
550 | return false; |
551 | |
552 | if (btree_node_locked(path, level)) { |
553 | bool ret; |
554 | |
555 | six_lock_readers_add(&b->c.lock, -count.n[SIX_LOCK_read]); |
556 | ret = six_lock_tryupgrade(&b->c.lock); |
557 | six_lock_readers_add(&b->c.lock, count.n[SIX_LOCK_read]); |
558 | |
559 | if (ret) |
560 | goto success; |
561 | } else { |
562 | if (six_relock_type(lock: &b->c.lock, type: SIX_LOCK_intent, seq: path->l[level].lock_seq)) |
563 | goto success; |
564 | } |
565 | |
566 | /* |
567 | * Do we already have an intent lock via another path? If so, just bump |
568 | * lock count: |
569 | */ |
570 | if (btree_node_lock_seq_matches(path, b, level) && |
571 | btree_node_lock_increment(trans, b: &b->c, level, want: BTREE_NODE_INTENT_LOCKED)) { |
572 | btree_node_unlock(trans, path, level); |
573 | goto success; |
574 | } |
575 | |
576 | trace_and_count(trans->c, btree_path_upgrade_fail, trans, _RET_IP_, path, level); |
577 | return false; |
578 | success: |
579 | mark_btree_node_locked_noreset(path, level, type: BTREE_NODE_INTENT_LOCKED); |
580 | return true; |
581 | } |
582 | |
583 | /* Btree path locking: */ |
584 | |
585 | /* |
586 | * Only for btree_cache.c - only relocks intent locks |
587 | */ |
588 | int bch2_btree_path_relock_intent(struct btree_trans *trans, |
589 | struct btree_path *path) |
590 | { |
591 | unsigned l; |
592 | |
593 | for (l = path->level; |
594 | l < path->locks_want && btree_path_node(path, level: l); |
595 | l++) { |
596 | if (!bch2_btree_node_relock(trans, path, level: l)) { |
597 | __bch2_btree_path_unlock(trans, path); |
598 | btree_path_set_dirty(path, u: BTREE_ITER_NEED_TRAVERSE); |
599 | trace_and_count(trans->c, trans_restart_relock_path_intent, trans, _RET_IP_, path); |
600 | return btree_trans_restart(trans, err: BCH_ERR_transaction_restart_relock_path_intent); |
601 | } |
602 | } |
603 | |
604 | return 0; |
605 | } |
606 | |
607 | __flatten |
608 | bool bch2_btree_path_relock_norestart(struct btree_trans *trans, struct btree_path *path) |
609 | { |
610 | struct get_locks_fail f; |
611 | |
612 | return btree_path_get_locks(trans, path, upgrade: false, f: &f); |
613 | } |
614 | |
615 | int __bch2_btree_path_relock(struct btree_trans *trans, |
616 | struct btree_path *path, unsigned long trace_ip) |
617 | { |
618 | if (!bch2_btree_path_relock_norestart(trans, path)) { |
619 | trace_and_count(trans->c, trans_restart_relock_path, trans, trace_ip, path); |
620 | return btree_trans_restart(trans, err: BCH_ERR_transaction_restart_relock_path); |
621 | } |
622 | |
623 | return 0; |
624 | } |
625 | |
626 | bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans, |
627 | struct btree_path *path, |
628 | unsigned new_locks_want, |
629 | struct get_locks_fail *f) |
630 | { |
631 | EBUG_ON(path->locks_want >= new_locks_want); |
632 | |
633 | path->locks_want = new_locks_want; |
634 | |
635 | return btree_path_get_locks(trans, path, upgrade: true, f); |
636 | } |
637 | |
638 | bool __bch2_btree_path_upgrade(struct btree_trans *trans, |
639 | struct btree_path *path, |
640 | unsigned new_locks_want, |
641 | struct get_locks_fail *f) |
642 | { |
643 | if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f)) |
644 | return true; |
645 | |
646 | /* |
647 | * XXX: this is ugly - we'd prefer to not be mucking with other |
648 | * iterators in the btree_trans here. |
649 | * |
650 | * On failure to upgrade the iterator, setting iter->locks_want and |
651 | * calling get_locks() is sufficient to make bch2_btree_path_traverse() |
652 | * get the locks we want on transaction restart. |
653 | * |
654 | * But if this iterator was a clone, on transaction restart what we did |
655 | * to this iterator isn't going to be preserved. |
656 | * |
657 | * Possibly we could add an iterator field for the parent iterator when |
658 | * an iterator is a copy - for now, we'll just upgrade any other |
659 | * iterators with the same btree id. |
660 | * |
661 | * The code below used to be needed to ensure ancestor nodes get locked |
662 | * before interior nodes - now that's handled by |
663 | * bch2_btree_path_traverse_all(). |
664 | */ |
665 | if (!path->cached && !trans->in_traverse_all) { |
666 | struct btree_path *linked; |
667 | unsigned i; |
668 | |
669 | trans_for_each_path(trans, linked, i) |
670 | if (linked != path && |
671 | linked->cached == path->cached && |
672 | linked->btree_id == path->btree_id && |
673 | linked->locks_want < new_locks_want) { |
674 | linked->locks_want = new_locks_want; |
675 | btree_path_get_locks(trans, path: linked, upgrade: true, NULL); |
676 | } |
677 | } |
678 | |
679 | return false; |
680 | } |
681 | |
682 | void __bch2_btree_path_downgrade(struct btree_trans *trans, |
683 | struct btree_path *path, |
684 | unsigned new_locks_want) |
685 | { |
686 | unsigned l, old_locks_want = path->locks_want; |
687 | |
688 | if (trans->restarted) |
689 | return; |
690 | |
691 | EBUG_ON(path->locks_want < new_locks_want); |
692 | |
693 | path->locks_want = new_locks_want; |
694 | |
695 | while (path->nodes_locked && |
696 | (l = btree_path_highest_level_locked(path)) >= path->locks_want) { |
697 | if (l > path->level) { |
698 | btree_node_unlock(trans, path, level: l); |
699 | } else { |
700 | if (btree_node_intent_locked(path, l)) { |
701 | six_lock_downgrade(&path->l[l].b->c.lock); |
702 | mark_btree_node_locked_noreset(path, level: l, type: BTREE_NODE_READ_LOCKED); |
703 | } |
704 | break; |
705 | } |
706 | } |
707 | |
708 | bch2_btree_path_verify_locks(path); |
709 | |
710 | trace_path_downgrade(trans, _RET_IP_, path, old_locks_want); |
711 | } |
712 | |
713 | /* Btree transaction locking: */ |
714 | |
715 | void bch2_trans_downgrade(struct btree_trans *trans) |
716 | { |
717 | struct btree_path *path; |
718 | unsigned i; |
719 | |
720 | if (trans->restarted) |
721 | return; |
722 | |
723 | trans_for_each_path(trans, path, i) |
724 | if (path->ref) |
725 | bch2_btree_path_downgrade(trans, path); |
726 | } |
727 | |
728 | int bch2_trans_relock(struct btree_trans *trans) |
729 | { |
730 | struct btree_path *path; |
731 | unsigned i; |
732 | |
733 | if (unlikely(trans->restarted)) |
734 | return -((int) trans->restarted); |
735 | |
736 | trans_for_each_path(trans, path, i) { |
737 | struct get_locks_fail f; |
738 | |
739 | if (path->should_be_locked && |
740 | !btree_path_get_locks(trans, path, upgrade: false, f: &f)) { |
741 | if (trace_trans_restart_relock_enabled()) { |
742 | struct printbuf buf = PRINTBUF; |
743 | |
744 | bch2_bpos_to_text(&buf, path->pos); |
745 | prt_printf(&buf, " l=%u seq=%u node seq=" , |
746 | f.l, path->l[f.l].lock_seq); |
747 | if (IS_ERR_OR_NULL(ptr: f.b)) { |
748 | prt_str(out: &buf, str: bch2_err_str(PTR_ERR(ptr: f.b))); |
749 | } else { |
750 | prt_printf(&buf, "%u" , f.b->c.lock.seq); |
751 | |
752 | struct six_lock_count c = |
753 | bch2_btree_node_lock_counts(trans, NULL, b: &f.b->c, level: f.l); |
754 | prt_printf(&buf, " self locked %u.%u.%u" , c.n[0], c.n[1], c.n[2]); |
755 | |
756 | c = six_lock_counts(&f.b->c.lock); |
757 | prt_printf(&buf, " total locked %u.%u.%u" , c.n[0], c.n[1], c.n[2]); |
758 | } |
759 | |
760 | trace_trans_restart_relock(trans, _RET_IP_, str: buf.buf); |
761 | printbuf_exit(&buf); |
762 | } |
763 | |
764 | count_event(trans->c, trans_restart_relock); |
765 | return btree_trans_restart(trans, err: BCH_ERR_transaction_restart_relock); |
766 | } |
767 | } |
768 | |
769 | return 0; |
770 | } |
771 | |
772 | int bch2_trans_relock_notrace(struct btree_trans *trans) |
773 | { |
774 | struct btree_path *path; |
775 | unsigned i; |
776 | |
777 | if (unlikely(trans->restarted)) |
778 | return -((int) trans->restarted); |
779 | |
780 | trans_for_each_path(trans, path, i) |
781 | if (path->should_be_locked && |
782 | !bch2_btree_path_relock_norestart(trans, path)) { |
783 | return btree_trans_restart(trans, err: BCH_ERR_transaction_restart_relock); |
784 | } |
785 | return 0; |
786 | } |
787 | |
788 | void bch2_trans_unlock_noassert(struct btree_trans *trans) |
789 | { |
790 | struct btree_path *path; |
791 | unsigned i; |
792 | |
793 | trans_for_each_path(trans, path, i) |
794 | __bch2_btree_path_unlock(trans, path); |
795 | } |
796 | |
797 | void bch2_trans_unlock(struct btree_trans *trans) |
798 | { |
799 | struct btree_path *path; |
800 | unsigned i; |
801 | |
802 | trans_for_each_path(trans, path, i) |
803 | __bch2_btree_path_unlock(trans, path); |
804 | } |
805 | |
806 | void bch2_trans_unlock_long(struct btree_trans *trans) |
807 | { |
808 | bch2_trans_unlock(trans); |
809 | bch2_trans_srcu_unlock(trans); |
810 | } |
811 | |
812 | bool bch2_trans_locked(struct btree_trans *trans) |
813 | { |
814 | struct btree_path *path; |
815 | unsigned i; |
816 | |
817 | trans_for_each_path(trans, path, i) |
818 | if (path->nodes_locked) |
819 | return true; |
820 | return false; |
821 | } |
822 | |
823 | int __bch2_trans_mutex_lock(struct btree_trans *trans, |
824 | struct mutex *lock) |
825 | { |
826 | int ret = drop_locks_do(trans, (mutex_lock(lock), 0)); |
827 | |
828 | if (ret) |
829 | mutex_unlock(lock); |
830 | return ret; |
831 | } |
832 | |
833 | /* Debug */ |
834 | |
835 | #ifdef CONFIG_BCACHEFS_DEBUG |
836 | |
837 | void bch2_btree_path_verify_locks(struct btree_path *path) |
838 | { |
839 | unsigned l; |
840 | |
841 | if (!path->nodes_locked) { |
842 | BUG_ON(path->uptodate == BTREE_ITER_UPTODATE && |
843 | btree_path_node(path, path->level)); |
844 | return; |
845 | } |
846 | |
847 | for (l = 0; l < BTREE_MAX_DEPTH; l++) { |
848 | int want = btree_lock_want(path, level: l); |
849 | int have = btree_node_locked_type(path, level: l); |
850 | |
851 | BUG_ON(!is_btree_node(path, l) && have != BTREE_NODE_UNLOCKED); |
852 | |
853 | BUG_ON(is_btree_node(path, l) && |
854 | (want == BTREE_NODE_UNLOCKED || |
855 | have != BTREE_NODE_WRITE_LOCKED) && |
856 | want != have); |
857 | } |
858 | } |
859 | |
860 | void bch2_trans_verify_locks(struct btree_trans *trans) |
861 | { |
862 | struct btree_path *path; |
863 | unsigned i; |
864 | |
865 | trans_for_each_path(trans, path, i) |
866 | bch2_btree_path_verify_locks(path); |
867 | } |
868 | |
869 | #endif |
870 | |