1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #include <linux/export.h> |
4 | #include <linux/log2.h> |
5 | #include <linux/percpu.h> |
6 | #include <linux/preempt.h> |
7 | #include <linux/rcupdate.h> |
8 | #include <linux/sched.h> |
9 | #include <linux/sched/clock.h> |
10 | #include <linux/sched/rt.h> |
11 | #include <linux/sched/task.h> |
12 | #include <linux/slab.h> |
13 | |
14 | #include <trace/events/lock.h> |
15 | |
16 | #include "six.h" |
17 | |
18 | #ifdef DEBUG |
19 | #define EBUG_ON(cond) BUG_ON(cond) |
20 | #else |
21 | #define EBUG_ON(cond) do {} while (0) |
22 | #endif |
23 | |
24 | #define six_acquire(l, t, r, ip) lock_acquire(l, 0, t, r, 1, NULL, ip) |
25 | #define six_release(l, ip) lock_release(l, ip) |
26 | |
27 | static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type); |
28 | |
29 | #define SIX_LOCK_HELD_read_OFFSET 0 |
30 | #define SIX_LOCK_HELD_read ~(~0U << 26) |
31 | #define SIX_LOCK_HELD_intent (1U << 26) |
32 | #define SIX_LOCK_HELD_write (1U << 27) |
33 | #define SIX_LOCK_WAITING_read (1U << (28 + SIX_LOCK_read)) |
34 | #define SIX_LOCK_WAITING_write (1U << (28 + SIX_LOCK_write)) |
35 | #define SIX_LOCK_NOSPIN (1U << 31) |
36 | |
37 | struct six_lock_vals { |
38 | /* Value we add to the lock in order to take the lock: */ |
39 | u32 lock_val; |
40 | |
41 | /* If the lock has this value (used as a mask), taking the lock fails: */ |
42 | u32 lock_fail; |
43 | |
44 | /* Mask that indicates lock is held for this type: */ |
45 | u32 held_mask; |
46 | |
47 | /* Waitlist we wakeup when releasing the lock: */ |
48 | enum six_lock_type unlock_wakeup; |
49 | }; |
50 | |
51 | static const struct six_lock_vals l[] = { |
52 | [SIX_LOCK_read] = { |
53 | .lock_val = 1U << SIX_LOCK_HELD_read_OFFSET, |
54 | .lock_fail = SIX_LOCK_HELD_write, |
55 | .held_mask = SIX_LOCK_HELD_read, |
56 | .unlock_wakeup = SIX_LOCK_write, |
57 | }, |
58 | [SIX_LOCK_intent] = { |
59 | .lock_val = SIX_LOCK_HELD_intent, |
60 | .lock_fail = SIX_LOCK_HELD_intent, |
61 | .held_mask = SIX_LOCK_HELD_intent, |
62 | .unlock_wakeup = SIX_LOCK_intent, |
63 | }, |
64 | [SIX_LOCK_write] = { |
65 | .lock_val = SIX_LOCK_HELD_write, |
66 | .lock_fail = SIX_LOCK_HELD_read, |
67 | .held_mask = SIX_LOCK_HELD_write, |
68 | .unlock_wakeup = SIX_LOCK_read, |
69 | }, |
70 | }; |
71 | |
72 | static inline void six_set_bitmask(struct six_lock *lock, u32 mask) |
73 | { |
74 | if ((atomic_read(v: &lock->state) & mask) != mask) |
75 | atomic_or(i: mask, v: &lock->state); |
76 | } |
77 | |
78 | static inline void six_clear_bitmask(struct six_lock *lock, u32 mask) |
79 | { |
80 | if (atomic_read(v: &lock->state) & mask) |
81 | atomic_and(i: ~mask, v: &lock->state); |
82 | } |
83 | |
84 | static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type, |
85 | u32 old, struct task_struct *owner) |
86 | { |
87 | if (type != SIX_LOCK_intent) |
88 | return; |
89 | |
90 | if (!(old & SIX_LOCK_HELD_intent)) { |
91 | EBUG_ON(lock->owner); |
92 | lock->owner = owner; |
93 | } else { |
94 | EBUG_ON(lock->owner != current); |
95 | } |
96 | } |
97 | |
98 | static inline unsigned pcpu_read_count(struct six_lock *lock) |
99 | { |
100 | unsigned read_count = 0; |
101 | int cpu; |
102 | |
103 | for_each_possible_cpu(cpu) |
104 | read_count += *per_cpu_ptr(lock->readers, cpu); |
105 | return read_count; |
106 | } |
107 | |
108 | /* |
109 | * __do_six_trylock() - main trylock routine |
110 | * |
111 | * Returns 1 on success, 0 on failure |
112 | * |
113 | * In percpu reader mode, a failed trylock may cause a spurious trylock failure |
114 | * for anoter thread taking the competing lock type, and we may havve to do a |
115 | * wakeup: when a wakeup is required, we return -1 - wakeup_type. |
116 | */ |
117 | static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type, |
118 | struct task_struct *task, bool try) |
119 | { |
120 | int ret; |
121 | u32 old; |
122 | |
123 | EBUG_ON(type == SIX_LOCK_write && lock->owner != task); |
124 | EBUG_ON(type == SIX_LOCK_write && |
125 | (try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write))); |
126 | |
127 | /* |
128 | * Percpu reader mode: |
129 | * |
130 | * The basic idea behind this algorithm is that you can implement a lock |
131 | * between two threads without any atomics, just memory barriers: |
132 | * |
133 | * For two threads you'll need two variables, one variable for "thread a |
134 | * has the lock" and another for "thread b has the lock". |
135 | * |
136 | * To take the lock, a thread sets its variable indicating that it holds |
137 | * the lock, then issues a full memory barrier, then reads from the |
138 | * other thread's variable to check if the other thread thinks it has |
139 | * the lock. If we raced, we backoff and retry/sleep. |
140 | * |
141 | * Failure to take the lock may cause a spurious trylock failure in |
142 | * another thread, because we temporarily set the lock to indicate that |
143 | * we held it. This would be a problem for a thread in six_lock(), when |
144 | * they are calling trylock after adding themself to the waitlist and |
145 | * prior to sleeping. |
146 | * |
147 | * Therefore, if we fail to get the lock, and there were waiters of the |
148 | * type we conflict with, we will have to issue a wakeup. |
149 | * |
150 | * Since we may be called under wait_lock (and by the wakeup code |
151 | * itself), we return that the wakeup has to be done instead of doing it |
152 | * here. |
153 | */ |
154 | if (type == SIX_LOCK_read && lock->readers) { |
155 | preempt_disable(); |
156 | this_cpu_inc(*lock->readers); /* signal that we own lock */ |
157 | |
158 | smp_mb(); |
159 | |
160 | old = atomic_read(v: &lock->state); |
161 | ret = !(old & l[type].lock_fail); |
162 | |
163 | this_cpu_sub(*lock->readers, !ret); |
164 | preempt_enable(); |
165 | |
166 | if (!ret) { |
167 | smp_mb(); |
168 | if (atomic_read(v: &lock->state) & SIX_LOCK_WAITING_write) |
169 | ret = -1 - SIX_LOCK_write; |
170 | } |
171 | } else if (type == SIX_LOCK_write && lock->readers) { |
172 | if (try) { |
173 | atomic_add(SIX_LOCK_HELD_write, v: &lock->state); |
174 | smp_mb__after_atomic(); |
175 | } |
176 | |
177 | ret = !pcpu_read_count(lock); |
178 | |
179 | if (try && !ret) { |
180 | old = atomic_sub_return(SIX_LOCK_HELD_write, v: &lock->state); |
181 | if (old & SIX_LOCK_WAITING_read) |
182 | ret = -1 - SIX_LOCK_read; |
183 | } |
184 | } else { |
185 | old = atomic_read(v: &lock->state); |
186 | do { |
187 | ret = !(old & l[type].lock_fail); |
188 | if (!ret || (type == SIX_LOCK_write && !try)) { |
189 | smp_mb(); |
190 | break; |
191 | } |
192 | } while (!atomic_try_cmpxchg_acquire(v: &lock->state, old: &old, new: old + l[type].lock_val)); |
193 | |
194 | EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask)); |
195 | } |
196 | |
197 | if (ret > 0) |
198 | six_set_owner(lock, type, old, owner: task); |
199 | |
200 | EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 && |
201 | (atomic_read(&lock->state) & SIX_LOCK_HELD_write)); |
202 | |
203 | return ret; |
204 | } |
205 | |
206 | static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type) |
207 | { |
208 | struct six_lock_waiter *w, *next; |
209 | struct task_struct *task; |
210 | bool saw_one; |
211 | int ret; |
212 | again: |
213 | ret = 0; |
214 | saw_one = false; |
215 | raw_spin_lock(&lock->wait_lock); |
216 | |
217 | list_for_each_entry_safe(w, next, &lock->wait_list, list) { |
218 | if (w->lock_want != lock_type) |
219 | continue; |
220 | |
221 | if (saw_one && lock_type != SIX_LOCK_read) |
222 | goto unlock; |
223 | saw_one = true; |
224 | |
225 | ret = __do_six_trylock(lock, type: lock_type, task: w->task, try: false); |
226 | if (ret <= 0) |
227 | goto unlock; |
228 | |
229 | /* |
230 | * Similar to percpu_rwsem_wake_function(), we need to guard |
231 | * against the wakee noticing w->lock_acquired, returning, and |
232 | * then exiting before we do the wakeup: |
233 | */ |
234 | task = get_task_struct(t: w->task); |
235 | __list_del(prev: w->list.prev, next: w->list.next); |
236 | /* |
237 | * The release barrier here ensures the ordering of the |
238 | * __list_del before setting w->lock_acquired; @w is on the |
239 | * stack of the thread doing the waiting and will be reused |
240 | * after it sees w->lock_acquired with no other locking: |
241 | * pairs with smp_load_acquire() in six_lock_slowpath() |
242 | */ |
243 | smp_store_release(&w->lock_acquired, true); |
244 | wake_up_process(tsk: task); |
245 | put_task_struct(t: task); |
246 | } |
247 | |
248 | six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type); |
249 | unlock: |
250 | raw_spin_unlock(&lock->wait_lock); |
251 | |
252 | if (ret < 0) { |
253 | lock_type = -ret - 1; |
254 | goto again; |
255 | } |
256 | } |
257 | |
258 | __always_inline |
259 | static void six_lock_wakeup(struct six_lock *lock, u32 state, |
260 | enum six_lock_type lock_type) |
261 | { |
262 | if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read)) |
263 | return; |
264 | |
265 | if (!(state & (SIX_LOCK_WAITING_read << lock_type))) |
266 | return; |
267 | |
268 | __six_lock_wakeup(lock, lock_type); |
269 | } |
270 | |
271 | __always_inline |
272 | static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try) |
273 | { |
274 | int ret; |
275 | |
276 | ret = __do_six_trylock(lock, type, current, try); |
277 | if (ret < 0) |
278 | __six_lock_wakeup(lock, lock_type: -ret - 1); |
279 | |
280 | return ret > 0; |
281 | } |
282 | |
283 | /** |
284 | * six_trylock_ip - attempt to take a six lock without blocking |
285 | * @lock: lock to take |
286 | * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write |
287 | * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ |
288 | * |
289 | * Return: true on success, false on failure. |
290 | */ |
291 | bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip) |
292 | { |
293 | if (!do_six_trylock(lock, type, try: true)) |
294 | return false; |
295 | |
296 | if (type != SIX_LOCK_write) |
297 | six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip); |
298 | return true; |
299 | } |
300 | EXPORT_SYMBOL_GPL(six_trylock_ip); |
301 | |
302 | /** |
303 | * six_relock_ip - attempt to re-take a lock that was held previously |
304 | * @lock: lock to take |
305 | * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write |
306 | * @seq: lock sequence number obtained from six_lock_seq() while lock was |
307 | * held previously |
308 | * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ |
309 | * |
310 | * Return: true on success, false on failure. |
311 | */ |
312 | bool six_relock_ip(struct six_lock *lock, enum six_lock_type type, |
313 | unsigned seq, unsigned long ip) |
314 | { |
315 | if (six_lock_seq(lock) != seq || !six_trylock_ip(lock, type, ip)) |
316 | return false; |
317 | |
318 | if (six_lock_seq(lock) != seq) { |
319 | six_unlock_ip(lock, type, ip); |
320 | return false; |
321 | } |
322 | |
323 | return true; |
324 | } |
325 | EXPORT_SYMBOL_GPL(six_relock_ip); |
326 | |
327 | #ifdef CONFIG_BCACHEFS_SIX_OPTIMISTIC_SPIN |
328 | |
329 | static inline bool six_owner_running(struct six_lock *lock) |
330 | { |
331 | /* |
332 | * When there's no owner, we might have preempted between the owner |
333 | * acquiring the lock and setting the owner field. If we're an RT task |
334 | * that will live-lock because we won't let the owner complete. |
335 | */ |
336 | rcu_read_lock(); |
337 | struct task_struct *owner = READ_ONCE(lock->owner); |
338 | bool ret = owner ? owner_on_cpu(owner) : !rt_task(current); |
339 | rcu_read_unlock(); |
340 | |
341 | return ret; |
342 | } |
343 | |
344 | static inline bool six_optimistic_spin(struct six_lock *lock, |
345 | struct six_lock_waiter *wait, |
346 | enum six_lock_type type) |
347 | { |
348 | unsigned loop = 0; |
349 | u64 end_time; |
350 | |
351 | if (type == SIX_LOCK_write) |
352 | return false; |
353 | |
354 | if (lock->wait_list.next != &wait->list) |
355 | return false; |
356 | |
357 | if (atomic_read(v: &lock->state) & SIX_LOCK_NOSPIN) |
358 | return false; |
359 | |
360 | preempt_disable(); |
361 | end_time = sched_clock() + 10 * NSEC_PER_USEC; |
362 | |
363 | while (!need_resched() && six_owner_running(lock)) { |
364 | /* |
365 | * Ensures that writes to the waitlist entry happen after we see |
366 | * wait->lock_acquired: pairs with the smp_store_release in |
367 | * __six_lock_wakeup |
368 | */ |
369 | if (smp_load_acquire(&wait->lock_acquired)) { |
370 | preempt_enable(); |
371 | return true; |
372 | } |
373 | |
374 | if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) { |
375 | six_set_bitmask(lock, SIX_LOCK_NOSPIN); |
376 | break; |
377 | } |
378 | |
379 | /* |
380 | * The cpu_relax() call is a compiler barrier which forces |
381 | * everything in this loop to be re-loaded. We don't need |
382 | * memory barriers as we'll eventually observe the right |
383 | * values at the cost of a few extra spins. |
384 | */ |
385 | cpu_relax(); |
386 | } |
387 | |
388 | preempt_enable(); |
389 | return false; |
390 | } |
391 | |
392 | #else /* CONFIG_LOCK_SPIN_ON_OWNER */ |
393 | |
394 | static inline bool six_optimistic_spin(struct six_lock *lock, |
395 | struct six_lock_waiter *wait, |
396 | enum six_lock_type type) |
397 | { |
398 | return false; |
399 | } |
400 | |
401 | #endif |
402 | |
403 | noinline |
404 | static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type, |
405 | struct six_lock_waiter *wait, |
406 | six_lock_should_sleep_fn should_sleep_fn, void *p, |
407 | unsigned long ip) |
408 | { |
409 | int ret = 0; |
410 | |
411 | if (type == SIX_LOCK_write) { |
412 | EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write); |
413 | atomic_add(SIX_LOCK_HELD_write, v: &lock->state); |
414 | smp_mb__after_atomic(); |
415 | } |
416 | |
417 | trace_contention_begin(lock, flags: 0); |
418 | lock_contended(lock: &lock->dep_map, ip); |
419 | |
420 | wait->task = current; |
421 | wait->lock_want = type; |
422 | wait->lock_acquired = false; |
423 | |
424 | raw_spin_lock(&lock->wait_lock); |
425 | six_set_bitmask(lock, SIX_LOCK_WAITING_read << type); |
426 | /* |
427 | * Retry taking the lock after taking waitlist lock, in case we raced |
428 | * with an unlock: |
429 | */ |
430 | ret = __do_six_trylock(lock, type, current, try: false); |
431 | if (ret <= 0) { |
432 | wait->start_time = local_clock(); |
433 | |
434 | if (!list_empty(head: &lock->wait_list)) { |
435 | struct six_lock_waiter *last = |
436 | list_last_entry(&lock->wait_list, |
437 | struct six_lock_waiter, list); |
438 | |
439 | if (time_before_eq64(wait->start_time, last->start_time)) |
440 | wait->start_time = last->start_time + 1; |
441 | } |
442 | |
443 | list_add_tail(new: &wait->list, head: &lock->wait_list); |
444 | } |
445 | raw_spin_unlock(&lock->wait_lock); |
446 | |
447 | if (unlikely(ret > 0)) { |
448 | ret = 0; |
449 | goto out; |
450 | } |
451 | |
452 | if (unlikely(ret < 0)) { |
453 | __six_lock_wakeup(lock, lock_type: -ret - 1); |
454 | ret = 0; |
455 | } |
456 | |
457 | if (six_optimistic_spin(lock, wait, type)) |
458 | goto out; |
459 | |
460 | while (1) { |
461 | set_current_state(TASK_UNINTERRUPTIBLE); |
462 | |
463 | /* |
464 | * Ensures that writes to the waitlist entry happen after we see |
465 | * wait->lock_acquired: pairs with the smp_store_release in |
466 | * __six_lock_wakeup |
467 | */ |
468 | if (smp_load_acquire(&wait->lock_acquired)) |
469 | break; |
470 | |
471 | ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0; |
472 | if (unlikely(ret)) { |
473 | bool acquired; |
474 | |
475 | /* |
476 | * If should_sleep_fn() returns an error, we are |
477 | * required to return that error even if we already |
478 | * acquired the lock - should_sleep_fn() might have |
479 | * modified external state (e.g. when the deadlock cycle |
480 | * detector in bcachefs issued a transaction restart) |
481 | */ |
482 | raw_spin_lock(&lock->wait_lock); |
483 | acquired = wait->lock_acquired; |
484 | if (!acquired) |
485 | list_del(entry: &wait->list); |
486 | raw_spin_unlock(&lock->wait_lock); |
487 | |
488 | if (unlikely(acquired)) |
489 | do_six_unlock_type(lock, type); |
490 | break; |
491 | } |
492 | |
493 | schedule(); |
494 | } |
495 | |
496 | __set_current_state(TASK_RUNNING); |
497 | out: |
498 | if (ret && type == SIX_LOCK_write) { |
499 | six_clear_bitmask(lock, SIX_LOCK_HELD_write); |
500 | six_lock_wakeup(lock, state: atomic_read(v: &lock->state), lock_type: SIX_LOCK_read); |
501 | } |
502 | trace_contention_end(lock, ret: 0); |
503 | |
504 | return ret; |
505 | } |
506 | |
507 | /** |
508 | * six_lock_ip_waiter - take a lock, with full waitlist interface |
509 | * @lock: lock to take |
510 | * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write |
511 | * @wait: pointer to wait object, which will be added to lock's waitlist |
512 | * @should_sleep_fn: callback run after adding to waitlist, immediately prior |
513 | * to scheduling |
514 | * @p: passed through to @should_sleep_fn |
515 | * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ |
516 | * |
517 | * This is the most general six_lock() variant, with parameters to support full |
518 | * cycle detection for deadlock avoidance. |
519 | * |
520 | * The code calling this function must implement tracking of held locks, and the |
521 | * @wait object should be embedded into the struct that tracks held locks - |
522 | * which must also be accessible in a thread-safe way. |
523 | * |
524 | * @should_sleep_fn should invoke the cycle detector; it should walk each |
525 | * lock's waiters, and for each waiter recursively walk their held locks. |
526 | * |
527 | * When this function must block, @wait will be added to @lock's waitlist before |
528 | * calling trylock, and before calling @should_sleep_fn, and @wait will not be |
529 | * removed from the lock waitlist until the lock has been successfully acquired, |
530 | * or we abort. |
531 | * |
532 | * @wait.start_time will be monotonically increasing for any given waitlist, and |
533 | * thus may be used as a loop cursor. |
534 | * |
535 | * Return: 0 on success, or the return code from @should_sleep_fn on failure. |
536 | */ |
537 | int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type, |
538 | struct six_lock_waiter *wait, |
539 | six_lock_should_sleep_fn should_sleep_fn, void *p, |
540 | unsigned long ip) |
541 | { |
542 | int ret; |
543 | |
544 | wait->start_time = 0; |
545 | |
546 | if (type != SIX_LOCK_write) |
547 | six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip); |
548 | |
549 | ret = do_six_trylock(lock, type, try: true) ? 0 |
550 | : six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip); |
551 | |
552 | if (ret && type != SIX_LOCK_write) |
553 | six_release(&lock->dep_map, ip); |
554 | if (!ret) |
555 | lock_acquired(lock: &lock->dep_map, ip); |
556 | |
557 | return ret; |
558 | } |
559 | EXPORT_SYMBOL_GPL(six_lock_ip_waiter); |
560 | |
561 | __always_inline |
562 | static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type) |
563 | { |
564 | u32 state; |
565 | |
566 | if (type == SIX_LOCK_intent) |
567 | lock->owner = NULL; |
568 | |
569 | if (type == SIX_LOCK_read && |
570 | lock->readers) { |
571 | smp_mb(); /* unlock barrier */ |
572 | this_cpu_dec(*lock->readers); |
573 | smp_mb(); /* between unlocking and checking for waiters */ |
574 | state = atomic_read(v: &lock->state); |
575 | } else { |
576 | u32 v = l[type].lock_val; |
577 | |
578 | if (type != SIX_LOCK_read) |
579 | v += atomic_read(v: &lock->state) & SIX_LOCK_NOSPIN; |
580 | |
581 | EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask)); |
582 | state = atomic_sub_return_release(i: v, v: &lock->state); |
583 | } |
584 | |
585 | six_lock_wakeup(lock, state, lock_type: l[type].unlock_wakeup); |
586 | } |
587 | |
588 | /** |
589 | * six_unlock_ip - drop a six lock |
590 | * @lock: lock to unlock |
591 | * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write |
592 | * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ |
593 | * |
594 | * When a lock is held multiple times (because six_lock_incement()) was used), |
595 | * this decrements the 'lock held' counter by one. |
596 | * |
597 | * For example: |
598 | * six_lock_read(&foo->lock); read count 1 |
599 | * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2 |
600 | * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1 |
601 | * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0 |
602 | */ |
603 | void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip) |
604 | { |
605 | EBUG_ON(type == SIX_LOCK_write && |
606 | !(atomic_read(&lock->state) & SIX_LOCK_HELD_intent)); |
607 | EBUG_ON((type == SIX_LOCK_write || |
608 | type == SIX_LOCK_intent) && |
609 | lock->owner != current); |
610 | |
611 | if (type != SIX_LOCK_write) |
612 | six_release(&lock->dep_map, ip); |
613 | else |
614 | lock->seq++; |
615 | |
616 | if (type == SIX_LOCK_intent && |
617 | lock->intent_lock_recurse) { |
618 | --lock->intent_lock_recurse; |
619 | return; |
620 | } |
621 | |
622 | do_six_unlock_type(lock, type); |
623 | } |
624 | EXPORT_SYMBOL_GPL(six_unlock_ip); |
625 | |
626 | /** |
627 | * six_lock_downgrade - convert an intent lock to a read lock |
628 | * @lock: lock to dowgrade |
629 | * |
630 | * @lock will have read count incremented and intent count decremented |
631 | */ |
632 | void six_lock_downgrade(struct six_lock *lock) |
633 | { |
634 | six_lock_increment(lock, SIX_LOCK_read); |
635 | six_unlock_intent(lock); |
636 | } |
637 | EXPORT_SYMBOL_GPL(six_lock_downgrade); |
638 | |
639 | /** |
640 | * six_lock_tryupgrade - attempt to convert read lock to an intent lock |
641 | * @lock: lock to upgrade |
642 | * |
643 | * On success, @lock will have intent count incremented and read count |
644 | * decremented |
645 | * |
646 | * Return: true on success, false on failure |
647 | */ |
648 | bool six_lock_tryupgrade(struct six_lock *lock) |
649 | { |
650 | u32 old = atomic_read(v: &lock->state), new; |
651 | |
652 | do { |
653 | new = old; |
654 | |
655 | if (new & SIX_LOCK_HELD_intent) |
656 | return false; |
657 | |
658 | if (!lock->readers) { |
659 | EBUG_ON(!(new & SIX_LOCK_HELD_read)); |
660 | new -= l[SIX_LOCK_read].lock_val; |
661 | } |
662 | |
663 | new |= SIX_LOCK_HELD_intent; |
664 | } while (!atomic_try_cmpxchg_acquire(v: &lock->state, old: &old, new)); |
665 | |
666 | if (lock->readers) |
667 | this_cpu_dec(*lock->readers); |
668 | |
669 | six_set_owner(lock, type: SIX_LOCK_intent, old, current); |
670 | |
671 | return true; |
672 | } |
673 | EXPORT_SYMBOL_GPL(six_lock_tryupgrade); |
674 | |
675 | /** |
676 | * six_trylock_convert - attempt to convert a held lock from one type to another |
677 | * @lock: lock to upgrade |
678 | * @from: SIX_LOCK_read or SIX_LOCK_intent |
679 | * @to: SIX_LOCK_read or SIX_LOCK_intent |
680 | * |
681 | * On success, @lock will have intent count incremented and read count |
682 | * decremented |
683 | * |
684 | * Return: true on success, false on failure |
685 | */ |
686 | bool six_trylock_convert(struct six_lock *lock, |
687 | enum six_lock_type from, |
688 | enum six_lock_type to) |
689 | { |
690 | EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write); |
691 | |
692 | if (to == from) |
693 | return true; |
694 | |
695 | if (to == SIX_LOCK_read) { |
696 | six_lock_downgrade(lock); |
697 | return true; |
698 | } else { |
699 | return six_lock_tryupgrade(lock); |
700 | } |
701 | } |
702 | EXPORT_SYMBOL_GPL(six_trylock_convert); |
703 | |
704 | /** |
705 | * six_lock_increment - increase held lock count on a lock that is already held |
706 | * @lock: lock to increment |
707 | * @type: SIX_LOCK_read or SIX_LOCK_intent |
708 | * |
709 | * @lock must already be held, with a lock type that is greater than or equal to |
710 | * @type |
711 | * |
712 | * A corresponding six_unlock_type() call will be required for @lock to be fully |
713 | * unlocked. |
714 | */ |
715 | void six_lock_increment(struct six_lock *lock, enum six_lock_type type) |
716 | { |
717 | six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_); |
718 | |
719 | /* XXX: assert already locked, and that we don't overflow: */ |
720 | |
721 | switch (type) { |
722 | case SIX_LOCK_read: |
723 | if (lock->readers) { |
724 | this_cpu_inc(*lock->readers); |
725 | } else { |
726 | EBUG_ON(!(atomic_read(&lock->state) & |
727 | (SIX_LOCK_HELD_read| |
728 | SIX_LOCK_HELD_intent))); |
729 | atomic_add(i: l[type].lock_val, v: &lock->state); |
730 | } |
731 | break; |
732 | case SIX_LOCK_intent: |
733 | EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent)); |
734 | lock->intent_lock_recurse++; |
735 | break; |
736 | case SIX_LOCK_write: |
737 | BUG(); |
738 | break; |
739 | } |
740 | } |
741 | EXPORT_SYMBOL_GPL(six_lock_increment); |
742 | |
743 | /** |
744 | * six_lock_wakeup_all - wake up all waiters on @lock |
745 | * @lock: lock to wake up waiters for |
746 | * |
747 | * Wakeing up waiters will cause them to re-run should_sleep_fn, which may then |
748 | * abort the lock operation. |
749 | * |
750 | * This function is never needed in a bug-free program; it's only useful in |
751 | * debug code, e.g. to determine if a cycle detector is at fault. |
752 | */ |
753 | void six_lock_wakeup_all(struct six_lock *lock) |
754 | { |
755 | u32 state = atomic_read(v: &lock->state); |
756 | struct six_lock_waiter *w; |
757 | |
758 | six_lock_wakeup(lock, state, lock_type: SIX_LOCK_read); |
759 | six_lock_wakeup(lock, state, lock_type: SIX_LOCK_intent); |
760 | six_lock_wakeup(lock, state, lock_type: SIX_LOCK_write); |
761 | |
762 | raw_spin_lock(&lock->wait_lock); |
763 | list_for_each_entry(w, &lock->wait_list, list) |
764 | wake_up_process(tsk: w->task); |
765 | raw_spin_unlock(&lock->wait_lock); |
766 | } |
767 | EXPORT_SYMBOL_GPL(six_lock_wakeup_all); |
768 | |
769 | /** |
770 | * six_lock_counts - return held lock counts, for each lock type |
771 | * @lock: lock to return counters for |
772 | * |
773 | * Return: the number of times a lock is held for read, intent and write. |
774 | */ |
775 | struct six_lock_count six_lock_counts(struct six_lock *lock) |
776 | { |
777 | struct six_lock_count ret; |
778 | |
779 | ret.n[SIX_LOCK_read] = !lock->readers |
780 | ? atomic_read(v: &lock->state) & SIX_LOCK_HELD_read |
781 | : pcpu_read_count(lock); |
782 | ret.n[SIX_LOCK_intent] = !!(atomic_read(v: &lock->state) & SIX_LOCK_HELD_intent) + |
783 | lock->intent_lock_recurse; |
784 | ret.n[SIX_LOCK_write] = !!(atomic_read(v: &lock->state) & SIX_LOCK_HELD_write); |
785 | |
786 | return ret; |
787 | } |
788 | EXPORT_SYMBOL_GPL(six_lock_counts); |
789 | |
790 | /** |
791 | * six_lock_readers_add - directly manipulate reader count of a lock |
792 | * @lock: lock to add/subtract readers for |
793 | * @nr: reader count to add/subtract |
794 | * |
795 | * When an upper layer is implementing lock reentrency, we may have both read |
796 | * and intent locks on the same lock. |
797 | * |
798 | * When we need to take a write lock, the read locks will cause self-deadlock, |
799 | * because six locks themselves do not track which read locks are held by the |
800 | * current thread and which are held by a different thread - it does no |
801 | * per-thread tracking of held locks. |
802 | * |
803 | * The upper layer that is tracking held locks may however, if trylock() has |
804 | * failed, count up its own read locks, subtract them, take the write lock, and |
805 | * then re-add them. |
806 | * |
807 | * As in any other situation when taking a write lock, @lock must be held for |
808 | * intent one (or more) times, so @lock will never be left unlocked. |
809 | */ |
810 | void six_lock_readers_add(struct six_lock *lock, int nr) |
811 | { |
812 | if (lock->readers) { |
813 | this_cpu_add(*lock->readers, nr); |
814 | } else { |
815 | EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0); |
816 | /* reader count starts at bit 0 */ |
817 | atomic_add(i: nr, v: &lock->state); |
818 | } |
819 | } |
820 | EXPORT_SYMBOL_GPL(six_lock_readers_add); |
821 | |
822 | /** |
823 | * six_lock_exit - release resources held by a lock prior to freeing |
824 | * @lock: lock to exit |
825 | * |
826 | * When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is |
827 | * required to free the percpu read counts. |
828 | */ |
829 | void six_lock_exit(struct six_lock *lock) |
830 | { |
831 | WARN_ON(lock->readers && pcpu_read_count(lock)); |
832 | WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read); |
833 | |
834 | free_percpu(pdata: lock->readers); |
835 | lock->readers = NULL; |
836 | } |
837 | EXPORT_SYMBOL_GPL(six_lock_exit); |
838 | |
839 | void __six_lock_init(struct six_lock *lock, const char *name, |
840 | struct lock_class_key *key, enum six_lock_init_flags flags) |
841 | { |
842 | atomic_set(v: &lock->state, i: 0); |
843 | raw_spin_lock_init(&lock->wait_lock); |
844 | INIT_LIST_HEAD(list: &lock->wait_list); |
845 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
846 | debug_check_no_locks_freed(from: (void *) lock, len: sizeof(*lock)); |
847 | lockdep_init_map(lock: &lock->dep_map, name, key, subclass: 0); |
848 | #endif |
849 | |
850 | /* |
851 | * Don't assume that we have real percpu variables available in |
852 | * userspace: |
853 | */ |
854 | #ifdef __KERNEL__ |
855 | if (flags & SIX_LOCK_INIT_PCPU) { |
856 | /* |
857 | * We don't return an error here on memory allocation failure |
858 | * since percpu is an optimization, and locks will work with the |
859 | * same semantics in non-percpu mode: callers can check for |
860 | * failure if they wish by checking lock->readers, but generally |
861 | * will not want to treat it as an error. |
862 | */ |
863 | lock->readers = alloc_percpu(unsigned); |
864 | } |
865 | #endif |
866 | } |
867 | EXPORT_SYMBOL_GPL(__six_lock_init); |
868 | |