rtmutex.c source code [linux/kernel/locking/rtmutex.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* RT-Mutexes: simple blocking mutual exclusion locks with PI support
4	*
5	* started by Ingo Molnar and Thomas Gleixner.
6	*
7	* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
8	* Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
9	* Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
10	* Copyright (C) 2006 Esben Nielsen
11	* Adaptive Spinlocks:
12	* Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
13	* and Peter Morreale,
14	* Adaptive Spinlocks simplification:
15	* Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
16	*
17	* See Documentation/locking/rt-mutex-design.rst for details.
18	*/
19	#include <linux/sched.h>
20	#include <linux/sched/debug.h>
21	#include <linux/sched/deadline.h>
22	#include <linux/sched/signal.h>
23	#include <linux/sched/rt.h>
24	#include <linux/sched/wake_q.h>
25	#include <linux/ww_mutex.h>
26
27	#include <trace/events/lock.h>
28
29	#include "rtmutex_common.h"
30
31	#ifndef WW_RT
32	# define build_ww_mutex() (false)
33	# define ww_container_of(rtm) NULL
34
35	static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
36	struct rt_mutex *lock,
37	struct ww_acquire_ctx *ww_ctx)
38	{
39	return `0`;
40	}
41
42	static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
43	struct ww_acquire_ctx *ww_ctx)
44	{
45	}
46
47	static inline void ww_mutex_lock_acquired(struct ww_mutex *lock,
48	struct ww_acquire_ctx *ww_ctx)
49	{
50	}
51
52	static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
53	struct rt_mutex_waiter *waiter,
54	struct ww_acquire_ctx *ww_ctx)
55	{
56	return `0`;
57	}
58
59	#else
60	# define build_ww_mutex() (true)
61	# define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base)
62	# include "ww_mutex.h"
63	#endif
64
65	/*
66	* lock->owner state tracking:
67	*
68	* lock->owner holds the task_struct pointer of the owner. Bit 0
69	* is used to keep track of the "lock has waiters" state.
70	*
71	* owner bit0
72	* NULL 0 lock is free (fast acquire possible)
73	* NULL 1 lock is free and has waiters and the top waiter
74	* is going to take the lock*
75	* taskpointer 0 lock is held (fast release possible)
76	* taskpointer 1 lock is held and has waiters**
77	*
78	* The fast atomic compare exchange based acquire and release is only
79	* possible when bit 0 of lock->owner is 0.
80	*
81	* (*) It also can be a transitional state when grabbing the lock
82	* with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
83	* we need to set the bit0 before looking at the lock, and the owner may be
84	* NULL in this small time, hence this can be a transitional state.
85	*
86	* (**) There is a small time when bit 0 is set but there are no
87	* waiters. This can happen when grabbing the lock in the slow path.
88	* To prevent a cmpxchg of the owner releasing the lock, we need to
89	* set this bit before looking at the lock.
90	*/
91
92	static __always_inline struct task_struct *
93	rt_mutex_owner_encode(struct rt_mutex_base lock, struct* task_struct *owner)
94	{
95	unsigned long val = (unsigned long)owner;
96
97	if (rt_mutex_has_waiters(lock))
98	val \|= RT_MUTEX_HAS_WAITERS;
99
100	return (struct task_struct *)val;
101	}
102
103	static __always_inline void
104	rt_mutex_set_owner(struct rt_mutex_base lock, struct* task_struct *owner)
105	{
106	/*
107	* lock->wait_lock is held but explicit acquire semantics are needed
108	* for a new lock owner so WRITE_ONCE is insufficient.
109	*/
110	xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner));
111	}
112
113	static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock)
114	{
115	/ lock->wait_lock is held so the unlock provides release semantics. /
116	WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL));
117	}
118
119	static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
120	{
121	lock->owner = (struct task_struct *)
122	((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
123	}
124
125	static __always_inline void
126	fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock)
127	{
128	unsigned long owner, p = (unsigned* long *) &lock->owner;
129
130	if (rt_mutex_has_waiters(lock))
131	return;
132
133	/*
134	* The rbtree has no waiters enqueued, now make sure that the
135	* lock->owner still has the waiters bit set, otherwise the
136	* following can happen:
137	*
138	* CPU 0 CPU 1 CPU2
139	* l->owner=T1
140	* rt_mutex_lock(l)
141	* lock(l->lock)
142	* l->owner = T1 \| HAS_WAITERS;
143	* enqueue(T2)
144	* boost()
145	* unlock(l->lock)
146	* block()
147	*
148	* rt_mutex_lock(l)
149	* lock(l->lock)
150	* l->owner = T1 \| HAS_WAITERS;
151	* enqueue(T3)
152	* boost()
153	* unlock(l->lock)
154	* block()
155	* signal(->T2) signal(->T3)
156	* lock(l->lock)
157	* dequeue(T2)
158	* deboost()
159	* unlock(l->lock)
160	* lock(l->lock)
161	* dequeue(T3)
162	* ==> wait list is empty
163	* deboost()
164	* unlock(l->lock)
165	* lock(l->lock)
166	* fixup_rt_mutex_waiters()
167	* if (wait_list_empty(l) {
168	* l->owner = owner
169	* owner = l->owner & ~HAS_WAITERS;
170	* ==> l->owner = T1
171	* }
172	* lock(l->lock)
173	* rt_mutex_unlock(l) fixup_rt_mutex_waiters()
174	* if (wait_list_empty(l) {
175	* owner = l->owner & ~HAS_WAITERS;
176	* cmpxchg(l->owner, T1, NULL)
177	* ===> Success (l->owner = NULL)
178	*
179	* l->owner = owner
180	* ==> l->owner = T1
181	* }
182	*
183	* With the check for the waiter bit in place T3 on CPU2 will not
184	* overwrite. All tasks fiddling with the waiters bit are
185	* serialized by l->lock, so nothing else can modify the waiters
186	* bit. If the bit is set then nothing can change l->owner either
187	* so the simple RMW is safe. The cmpxchg() will simply fail if it
188	* happens in the middle of the RMW because the waiters bit is
189	* still set.
190	*/
191	owner = READ_ONCE(*p);
192	if (owner & RT_MUTEX_HAS_WAITERS) {
193	/*
194	* See rt_mutex_set_owner() and rt_mutex_clear_owner() on
195	* why xchg_acquire() is used for updating owner for
196	* locking and WRITE_ONCE() for unlocking.
197	*
198	* WRITE_ONCE() would work for the acquire case too, but
199	* in case that the lock acquisition failed it might
200	* force other lockers into the slow path unnecessarily.
201	*/
202	if (acquire_lock)
203	xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS);
204	else
205	WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
206	}
207	}
208
209	/*
210	* We can speed up the acquire/release, if there's no debugging state to be
211	* set up.
212	*/
213	#ifndef CONFIG_DEBUG_RT_MUTEXES
214	static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
215	struct task_struct *old,
216	struct task_struct *new)
217	{
218	return try_cmpxchg_acquire(&lock->owner, &old, new);
219	}
220
221	static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock)
222	{
223	return rt_mutex_cmpxchg_acquire(lock, NULL, current);
224	}
225
226	static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
227	struct task_struct *old,
228	struct task_struct *new)
229	{
230	return try_cmpxchg_release(&lock->owner, &old, new);
231	}
232
233	/*
234	* Callers must hold the ->wait_lock -- which is the whole purpose as we force
235	* all future threads that attempt to [Rmw] the lock to the slowpath. As such
236	* relaxed semantics suffice.
237	*/
238	static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
239	{
240	unsigned long owner, p = (unsigned* long *) &lock->owner;
241
242	do {
243	owner = *p;
244	} while (cmpxchg_relaxed(p, owner,
245	owner \| RT_MUTEX_HAS_WAITERS) != owner);
246
247	/*
248	* The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE
249	* operations in the event of contention. Ensure the successful
250	* cmpxchg is visible.
251	*/
252	smp_mb__after_atomic();
253	}
254
255	/*
256	* Safe fastpath aware unlock:
257	* 1) Clear the waiters bit
258	* 2) Drop lock->wait_lock
259	* 3) Try to unlock the lock with cmpxchg
260	*/
261	static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
262	unsigned long flags)
263	__releases(lock->wait_lock)
264	{
265	struct task_struct *owner = rt_mutex_owner(lock);
266
267	clear_rt_mutex_waiters(lock);
268	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
269	/*
270	* If a new waiter comes in between the unlock and the cmpxchg
271	* we have two situations:
272	*
273	* unlock(wait_lock);
274	* lock(wait_lock);
275	* cmpxchg(p, owner, 0) == owner
276	* mark_rt_mutex_waiters(lock);
277	* acquire(lock);
278	* or:
279	*
280	* unlock(wait_lock);
281	* lock(wait_lock);
282	* mark_rt_mutex_waiters(lock);
283	*
284	* cmpxchg(p, owner, 0) != owner
285	* enqueue_waiter();
286	* unlock(wait_lock);
287	* lock(wait_lock);
288	* wake waiter();
289	* unlock(wait_lock);
290	* lock(wait_lock);
291	* acquire(lock);
292	*/
293	return rt_mutex_cmpxchg_release(lock, owner, NULL);
294	}
295
296	#else
297	static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
298	struct task_struct *old,
299	struct task_struct *new)
300	{
301	return false;
302
303	}
304
305	static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock);
306
307	static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock)
308	{
309	/*
310	* With debug enabled rt_mutex_cmpxchg trylock() will always fail.
311	*
312	* Avoid unconditionally taking the slow path by using
313	* rt_mutex_slow_trylock() which is covered by the debug code and can
314	* acquire a non-contended rtmutex.
315	*/
316	return rt_mutex_slowtrylock(lock);
317	}
318
319	static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
320	struct task_struct *old,
321	struct task_struct *new)
322	{
323	return false;
324	}
325
326	static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
327	{
328	lock->owner = (struct task_struct *)
329	((unsigned long)lock->owner \| RT_MUTEX_HAS_WAITERS);
330	}
331
332	/*
333	* Simple slow path only version: lock->owner is protected by lock->wait_lock.
334	*/
335	static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
336	unsigned long flags)
337	__releases(lock->wait_lock)
338	{
339	lock->owner = NULL;
340	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
341	return true;
342	}
343	#endif
344
345	static __always_inline int __waiter_prio(struct task_struct *task)
346	{
347	int prio = task->prio;
348
349	if (!rt_prio(prio))
350	return DEFAULT_PRIO;
351
352	return prio;
353	}
354
355	/*
356	* Update the waiter->tree copy of the sort keys.
357	*/
358	static __always_inline void
359	waiter_update_prio(struct rt_mutex_waiter waiter, struct* task_struct *task)
360	{
361	lockdep_assert_held(&waiter->lock->wait_lock);
362	lockdep_assert(RB_EMPTY_NODE(&waiter->tree.entry));
363
364	waiter->tree.prio = __waiter_prio(task);
365	waiter->tree.deadline = task->dl.deadline;
366	}
367
368	/*
369	* Update the waiter->pi_tree copy of the sort keys (from the tree copy).
370	*/
371	static __always_inline void
372	waiter_clone_prio(struct rt_mutex_waiter waiter, struct* task_struct *task)
373	{
374	lockdep_assert_held(&waiter->lock->wait_lock);
375	lockdep_assert_held(&task->pi_lock);
376	lockdep_assert(RB_EMPTY_NODE(&waiter->pi_tree.entry));
377
378	waiter->pi_tree.prio = waiter->tree.prio;
379	waiter->pi_tree.deadline = waiter->tree.deadline;
380	}
381
382	/*
383	* Only use with rt_waiter_node_{less,equal}()
384	*/
385	#define task_to_waiter_node(p) \
386	&(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
387	#define task_to_waiter(p) \
388	&(struct rt_mutex_waiter){ .tree = *task_to_waiter_node(p) }
389
390	static __always_inline int rt_waiter_node_less(struct rt_waiter_node *left,
391	struct rt_waiter_node *right)
392	{
393	if (left->prio < right->prio)
394	return `1`;
395
396	/*
397	* If both waiters have dl_prio(), we check the deadlines of the
398	* associated tasks.
399	* If left waiter has a dl_prio(), and we didn't return 1 above,
400	* then right waiter has a dl_prio() too.
401	*/
402	if (dl_prio(prio: left->prio))
403	return dl_time_before(a: left->deadline, b: right->deadline);
404
405	return `0`;
406	}
407
408	static __always_inline int rt_waiter_node_equal(struct rt_waiter_node *left,
409	struct rt_waiter_node *right)
410	{
411	if (left->prio != right->prio)
412	return `0`;
413
414	/*
415	* If both waiters have dl_prio(), we check the deadlines of the
416	* associated tasks.
417	* If left waiter has a dl_prio(), and we didn't return 0 above,
418	* then right waiter has a dl_prio() too.
419	*/
420	if (dl_prio(prio: left->prio))
421	return left->deadline == right->deadline;
422
423	return `1`;
424	}
425
426	static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
427	struct rt_mutex_waiter *top_waiter)
428	{
429	if (rt_waiter_node_less(left: &waiter->tree, right: &top_waiter->tree))
430	return true;
431
432	#ifdef RT_MUTEX_BUILD_SPINLOCKS
433	/*
434	* Note that RT tasks are excluded from same priority (lateral)
435	* steals to prevent the introduction of an unbounded latency.
436	*/
437	if (rt_prio(waiter->tree.prio) \|\| dl_prio(waiter->tree.prio))
438	return false;
439
440	return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree);
441	#else
442	return false;
443	#endif
444	}
445
446	#define __node_2_waiter(node) \
447	rb_entry((node), struct rt_mutex_waiter, tree.entry)
448
449	static __always_inline bool __waiter_less(struct rb_node a, const* struct rb_node *b)
450	{
451	struct rt_mutex_waiter *aw = __node_2_waiter(a);
452	struct rt_mutex_waiter *bw = __node_2_waiter(b);
453
454	if (rt_waiter_node_less(left: &aw->tree, right: &bw->tree))
455	return `1`;
456
457	if (!build_ww_mutex())
458	return `0`;
459
460	if (rt_waiter_node_less(left: &bw->tree, right: &aw->tree))
461	return `0`;
462
463	/ NOTE: relies on waiter->ww_ctx being set before insertion /
464	if (aw->ww_ctx) {
465	if (!bw->ww_ctx)
466	return `1`;
467
468	return (signed long)(aw->ww_ctx->stamp -
469	bw->ww_ctx->stamp) < `0`;
470	}
471
472	return `0`;
473	}
474
475	static __always_inline void
476	rt_mutex_enqueue(struct rt_mutex_base lock, struct* rt_mutex_waiter *waiter)
477	{
478	lockdep_assert_held(&lock->wait_lock);
479
480	rb_add_cached(node: &waiter->tree.entry, tree: &lock->waiters, less: __waiter_less);
481	}
482
483	static __always_inline void
484	rt_mutex_dequeue(struct rt_mutex_base lock, struct* rt_mutex_waiter *waiter)
485	{
486	lockdep_assert_held(&lock->wait_lock);
487
488	if (RB_EMPTY_NODE(&waiter->tree.entry))
489	return;
490
491	rb_erase_cached(node: &waiter->tree.entry, root: &lock->waiters);
492	RB_CLEAR_NODE(&waiter->tree.entry);
493	}
494
495	#define __node_2_rt_node(node) \
496	rb_entry((node), struct rt_waiter_node, entry)
497
498	static __always_inline bool __pi_waiter_less(struct rb_node a, const* struct rb_node *b)
499	{
500	return rt_waiter_node_less(__node_2_rt_node(a), __node_2_rt_node(b));
501	}
502
503	static __always_inline void
504	rt_mutex_enqueue_pi(struct task_struct task, struct* rt_mutex_waiter *waiter)
505	{
506	lockdep_assert_held(&task->pi_lock);
507
508	rb_add_cached(node: &waiter->pi_tree.entry, tree: &task->pi_waiters, less: __pi_waiter_less);
509	}
510
511	static __always_inline void
512	rt_mutex_dequeue_pi(struct task_struct task, struct* rt_mutex_waiter *waiter)
513	{
514	lockdep_assert_held(&task->pi_lock);
515
516	if (RB_EMPTY_NODE(&waiter->pi_tree.entry))
517	return;
518
519	rb_erase_cached(node: &waiter->pi_tree.entry, root: &task->pi_waiters);
520	RB_CLEAR_NODE(&waiter->pi_tree.entry);
521	}
522
523	static __always_inline void rt_mutex_adjust_prio(struct rt_mutex_base *lock,
524	struct task_struct *p)
525	{
526	struct task_struct *pi_task = NULL;
527
528	lockdep_assert_held(&lock->wait_lock);
529	lockdep_assert(rt_mutex_owner(lock) == p);
530	lockdep_assert_held(&p->pi_lock);
531
532	if (task_has_pi_waiters(p))
533	pi_task = task_top_pi_waiter(p)->task;
534
535	rt_mutex_setprio(p, pi_task);
536	}
537
538	/ RT mutex specific wake_q wrappers /
539	static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh,
540	struct task_struct *task,
541	unsigned int wake_state)
542	{
543	if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) {
544	if (IS_ENABLED(CONFIG_PROVE_LOCKING))
545	WARN_ON_ONCE(wqh->rtlock_task);
546	get_task_struct(t: task);
547	wqh->rtlock_task = task;
548	} else {
549	wake_q_add(head: &wqh->head, task);
550	}
551	}
552
553	static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh,
554	struct rt_mutex_waiter *w)
555	{
556	rt_mutex_wake_q_add_task(wqh, task: w->task, wake_state: w->wake_state);
557	}
558
559	static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh)
560	{
561	if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) {
562	wake_up_state(tsk: wqh->rtlock_task, TASK_RTLOCK_WAIT);
563	put_task_struct(t: wqh->rtlock_task);
564	wqh->rtlock_task = NULL;
565	}
566
567	if (!wake_q_empty(head: &wqh->head))
568	wake_up_q(head: &wqh->head);
569
570	/ Pairs with preempt_disable() in mark_wakeup_next_waiter() /
571	preempt_enable();
572	}
573
574	/*
575	* Deadlock detection is conditional:
576	*
577	* If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
578	* if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
579	*
580	* If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
581	* conducted independent of the detect argument.
582	*
583	* If the waiter argument is NULL this indicates the deboost path and
584	* deadlock detection is disabled independent of the detect argument
585	* and the config settings.
586	*/
587	static __always_inline bool
588	rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
589	enum rtmutex_chainwalk chwalk)
590	{
591	if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES))
592	return waiter != NULL;
593	return chwalk == RT_MUTEX_FULL_CHAINWALK;
594	}
595
596	static __always_inline struct rt_mutex_base task_blocked_on_lock(struct* task_struct *p)
597	{
598	return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
599	}
600
601	/*
602	* Adjust the priority chain. Also used for deadlock detection.
603	* Decreases task's usage by one - may thus free the task.
604	*
605	* @task: the task owning the mutex (owner) for which a chain walk is
606	* probably needed
607	* @chwalk: do we have to carry out deadlock detection?
608	* @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
609	* things for a task that has just got its priority adjusted, and
610	* is waiting on a mutex)
611	* @next_lock: the mutex on which the owner of @orig_lock was blocked before
612	* we dropped its pi_lock. Is never dereferenced, only used for
613	* comparison to detect lock chain changes.
614	* @orig_waiter: rt_mutex_waiter struct for the task that has just donated
615	* its priority to the mutex owner (can be NULL in the case
616	* depicted above or if the top waiter is gone away and we are
617	* actually deboosting the owner)
618	* @top_task: the current top waiter
619	*
620	* Returns 0 or -EDEADLK.
621	*
622	* Chain walk basics and protection scope
623	*
624	* [R] refcount on task
625	* [Pn] task->pi_lock held
626	* [L] rtmutex->wait_lock held
627	*
628	* Normal locking order:
629	*
630	* rtmutex->wait_lock
631	* task->pi_lock
632	*
633	* Step Description Protected by
634	* function arguments:
635	* @task [R]
636	* @orig_lock if != NULL @top_task is blocked on it
637	* @next_lock Unprotected. Cannot be
638	* dereferenced. Only used for
639	* comparison.
640	* @orig_waiter if != NULL @top_task is blocked on it
641	* @top_task current, or in case of proxy
642	* locking protected by calling
643	* code
644	* again:
645	* loop_sanity_check();
646	* retry:
647	* [1] lock(task->pi_lock); [R] acquire [P1]
648	* [2] waiter = task->pi_blocked_on; [P1]
649	* [3] check_exit_conditions_1(); [P1]
650	* [4] lock = waiter->lock; [P1]
651	* [5] if (!try_lock(lock->wait_lock)) { [P1] try to acquire [L]
652	* unlock(task->pi_lock); release [P1]
653	* goto retry;
654	* }
655	* [6] check_exit_conditions_2(); [P1] + [L]
656	* [7] requeue_lock_waiter(lock, waiter); [P1] + [L]
657	* [8] unlock(task->pi_lock); release [P1]
658	* put_task_struct(task); release [R]
659	* [9] check_exit_conditions_3(); [L]
660	* [10] task = owner(lock); [L]
661	* get_task_struct(task); [L] acquire [R]
662	* lock(task->pi_lock); [L] acquire [P2]
663	* [11] requeue_pi_waiter(tsk, waiters(lock));[P2] + [L]
664	* [12] check_exit_conditions_4(); [P2] + [L]
665	* [13] unlock(task->pi_lock); release [P2]
666	* unlock(lock->wait_lock); release [L]
667	* goto again;
668	*
669	* Where P1 is the blocking task and P2 is the lock owner; going up one step
670	* the owner becomes the next blocked task etc..
671	*
672	*
673	*/
674	static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
675	enum rtmutex_chainwalk chwalk,
676	struct rt_mutex_base *orig_lock,
677	struct rt_mutex_base *next_lock,
678	struct rt_mutex_waiter *orig_waiter,
679	struct task_struct *top_task)
680	{
681	struct rt_mutex_waiter waiter, top_waiter = orig_waiter;
682	struct rt_mutex_waiter *prerequeue_top_waiter;
683	int ret = `0`, depth = `0`;
684	struct rt_mutex_base *lock;
685	bool detect_deadlock;
686	bool requeue = true;
687
688	detect_deadlock = rt_mutex_cond_detect_deadlock(waiter: orig_waiter, chwalk);
689
690	/*
691	* The (de)boosting is a step by step approach with a lot of
692	* pitfalls. We want this to be preemptible and we want hold a
693	* maximum of two locks per step. So we have to check
694	* carefully whether things change under us.
695	*/
696	again:
697	/*
698	* We limit the lock chain length for each invocation.
699	*/
700	if (++depth > max_lock_depth) {
701	static int prev_max;
702
703	/*
704	* Print this only once. If the admin changes the limit,
705	* print a new message when reaching the limit again.
706	*/
707	if (prev_max != max_lock_depth) {
708	prev_max = max_lock_depth;
709	printk(KERN_WARNING "Maximum lock depth %d reached "
710	"task: %s (%d)\n", max_lock_depth,
711	top_task->comm, task_pid_nr(top_task));
712	}
713	put_task_struct(t: task);
714
715	return -EDEADLK;
716	}
717
718	/*
719	* We are fully preemptible here and only hold the refcount on
720	* @task. So everything can have changed under us since the
721	* caller or our own code below (goto retry/again) dropped all
722	* locks.
723	*/
724	retry:
725	/*
726	* [1] Task cannot go away as we did a get_task() before !
727	*/
728	raw_spin_lock_irq(&task->pi_lock);
729
730	/*
731	* [2] Get the waiter on which @task is blocked on.
732	*/
733	waiter = task->pi_blocked_on;
734
735	/*
736	* [3] check_exit_conditions_1() protected by task->pi_lock.
737	*/
738
739	/*
740	* Check whether the end of the boosting chain has been
741	* reached or the state of the chain has changed while we
742	* dropped the locks.
743	*/
744	if (!waiter)
745	goto out_unlock_pi;
746
747	/*
748	* Check the orig_waiter state. After we dropped the locks,
749	* the previous owner of the lock might have released the lock.
750	*/
751	if (orig_waiter && !rt_mutex_owner(lock: orig_lock))
752	goto out_unlock_pi;
753
754	/*
755	* We dropped all locks after taking a refcount on @task, so
756	* the task might have moved on in the lock chain or even left
757	* the chain completely and blocks now on an unrelated lock or
758	* on @orig_lock.
759	*
760	* We stored the lock on which @task was blocked in @next_lock,
761	* so we can detect the chain change.
762	*/
763	if (next_lock != waiter->lock)
764	goto out_unlock_pi;
765
766	/*
767	* There could be 'spurious' loops in the lock graph due to ww_mutex,
768	* consider:
769	*
770	* P1: A, ww_A, ww_B
771	* P2: ww_B, ww_A
772	* P3: A
773	*
774	* P3 should not return -EDEADLK because it gets trapped in the cycle
775	* created by P1 and P2 (which will resolve -- and runs into
776	* max_lock_depth above). Therefore disable detect_deadlock such that
777	* the below termination condition can trigger once all relevant tasks
778	* are boosted.
779	*
780	* Even when we start with ww_mutex we can disable deadlock detection,
781	* since we would supress a ww_mutex induced deadlock at [6] anyway.
782	* Supressing it here however is not sufficient since we might still
783	* hit [6] due to adjustment driven iteration.
784	*
785	* NOTE: if someone were to create a deadlock between 2 ww_classes we'd
786	* utterly fail to report it; lockdep should.
787	*/
788	if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)
789	detect_deadlock = false;
790
791	/*
792	* Drop out, when the task has no waiters. Note,
793	* top_waiter can be NULL, when we are in the deboosting
794	* mode!
795	*/
796	if (top_waiter) {
797	if (!task_has_pi_waiters(p: task))
798	goto out_unlock_pi;
799	/*
800	* If deadlock detection is off, we stop here if we
801	* are not the top pi waiter of the task. If deadlock
802	* detection is enabled we continue, but stop the
803	* requeueing in the chain walk.
804	*/
805	if (top_waiter != task_top_pi_waiter(p: task)) {
806	if (!detect_deadlock)
807	goto out_unlock_pi;
808	else
809	requeue = false;
810	}
811	}
812
813	/*
814	* If the waiter priority is the same as the task priority
815	* then there is no further priority adjustment necessary. If
816	* deadlock detection is off, we stop the chain walk. If its
817	* enabled we continue, but stop the requeueing in the chain
818	* walk.
819	*/
820	if (rt_waiter_node_equal(left: &waiter->tree, task_to_waiter_node(task))) {
821	if (!detect_deadlock)
822	goto out_unlock_pi;
823	else
824	requeue = false;
825	}
826
827	/*
828	* [4] Get the next lock; per holding task->pi_lock we can't unblock
829	* and guarantee @lock's existence.
830	*/
831	lock = waiter->lock;
832	/*
833	* [5] We need to trylock here as we are holding task->pi_lock,
834	* which is the reverse lock order versus the other rtmutex
835	* operations.
836	*
837	* Per the above, holding task->pi_lock guarantees lock exists, so
838	* inverting this lock order is infeasible from a life-time
839	* perspective.
840	*/
841	if (!raw_spin_trylock(&lock->wait_lock)) {
842	raw_spin_unlock_irq(&task->pi_lock);
843	cpu_relax();
844	goto retry;
845	}
846
847	/*
848	* [6] check_exit_conditions_2() protected by task->pi_lock and
849	* lock->wait_lock.
850	*
851	* Deadlock detection. If the lock is the same as the original
852	* lock which caused us to walk the lock chain or if the
853	* current lock is owned by the task which initiated the chain
854	* walk, we detected a deadlock.
855	*/
856	if (lock == orig_lock \|\| rt_mutex_owner(lock) == top_task) {
857	ret = -EDEADLK;
858
859	/*
860	* When the deadlock is due to ww_mutex; also see above. Don't
861	* report the deadlock and instead let the ww_mutex wound/die
862	* logic pick which of the contending threads gets -EDEADLK.
863	*
864	* NOTE: assumes the cycle only contains a single ww_class; any
865	* other configuration and we fail to report; also, see
866	* lockdep.
867	*/
868	if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx)
869	ret = `0`;
870
871	raw_spin_unlock(&lock->wait_lock);
872	goto out_unlock_pi;
873	}
874
875	/*
876	* If we just follow the lock chain for deadlock detection, no
877	* need to do all the requeue operations. To avoid a truckload
878	* of conditionals around the various places below, just do the
879	* minimum chain walk checks.
880	*/
881	if (!requeue) {
882	/*
883	* No requeue[7] here. Just release @task [8]
884	*/
885	raw_spin_unlock(&task->pi_lock);
886	put_task_struct(t: task);
887
888	/*
889	* [9] check_exit_conditions_3 protected by lock->wait_lock.
890	* If there is no owner of the lock, end of chain.
891	*/
892	if (!rt_mutex_owner(lock)) {
893	raw_spin_unlock_irq(&lock->wait_lock);
894	return `0`;
895	}
896
897	/ [10] Grab the next task, i.e. owner of @lock /
898	task = get_task_struct(t: rt_mutex_owner(lock));
899	raw_spin_lock(&task->pi_lock);
900
901	/*
902	* No requeue [11] here. We just do deadlock detection.
903	*
904	* [12] Store whether owner is blocked
905	* itself. Decision is made after dropping the locks
906	*/
907	next_lock = task_blocked_on_lock(p: task);
908	/*
909	* Get the top waiter for the next iteration
910	*/
911	top_waiter = rt_mutex_top_waiter(lock);
912
913	/ [13] Drop locks /
914	raw_spin_unlock(&task->pi_lock);
915	raw_spin_unlock_irq(&lock->wait_lock);
916
917	/ If owner is not blocked, end of chain. /
918	if (!next_lock)
919	goto out_put_task;
920	goto again;
921	}
922
923	/*
924	* Store the current top waiter before doing the requeue
925	* operation on @lock. We need it for the boost/deboost
926	* decision below.
927	*/
928	prerequeue_top_waiter = rt_mutex_top_waiter(lock);
929
930	/ [7] Requeue the waiter in the lock waiter tree. /
931	rt_mutex_dequeue(lock, waiter);
932
933	/*
934	* Update the waiter prio fields now that we're dequeued.
935	*
936	* These values can have changed through either:
937	*
938	* sys_sched_set_scheduler() / sys_sched_setattr()
939	*
940	* or
941	*
942	* DL CBS enforcement advancing the effective deadline.
943	*/
944	waiter_update_prio(waiter, task);
945
946	rt_mutex_enqueue(lock, waiter);
947
948	/*
949	* [8] Release the (blocking) task in preparation for
950	* taking the owner task in [10].
951	*
952	* Since we hold lock->waiter_lock, task cannot unblock, even if we
953	* release task->pi_lock.
954	*/
955	raw_spin_unlock(&task->pi_lock);
956	put_task_struct(t: task);
957
958	/*
959	* [9] check_exit_conditions_3 protected by lock->wait_lock.
960	*
961	* We must abort the chain walk if there is no lock owner even
962	* in the dead lock detection case, as we have nothing to
963	* follow here. This is the end of the chain we are walking.
964	*/
965	if (!rt_mutex_owner(lock)) {
966	/*
967	* If the requeue [7] above changed the top waiter,
968	* then we need to wake the new top waiter up to try
969	* to get the lock.
970	*/
971	top_waiter = rt_mutex_top_waiter(lock);
972	if (prerequeue_top_waiter != top_waiter)
973	wake_up_state(tsk: top_waiter->task, state: top_waiter->wake_state);
974	raw_spin_unlock_irq(&lock->wait_lock);
975	return `0`;
976	}
977
978	/*
979	* [10] Grab the next task, i.e. the owner of @lock
980	*
981	* Per holding lock->wait_lock and checking for !owner above, there
982	* must be an owner and it cannot go away.
983	*/
984	task = get_task_struct(t: rt_mutex_owner(lock));
985	raw_spin_lock(&task->pi_lock);
986
987	/ [11] requeue the pi waiters if necessary /
988	if (waiter == rt_mutex_top_waiter(lock)) {
989	/*
990	* The waiter became the new top (highest priority)
991	* waiter on the lock. Replace the previous top waiter
992	* in the owner tasks pi waiters tree with this waiter
993	* and adjust the priority of the owner.
994	*/
995	rt_mutex_dequeue_pi(task, waiter: prerequeue_top_waiter);
996	waiter_clone_prio(waiter, task);
997	rt_mutex_enqueue_pi(task, waiter);
998	rt_mutex_adjust_prio(lock, p: task);
999
1000	} else if (prerequeue_top_waiter == waiter) {
1001	/*
1002	* The waiter was the top waiter on the lock, but is
1003	* no longer the top priority waiter. Replace waiter in
1004	* the owner tasks pi waiters tree with the new top
1005	* (highest priority) waiter and adjust the priority
1006	* of the owner.
1007	* The new top waiter is stored in @waiter so that
1008	* @waiter == @top_waiter evaluates to true below and
1009	* we continue to deboost the rest of the chain.
1010	*/
1011	rt_mutex_dequeue_pi(task, waiter);
1012	waiter = rt_mutex_top_waiter(lock);
1013	waiter_clone_prio(waiter, task);
1014	rt_mutex_enqueue_pi(task, waiter);
1015	rt_mutex_adjust_prio(lock, p: task);
1016	} else {
1017	/*
1018	* Nothing changed. No need to do any priority
1019	* adjustment.
1020	*/
1021	}
1022
1023	/*
1024	* [12] check_exit_conditions_4() protected by task->pi_lock
1025	* and lock->wait_lock. The actual decisions are made after we
1026	* dropped the locks.
1027	*
1028	* Check whether the task which owns the current lock is pi
1029	* blocked itself. If yes we store a pointer to the lock for
1030	* the lock chain change detection above. After we dropped
1031	* task->pi_lock next_lock cannot be dereferenced anymore.
1032	*/
1033	next_lock = task_blocked_on_lock(p: task);
1034	/*
1035	* Store the top waiter of @lock for the end of chain walk
1036	* decision below.
1037	*/
1038	top_waiter = rt_mutex_top_waiter(lock);
1039
1040	/ [13] Drop the locks /
1041	raw_spin_unlock(&task->pi_lock);
1042	raw_spin_unlock_irq(&lock->wait_lock);
1043
1044	/*
1045	* Make the actual exit decisions [12], based on the stored
1046	* values.
1047	*
1048	* We reached the end of the lock chain. Stop right here. No
1049	* point to go back just to figure that out.
1050	*/
1051	if (!next_lock)
1052	goto out_put_task;
1053
1054	/*
1055	* If the current waiter is not the top waiter on the lock,
1056	* then we can stop the chain walk here if we are not in full
1057	* deadlock detection mode.
1058	*/
1059	if (!detect_deadlock && waiter != top_waiter)
1060	goto out_put_task;
1061
1062	goto again;
1063
1064	out_unlock_pi:
1065	raw_spin_unlock_irq(&task->pi_lock);
1066	out_put_task:
1067	put_task_struct(t: task);
1068
1069	return ret;
1070	}
1071
1072	/*
1073	* Try to take an rt-mutex
1074	*
1075	* Must be called with lock->wait_lock held and interrupts disabled
1076	*
1077	* @lock: The lock to be acquired.
1078	* @task: The task which wants to acquire the lock
1079	* @waiter: The waiter that is queued to the lock's wait tree if the
1080	* callsite called task_blocked_on_lock(), otherwise NULL
1081	*/
1082	static int __sched
1083	try_to_take_rt_mutex(struct rt_mutex_base lock, struct* task_struct *task,
1084	struct rt_mutex_waiter *waiter)
1085	{
1086	lockdep_assert_held(&lock->wait_lock);
1087
1088	/*
1089	* Before testing whether we can acquire @lock, we set the
1090	* RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
1091	* other tasks which try to modify @lock into the slow path
1092	* and they serialize on @lock->wait_lock.
1093	*
1094	* The RT_MUTEX_HAS_WAITERS bit can have a transitional state
1095	* as explained at the top of this file if and only if:
1096	*
1097	* - There is a lock owner. The caller must fixup the
1098	* transient state if it does a trylock or leaves the lock
1099	* function due to a signal or timeout.
1100	*
1101	* - @task acquires the lock and there are no other
1102	* waiters. This is undone in rt_mutex_set_owner(@task) at
1103	* the end of this function.
1104	*/
1105	mark_rt_mutex_waiters(lock);
1106
1107	/*
1108	* If @lock has an owner, give up.
1109	*/
1110	if (rt_mutex_owner(lock))
1111	return `0`;
1112
1113	/*
1114	* If @waiter != NULL, @task has already enqueued the waiter
1115	* into @lock waiter tree. If @waiter == NULL then this is a
1116	* trylock attempt.
1117	*/
1118	if (waiter) {
1119	struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
1120
1121	/*
1122	* If waiter is the highest priority waiter of @lock,
1123	* or allowed to steal it, take it over.
1124	*/
1125	if (waiter == top_waiter \|\| rt_mutex_steal(waiter, top_waiter)) {
1126	/*
1127	* We can acquire the lock. Remove the waiter from the
1128	* lock waiters tree.
1129	*/
1130	rt_mutex_dequeue(lock, waiter);
1131	} else {
1132	return `0`;
1133	}
1134	} else {
1135	/*
1136	* If the lock has waiters already we check whether @task is
1137	* eligible to take over the lock.
1138	*
1139	* If there are no other waiters, @task can acquire
1140	* the lock. @task->pi_blocked_on is NULL, so it does
1141	* not need to be dequeued.
1142	*/
1143	if (rt_mutex_has_waiters(lock)) {
1144	/ Check whether the trylock can steal it. /
1145	if (!rt_mutex_steal(task_to_waiter(task),
1146	top_waiter: rt_mutex_top_waiter(lock)))
1147	return `0`;
1148
1149	/*
1150	* The current top waiter stays enqueued. We
1151	* don't have to change anything in the lock
1152	* waiters order.
1153	*/
1154	} else {
1155	/*
1156	* No waiters. Take the lock without the
1157	* pi_lock dance.@task->pi_blocked_on is NULL
1158	* and we have no waiters to enqueue in @task
1159	* pi waiters tree.
1160	*/
1161	goto takeit;
1162	}
1163	}
1164
1165	/*
1166	* Clear @task->pi_blocked_on. Requires protection by
1167	* @task->pi_lock. Redundant operation for the @waiter == NULL
1168	* case, but conditionals are more expensive than a redundant
1169	* store.
1170	*/
1171	raw_spin_lock(&task->pi_lock);
1172	task->pi_blocked_on = NULL;
1173	/*
1174	* Finish the lock acquisition. @task is the new owner. If
1175	* other waiters exist we have to insert the highest priority
1176	* waiter into @task->pi_waiters tree.
1177	*/
1178	if (rt_mutex_has_waiters(lock))
1179	rt_mutex_enqueue_pi(task, waiter: rt_mutex_top_waiter(lock));
1180	raw_spin_unlock(&task->pi_lock);
1181
1182	takeit:
1183	/*
1184	* This either preserves the RT_MUTEX_HAS_WAITERS bit if there
1185	* are still waiters or clears it.
1186	*/
1187	rt_mutex_set_owner(lock, owner: task);
1188
1189	return `1`;
1190	}
1191
1192	/*
1193	* Task blocks on lock.
1194	*
1195	* Prepare waiter and propagate pi chain
1196	*
1197	* This must be called with lock->wait_lock held and interrupts disabled
1198	*/
1199	static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
1200	struct rt_mutex_waiter *waiter,
1201	struct task_struct *task,
1202	struct ww_acquire_ctx *ww_ctx,
1203	enum rtmutex_chainwalk chwalk)
1204	{
1205	struct task_struct *owner = rt_mutex_owner(lock);
1206	struct rt_mutex_waiter *top_waiter = waiter;
1207	struct rt_mutex_base *next_lock;
1208	int chain_walk = `0`, res;
1209
1210	lockdep_assert_held(&lock->wait_lock);
1211
1212	/*
1213	* Early deadlock detection. We really don't want the task to
1214	* enqueue on itself just to untangle the mess later. It's not
1215	* only an optimization. We drop the locks, so another waiter
1216	* can come in before the chain walk detects the deadlock. So
1217	* the other will detect the deadlock and return -EDEADLOCK,
1218	* which is wrong, as the other waiter is not in a deadlock
1219	* situation.
1220	*
1221	* Except for ww_mutex, in that case the chain walk must already deal
1222	* with spurious cycles, see the comments at [3] and [6].
1223	*/
1224	if (owner == task && !(build_ww_mutex() && ww_ctx))
1225	return -EDEADLK;
1226
1227	raw_spin_lock(&task->pi_lock);
1228	waiter->task = task;
1229	waiter->lock = lock;
1230	waiter_update_prio(waiter, task);
1231	waiter_clone_prio(waiter, task);
1232
1233	/ Get the top priority waiter on the lock /
1234	if (rt_mutex_has_waiters(lock))
1235	top_waiter = rt_mutex_top_waiter(lock);
1236	rt_mutex_enqueue(lock, waiter);
1237
1238	task->pi_blocked_on = waiter;
1239
1240	raw_spin_unlock(&task->pi_lock);
1241
1242	if (build_ww_mutex() && ww_ctx) {
1243	struct rt_mutex *rtm;
1244
1245	/ Check whether the waiter should back out immediately /
1246	rtm = container_of(lock, struct rt_mutex, rtmutex);
1247	res = __ww_mutex_add_waiter(waiter, lock: rtm, ww_ctx);
1248	if (res) {
1249	raw_spin_lock(&task->pi_lock);
1250	rt_mutex_dequeue(lock, waiter);
1251	task->pi_blocked_on = NULL;
1252	raw_spin_unlock(&task->pi_lock);
1253	return res;
1254	}
1255	}
1256
1257	if (!owner)
1258	return `0`;
1259
1260	raw_spin_lock(&owner->pi_lock);
1261	if (waiter == rt_mutex_top_waiter(lock)) {
1262	rt_mutex_dequeue_pi(task: owner, waiter: top_waiter);
1263	rt_mutex_enqueue_pi(task: owner, waiter);
1264
1265	rt_mutex_adjust_prio(lock, p: owner);
1266	if (owner->pi_blocked_on)
1267	chain_walk = `1`;
1268	} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
1269	chain_walk = `1`;
1270	}
1271
1272	/ Store the lock on which owner is blocked or NULL /
1273	next_lock = task_blocked_on_lock(p: owner);
1274
1275	raw_spin_unlock(&owner->pi_lock);
1276	/*
1277	* Even if full deadlock detection is on, if the owner is not
1278	* blocked itself, we can avoid finding this out in the chain
1279	* walk.
1280	*/
1281	if (!chain_walk \|\| !next_lock)
1282	return `0`;
1283
1284	/*
1285	* The owner can't disappear while holding a lock,
1286	* so the owner struct is protected by wait_lock.
1287	* Gets dropped in rt_mutex_adjust_prio_chain()!
1288	*/
1289	get_task_struct(t: owner);
1290
1291	raw_spin_unlock_irq(&lock->wait_lock);
1292
1293	res = rt_mutex_adjust_prio_chain(task: owner, chwalk, orig_lock: lock,
1294	next_lock, orig_waiter: waiter, top_task: task);
1295
1296	raw_spin_lock_irq(&lock->wait_lock);
1297
1298	return res;
1299	}
1300
1301	/*
1302	* Remove the top waiter from the current tasks pi waiter tree and
1303	* queue it up.
1304	*
1305	* Called with lock->wait_lock held and interrupts disabled.
1306	*/
1307	static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
1308	struct rt_mutex_base *lock)
1309	{
1310	struct rt_mutex_waiter *waiter;
1311
1312	lockdep_assert_held(&lock->wait_lock);
1313
1314	raw_spin_lock(&current->pi_lock);
1315
1316	waiter = rt_mutex_top_waiter(lock);
1317
1318	/*
1319	* Remove it from current->pi_waiters and deboost.
1320	*
1321	* We must in fact deboost here in order to ensure we call
1322	* rt_mutex_setprio() to update p->pi_top_task before the
1323	* task unblocks.
1324	*/
1325	rt_mutex_dequeue_pi(current, waiter);
1326	rt_mutex_adjust_prio(lock, current);
1327
1328	/*
1329	* As we are waking up the top waiter, and the waiter stays
1330	* queued on the lock until it gets the lock, this lock
1331	* obviously has waiters. Just set the bit here and this has
1332	* the added benefit of forcing all new tasks into the
1333	* slow path making sure no task of lower priority than
1334	* the top waiter can steal this lock.
1335	*/
1336	lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
1337
1338	/*
1339	* We deboosted before waking the top waiter task such that we don't
1340	* run two tasks with the 'same' priority (and ensure the
1341	* p->pi_top_task pointer points to a blocked task). This however can
1342	* lead to priority inversion if we would get preempted after the
1343	* deboost but before waking our donor task, hence the preempt_disable()
1344	* before unlock.
1345	*
1346	* Pairs with preempt_enable() in rt_mutex_wake_up_q();
1347	*/
1348	preempt_disable();
1349	rt_mutex_wake_q_add(wqh, w: waiter);
1350	raw_spin_unlock(&current->pi_lock);
1351	}
1352
1353	static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1354	{
1355	int ret = try_to_take_rt_mutex(lock, current, NULL);
1356
1357	/*
1358	* try_to_take_rt_mutex() sets the lock waiters bit
1359	* unconditionally. Clean this up.
1360	*/
1361	fixup_rt_mutex_waiters(lock, acquire_lock: true);
1362
1363	return ret;
1364	}
1365
1366	/*
1367	* Slow path try-lock function:
1368	*/
1369	static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1370	{
1371	unsigned long flags;
1372	int ret;
1373
1374	/*
1375	* If the lock already has an owner we fail to get the lock.
1376	* This can be done without taking the @lock->wait_lock as
1377	* it is only being read, and this is a trylock anyway.
1378	*/
1379	if (rt_mutex_owner(lock))
1380	return `0`;
1381
1382	/*
1383	* The mutex has currently no owner. Lock the wait lock and try to
1384	* acquire the lock. We use irqsave here to support early boot calls.
1385	*/
1386	raw_spin_lock_irqsave(&lock->wait_lock, flags);
1387
1388	ret = __rt_mutex_slowtrylock(lock);
1389
1390	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1391
1392	return ret;
1393	}
1394
1395	static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock)
1396	{
1397	if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1398	return `1`;
1399
1400	return rt_mutex_slowtrylock(lock);
1401	}
1402
1403	/*
1404	* Slow path to release a rt-mutex.
1405	*/
1406	static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
1407	{
1408	DEFINE_RT_WAKE_Q(wqh);
1409	unsigned long flags;
1410
1411	/ irqsave required to support early boot calls /
1412	raw_spin_lock_irqsave(&lock->wait_lock, flags);
1413
1414	debug_rt_mutex_unlock(lock);
1415
1416	/*
1417	* We must be careful here if the fast path is enabled. If we
1418	* have no waiters queued we cannot set owner to NULL here
1419	* because of:
1420	*
1421	* foo->lock->owner = NULL;
1422	* rtmutex_lock(foo->lock); <- fast path
1423	* free = atomic_dec_and_test(foo->refcnt);
1424	* rtmutex_unlock(foo->lock); <- fast path
1425	* if (free)
1426	* kfree(foo);
1427	* raw_spin_unlock(foo->lock->wait_lock);
1428	*
1429	* So for the fastpath enabled kernel:
1430	*
1431	* Nothing can set the waiters bit as long as we hold
1432	* lock->wait_lock. So we do the following sequence:
1433	*
1434	* owner = rt_mutex_owner(lock);
1435	* clear_rt_mutex_waiters(lock);
1436	* raw_spin_unlock(&lock->wait_lock);
1437	* if (cmpxchg(&lock->owner, owner, 0) == owner)
1438	* return;
1439	* goto retry;
1440	*
1441	* The fastpath disabled variant is simple as all access to
1442	* lock->owner is serialized by lock->wait_lock:
1443	*
1444	* lock->owner = NULL;
1445	* raw_spin_unlock(&lock->wait_lock);
1446	*/
1447	while (!rt_mutex_has_waiters(lock)) {
1448	/ Drops lock->wait_lock ! /
1449	if (unlock_rt_mutex_safe(lock, flags) == true)
1450	return;
1451	/ Relock the rtmutex and try again /
1452	raw_spin_lock_irqsave(&lock->wait_lock, flags);
1453	}
1454
1455	/*
1456	* The wakeup next waiter path does not suffer from the above
1457	* race. See the comments there.
1458	*
1459	* Queue the next waiter for wakeup once we release the wait_lock.
1460	*/
1461	mark_wakeup_next_waiter(wqh: &wqh, lock);
1462	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1463
1464	rt_mutex_wake_up_q(wqh: &wqh);
1465	}
1466
1467	static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock)
1468	{
1469	if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
1470	return;
1471
1472	rt_mutex_slowunlock(lock);
1473	}
1474
1475	#ifdef CONFIG_SMP
1476	static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
1477	struct rt_mutex_waiter *waiter,
1478	struct task_struct *owner)
1479	{
1480	bool res = true;
1481
1482	rcu_read_lock();
1483	for (;;) {
1484	/ If owner changed, trylock again. /
1485	if (owner != rt_mutex_owner(lock))
1486	break;
1487	/*
1488	* Ensure that @owner is dereferenced after checking that
1489	* the lock owner still matches @owner. If that fails,
1490	* @owner might point to freed memory. If it still matches,
1491	* the rcu_read_lock() ensures the memory stays valid.
1492	*/
1493	barrier();
1494	/*
1495	* Stop spinning when:
1496	* - the lock owner has been scheduled out
1497	* - current is not longer the top waiter
1498	* - current is requested to reschedule (redundant
1499	* for CONFIG_PREEMPT_RCU=y)
1500	* - the VCPU on which owner runs is preempted
1501	*/
1502	if (!owner_on_cpu(owner) \|\| need_resched() \|\|
1503	!rt_mutex_waiter_is_top_waiter(lock, waiter)) {
1504	res = false;
1505	break;
1506	}
1507	cpu_relax();
1508	}
1509	rcu_read_unlock();
1510	return res;
1511	}
1512	#else
1513	static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
1514	struct rt_mutex_waiter *waiter,
1515	struct task_struct *owner)
1516	{
1517	return false;
1518	}
1519	#endif
1520
1521	#ifdef RT_MUTEX_BUILD_MUTEX
1522	/*
1523	* Functions required for:
1524	* - rtmutex, futex on all kernels
1525	* - mutex and rwsem substitutions on RT kernels
1526	*/
1527
1528	/*
1529	* Remove a waiter from a lock and give up
1530	*
1531	* Must be called with lock->wait_lock held and interrupts disabled. It must
1532	* have just failed to try_to_take_rt_mutex().
1533	*/
1534	static void __sched remove_waiter(struct rt_mutex_base *lock,
1535	struct rt_mutex_waiter *waiter)
1536	{
1537	bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
1538	struct task_struct *owner = rt_mutex_owner(lock);
1539	struct rt_mutex_base *next_lock;
1540
1541	lockdep_assert_held(&lock->wait_lock);
1542
1543	raw_spin_lock(&current->pi_lock);
1544	rt_mutex_dequeue(lock, waiter);
1545	current->pi_blocked_on = NULL;
1546	raw_spin_unlock(&current->pi_lock);
1547
1548	/*
1549	* Only update priority if the waiter was the highest priority
1550	* waiter of the lock and there is an owner to update.
1551	*/
1552	if (!owner \|\| !is_top_waiter)
1553	return;
1554
1555	raw_spin_lock(&owner->pi_lock);
1556
1557	rt_mutex_dequeue_pi(owner, waiter);
1558
1559	if (rt_mutex_has_waiters(lock))
1560	rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
1561
1562	rt_mutex_adjust_prio(lock, owner);
1563
1564	/ Store the lock on which owner is blocked or NULL /
1565	next_lock = task_blocked_on_lock(owner);
1566
1567	raw_spin_unlock(&owner->pi_lock);
1568
1569	/*
1570	* Don't walk the chain, if the owner task is not blocked
1571	* itself.
1572	*/
1573	if (!next_lock)
1574	return;
1575
1576	/ gets dropped in rt_mutex_adjust_prio_chain()! /
1577	get_task_struct(owner);
1578
1579	raw_spin_unlock_irq(&lock->wait_lock);
1580
1581	rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
1582	next_lock, NULL, current);
1583
1584	raw_spin_lock_irq(&lock->wait_lock);
1585	}
1586
1587	/**
1588	* rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop
1589	* @lock: the rt_mutex to take
1590	* @ww_ctx: WW mutex context pointer
1591	* @state: the state the task should block in (TASK_INTERRUPTIBLE
1592	* or TASK_UNINTERRUPTIBLE)
1593	* @timeout: the pre-initialized and started timer, or NULL for none
1594	* @waiter: the pre-initialized rt_mutex_waiter
1595	*
1596	* Must be called with lock->wait_lock held and interrupts disabled
1597	*/
1598	static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
1599	struct ww_acquire_ctx *ww_ctx,
1600	unsigned int state,
1601	struct hrtimer_sleeper *timeout,
1602	struct rt_mutex_waiter *waiter)
1603	{
1604	struct rt_mutex rtm = container_of(lock, struct* rt_mutex, rtmutex);
1605	struct task_struct *owner;
1606	int ret = `0`;
1607
1608	for (;;) {
1609	/ Try to acquire the lock: /
1610	if (try_to_take_rt_mutex(lock, current, waiter))
1611	break;
1612
1613	if (timeout && !timeout->task) {
1614	ret = -ETIMEDOUT;
1615	break;
1616	}
1617	if (signal_pending_state(state, current)) {
1618	ret = -EINTR;
1619	break;
1620	}
1621
1622	if (build_ww_mutex() && ww_ctx) {
1623	ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx);
1624	if (ret)
1625	break;
1626	}
1627
1628	if (waiter == rt_mutex_top_waiter(lock))
1629	owner = rt_mutex_owner(lock);
1630	else
1631	owner = NULL;
1632	raw_spin_unlock_irq(&lock->wait_lock);
1633
1634	if (!owner \|\| !rtmutex_spin_on_owner(lock, waiter, owner))
1635	rt_mutex_schedule();
1636
1637	raw_spin_lock_irq(&lock->wait_lock);
1638	set_current_state(state);
1639	}
1640
1641	__set_current_state(TASK_RUNNING);
1642	return ret;
1643	}
1644
1645	static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
1646	struct rt_mutex_waiter *w)
1647	{
1648	/*
1649	* If the result is not -EDEADLOCK or the caller requested
1650	* deadlock detection, nothing to do here.
1651	*/
1652	if (res != -EDEADLOCK \|\| detect_deadlock)
1653	return;
1654
1655	if (build_ww_mutex() && w->ww_ctx)
1656	return;
1657
1658	/*
1659	* Yell loudly and stop the task right here.
1660	*/
1661	WARN(`1`, "rtmutex deadlock detected\n");
1662	while (`1`) {
1663	set_current_state(TASK_INTERRUPTIBLE);
1664	rt_mutex_schedule();
1665	}
1666	}
1667
1668	/**
1669	* __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held
1670	* @lock: The rtmutex to block lock
1671	* @ww_ctx: WW mutex context pointer
1672	* @state: The task state for sleeping
1673	* @chwalk: Indicator whether full or partial chainwalk is requested
1674	* @waiter: Initializer waiter for blocking
1675	*/
1676	static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
1677	struct ww_acquire_ctx *ww_ctx,
1678	unsigned int state,
1679	enum rtmutex_chainwalk chwalk,
1680	struct rt_mutex_waiter *waiter)
1681	{
1682	struct rt_mutex rtm = container_of(lock, struct* rt_mutex, rtmutex);
1683	struct ww_mutex *ww = ww_container_of(rtm);
1684	int ret;
1685
1686	lockdep_assert_held(&lock->wait_lock);
1687
1688	/ Try to acquire the lock again: /
1689	if (try_to_take_rt_mutex(lock, current, NULL)) {
1690	if (build_ww_mutex() && ww_ctx) {
1691	__ww_mutex_check_waiters(rtm, ww_ctx);
1692	ww_mutex_lock_acquired(ww, ww_ctx);
1693	}
1694	return `0`;
1695	}
1696
1697	set_current_state(state);
1698
1699	trace_contention_begin(lock, LCB_F_RT);
1700
1701	ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
1702	if (likely(!ret))
1703	ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
1704
1705	if (likely(!ret)) {
1706	/ acquired the lock /
1707	if (build_ww_mutex() && ww_ctx) {
1708	if (!ww_ctx->is_wait_die)
1709	__ww_mutex_check_waiters(rtm, ww_ctx);
1710	ww_mutex_lock_acquired(ww, ww_ctx);
1711	}
1712	} else {
1713	__set_current_state(TASK_RUNNING);
1714	remove_waiter(lock, waiter);
1715	rt_mutex_handle_deadlock(ret, chwalk, waiter);
1716	}
1717
1718	/*
1719	* try_to_take_rt_mutex() sets the waiter bit
1720	* unconditionally. We might have to fix that up.
1721	*/
1722	fixup_rt_mutex_waiters(lock, true);
1723
1724	trace_contention_end(lock, ret);
1725
1726	return ret;
1727	}
1728
1729	static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
1730	struct ww_acquire_ctx *ww_ctx,
1731	unsigned int state)
1732	{
1733	struct rt_mutex_waiter waiter;
1734	int ret;
1735
1736	rt_mutex_init_waiter(&waiter);
1737	waiter.ww_ctx = ww_ctx;
1738
1739	ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
1740	&waiter);
1741
1742	debug_rt_mutex_free_waiter(&waiter);
1743	return ret;
1744	}
1745
1746	/*
1747	* rt_mutex_slowlock - Locking slowpath invoked when fast path fails
1748	* @lock: The rtmutex to block lock
1749	* @ww_ctx: WW mutex context pointer
1750	* @state: The task state for sleeping
1751	*/
1752	static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
1753	struct ww_acquire_ctx *ww_ctx,
1754	unsigned int state)
1755	{
1756	unsigned long flags;
1757	int ret;
1758
1759	/*
1760	* Do all pre-schedule work here, before we queue a waiter and invoke
1761	* PI -- any such work that trips on rtlock (PREEMPT_RT spinlock) would
1762	* otherwise recurse back into task_blocks_on_rt_mutex() through
1763	* rtlock_slowlock() and will then enqueue a second waiter for this
1764	* same task and things get really confusing real fast.
1765	*/
1766	rt_mutex_pre_schedule();
1767
1768	/*
1769	* Technically we could use raw_spin_[un]lock_irq() here, but this can
1770	* be called in early boot if the cmpxchg() fast path is disabled
1771	* (debug, no architecture support). In this case we will acquire the
1772	* rtmutex with lock->wait_lock held. But we cannot unconditionally
1773	* enable interrupts in that early boot case. So we need to use the
1774	* irqsave/restore variants.
1775	*/
1776	raw_spin_lock_irqsave(&lock->wait_lock, flags);
1777	ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
1778	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1779	rt_mutex_post_schedule();
1780
1781	return ret;
1782	}
1783
1784	static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
1785	unsigned int state)
1786	{
1787	lockdep_assert(!current->pi_blocked_on);
1788
1789	if (likely(rt_mutex_try_acquire(lock)))
1790	return `0`;
1791
1792	return rt_mutex_slowlock(lock, NULL, state);
1793	}
1794	#endif /* RT_MUTEX_BUILD_MUTEX */
1795
1796	#ifdef RT_MUTEX_BUILD_SPINLOCKS
1797	/*
1798	* Functions required for spin/rw_lock substitution on RT kernels
1799	*/
1800
1801	/**
1802	* rtlock_slowlock_locked - Slow path lock acquisition for RT locks
1803	* @lock: The underlying RT mutex
1804	*/
1805	static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
1806	{
1807	struct rt_mutex_waiter waiter;
1808	struct task_struct *owner;
1809
1810	lockdep_assert_held(&lock->wait_lock);
1811
1812	if (try_to_take_rt_mutex(lock, current, NULL))
1813	return;
1814
1815	rt_mutex_init_rtlock_waiter(&waiter);
1816
1817	/ Save current state and set state to TASK_RTLOCK_WAIT /
1818	current_save_and_set_rtlock_wait_state();
1819
1820	trace_contention_begin(lock, LCB_F_RT);
1821
1822	task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
1823
1824	for (;;) {
1825	/ Try to acquire the lock again /
1826	if (try_to_take_rt_mutex(lock, current, &waiter))
1827	break;
1828
1829	if (&waiter == rt_mutex_top_waiter(lock))
1830	owner = rt_mutex_owner(lock);
1831	else
1832	owner = NULL;
1833	raw_spin_unlock_irq(&lock->wait_lock);
1834
1835	if (!owner \|\| !rtmutex_spin_on_owner(lock, &waiter, owner))
1836	schedule_rtlock();
1837
1838	raw_spin_lock_irq(&lock->wait_lock);
1839	set_current_state(TASK_RTLOCK_WAIT);
1840	}
1841
1842	/ Restore the task state /
1843	current_restore_rtlock_saved_state();
1844
1845	/*
1846	* try_to_take_rt_mutex() sets the waiter bit unconditionally.
1847	* We might have to fix that up:
1848	*/
1849	fixup_rt_mutex_waiters(lock, true);
1850	debug_rt_mutex_free_waiter(&waiter);
1851
1852	trace_contention_end(lock, `0`);
1853	}
1854
1855	static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
1856	{
1857	unsigned long flags;
1858
1859	raw_spin_lock_irqsave(&lock->wait_lock, flags);
1860	rtlock_slowlock_locked(lock);
1861	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1862	}
1863
1864	#endif /* RT_MUTEX_BUILD_SPINLOCKS */
1865

source code of linux/kernel/locking/rtmutex.c