percpu-rwsem.c source code [linux/kernel/locking/percpu-rwsem.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	#include <linux/atomic.h>
3	#include <linux/percpu.h>
4	#include <linux/wait.h>
5	#include <linux/lockdep.h>
6	#include <linux/percpu-rwsem.h>
7	#include <linux/rcupdate.h>
8	#include <linux/sched.h>
9	#include <linux/sched/task.h>
10	#include <linux/sched/debug.h>
11	#include <linux/errno.h>
12	#include <trace/events/lock.h>
13
14	int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
15	const char name, struct* lock_class_key *key)
16	{
17	sem->read_count = alloc_percpu(int);
18	if (unlikely(!sem->read_count))
19	return -ENOMEM;
20
21	rcu_sync_init(&sem->rss);
22	rcuwait_init(w: &sem->writer);
23	init_waitqueue_head(&sem->waiters);
24	atomic_set(v: &sem->block, i: `0`);
25	#ifdef CONFIG_DEBUG_LOCK_ALLOC
26	debug_check_no_locks_freed(from: (void )sem, len: sizeof(sem));
27	lockdep_init_map(lock: &sem->dep_map, name, key, subclass: `0`);
28	#endif
29	return `0`;
30	}
31	EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
32
33	void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
34	{
35	/*
36	* XXX: temporary kludge. The error path in alloc_super()
37	* assumes that percpu_free_rwsem() is safe after kzalloc().
38	*/
39	if (!sem->read_count)
40	return;
41
42	rcu_sync_dtor(&sem->rss);
43	free_percpu(pdata: sem->read_count);
44	sem->read_count = NULL; / catch use after free bugs /
45	}
46	EXPORT_SYMBOL_GPL(percpu_free_rwsem);
47
48	static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
49	{
50	this_cpu_inc(*sem->read_count);
51
52	/*
53	* Due to having preemption disabled the decrement happens on
54	* the same CPU as the increment, avoiding the
55	* increment-on-one-CPU-and-decrement-on-another problem.
56	*
57	* If the reader misses the writer's assignment of sem->block, then the
58	* writer is guaranteed to see the reader's increment.
59	*
60	* Conversely, any readers that increment their sem->read_count after
61	* the writer looks are guaranteed to see the sem->block value, which
62	* in turn means that they are guaranteed to immediately decrement
63	* their sem->read_count, so that it doesn't matter that the writer
64	* missed them.
65	*/
66
67	smp_mb(); / A matches D /
68
69	/*
70	* If !sem->block the critical section starts here, matched by the
71	* release in percpu_up_write().
72	*/
73	if (likely(!atomic_read_acquire(&sem->block)))
74	return true;
75
76	this_cpu_dec(*sem->read_count);
77
78	/ Prod writer to re-evaluate readers_active_check() /
79	rcuwait_wake_up(w: &sem->writer);
80
81	return false;
82	}
83
84	static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem)
85	{
86	if (atomic_read(v: &sem->block))
87	return false;
88
89	return atomic_xchg(v: &sem->block, new: `1`) == `0`;
90	}
91
92	static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader)
93	{
94	if (reader) {
95	bool ret;
96
97	preempt_disable();
98	ret = __percpu_down_read_trylock(sem);
99	preempt_enable();
100
101	return ret;
102	}
103	return __percpu_down_write_trylock(sem);
104	}
105
106	/*
107	* The return value of wait_queue_entry::func means:
108	*
109	* <0 - error, wakeup is terminated and the error is returned
110	* 0 - no wakeup, a next waiter is tried
111	* >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive.
112	*
113	* We use EXCLUSIVE for both readers and writers to preserve FIFO order,
114	* and play games with the return value to allow waking multiple readers.
115	*
116	* Specifically, we wake readers until we've woken a single writer, or until a
117	* trylock fails.
118	*/
119	static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
120	unsigned int mode, int wake_flags,
121	void *key)
122	{
123	bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
124	struct percpu_rw_semaphore *sem = key;
125	struct task_struct *p;
126
127	/ concurrent against percpu_down_write(), can get stolen /
128	if (!__percpu_rwsem_trylock(sem, reader))
129	return `1`;
130
131	p = get_task_struct(t: wq_entry->private);
132	list_del_init(entry: &wq_entry->entry);
133	smp_store_release(&wq_entry->private, NULL);
134
135	wake_up_process(tsk: p);
136	put_task_struct(t: p);
137
138	return !reader; / wake (readers until) 1 writer /
139	}
140
141	static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
142	{
143	DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
144	bool wait;
145
146	spin_lock_irq(lock: &sem->waiters.lock);
147	/*
148	* Serialize against the wakeup in percpu_up_write(), if we fail
149	* the trylock, the wakeup must see us on the list.
150	*/
151	wait = !__percpu_rwsem_trylock(sem, reader);
152	if (wait) {
153	wq_entry.flags \|= WQ_FLAG_EXCLUSIVE \| reader * WQ_FLAG_CUSTOM;
154	__add_wait_queue_entry_tail(wq_head: &sem->waiters, wq_entry: &wq_entry);
155	}
156	spin_unlock_irq(lock: &sem->waiters.lock);
157
158	while (wait) {
159	set_current_state(TASK_UNINTERRUPTIBLE);
160	if (!smp_load_acquire(&wq_entry.private))
161	break;
162	schedule();
163	}
164	__set_current_state(TASK_RUNNING);
165	}
166
167	bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
168	{
169	if (__percpu_down_read_trylock(sem))
170	return true;
171
172	if (try)
173	return false;
174
175	trace_contention_begin(lock: sem, LCB_F_PERCPU \| LCB_F_READ);
176	preempt_enable();
177	percpu_rwsem_wait(sem, / .reader = / true);
178	preempt_disable();
179	trace_contention_end(lock: sem, ret: `0`);
180
181	return true;
182	}
183	EXPORT_SYMBOL_GPL(__percpu_down_read);
184
185	#define per_cpu_sum(var) \
186	({ \
187	typeof(var) __sum = 0; \
188	int cpu; \
189	compiletime_assert_atomic_type(__sum); \
190	for_each_possible_cpu(cpu) \
191	__sum += per_cpu(var, cpu); \
192	__sum; \
193	})
194
195	bool percpu_is_read_locked(struct percpu_rw_semaphore *sem)
196	{
197	return per_cpu_sum(*sem->read_count) != `0` && !atomic_read(v: &sem->block);
198	}
199	EXPORT_SYMBOL_GPL(percpu_is_read_locked);
200
201	/*
202	* Return true if the modular sum of the sem->read_count per-CPU variable is
203	* zero. If this sum is zero, then it is stable due to the fact that if any
204	* newly arriving readers increment a given counter, they will immediately
205	* decrement that same counter.
206	*
207	* Assumes sem->block is set.
208	*/
209	static bool readers_active_check(struct percpu_rw_semaphore *sem)
210	{
211	if (per_cpu_sum(*sem->read_count) != `0`)
212	return false;
213
214	/*
215	* If we observed the decrement; ensure we see the entire critical
216	* section.
217	*/
218
219	smp_mb(); / C matches B /
220
221	return true;
222	}
223
224	void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
225	{
226	might_sleep();
227	rwsem_acquire(&sem->dep_map, `0`, `0`, _RET_IP_);
228	trace_contention_begin(lock: sem, LCB_F_PERCPU \| LCB_F_WRITE);
229
230	/ Notify readers to take the slow path. /
231	rcu_sync_enter(&sem->rss);
232
233	/*
234	* Try set sem->block; this provides writer-writer exclusion.
235	* Having sem->block set makes new readers block.
236	*/
237	if (!__percpu_down_write_trylock(sem))
238	percpu_rwsem_wait(sem, / .reader = / false);
239
240	/ smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A /
241
242	/*
243	* If they don't see our store of sem->block, then we are guaranteed to
244	* see their sem->read_count increment, and therefore will wait for
245	* them.
246	*/
247
248	/ Wait for all active readers to complete. /
249	rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE);
250	trace_contention_end(lock: sem, ret: `0`);
251	}
252	EXPORT_SYMBOL_GPL(percpu_down_write);
253
254	void percpu_up_write(struct percpu_rw_semaphore *sem)
255	{
256	rwsem_release(&sem->dep_map, _RET_IP_);
257
258	/*
259	* Signal the writer is done, no fast path yet.
260	*
261	* One reason that we cannot just immediately flip to readers_fast is
262	* that new readers might fail to see the results of this writer's
263	* critical section.
264	*
265	* Therefore we force it through the slow path which guarantees an
266	* acquire and thereby guarantees the critical section's consistency.
267	*/
268	atomic_set_release(v: &sem->block, i: `0`);
269
270	/*
271	* Prod any pending reader/writer to make progress.
272	*/
273	__wake_up(wq_head: &sem->waiters, TASK_NORMAL, nr: `1`, key: sem);
274
275	/*
276	* Once this completes (at least one RCU-sched grace period hence) the
277	* reader fast path will be available again. Safe to use outside the
278	* exclusive write lock because its counting.
279	*/
280	rcu_sync_exit(&sem->rss);
281	}
282	EXPORT_SYMBOL_GPL(percpu_up_write);
283

source code of linux/kernel/locking/percpu-rwsem.c