1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | |
3 | #ifndef _LINUX_SIX_H |
4 | #define _LINUX_SIX_H |
5 | |
6 | /** |
7 | * DOC: SIX locks overview |
8 | * |
9 | * Shared/intent/exclusive locks: sleepable read/write locks, like rw semaphores |
10 | * but with an additional state: read/shared, intent, exclusive/write |
11 | * |
12 | * The purpose of the intent state is to allow for greater concurrency on tree |
13 | * structures without deadlocking. In general, a read can't be upgraded to a |
14 | * write lock without deadlocking, so an operation that updates multiple nodes |
15 | * will have to take write locks for the full duration of the operation. |
16 | * |
17 | * But by adding an intent state, which is exclusive with other intent locks but |
18 | * not with readers, we can take intent locks at the start of the operation, |
19 | * and then take write locks only for the actual update to each individual |
20 | * nodes, without deadlocking. |
21 | * |
22 | * Example usage: |
23 | * six_lock_read(&foo->lock); |
24 | * six_unlock_read(&foo->lock); |
25 | * |
26 | * An intent lock must be held before taking a write lock: |
27 | * six_lock_intent(&foo->lock); |
28 | * six_lock_write(&foo->lock); |
29 | * six_unlock_write(&foo->lock); |
30 | * six_unlock_intent(&foo->lock); |
31 | * |
32 | * Other operations: |
33 | * six_trylock_read() |
34 | * six_trylock_intent() |
35 | * six_trylock_write() |
36 | * |
37 | * six_lock_downgrade() convert from intent to read |
38 | * six_lock_tryupgrade() attempt to convert from read to intent, may fail |
39 | * |
40 | * There are also interfaces that take the lock type as an enum: |
41 | * |
42 | * six_lock_type(&foo->lock, SIX_LOCK_read); |
43 | * six_trylock_convert(&foo->lock, SIX_LOCK_read, SIX_LOCK_intent) |
44 | * six_lock_type(&foo->lock, SIX_LOCK_write); |
45 | * six_unlock_type(&foo->lock, SIX_LOCK_write); |
46 | * six_unlock_type(&foo->lock, SIX_LOCK_intent); |
47 | * |
48 | * Lock sequence numbers - unlock(), relock(): |
49 | * |
50 | * Locks embed sequences numbers, which are incremented on write lock/unlock. |
51 | * This allows locks to be dropped and the retaken iff the state they protect |
52 | * hasn't changed; this makes it much easier to avoid holding locks while e.g. |
53 | * doing IO or allocating memory. |
54 | * |
55 | * Example usage: |
56 | * six_lock_read(&foo->lock); |
57 | * u32 seq = six_lock_seq(&foo->lock); |
58 | * six_unlock_read(&foo->lock); |
59 | * |
60 | * some_operation_that_may_block(); |
61 | * |
62 | * if (six_relock_read(&foo->lock, seq)) { ... } |
63 | * |
64 | * If the relock operation succeeds, it is as if the lock was never unlocked. |
65 | * |
66 | * Reentrancy: |
67 | * |
68 | * Six locks are not by themselves reentrant, but have counters for both the |
69 | * read and intent states that can be used to provide reentrancy by an upper |
70 | * layer that tracks held locks. If a lock is known to already be held in the |
71 | * read or intent state, six_lock_increment() can be used to bump the "lock |
72 | * held in this state" counter, increasing the number of unlock calls that |
73 | * will be required to fully unlock it. |
74 | * |
75 | * Example usage: |
76 | * six_lock_read(&foo->lock); |
77 | * six_lock_increment(&foo->lock, SIX_LOCK_read); |
78 | * six_unlock_read(&foo->lock); |
79 | * six_unlock_read(&foo->lock); |
80 | * foo->lock is now fully unlocked. |
81 | * |
82 | * Since the intent state supercedes read, it's legal to increment the read |
83 | * counter when holding an intent lock, but not the reverse. |
84 | * |
85 | * A lock may only be held once for write: six_lock_increment(.., SIX_LOCK_write) |
86 | * is not legal. |
87 | * |
88 | * should_sleep_fn: |
89 | * |
90 | * There is a six_lock() variant that takes a function pointer that is called |
91 | * immediately prior to schedule() when blocking, and may return an error to |
92 | * abort. |
93 | * |
94 | * One possible use for this feature is when objects being locked are part of |
95 | * a cache and may reused, and lock ordering is based on a property of the |
96 | * object that will change when the object is reused - i.e. logical key order. |
97 | * |
98 | * If looking up an object in the cache may race with object reuse, and lock |
99 | * ordering is required to prevent deadlock, object reuse may change the |
100 | * correct lock order for that object and cause a deadlock. should_sleep_fn |
101 | * can be used to check if the object is still the object we want and avoid |
102 | * this deadlock. |
103 | * |
104 | * Wait list entry interface: |
105 | * |
106 | * There is a six_lock() variant, six_lock_waiter(), that takes a pointer to a |
107 | * wait list entry. By embedding six_lock_waiter into another object, and by |
108 | * traversing lock waitlists, it is then possible for an upper layer to |
109 | * implement full cycle detection for deadlock avoidance. |
110 | * |
111 | * should_sleep_fn should be used for invoking the cycle detector, walking the |
112 | * graph of held locks to check for a deadlock. The upper layer must track |
113 | * held locks for each thread, and each thread's held locks must be reachable |
114 | * from its six_lock_waiter object. |
115 | * |
116 | * six_lock_waiter() will add the wait object to the waitlist re-trying taking |
117 | * the lock, and before calling should_sleep_fn, and the wait object will not |
118 | * be removed from the waitlist until either the lock has been successfully |
119 | * acquired, or we aborted because should_sleep_fn returned an error. |
120 | * |
121 | * Also, six_lock_waiter contains a timestamp, and waiters on a waitlist will |
122 | * have timestamps in strictly ascending order - this is so the timestamp can |
123 | * be used as a cursor for lock graph traverse. |
124 | */ |
125 | |
126 | #include <linux/lockdep.h> |
127 | #include <linux/sched.h> |
128 | #include <linux/types.h> |
129 | |
130 | enum six_lock_type { |
131 | SIX_LOCK_read, |
132 | SIX_LOCK_intent, |
133 | SIX_LOCK_write, |
134 | }; |
135 | |
136 | struct six_lock { |
137 | atomic_t state; |
138 | u32 seq; |
139 | unsigned intent_lock_recurse; |
140 | struct task_struct *owner; |
141 | unsigned __percpu *readers; |
142 | raw_spinlock_t wait_lock; |
143 | struct list_head wait_list; |
144 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
145 | struct lockdep_map dep_map; |
146 | #endif |
147 | }; |
148 | |
149 | struct six_lock_waiter { |
150 | struct list_head list; |
151 | struct task_struct *task; |
152 | enum six_lock_type lock_want; |
153 | bool lock_acquired; |
154 | u64 start_time; |
155 | }; |
156 | |
157 | typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *); |
158 | |
159 | void six_lock_exit(struct six_lock *lock); |
160 | |
161 | enum six_lock_init_flags { |
162 | SIX_LOCK_INIT_PCPU = 1U << 0, |
163 | }; |
164 | |
165 | void __six_lock_init(struct six_lock *lock, const char *name, |
166 | struct lock_class_key *key, enum six_lock_init_flags flags); |
167 | |
168 | /** |
169 | * six_lock_init - initialize a six lock |
170 | * @lock: lock to initialize |
171 | * @flags: optional flags, i.e. SIX_LOCK_INIT_PCPU |
172 | */ |
173 | #define six_lock_init(lock, flags) \ |
174 | do { \ |
175 | static struct lock_class_key __key; \ |
176 | \ |
177 | __six_lock_init((lock), #lock, &__key, flags); \ |
178 | } while (0) |
179 | |
180 | /** |
181 | * six_lock_seq - obtain current lock sequence number |
182 | * @lock: six_lock to obtain sequence number for |
183 | * |
184 | * @lock should be held for read or intent, and not write |
185 | * |
186 | * By saving the lock sequence number, we can unlock @lock and then (typically |
187 | * after some blocking operation) attempt to relock it: the relock will succeed |
188 | * if the sequence number hasn't changed, meaning no write locks have been taken |
189 | * and state corresponding to what @lock protects is still valid. |
190 | */ |
191 | static inline u32 six_lock_seq(const struct six_lock *lock) |
192 | { |
193 | return lock->seq; |
194 | } |
195 | |
196 | bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip); |
197 | |
198 | /** |
199 | * six_trylock_type - attempt to take a six lock without blocking |
200 | * @lock: lock to take |
201 | * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write |
202 | * |
203 | * Return: true on success, false on failure. |
204 | */ |
205 | static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type) |
206 | { |
207 | return six_trylock_ip(lock, type, _THIS_IP_); |
208 | } |
209 | |
210 | int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type, |
211 | struct six_lock_waiter *wait, |
212 | six_lock_should_sleep_fn should_sleep_fn, void *p, |
213 | unsigned long ip); |
214 | |
215 | /** |
216 | * six_lock_waiter - take a lock, with full waitlist interface |
217 | * @lock: lock to take |
218 | * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write |
219 | * @wait: pointer to wait object, which will be added to lock's waitlist |
220 | * @should_sleep_fn: callback run after adding to waitlist, immediately prior |
221 | * to scheduling |
222 | * @p: passed through to @should_sleep_fn |
223 | * |
224 | * This is a convenience wrapper around six_lock_ip_waiter(), see that function |
225 | * for full documentation. |
226 | * |
227 | * Return: 0 on success, or the return code from @should_sleep_fn on failure. |
228 | */ |
229 | static inline int six_lock_waiter(struct six_lock *lock, enum six_lock_type type, |
230 | struct six_lock_waiter *wait, |
231 | six_lock_should_sleep_fn should_sleep_fn, void *p) |
232 | { |
233 | return six_lock_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_); |
234 | } |
235 | |
236 | /** |
237 | * six_lock_ip - take a six lock lock |
238 | * @lock: lock to take |
239 | * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write |
240 | * @should_sleep_fn: callback run after adding to waitlist, immediately prior |
241 | * to scheduling |
242 | * @p: passed through to @should_sleep_fn |
243 | * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ |
244 | * |
245 | * Return: 0 on success, or the return code from @should_sleep_fn on failure. |
246 | */ |
247 | static inline int six_lock_ip(struct six_lock *lock, enum six_lock_type type, |
248 | six_lock_should_sleep_fn should_sleep_fn, void *p, |
249 | unsigned long ip) |
250 | { |
251 | struct six_lock_waiter wait; |
252 | |
253 | return six_lock_ip_waiter(lock, type, wait: &wait, should_sleep_fn, p, ip); |
254 | } |
255 | |
256 | /** |
257 | * six_lock_type - take a six lock lock |
258 | * @lock: lock to take |
259 | * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write |
260 | * @should_sleep_fn: callback run after adding to waitlist, immediately prior |
261 | * to scheduling |
262 | * @p: passed through to @should_sleep_fn |
263 | * |
264 | * Return: 0 on success, or the return code from @should_sleep_fn on failure. |
265 | */ |
266 | static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type, |
267 | six_lock_should_sleep_fn should_sleep_fn, void *p) |
268 | { |
269 | struct six_lock_waiter wait; |
270 | |
271 | return six_lock_ip_waiter(lock, type, wait: &wait, should_sleep_fn, p, _THIS_IP_); |
272 | } |
273 | |
274 | bool six_relock_ip(struct six_lock *lock, enum six_lock_type type, |
275 | unsigned seq, unsigned long ip); |
276 | |
277 | /** |
278 | * six_relock_type - attempt to re-take a lock that was held previously |
279 | * @lock: lock to take |
280 | * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write |
281 | * @seq: lock sequence number obtained from six_lock_seq() while lock was |
282 | * held previously |
283 | * |
284 | * Return: true on success, false on failure. |
285 | */ |
286 | static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type, |
287 | unsigned seq) |
288 | { |
289 | return six_relock_ip(lock, type, seq, _THIS_IP_); |
290 | } |
291 | |
292 | void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip); |
293 | |
294 | /** |
295 | * six_unlock_type - drop a six lock |
296 | * @lock: lock to unlock |
297 | * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write |
298 | * |
299 | * When a lock is held multiple times (because six_lock_incement()) was used), |
300 | * this decrements the 'lock held' counter by one. |
301 | * |
302 | * For example: |
303 | * six_lock_read(&foo->lock); read count 1 |
304 | * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2 |
305 | * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1 |
306 | * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0 |
307 | */ |
308 | static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type) |
309 | { |
310 | six_unlock_ip(lock, type, _THIS_IP_); |
311 | } |
312 | |
313 | #define __SIX_LOCK(type) \ |
314 | static inline bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip)\ |
315 | { \ |
316 | return six_trylock_ip(lock, SIX_LOCK_##type, ip); \ |
317 | } \ |
318 | \ |
319 | static inline bool six_trylock_##type(struct six_lock *lock) \ |
320 | { \ |
321 | return six_trylock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \ |
322 | } \ |
323 | \ |
324 | static inline int six_lock_ip_waiter_##type(struct six_lock *lock, \ |
325 | struct six_lock_waiter *wait, \ |
326 | six_lock_should_sleep_fn should_sleep_fn, void *p,\ |
327 | unsigned long ip) \ |
328 | { \ |
329 | return six_lock_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\ |
330 | } \ |
331 | \ |
332 | static inline int six_lock_ip_##type(struct six_lock *lock, \ |
333 | six_lock_should_sleep_fn should_sleep_fn, void *p, \ |
334 | unsigned long ip) \ |
335 | { \ |
336 | return six_lock_ip(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\ |
337 | } \ |
338 | \ |
339 | static inline bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\ |
340 | { \ |
341 | return six_relock_ip(lock, SIX_LOCK_##type, seq, ip); \ |
342 | } \ |
343 | \ |
344 | static inline bool six_relock_##type(struct six_lock *lock, u32 seq) \ |
345 | { \ |
346 | return six_relock_ip(lock, SIX_LOCK_##type, seq, _THIS_IP_); \ |
347 | } \ |
348 | \ |
349 | static inline int six_lock_##type(struct six_lock *lock, \ |
350 | six_lock_should_sleep_fn fn, void *p)\ |
351 | { \ |
352 | return six_lock_ip_##type(lock, fn, p, _THIS_IP_); \ |
353 | } \ |
354 | \ |
355 | static inline void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip) \ |
356 | { \ |
357 | six_unlock_ip(lock, SIX_LOCK_##type, ip); \ |
358 | } \ |
359 | \ |
360 | static inline void six_unlock_##type(struct six_lock *lock) \ |
361 | { \ |
362 | six_unlock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \ |
363 | } |
364 | |
365 | __SIX_LOCK(read) |
366 | __SIX_LOCK(intent) |
367 | __SIX_LOCK(write) |
368 | #undef __SIX_LOCK |
369 | |
370 | void six_lock_downgrade(struct six_lock *); |
371 | bool six_lock_tryupgrade(struct six_lock *); |
372 | bool six_trylock_convert(struct six_lock *, enum six_lock_type, |
373 | enum six_lock_type); |
374 | |
375 | void six_lock_increment(struct six_lock *, enum six_lock_type); |
376 | |
377 | void six_lock_wakeup_all(struct six_lock *); |
378 | |
379 | struct six_lock_count { |
380 | unsigned n[3]; |
381 | }; |
382 | |
383 | struct six_lock_count six_lock_counts(struct six_lock *); |
384 | void six_lock_readers_add(struct six_lock *, int); |
385 | |
386 | #endif /* _LINUX_SIX_H */ |
387 | |