locking.c source code [linux/fs/btrfs/locking.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright (C) 2008 Oracle. All rights reserved.
4	*/
5
6	#include <linux/sched.h>
7	#include <linux/pagemap.h>
8	#include <linux/spinlock.h>
9	#include <linux/page-flags.h>
10	#include <asm/bug.h>
11	#include <trace/events/btrfs.h>
12	#include "misc.h"
13	#include "ctree.h"
14	#include "extent_io.h"
15	#include "locking.h"
16	#include "accessors.h"
17
18	/*
19	* Lockdep class keys for extent_buffer->lock's in this root. For a given
20	* eb, the lockdep key is determined by the btrfs_root it belongs to and
21	* the level the eb occupies in the tree.
22	*
23	* Different roots are used for different purposes and may nest inside each
24	* other and they require separate keysets. As lockdep keys should be
25	* static, assign keysets according to the purpose of the root as indicated
26	* by btrfs_root->root_key.objectid. This ensures that all special purpose
27	* roots have separate keysets.
28	*
29	* Lock-nesting across peer nodes is always done with the immediate parent
30	* node locked thus preventing deadlock. As lockdep doesn't know this, use
31	* subclass to avoid triggering lockdep warning in such cases.
32	*
33	* The key is set by the readpage_end_io_hook after the buffer has passed
34	* csum validation but before the pages are unlocked. It is also set by
35	* btrfs_init_new_buffer on freshly allocated blocks.
36	*
37	* We also add a check to make sure the highest level of the tree is the
38	* same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
39	* needs update as well.
40	*/
41	#ifdef CONFIG_DEBUG_LOCK_ALLOC
42	#if BTRFS_MAX_LEVEL != 8
43	#error
44	#endif
45
46	#define DEFINE_LEVEL(stem, level) \
47	.names[level] = "btrfs-" stem "-0" #level,
48
49	#define DEFINE_NAME(stem) \
50	DEFINE_LEVEL(stem, 0) \
51	DEFINE_LEVEL(stem, 1) \
52	DEFINE_LEVEL(stem, 2) \
53	DEFINE_LEVEL(stem, 3) \
54	DEFINE_LEVEL(stem, 4) \
55	DEFINE_LEVEL(stem, 5) \
56	DEFINE_LEVEL(stem, 6) \
57	DEFINE_LEVEL(stem, 7)
58
59	static struct btrfs_lockdep_keyset {
60	u64 id; / root objectid /
61	/ Longest entry: btrfs-block-group-00 /
62	char names[BTRFS_MAX_LEVEL][`24`];
63	struct lock_class_key keys[BTRFS_MAX_LEVEL];
64	} btrfs_lockdep_keysets[] = {
65	{ .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") },
66	{ .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") },
67	{ .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") },
68	{ .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") },
69	{ .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") },
70	{ .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") },
71	{ .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") },
72	{ .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") },
73	{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") },
74	{ .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") },
75	{ .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
76	{ .id = BTRFS_BLOCK_GROUP_TREE_OBJECTID, DEFINE_NAME("block-group") },
77	{ .id = BTRFS_RAID_STRIPE_TREE_OBJECTID, DEFINE_NAME("raid-stripe") },
78	{ .id = `0`, DEFINE_NAME("tree") },
79	};
80
81	#undef DEFINE_LEVEL
82	#undef DEFINE_NAME
83
84	void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer eb, int* level)
85	{
86	struct btrfs_lockdep_keyset *ks;
87
88	BUG_ON(level >= ARRAY_SIZE(ks->keys));
89
90	/ Find the matching keyset, id 0 is the default entry /
91	for (ks = btrfs_lockdep_keysets; ks->id; ks++)
92	if (ks->id == objectid)
93	break;
94
95	lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]);
96	}
97
98	void btrfs_maybe_reset_lockdep_class(struct btrfs_root root, struct* extent_buffer *eb)
99	{
100	if (test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
101	btrfs_set_buffer_lockdep_class(objectid: root->root_key.objectid,
102	eb, level: btrfs_header_level(eb));
103	}
104
105	#endif
106
107	#ifdef CONFIG_BTRFS_DEBUG
108	static void btrfs_set_eb_lock_owner(struct extent_buffer *eb, pid_t owner)
109	{
110	eb->lock_owner = owner;
111	}
112	#else
113	static void btrfs_set_eb_lock_owner(struct extent_buffer *eb, pid_t owner) { }
114	#endif
115
116	/*
117	* Extent buffer locking
118	* =====================
119	*
120	* We use a rw_semaphore for tree locking, and the semantics are exactly the
121	* same:
122	*
123	* - reader/writer exclusion
124	* - writer/writer exclusion
125	* - reader/reader sharing
126	* - try-lock semantics for readers and writers
127	*
128	* The rwsem implementation does opportunistic spinning which reduces number of
129	* times the locking task needs to sleep.
130	*/
131
132	/*
133	* __btrfs_tree_read_lock - lock extent buffer for read
134	* @eb: the eb to be locked
135	* @nest: the nesting level to be used for lockdep
136	*
137	* This takes the read lock on the extent buffer, using the specified nesting
138	* level for lockdep purposes.
139	*/
140	void __btrfs_tree_read_lock(struct extent_buffer eb, enum* btrfs_lock_nesting nest)
141	{
142	u64 start_ns = `0`;
143
144	if (trace_btrfs_tree_read_lock_enabled())
145	start_ns = ktime_get_ns();
146
147	down_read_nested(sem: &eb->lock, subclass: nest);
148	trace_btrfs_tree_read_lock(eb, start_ns);
149	}
150
151	void btrfs_tree_read_lock(struct extent_buffer *eb)
152	{
153	__btrfs_tree_read_lock(eb, nest: BTRFS_NESTING_NORMAL);
154	}
155
156	/*
157	* Try-lock for read.
158	*
159	* Return 1 if the rwlock has been taken, 0 otherwise
160	*/
161	int btrfs_try_tree_read_lock(struct extent_buffer *eb)
162	{
163	if (down_read_trylock(sem: &eb->lock)) {
164	trace_btrfs_try_tree_read_lock(eb);
165	return `1`;
166	}
167	return `0`;
168	}
169
170	/*
171	* Try-lock for write.
172	*
173	* Return 1 if the rwlock has been taken, 0 otherwise
174	*/
175	int btrfs_try_tree_write_lock(struct extent_buffer *eb)
176	{
177	if (down_write_trylock(sem: &eb->lock)) {
178	btrfs_set_eb_lock_owner(eb, current->pid);
179	trace_btrfs_try_tree_write_lock(eb);
180	return `1`;
181	}
182	return `0`;
183	}
184
185	/*
186	* Release read lock.
187	*/
188	void btrfs_tree_read_unlock(struct extent_buffer *eb)
189	{
190	trace_btrfs_tree_read_unlock(eb);
191	up_read(sem: &eb->lock);
192	}
193
194	/*
195	* Lock eb for write.
196	*
197	* @eb: the eb to lock
198	* @nest: the nesting to use for the lock
199	*
200	* Returns with the eb->lock write locked.
201	*/
202	void __btrfs_tree_lock(struct extent_buffer eb, enum* btrfs_lock_nesting nest)
203	__acquires(&eb->lock)
204	{
205	u64 start_ns = `0`;
206
207	if (trace_btrfs_tree_lock_enabled())
208	start_ns = ktime_get_ns();
209
210	down_write_nested(sem: &eb->lock, subclass: nest);
211	btrfs_set_eb_lock_owner(eb, current->pid);
212	trace_btrfs_tree_lock(eb, start_ns);
213	}
214
215	void btrfs_tree_lock(struct extent_buffer *eb)
216	{
217	__btrfs_tree_lock(eb, nest: BTRFS_NESTING_NORMAL);
218	}
219
220	/*
221	* Release the write lock.
222	*/
223	void btrfs_tree_unlock(struct extent_buffer *eb)
224	{
225	trace_btrfs_tree_unlock(eb);
226	btrfs_set_eb_lock_owner(eb, owner: `0`);
227	up_write(sem: &eb->lock);
228	}
229
230	/*
231	* This releases any locks held in the path starting at level and going all the
232	* way up to the root.
233	*
234	* btrfs_search_slot will keep the lock held on higher nodes in a few corner
235	* cases, such as COW of the block at slot zero in the node. This ignores
236	* those rules, and it should only be called when there are no more updates to
237	* be done higher up in the tree.
238	*/
239	void btrfs_unlock_up_safe(struct btrfs_path path, int* level)
240	{
241	int i;
242
243	if (path->keep_locks)
244	return;
245
246	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
247	if (!path->nodes[i])
248	continue;
249	if (!path->locks[i])
250	continue;
251	btrfs_tree_unlock_rw(eb: path->nodes[i], rw: path->locks[i]);
252	path->locks[i] = `0`;
253	}
254	}
255
256	/*
257	* Loop around taking references on and locking the root node of the tree until
258	* we end up with a lock on the root node.
259	*
260	* Return: root extent buffer with write lock held
261	*/
262	struct extent_buffer btrfs_lock_root_node(struct* btrfs_root *root)
263	{
264	struct extent_buffer *eb;
265
266	while (`1`) {
267	eb = btrfs_root_node(root);
268
269	btrfs_maybe_reset_lockdep_class(root, eb);
270	btrfs_tree_lock(eb);
271	if (eb == root->node)
272	break;
273	btrfs_tree_unlock(eb);
274	free_extent_buffer(eb);
275	}
276	return eb;
277	}
278
279	/*
280	* Loop around taking references on and locking the root node of the tree until
281	* we end up with a lock on the root node.
282	*
283	* Return: root extent buffer with read lock held
284	*/
285	struct extent_buffer btrfs_read_lock_root_node(struct* btrfs_root *root)
286	{
287	struct extent_buffer *eb;
288
289	while (`1`) {
290	eb = btrfs_root_node(root);
291
292	btrfs_maybe_reset_lockdep_class(root, eb);
293	btrfs_tree_read_lock(eb);
294	if (eb == root->node)
295	break;
296	btrfs_tree_read_unlock(eb);
297	free_extent_buffer(eb);
298	}
299	return eb;
300	}
301
302	/*
303	* Loop around taking references on and locking the root node of the tree in
304	* nowait mode until we end up with a lock on the root node or returning to
305	* avoid blocking.
306	*
307	* Return: root extent buffer with read lock held or -EAGAIN.
308	*/
309	struct extent_buffer btrfs_try_read_lock_root_node(struct* btrfs_root *root)
310	{
311	struct extent_buffer *eb;
312
313	while (`1`) {
314	eb = btrfs_root_node(root);
315	if (!btrfs_try_tree_read_lock(eb)) {
316	free_extent_buffer(eb);
317	return ERR_PTR(error: -EAGAIN);
318	}
319	if (eb == root->node)
320	break;
321	btrfs_tree_read_unlock(eb);
322	free_extent_buffer(eb);
323	}
324	return eb;
325	}
326
327	/*
328	* DREW locks
329	* ==========
330	*
331	* DREW stands for double-reader-writer-exclusion lock. It's used in situation
332	* where you want to provide A-B exclusion but not AA or BB.
333	*
334	* Currently implementation gives more priority to reader. If a reader and a
335	* writer both race to acquire their respective sides of the lock the writer
336	* would yield its lock as soon as it detects a concurrent reader. Additionally
337	* if there are pending readers no new writers would be allowed to come in and
338	* acquire the lock.
339	*/
340
341	void btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
342	{
343	atomic_set(v: &lock->readers, i: `0`);
344	atomic_set(v: &lock->writers, i: `0`);
345	init_waitqueue_head(&lock->pending_readers);
346	init_waitqueue_head(&lock->pending_writers);
347	}
348
349	/ Return true if acquisition is successful, false otherwise /
350	bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
351	{
352	if (atomic_read(v: &lock->readers))
353	return false;
354
355	atomic_inc(v: &lock->writers);
356
357	/ Ensure writers count is updated before we check for pending readers /
358	smp_mb__after_atomic();
359	if (atomic_read(v: &lock->readers)) {
360	btrfs_drew_write_unlock(lock);
361	return false;
362	}
363
364	return true;
365	}
366
367	void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
368	{
369	while (true) {
370	if (btrfs_drew_try_write_lock(lock))
371	return;
372	wait_event(lock->pending_writers, !atomic_read(&lock->readers));
373	}
374	}
375
376	void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
377	{
378	atomic_dec(v: &lock->writers);
379	cond_wake_up(wq: &lock->pending_readers);
380	}
381
382	void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
383	{
384	atomic_inc(v: &lock->readers);
385
386	/*
387	* Ensure the pending reader count is perceieved BEFORE this reader
388	* goes to sleep in case of active writers. This guarantees new writers
389	* won't be allowed and that the current reader will be woken up when
390	* the last active writer finishes its jobs.
391	*/
392	smp_mb__after_atomic();
393
394	wait_event(lock->pending_readers, atomic_read(&lock->writers) == `0`);
395	}
396
397	void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
398	{
399	/*
400	* atomic_dec_and_test implies a full barrier, so woken up writers
401	* are guaranteed to see the decrement
402	*/
403	if (atomic_dec_and_test(v: &lock->readers))
404	wake_up(&lock->pending_writers);
405	}
406

source code of linux/fs/btrfs/locking.c