locks.c source code [linux/fs/locks.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* linux/fs/locks.c
4	*
5	* We implement four types of file locks: BSD locks, posix locks, open
6	* file description locks, and leases. For details about BSD locks,
7	* see the flock(2) man page; for details about the other three, see
8	* fcntl(2).
9	*
10	*
11	* Locking conflicts and dependencies:
12	* If multiple threads attempt to lock the same byte (or flock the same file)
13	* only one can be granted the lock, and other must wait their turn.
14	* The first lock has been "applied" or "granted", the others are "waiting"
15	* and are "blocked" by the "applied" lock..
16	*
17	* Waiting and applied locks are all kept in trees whose properties are:
18	*
19	* - the root of a tree may be an applied or waiting lock.
20	* - every other node in the tree is a waiting lock that
21	* conflicts with every ancestor of that node.
22	*
23	* Every such tree begins life as a waiting singleton which obviously
24	* satisfies the above properties.
25	*
26	* The only ways we modify trees preserve these properties:
27	*
28	* 1. We may add a new leaf node, but only after first verifying that it
29	* conflicts with all of its ancestors.
30	* 2. We may remove the root of a tree, creating a new singleton
31	* tree from the root and N new trees rooted in the immediate
32	* children.
33	* 3. If the root of a tree is not currently an applied lock, we may
34	* apply it (if possible).
35	* 4. We may upgrade the root of the tree (either extend its range,
36	* or upgrade its entire range from read to write).
37	*
38	* When an applied lock is modified in a way that reduces or downgrades any
39	* part of its range, we remove all its children (2 above). This particularly
40	* happens when a lock is unlocked.
41	*
42	* For each of those child trees we "wake up" the thread which is
43	* waiting for the lock so it can continue handling as follows: if the
44	* root of the tree applies, we do so (3). If it doesn't, it must
45	* conflict with some applied lock. We remove (wake up) all of its children
46	* (2), and add it is a new leaf to the tree rooted in the applied
47	* lock (1). We then repeat the process recursively with those
48	* children.
49	*
50	*/
51	#include <linux/capability.h>
52	#include <linux/file.h>
53	#include <linux/fdtable.h>
54	#include <linux/filelock.h>
55	#include <linux/fs.h>
56	#include <linux/init.h>
57	#include <linux/security.h>
58	#include <linux/slab.h>
59	#include <linux/syscalls.h>
60	#include <linux/time.h>
61	#include <linux/rcupdate.h>
62	#include <linux/pid_namespace.h>
63	#include <linux/hashtable.h>
64	#include <linux/percpu.h>
65	#include <linux/sysctl.h>
66
67	#define CREATE_TRACE_POINTS
68	#include <trace/events/filelock.h>
69
70	#include <linux/uaccess.h>
71
72	static struct file_lock file_lock(struct* file_lock_core *flc)
73	{
74	return container_of(flc, struct file_lock, c);
75	}
76
77	static struct file_lease file_lease(struct* file_lock_core *flc)
78	{
79	return container_of(flc, struct file_lease, c);
80	}
81
82	static bool lease_breaking(struct file_lease *fl)
83	{
84	return fl->c.flc_flags & (FL_UNLOCK_PENDING \| FL_DOWNGRADE_PENDING);
85	}
86
87	static int target_leasetype(struct file_lease *fl)
88	{
89	if (fl->c.flc_flags & FL_UNLOCK_PENDING)
90	return F_UNLCK;
91	if (fl->c.flc_flags & FL_DOWNGRADE_PENDING)
92	return F_RDLCK;
93	return fl->c.flc_type;
94	}
95
96	static int leases_enable = `1`;
97	static int lease_break_time = `45`;
98
99	#ifdef CONFIG_SYSCTL
100	static struct ctl_table locks_sysctls[] = {
101	{
102	.procname = "leases-enable",
103	.data = &leases_enable,
104	.maxlen = sizeof(int),
105	.mode = `0644`,
106	.proc_handler = proc_dointvec,
107	},
108	#ifdef CONFIG_MMU
109	{
110	.procname = "lease-break-time",
111	.data = &lease_break_time,
112	.maxlen = sizeof(int),
113	.mode = `0644`,
114	.proc_handler = proc_dointvec,
115	},
116	#endif /* CONFIG_MMU */
117	};
118
119	static int __init init_fs_locks_sysctls(void)
120	{
121	register_sysctl_init("fs", locks_sysctls);
122	return `0`;
123	}
124	early_initcall(init_fs_locks_sysctls);
125	#endif /* CONFIG_SYSCTL */
126
127	/*
128	* The global file_lock_list is only used for displaying /proc/locks, so we
129	* keep a list on each CPU, with each list protected by its own spinlock.
130	* Global serialization is done using file_rwsem.
131	*
132	* Note that alterations to the list also require that the relevant flc_lock is
133	* held.
134	*/
135	struct file_lock_list_struct {
136	spinlock_t lock;
137	struct hlist_head hlist;
138	};
139	static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
140	DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
141
142
143	/*
144	* The blocked_hash is used to find POSIX lock loops for deadlock detection.
145	* It is protected by blocked_lock_lock.
146	*
147	* We hash locks by lockowner in order to optimize searching for the lock a
148	* particular lockowner is waiting on.
149	*
150	* FIXME: make this value scale via some heuristic? We generally will want more
151	* buckets when we have more lockowners holding locks, but that's a little
152	* difficult to determine without knowing what the workload will look like.
153	*/
154	#define BLOCKED_HASH_BITS 7
155	static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
156
157	/*
158	* This lock protects the blocked_hash. Generally, if you're accessing it, you
159	* want to be holding this lock.
160	*
161	* In addition, it also protects the fl->fl_blocked_requests list, and the
162	* fl->fl_blocker pointer for file_lock structures that are acting as lock
163	* requests (in contrast to those that are acting as records of acquired locks).
164	*
165	* Note that when we acquire this lock in order to change the above fields,
166	* we often hold the flc_lock as well. In certain cases, when reading the fields
167	* protected by this lock, we can skip acquiring it iff we already hold the
168	* flc_lock.
169	*/
170	static DEFINE_SPINLOCK(blocked_lock_lock);
171
172	static struct kmem_cache *flctx_cache __ro_after_init;
173	static struct kmem_cache *filelock_cache __ro_after_init;
174	static struct kmem_cache *filelease_cache __ro_after_init;
175
176	static struct file_lock_context *
177	locks_get_lock_context(struct inode inode, int* type)
178	{
179	struct file_lock_context *ctx;
180
181	/ paired with cmpxchg() below /
182	ctx = locks_inode_context(inode);
183	if (likely(ctx) \|\| type == F_UNLCK)
184	goto out;
185
186	ctx = kmem_cache_alloc(cachep: flctx_cache, GFP_KERNEL);
187	if (!ctx)
188	goto out;
189
190	spin_lock_init(&ctx->flc_lock);
191	INIT_LIST_HEAD(list: &ctx->flc_flock);
192	INIT_LIST_HEAD(list: &ctx->flc_posix);
193	INIT_LIST_HEAD(list: &ctx->flc_lease);
194
195	/*
196	* Assign the pointer if it's not already assigned. If it is, then
197	* free the context we just allocated.
198	*/
199	if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
200	kmem_cache_free(s: flctx_cache, objp: ctx);
201	ctx = locks_inode_context(inode);
202	}
203	out:
204	trace_locks_get_lock_context(inode, type, ctx);
205	return ctx;
206	}
207
208	static void
209	locks_dump_ctx_list(struct list_head list, char* *list_type)
210	{
211	struct file_lock_core *flc;
212
213	list_for_each_entry(flc, list, flc_list)
214	pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
215	list_type, flc->flc_owner, flc->flc_flags,
216	flc->flc_type, flc->flc_pid);
217	}
218
219	static void
220	locks_check_ctx_lists(struct inode *inode)
221	{
222	struct file_lock_context *ctx = inode->i_flctx;
223
224	if (unlikely(!list_empty(&ctx->flc_flock) \|\|
225	!list_empty(&ctx->flc_posix) \|\|
226	!list_empty(&ctx->flc_lease))) {
227	pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
228	MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
229	inode->i_ino);
230	locks_dump_ctx_list(list: &ctx->flc_flock, list_type: "FLOCK");
231	locks_dump_ctx_list(list: &ctx->flc_posix, list_type: "POSIX");
232	locks_dump_ctx_list(list: &ctx->flc_lease, list_type: "LEASE");
233	}
234	}
235
236	static void
237	locks_check_ctx_file_list(struct file filp, struct* list_head list, char* *list_type)
238	{
239	struct file_lock_core *flc;
240	struct inode *inode = file_inode(f: filp);
241
242	list_for_each_entry(flc, list, flc_list)
243	if (flc->flc_file == filp)
244	pr_warn("Leaked %s lock on dev=0x%x:0x%x ino=0x%lx "
245	" fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
246	list_type, MAJOR(inode->i_sb->s_dev),
247	MINOR(inode->i_sb->s_dev), inode->i_ino,
248	flc->flc_owner, flc->flc_flags,
249	flc->flc_type, flc->flc_pid);
250	}
251
252	void
253	locks_free_lock_context(struct inode *inode)
254	{
255	struct file_lock_context *ctx = locks_inode_context(inode);
256
257	if (unlikely(ctx)) {
258	locks_check_ctx_lists(inode);
259	kmem_cache_free(s: flctx_cache, objp: ctx);
260	}
261	}
262
263	static void locks_init_lock_heads(struct file_lock_core *flc)
264	{
265	INIT_HLIST_NODE(h: &flc->flc_link);
266	INIT_LIST_HEAD(list: &flc->flc_list);
267	INIT_LIST_HEAD(list: &flc->flc_blocked_requests);
268	INIT_LIST_HEAD(list: &flc->flc_blocked_member);
269	init_waitqueue_head(&flc->flc_wait);
270	}
271
272	/ Allocate an empty lock structure. /
273	struct file_lock locks_alloc_lock(void*)
274	{
275	struct file_lock *fl = kmem_cache_zalloc(k: filelock_cache, GFP_KERNEL);
276
277	if (fl)
278	locks_init_lock_heads(flc: &fl->c);
279
280	return fl;
281	}
282	EXPORT_SYMBOL_GPL(locks_alloc_lock);
283
284	/ Allocate an empty lock structure. /
285	struct file_lease locks_alloc_lease(void*)
286	{
287	struct file_lease *fl = kmem_cache_zalloc(k: filelease_cache, GFP_KERNEL);
288
289	if (fl)
290	locks_init_lock_heads(flc: &fl->c);
291
292	return fl;
293	}
294	EXPORT_SYMBOL_GPL(locks_alloc_lease);
295
296	void locks_release_private(struct file_lock *fl)
297	{
298	struct file_lock_core *flc = &fl->c;
299
300	BUG_ON(waitqueue_active(&flc->flc_wait));
301	BUG_ON(!list_empty(&flc->flc_list));
302	BUG_ON(!list_empty(&flc->flc_blocked_requests));
303	BUG_ON(!list_empty(&flc->flc_blocked_member));
304	BUG_ON(!hlist_unhashed(&flc->flc_link));
305
306	if (fl->fl_ops) {
307	if (fl->fl_ops->fl_release_private)
308	fl->fl_ops->fl_release_private(fl);
309	fl->fl_ops = NULL;
310	}
311
312	if (fl->fl_lmops) {
313	if (fl->fl_lmops->lm_put_owner) {
314	fl->fl_lmops->lm_put_owner(flc->flc_owner);
315	flc->flc_owner = NULL;
316	}
317	fl->fl_lmops = NULL;
318	}
319	}
320	EXPORT_SYMBOL_GPL(locks_release_private);
321
322	/**
323	* locks_owner_has_blockers - Check for blocking lock requests
324	* @flctx: file lock context
325	* @owner: lock owner
326	*
327	* Return values:
328	* %true: @owner has at least one blocker
329	* %false: @owner has no blockers
330	*/
331	bool locks_owner_has_blockers(struct file_lock_context *flctx, fl_owner_t owner)
332	{
333	struct file_lock_core *flc;
334
335	spin_lock(lock: &flctx->flc_lock);
336	list_for_each_entry(flc, &flctx->flc_posix, flc_list) {
337	if (flc->flc_owner != owner)
338	continue;
339	if (!list_empty(head: &flc->flc_blocked_requests)) {
340	spin_unlock(lock: &flctx->flc_lock);
341	return true;
342	}
343	}
344	spin_unlock(lock: &flctx->flc_lock);
345	return false;
346	}
347	EXPORT_SYMBOL_GPL(locks_owner_has_blockers);
348
349	/ Free a lock which is not in use. /
350	void locks_free_lock(struct file_lock *fl)
351	{
352	locks_release_private(fl);
353	kmem_cache_free(s: filelock_cache, objp: fl);
354	}
355	EXPORT_SYMBOL(locks_free_lock);
356
357	/ Free a lease which is not in use. /
358	void locks_free_lease(struct file_lease *fl)
359	{
360	kmem_cache_free(s: filelease_cache, objp: fl);
361	}
362	EXPORT_SYMBOL(locks_free_lease);
363
364	static void
365	locks_dispose_list(struct list_head *dispose)
366	{
367	struct file_lock_core *flc;
368
369	while (!list_empty(head: dispose)) {
370	flc = list_first_entry(dispose, struct file_lock_core, flc_list);
371	list_del_init(entry: &flc->flc_list);
372	if (flc->flc_flags & (FL_LEASE\|FL_DELEG\|FL_LAYOUT))
373	locks_free_lease(file_lease(flc));
374	else
375	locks_free_lock(file_lock(flc));
376	}
377	}
378
379	void locks_init_lock(struct file_lock *fl)
380	{
381	memset(fl, `0`, sizeof(struct file_lock));
382	locks_init_lock_heads(flc: &fl->c);
383	}
384	EXPORT_SYMBOL(locks_init_lock);
385
386	void locks_init_lease(struct file_lease *fl)
387	{
388	memset(fl, `0`, sizeof(*fl));
389	locks_init_lock_heads(flc: &fl->c);
390	}
391	EXPORT_SYMBOL(locks_init_lease);
392
393	/*
394	* Initialize a new lock from an existing file_lock structure.
395	*/
396	void locks_copy_conflock(struct file_lock new, struct* file_lock *fl)
397	{
398	new->c.flc_owner = fl->c.flc_owner;
399	new->c.flc_pid = fl->c.flc_pid;
400	new->c.flc_file = NULL;
401	new->c.flc_flags = fl->c.flc_flags;
402	new->c.flc_type = fl->c.flc_type;
403	new->fl_start = fl->fl_start;
404	new->fl_end = fl->fl_end;
405	new->fl_lmops = fl->fl_lmops;
406	new->fl_ops = NULL;
407
408	if (fl->fl_lmops) {
409	if (fl->fl_lmops->lm_get_owner)
410	fl->fl_lmops->lm_get_owner(fl->c.flc_owner);
411	}
412	}
413	EXPORT_SYMBOL(locks_copy_conflock);
414
415	void locks_copy_lock(struct file_lock new, struct* file_lock *fl)
416	{
417	/ "new" must be a freshly-initialized lock /
418	WARN_ON_ONCE(new->fl_ops);
419
420	locks_copy_conflock(new, fl);
421
422	new->c.flc_file = fl->c.flc_file;
423	new->fl_ops = fl->fl_ops;
424
425	if (fl->fl_ops) {
426	if (fl->fl_ops->fl_copy_lock)
427	fl->fl_ops->fl_copy_lock(new, fl);
428	}
429	}
430	EXPORT_SYMBOL(locks_copy_lock);
431
432	static void locks_move_blocks(struct file_lock new, struct* file_lock *fl)
433	{
434	struct file_lock *f;
435
436	/*
437	* As ctx->flc_lock is held, new requests cannot be added to
438	* ->flc_blocked_requests, so we don't need a lock to check if it
439	* is empty.
440	*/
441	if (list_empty(head: &fl->c.flc_blocked_requests))
442	return;
443	spin_lock(lock: &blocked_lock_lock);
444	list_splice_init(list: &fl->c.flc_blocked_requests,
445	head: &new->c.flc_blocked_requests);
446	list_for_each_entry(f, &new->c.flc_blocked_requests,
447	c.flc_blocked_member)
448	f->c.flc_blocker = &new->c;
449	spin_unlock(lock: &blocked_lock_lock);
450	}
451
452	static inline int flock_translate_cmd(int cmd) {
453	switch (cmd) {
454	case LOCK_SH:
455	return F_RDLCK;
456	case LOCK_EX:
457	return F_WRLCK;
458	case LOCK_UN:
459	return F_UNLCK;
460	}
461	return -EINVAL;
462	}
463
464	/ Fill in a file_lock structure with an appropriate FLOCK lock. /
465	static void flock_make_lock(struct file filp, struct* file_lock fl, int* type)
466	{
467	locks_init_lock(fl);
468
469	fl->c.flc_file = filp;
470	fl->c.flc_owner = filp;
471	fl->c.flc_pid = current->tgid;
472	fl->c.flc_flags = FL_FLOCK;
473	fl->c.flc_type = type;
474	fl->fl_end = OFFSET_MAX;
475	}
476
477	static int assign_type(struct file_lock_core flc, int* type)
478	{
479	switch (type) {
480	case F_RDLCK:
481	case F_WRLCK:
482	case F_UNLCK:
483	flc->flc_type = type;
484	break;
485	default:
486	return -EINVAL;
487	}
488	return `0`;
489	}
490
491	static int flock64_to_posix_lock(struct file filp, struct* file_lock *fl,
492	struct flock64 *l)
493	{
494	switch (l->l_whence) {
495	case SEEK_SET:
496	fl->fl_start = `0`;
497	break;
498	case SEEK_CUR:
499	fl->fl_start = filp->f_pos;
500	break;
501	case SEEK_END:
502	fl->fl_start = i_size_read(inode: file_inode(f: filp));
503	break;
504	default:
505	return -EINVAL;
506	}
507	if (l->l_start > OFFSET_MAX - fl->fl_start)
508	return -EOVERFLOW;
509	fl->fl_start += l->l_start;
510	if (fl->fl_start < `0`)
511	return -EINVAL;
512
513	/ POSIX-1996 leaves the case l->l_len < 0 undefined;*
514	POSIX-2001 defines it. /*
515	if (l->l_len > `0`) {
516	if (l->l_len - `1` > OFFSET_MAX - fl->fl_start)
517	return -EOVERFLOW;
518	fl->fl_end = fl->fl_start + (l->l_len - `1`);
519
520	} else if (l->l_len < `0`) {
521	if (fl->fl_start + l->l_len < `0`)
522	return -EINVAL;
523	fl->fl_end = fl->fl_start - `1`;
524	fl->fl_start += l->l_len;
525	} else
526	fl->fl_end = OFFSET_MAX;
527
528	fl->c.flc_owner = current->files;
529	fl->c.flc_pid = current->tgid;
530	fl->c.flc_file = filp;
531	fl->c.flc_flags = FL_POSIX;
532	fl->fl_ops = NULL;
533	fl->fl_lmops = NULL;
534
535	return assign_type(flc: &fl->c, type: l->l_type);
536	}
537
538	/ Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX*
539	* style lock.
540	*/
541	static int flock_to_posix_lock(struct file filp, struct* file_lock *fl,
542	struct flock *l)
543	{
544	struct flock64 ll = {
545	.l_type = l->l_type,
546	.l_whence = l->l_whence,
547	.l_start = l->l_start,
548	.l_len = l->l_len,
549	};
550
551	return flock64_to_posix_lock(filp, fl, l: &ll);
552	}
553
554	/ default lease lock manager operations /
555	static bool
556	lease_break_callback(struct file_lease *fl)
557	{
558	kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
559	return false;
560	}
561
562	static void
563	lease_setup(struct file_lease fl, void* **priv)
564	{
565	struct file *filp = fl->c.flc_file;
566	struct fasync_struct fa = priv;
567
568	/*
569	* fasync_insert_entry() returns the old entry if any. If there was no
570	* old entry, then it used "priv" and inserted it into the fasync list.
571	* Clear the pointer to indicate that it shouldn't be freed.
572	*/
573	if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa))
574	*priv = NULL;
575
576	__f_setown(filp, task_pid(current), PIDTYPE_TGID, force: `0`);
577	}
578
579	static const struct lease_manager_operations lease_manager_ops = {
580	.lm_break = lease_break_callback,
581	.lm_change = lease_modify,
582	.lm_setup = lease_setup,
583	};
584
585	/*
586	* Initialize a lease, use the default lock manager operations
587	*/
588	static int lease_init(struct file filp, int* type, struct file_lease *fl)
589	{
590	if (assign_type(flc: &fl->c, type) != `0`)
591	return -EINVAL;
592
593	fl->c.flc_owner = filp;
594	fl->c.flc_pid = current->tgid;
595
596	fl->c.flc_file = filp;
597	fl->c.flc_flags = FL_LEASE;
598	fl->fl_lmops = &lease_manager_ops;
599	return `0`;
600	}
601
602	/ Allocate a file_lock initialised to this type of lease /
603	static struct file_lease lease_alloc(struct* file filp, int* type)
604	{
605	struct file_lease *fl = locks_alloc_lease();
606	int error = -ENOMEM;
607
608	if (fl == NULL)
609	return ERR_PTR(error);
610
611	error = lease_init(filp, type, fl);
612	if (error) {
613	locks_free_lease(fl);
614	return ERR_PTR(error);
615	}
616	return fl;
617	}
618
619	/ Check if two locks overlap each other.*
620	*/
621	static inline int locks_overlap(struct file_lock fl1, struct* file_lock *fl2)
622	{
623	return ((fl1->fl_end >= fl2->fl_start) &&
624	(fl2->fl_end >= fl1->fl_start));
625	}
626
627	/*
628	* Check whether two locks have the same owner.
629	*/
630	static int posix_same_owner(struct file_lock_core fl1, struct* file_lock_core *fl2)
631	{
632	return fl1->flc_owner == fl2->flc_owner;
633	}
634
635	/ Must be called with the flc_lock held! /
636	static void locks_insert_global_locks(struct file_lock_core *flc)
637	{
638	struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list);
639
640	percpu_rwsem_assert_held(&file_rwsem);
641
642	spin_lock(lock: &fll->lock);
643	flc->flc_link_cpu = smp_processor_id();
644	hlist_add_head(n: &flc->flc_link, h: &fll->hlist);
645	spin_unlock(lock: &fll->lock);
646	}
647
648	/ Must be called with the flc_lock held! /
649	static void locks_delete_global_locks(struct file_lock_core *flc)
650	{
651	struct file_lock_list_struct *fll;
652
653	percpu_rwsem_assert_held(&file_rwsem);
654
655	/*
656	* Avoid taking lock if already unhashed. This is safe since this check
657	* is done while holding the flc_lock, and new insertions into the list
658	* also require that it be held.
659	*/
660	if (hlist_unhashed(h: &flc->flc_link))
661	return;
662
663	fll = per_cpu_ptr(&file_lock_list, flc->flc_link_cpu);
664	spin_lock(lock: &fll->lock);
665	hlist_del_init(n: &flc->flc_link);
666	spin_unlock(lock: &fll->lock);
667	}
668
669	static unsigned long
670	posix_owner_key(struct file_lock_core *flc)
671	{
672	return (unsigned long) flc->flc_owner;
673	}
674
675	static void locks_insert_global_blocked(struct file_lock_core *waiter)
676	{
677	lockdep_assert_held(&blocked_lock_lock);
678
679	hash_add(blocked_hash, &waiter->flc_link, posix_owner_key(waiter));
680	}
681
682	static void locks_delete_global_blocked(struct file_lock_core *waiter)
683	{
684	lockdep_assert_held(&blocked_lock_lock);
685
686	hash_del(node: &waiter->flc_link);
687	}
688
689	/ Remove waiter from blocker's block list.*
690	* When blocker ends up pointing to itself then the list is empty.
691	*
692	* Must be called with blocked_lock_lock held.
693	*/
694	static void __locks_unlink_block(struct file_lock_core *waiter)
695	{
696	locks_delete_global_blocked(waiter);
697	list_del_init(entry: &waiter->flc_blocked_member);
698	}
699
700	static void __locks_wake_up_blocks(struct file_lock_core *blocker)
701	{
702	while (!list_empty(head: &blocker->flc_blocked_requests)) {
703	struct file_lock_core *waiter;
704	struct file_lock *fl;
705
706	waiter = list_first_entry(&blocker->flc_blocked_requests,
707	struct file_lock_core, flc_blocked_member);
708
709	fl = file_lock(flc: waiter);
710	__locks_unlink_block(waiter);
711	if ((waiter->flc_flags & (FL_POSIX \| FL_FLOCK)) &&
712	fl->fl_lmops && fl->fl_lmops->lm_notify)
713	fl->fl_lmops->lm_notify(fl);
714	else
715	locks_wake_up(fl);
716
717	/*
718	* The setting of flc_blocker to NULL marks the "done"
719	* point in deleting a block. Paired with acquire at the top
720	* of locks_delete_block().
721	*/
722	smp_store_release(&waiter->flc_blocker, NULL);
723	}
724	}
725
726	static int __locks_delete_block(struct file_lock_core *waiter)
727	{
728	int status = -ENOENT;
729
730	/*
731	* If fl_blocker is NULL, it won't be set again as this thread "owns"
732	* the lock and is the only one that might try to claim the lock.
733	*
734	* We use acquire/release to manage fl_blocker so that we can
735	* optimize away taking the blocked_lock_lock in many cases.
736	*
737	* The smp_load_acquire guarantees two things:
738	*
739	* 1/ that fl_blocked_requests can be tested locklessly. If something
740	* was recently added to that list it must have been in a locked region
741	* before the locked region when fl_blocker was set to NULL.
742	*
743	* 2/ that no other thread is accessing 'waiter', so it is safe to free
744	* it. __locks_wake_up_blocks is careful not to touch waiter after
745	* fl_blocker is released.
746	*
747	* If a lockless check of fl_blocker shows it to be NULL, we know that
748	* no new locks can be inserted into its fl_blocked_requests list, and
749	* can avoid doing anything further if the list is empty.
750	*/
751	if (!smp_load_acquire(&waiter->flc_blocker) &&
752	list_empty(head: &waiter->flc_blocked_requests))
753	return status;
754
755	spin_lock(lock: &blocked_lock_lock);
756	if (waiter->flc_blocker)
757	status = `0`;
758	__locks_wake_up_blocks(blocker: waiter);
759	__locks_unlink_block(waiter);
760
761	/*
762	* The setting of fl_blocker to NULL marks the "done" point in deleting
763	* a block. Paired with acquire at the top of this function.
764	*/
765	smp_store_release(&waiter->flc_blocker, NULL);
766	spin_unlock(lock: &blocked_lock_lock);
767	return status;
768	}
769
770	/**
771	* locks_delete_block - stop waiting for a file lock
772	* @waiter: the lock which was waiting
773	*
774	* lockd/nfsd need to disconnect the lock while working on it.
775	*/
776	int locks_delete_block(struct file_lock *waiter)
777	{
778	return __locks_delete_block(waiter: &waiter->c);
779	}
780	EXPORT_SYMBOL(locks_delete_block);
781
782	/ Insert waiter into blocker's block list.*
783	* We use a circular list so that processes can be easily woken up in
784	* the order they blocked. The documentation doesn't require this but
785	* it seems like the reasonable thing to do.
786	*
787	* Must be called with both the flc_lock and blocked_lock_lock held. The
788	* fl_blocked_requests list itself is protected by the blocked_lock_lock,
789	* but by ensuring that the flc_lock is also held on insertions we can avoid
790	* taking the blocked_lock_lock in some cases when we see that the
791	* fl_blocked_requests list is empty.
792	*
793	* Rather than just adding to the list, we check for conflicts with any existing
794	* waiters, and add beneath any waiter that blocks the new waiter.
795	* Thus wakeups don't happen until needed.
796	*/
797	static void __locks_insert_block(struct file_lock_core *blocker,
798	struct file_lock_core *waiter,
799	bool conflict(struct file_lock_core *,
800	struct file_lock_core *))
801	{
802	struct file_lock_core *flc;
803
804	BUG_ON(!list_empty(&waiter->flc_blocked_member));
805	new_blocker:
806	list_for_each_entry(flc, &blocker->flc_blocked_requests, flc_blocked_member)
807	if (conflict(flc, waiter)) {
808	blocker = flc;
809	goto new_blocker;
810	}
811	waiter->flc_blocker = blocker;
812	list_add_tail(new: &waiter->flc_blocked_member,
813	head: &blocker->flc_blocked_requests);
814
815	if ((blocker->flc_flags & (FL_POSIX\|FL_OFDLCK)) == FL_POSIX)
816	locks_insert_global_blocked(waiter);
817
818	/ The requests in waiter->flc_blocked are known to conflict with*
819	* waiter, but might not conflict with blocker, or the requests
820	* and lock which block it. So they all need to be woken.
821	*/
822	__locks_wake_up_blocks(blocker: waiter);
823	}
824
825	/ Must be called with flc_lock held. /
826	static void locks_insert_block(struct file_lock_core *blocker,
827	struct file_lock_core *waiter,
828	bool conflict(struct file_lock_core *,
829	struct file_lock_core *))
830	{
831	spin_lock(lock: &blocked_lock_lock);
832	__locks_insert_block(blocker, waiter, conflict);
833	spin_unlock(lock: &blocked_lock_lock);
834	}
835
836	/*
837	* Wake up processes blocked waiting for blocker.
838	*
839	* Must be called with the inode->flc_lock held!
840	*/
841	static void locks_wake_up_blocks(struct file_lock_core *blocker)
842	{
843	/*
844	* Avoid taking global lock if list is empty. This is safe since new
845	* blocked requests are only added to the list under the flc_lock, and
846	* the flc_lock is always held here. Note that removal from the
847	* fl_blocked_requests list does not require the flc_lock, so we must
848	* recheck list_empty() after acquiring the blocked_lock_lock.
849	*/
850	if (list_empty(head: &blocker->flc_blocked_requests))
851	return;
852
853	spin_lock(lock: &blocked_lock_lock);
854	__locks_wake_up_blocks(blocker);
855	spin_unlock(lock: &blocked_lock_lock);
856	}
857
858	static void
859	locks_insert_lock_ctx(struct file_lock_core fl, struct* list_head *before)
860	{
861	list_add_tail(new: &fl->flc_list, head: before);
862	locks_insert_global_locks(flc: fl);
863	}
864
865	static void
866	locks_unlink_lock_ctx(struct file_lock_core *fl)
867	{
868	locks_delete_global_locks(flc: fl);
869	list_del_init(entry: &fl->flc_list);
870	locks_wake_up_blocks(blocker: fl);
871	}
872
873	static void
874	locks_delete_lock_ctx(struct file_lock_core fl, struct* list_head *dispose)
875	{
876	locks_unlink_lock_ctx(fl);
877	if (dispose)
878	list_add(new: &fl->flc_list, head: dispose);
879	else
880	locks_free_lock(file_lock(flc: fl));
881	}
882
883	/ Determine if lock sys_fl blocks lock caller_fl. Common functionality*
884	* checks for shared/exclusive status of overlapping locks.
885	*/
886	static bool locks_conflict(struct file_lock_core *caller_flc,
887	struct file_lock_core *sys_flc)
888	{
889	if (sys_flc->flc_type == F_WRLCK)
890	return true;
891	if (caller_flc->flc_type == F_WRLCK)
892	return true;
893	return false;
894	}
895
896	/ Determine if lock sys_fl blocks lock caller_fl. POSIX specific*
897	* checking before calling the locks_conflict().
898	*/
899	static bool posix_locks_conflict(struct file_lock_core *caller_flc,
900	struct file_lock_core *sys_flc)
901	{
902	struct file_lock *caller_fl = file_lock(flc: caller_flc);
903	struct file_lock *sys_fl = file_lock(flc: sys_flc);
904
905	/ POSIX locks owned by the same process do not conflict with*
906	* each other.
907	*/
908	if (posix_same_owner(fl1: caller_flc, fl2: sys_flc))
909	return false;
910
911	/ Check whether they overlap /
912	if (!locks_overlap(fl1: caller_fl, fl2: sys_fl))
913	return false;
914
915	return locks_conflict(caller_flc, sys_flc);
916	}
917
918	/ Determine if lock sys_fl blocks lock caller_fl. Used on xx_GETLK*
919	* path so checks for additional GETLK-specific things like F_UNLCK.
920	*/
921	static bool posix_test_locks_conflict(struct file_lock *caller_fl,
922	struct file_lock *sys_fl)
923	{
924	struct file_lock_core *caller = &caller_fl->c;
925	struct file_lock_core *sys = &sys_fl->c;
926
927	/ F_UNLCK checks any locks on the same fd. /
928	if (lock_is_unlock(fl: caller_fl)) {
929	if (!posix_same_owner(fl1: caller, fl2: sys))
930	return false;
931	return locks_overlap(fl1: caller_fl, fl2: sys_fl);
932	}
933	return posix_locks_conflict(caller_flc: caller, sys_flc: sys);
934	}
935
936	/ Determine if lock sys_fl blocks lock caller_fl. FLOCK specific*
937	* checking before calling the locks_conflict().
938	*/
939	static bool flock_locks_conflict(struct file_lock_core *caller_flc,
940	struct file_lock_core *sys_flc)
941	{
942	/ FLOCK locks referring to the same filp do not conflict with*
943	* each other.
944	*/
945	if (caller_flc->flc_file == sys_flc->flc_file)
946	return false;
947
948	return locks_conflict(caller_flc, sys_flc);
949	}
950
951	void
952	posix_test_lock(struct file filp, struct* file_lock *fl)
953	{
954	struct file_lock *cfl;
955	struct file_lock_context *ctx;
956	struct inode *inode = file_inode(f: filp);
957	void *owner;
958	void (func)(void*);
959
960	ctx = locks_inode_context(inode);
961	if (!ctx \|\| list_empty_careful(head: &ctx->flc_posix)) {
962	fl->c.flc_type = F_UNLCK;
963	return;
964	}
965
966	retry:
967	spin_lock(lock: &ctx->flc_lock);
968	list_for_each_entry(cfl, &ctx->flc_posix, c.flc_list) {
969	if (!posix_test_locks_conflict(caller_fl: fl, sys_fl: cfl))
970	continue;
971	if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable
972	&& (*cfl->fl_lmops->lm_lock_expirable)(cfl)) {
973	owner = cfl->fl_lmops->lm_mod_owner;
974	func = cfl->fl_lmops->lm_expire_lock;
975	__module_get(module: owner);
976	spin_unlock(lock: &ctx->flc_lock);
977	(*func)();
978	module_put(module: owner);
979	goto retry;
980	}
981	locks_copy_conflock(fl, cfl);
982	goto out;
983	}
984	fl->c.flc_type = F_UNLCK;
985	out:
986	spin_unlock(lock: &ctx->flc_lock);
987	return;
988	}
989	EXPORT_SYMBOL(posix_test_lock);
990
991	/*
992	* Deadlock detection:
993	*
994	* We attempt to detect deadlocks that are due purely to posix file
995	* locks.
996	*
997	* We assume that a task can be waiting for at most one lock at a time.
998	* So for any acquired lock, the process holding that lock may be
999	* waiting on at most one other lock. That lock in turns may be held by
1000	* someone waiting for at most one other lock. Given a requested lock
1001	* caller_fl which is about to wait for a conflicting lock block_fl, we
1002	* follow this chain of waiters to ensure we are not about to create a
1003	* cycle.
1004	*
1005	* Since we do this before we ever put a process to sleep on a lock, we
1006	* are ensured that there is never a cycle; that is what guarantees that
1007	* the while() loop in posix_locks_deadlock() eventually completes.
1008	*
1009	* Note: the above assumption may not be true when handling lock
1010	* requests from a broken NFS client. It may also fail in the presence
1011	* of tasks (such as posix threads) sharing the same open file table.
1012	* To handle those cases, we just bail out after a few iterations.
1013	*
1014	* For FL_OFDLCK locks, the owner is the filp, not the files_struct.
1015	* Because the owner is not even nominally tied to a thread of
1016	* execution, the deadlock detection below can't reasonably work well. Just
1017	* skip it for those.
1018	*
1019	* In principle, we could do a more limited deadlock detection on FL_OFDLCK
1020	* locks that just checks for the case where two tasks are attempting to
1021	* upgrade from read to write locks on the same inode.
1022	*/
1023
1024	#define MAX_DEADLK_ITERATIONS 10
1025
1026	/ Find a lock that the owner of the given @blocker is blocking on. /
1027	static struct file_lock_core what_owner_is_waiting_for(struct* file_lock_core *blocker)
1028	{
1029	struct file_lock_core *flc;
1030
1031	hash_for_each_possible(blocked_hash, flc, flc_link, posix_owner_key(blocker)) {
1032	if (posix_same_owner(fl1: flc, fl2: blocker)) {
1033	while (flc->flc_blocker)
1034	flc = flc->flc_blocker;
1035	return flc;
1036	}
1037	}
1038	return NULL;
1039	}
1040
1041	/ Must be called with the blocked_lock_lock held! /
1042	static bool posix_locks_deadlock(struct file_lock *caller_fl,
1043	struct file_lock *block_fl)
1044	{
1045	struct file_lock_core *caller = &caller_fl->c;
1046	struct file_lock_core *blocker = &block_fl->c;
1047	int i = `0`;
1048
1049	lockdep_assert_held(&blocked_lock_lock);
1050
1051	/*
1052	* This deadlock detector can't reasonably detect deadlocks with
1053	* FL_OFDLCK locks, since they aren't owned by a process, per-se.
1054	*/
1055	if (caller->flc_flags & FL_OFDLCK)
1056	return false;
1057
1058	while ((blocker = what_owner_is_waiting_for(blocker))) {
1059	if (i++ > MAX_DEADLK_ITERATIONS)
1060	return false;
1061	if (posix_same_owner(fl1: caller, fl2: blocker))
1062	return true;
1063	}
1064	return false;
1065	}
1066
1067	/ Try to create a FLOCK lock on filp. We always insert new FLOCK locks*
1068	* after any leases, but before any posix locks.
1069	*
1070	* Note that if called with an FL_EXISTS argument, the caller may determine
1071	* whether or not a lock was successfully freed by testing the return
1072	* value for -ENOENT.
1073	*/
1074	static int flock_lock_inode(struct inode inode, struct* file_lock *request)
1075	{
1076	struct file_lock *new_fl = NULL;
1077	struct file_lock *fl;
1078	struct file_lock_context *ctx;
1079	int error = `0`;
1080	bool found = false;
1081	LIST_HEAD(dispose);
1082
1083	ctx = locks_get_lock_context(inode, type: request->c.flc_type);
1084	if (!ctx) {
1085	if (request->c.flc_type != F_UNLCK)
1086	return -ENOMEM;
1087	return (request->c.flc_flags & FL_EXISTS) ? -ENOENT : `0`;
1088	}
1089
1090	if (!(request->c.flc_flags & FL_ACCESS) && (request->c.flc_type != F_UNLCK)) {
1091	new_fl = locks_alloc_lock();
1092	if (!new_fl)
1093	return -ENOMEM;
1094	}
1095
1096	percpu_down_read(sem: &file_rwsem);
1097	spin_lock(lock: &ctx->flc_lock);
1098	if (request->c.flc_flags & FL_ACCESS)
1099	goto find_conflict;
1100
1101	list_for_each_entry(fl, &ctx->flc_flock, c.flc_list) {
1102	if (request->c.flc_file != fl->c.flc_file)
1103	continue;
1104	if (request->c.flc_type == fl->c.flc_type)
1105	goto out;
1106	found = true;
1107	locks_delete_lock_ctx(fl: &fl->c, dispose: &dispose);
1108	break;
1109	}
1110
1111	if (lock_is_unlock(fl: request)) {
1112	if ((request->c.flc_flags & FL_EXISTS) && !found)
1113	error = -ENOENT;
1114	goto out;
1115	}
1116
1117	find_conflict:
1118	list_for_each_entry(fl, &ctx->flc_flock, c.flc_list) {
1119	if (!flock_locks_conflict(caller_flc: &request->c, sys_flc: &fl->c))
1120	continue;
1121	error = -EAGAIN;
1122	if (!(request->c.flc_flags & FL_SLEEP))
1123	goto out;
1124	error = FILE_LOCK_DEFERRED;
1125	locks_insert_block(blocker: &fl->c, waiter: &request->c, conflict: flock_locks_conflict);
1126	goto out;
1127	}
1128	if (request->c.flc_flags & FL_ACCESS)
1129	goto out;
1130	locks_copy_lock(new_fl, request);
1131	locks_move_blocks(new: new_fl, fl: request);
1132	locks_insert_lock_ctx(fl: &new_fl->c, before: &ctx->flc_flock);
1133	new_fl = NULL;
1134	error = `0`;
1135
1136	out:
1137	spin_unlock(lock: &ctx->flc_lock);
1138	percpu_up_read(sem: &file_rwsem);
1139	if (new_fl)
1140	locks_free_lock(new_fl);
1141	locks_dispose_list(dispose: &dispose);
1142	trace_flock_lock_inode(inode, fl: request, ret: error);
1143	return error;
1144	}
1145
1146	static int posix_lock_inode(struct inode inode, struct* file_lock *request,
1147	struct file_lock *conflock)
1148	{
1149	struct file_lock fl, tmp;
1150	struct file_lock *new_fl = NULL;
1151	struct file_lock *new_fl2 = NULL;
1152	struct file_lock *left = NULL;
1153	struct file_lock *right = NULL;
1154	struct file_lock_context *ctx;
1155	int error;
1156	bool added = false;
1157	LIST_HEAD(dispose);
1158	void *owner;
1159	void (func)(void*);
1160
1161	ctx = locks_get_lock_context(inode, type: request->c.flc_type);
1162	if (!ctx)
1163	return lock_is_unlock(fl: request) ? `0` : -ENOMEM;
1164
1165	/*
1166	* We may need two file_lock structures for this operation,
1167	* so we get them in advance to avoid races.
1168	*
1169	* In some cases we can be sure, that no new locks will be needed
1170	*/
1171	if (!(request->c.flc_flags & FL_ACCESS) &&
1172	(request->c.flc_type != F_UNLCK \|\|
1173	request->fl_start != `0` \|\| request->fl_end != OFFSET_MAX)) {
1174	new_fl = locks_alloc_lock();
1175	new_fl2 = locks_alloc_lock();
1176	}
1177
1178	retry:
1179	percpu_down_read(sem: &file_rwsem);
1180	spin_lock(lock: &ctx->flc_lock);
1181	/*
1182	* New lock request. Walk all POSIX locks and look for conflicts. If
1183	* there are any, either return error or put the request on the
1184	* blocker's list of waiters and the global blocked_hash.
1185	*/
1186	if (request->c.flc_type != F_UNLCK) {
1187	list_for_each_entry(fl, &ctx->flc_posix, c.flc_list) {
1188	if (!posix_locks_conflict(caller_flc: &request->c, sys_flc: &fl->c))
1189	continue;
1190	if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable
1191	&& (*fl->fl_lmops->lm_lock_expirable)(fl)) {
1192	owner = fl->fl_lmops->lm_mod_owner;
1193	func = fl->fl_lmops->lm_expire_lock;
1194	__module_get(module: owner);
1195	spin_unlock(lock: &ctx->flc_lock);
1196	percpu_up_read(sem: &file_rwsem);
1197	(*func)();
1198	module_put(module: owner);
1199	goto retry;
1200	}
1201	if (conflock)
1202	locks_copy_conflock(conflock, fl);
1203	error = -EAGAIN;
1204	if (!(request->c.flc_flags & FL_SLEEP))
1205	goto out;
1206	/*
1207	* Deadlock detection and insertion into the blocked
1208	* locks list must be done while holding the same lock!
1209	*/
1210	error = -EDEADLK;
1211	spin_lock(lock: &blocked_lock_lock);
1212	/*
1213	* Ensure that we don't find any locks blocked on this
1214	* request during deadlock detection.
1215	*/
1216	__locks_wake_up_blocks(blocker: &request->c);
1217	if (likely(!posix_locks_deadlock(request, fl))) {
1218	error = FILE_LOCK_DEFERRED;
1219	__locks_insert_block(blocker: &fl->c, waiter: &request->c,
1220	conflict: posix_locks_conflict);
1221	}
1222	spin_unlock(lock: &blocked_lock_lock);
1223	goto out;
1224	}
1225	}
1226
1227	/ If we're just looking for a conflict, we're done. /
1228	error = `0`;
1229	if (request->c.flc_flags & FL_ACCESS)
1230	goto out;
1231
1232	/ Find the first old lock with the same owner as the new lock /
1233	list_for_each_entry(fl, &ctx->flc_posix, c.flc_list) {
1234	if (posix_same_owner(fl1: &request->c, fl2: &fl->c))
1235	break;
1236	}
1237
1238	/ Process locks with this owner. /
1239	list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, c.flc_list) {
1240	if (!posix_same_owner(fl1: &request->c, fl2: &fl->c))
1241	break;
1242
1243	/ Detect adjacent or overlapping regions (if same lock type) /
1244	if (request->c.flc_type == fl->c.flc_type) {
1245	/ In all comparisons of start vs end, use*
1246	* "start - 1" rather than "end + 1". If end
1247	* is OFFSET_MAX, end + 1 will become negative.
1248	*/
1249	if (fl->fl_end < request->fl_start - `1`)
1250	continue;
1251	/ If the next lock in the list has entirely bigger*
1252	* addresses than the new one, insert the lock here.
1253	*/
1254	if (fl->fl_start - `1` > request->fl_end)
1255	break;
1256
1257	/ If we come here, the new and old lock are of the*
1258	* same type and adjacent or overlapping. Make one
1259	* lock yielding from the lower start address of both
1260	* locks to the higher end address.
1261	*/
1262	if (fl->fl_start > request->fl_start)
1263	fl->fl_start = request->fl_start;
1264	else
1265	request->fl_start = fl->fl_start;
1266	if (fl->fl_end < request->fl_end)
1267	fl->fl_end = request->fl_end;
1268	else
1269	request->fl_end = fl->fl_end;
1270	if (added) {
1271	locks_delete_lock_ctx(fl: &fl->c, dispose: &dispose);
1272	continue;
1273	}
1274	request = fl;
1275	added = true;
1276	} else {
1277	/ Processing for different lock types is a bit*
1278	* more complex.
1279	*/
1280	if (fl->fl_end < request->fl_start)
1281	continue;
1282	if (fl->fl_start > request->fl_end)
1283	break;
1284	if (lock_is_unlock(fl: request))
1285	added = true;
1286	if (fl->fl_start < request->fl_start)
1287	left = fl;
1288	/ If the next lock in the list has a higher end*
1289	* address than the new one, insert the new one here.
1290	*/
1291	if (fl->fl_end > request->fl_end) {
1292	right = fl;
1293	break;
1294	}
1295	if (fl->fl_start >= request->fl_start) {
1296	/ The new lock completely replaces an old*
1297	* one (This may happen several times).
1298	*/
1299	if (added) {
1300	locks_delete_lock_ctx(fl: &fl->c, dispose: &dispose);
1301	continue;
1302	}
1303	/*
1304	* Replace the old lock with new_fl, and
1305	* remove the old one. It's safe to do the
1306	* insert here since we know that we won't be
1307	* using new_fl later, and that the lock is
1308	* just replacing an existing lock.
1309	*/
1310	error = -ENOLCK;
1311	if (!new_fl)
1312	goto out;
1313	locks_copy_lock(new_fl, request);
1314	locks_move_blocks(new: new_fl, fl: request);
1315	request = new_fl;
1316	new_fl = NULL;
1317	locks_insert_lock_ctx(fl: &request->c,
1318	before: &fl->c.flc_list);
1319	locks_delete_lock_ctx(fl: &fl->c, dispose: &dispose);
1320	added = true;
1321	}
1322	}
1323	}
1324
1325	/*
1326	* The above code only modifies existing locks in case of merging or
1327	* replacing. If new lock(s) need to be inserted all modifications are
1328	* done below this, so it's safe yet to bail out.
1329	*/
1330	error = -ENOLCK; / "no luck" /
1331	if (right && left == right && !new_fl2)
1332	goto out;
1333
1334	error = `0`;
1335	if (!added) {
1336	if (lock_is_unlock(fl: request)) {
1337	if (request->c.flc_flags & FL_EXISTS)
1338	error = -ENOENT;
1339	goto out;
1340	}
1341
1342	if (!new_fl) {
1343	error = -ENOLCK;
1344	goto out;
1345	}
1346	locks_copy_lock(new_fl, request);
1347	locks_move_blocks(new: new_fl, fl: request);
1348	locks_insert_lock_ctx(fl: &new_fl->c, before: &fl->c.flc_list);
1349	fl = new_fl;
1350	new_fl = NULL;
1351	}
1352	if (right) {
1353	if (left == right) {
1354	/ The new lock breaks the old one in two pieces,*
1355	* so we have to use the second new lock.
1356	*/
1357	left = new_fl2;
1358	new_fl2 = NULL;
1359	locks_copy_lock(left, right);
1360	locks_insert_lock_ctx(fl: &left->c, before: &fl->c.flc_list);
1361	}
1362	right->fl_start = request->fl_end + `1`;
1363	locks_wake_up_blocks(blocker: &right->c);
1364	}
1365	if (left) {
1366	left->fl_end = request->fl_start - `1`;
1367	locks_wake_up_blocks(blocker: &left->c);
1368	}
1369	out:
1370	spin_unlock(lock: &ctx->flc_lock);
1371	percpu_up_read(sem: &file_rwsem);
1372	trace_posix_lock_inode(inode, fl: request, ret: error);
1373	/*
1374	* Free any unused locks.
1375	*/
1376	if (new_fl)
1377	locks_free_lock(new_fl);
1378	if (new_fl2)
1379	locks_free_lock(new_fl2);
1380	locks_dispose_list(dispose: &dispose);
1381
1382	return error;
1383	}
1384
1385	/**
1386	* posix_lock_file - Apply a POSIX-style lock to a file
1387	* @filp: The file to apply the lock to
1388	* @fl: The lock to be applied
1389	* @conflock: Place to return a copy of the conflicting lock, if found.
1390	*
1391	* Add a POSIX style lock to a file.
1392	* We merge adjacent & overlapping locks whenever possible.
1393	* POSIX locks are sorted by owner task, then by starting address
1394	*
1395	* Note that if called with an FL_EXISTS argument, the caller may determine
1396	* whether or not a lock was successfully freed by testing the return
1397	* value for -ENOENT.
1398	*/
1399	int posix_lock_file(struct file filp, struct* file_lock *fl,
1400	struct file_lock *conflock)
1401	{
1402	return posix_lock_inode(inode: file_inode(f: filp), request: fl, conflock);
1403	}
1404	EXPORT_SYMBOL(posix_lock_file);
1405
1406	/**
1407	* posix_lock_inode_wait - Apply a POSIX-style lock to a file
1408	* @inode: inode of file to which lock request should be applied
1409	* @fl: The lock to be applied
1410	*
1411	* Apply a POSIX style lock request to an inode.
1412	*/
1413	static int posix_lock_inode_wait(struct inode inode, struct* file_lock *fl)
1414	{
1415	int error;
1416	might_sleep ();
1417	for (;;) {
1418	error = posix_lock_inode(inode, request: fl, NULL);
1419	if (error != FILE_LOCK_DEFERRED)
1420	break;
1421	error = wait_event_interruptible(fl->c.flc_wait,
1422	list_empty(&fl->c.flc_blocked_member));
1423	if (error)
1424	break;
1425	}
1426	locks_delete_block(fl);
1427	return error;
1428	}
1429
1430	static void lease_clear_pending(struct file_lease fl, int* arg)
1431	{
1432	switch (arg) {
1433	case F_UNLCK:
1434	fl->c.flc_flags &= ~FL_UNLOCK_PENDING;
1435	fallthrough;
1436	case F_RDLCK:
1437	fl->c.flc_flags &= ~FL_DOWNGRADE_PENDING;
1438	}
1439	}
1440
1441	/ We already had a lease on this file; just change its type /
1442	int lease_modify(struct file_lease fl, int* arg, struct list_head *dispose)
1443	{
1444	int error = assign_type(flc: &fl->c, type: arg);
1445
1446	if (error)
1447	return error;
1448	lease_clear_pending(fl, arg);
1449	locks_wake_up_blocks(blocker: &fl->c);
1450	if (arg == F_UNLCK) {
1451	struct file *filp = fl->c.flc_file;
1452
1453	f_delown(filp);
1454	filp->f_owner.signum = `0`;
1455	fasync_helper(`0`, fl->c.flc_file, `0`, &fl->fl_fasync);
1456	if (fl->fl_fasync != NULL) {
1457	printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
1458	fl->fl_fasync = NULL;
1459	}
1460	locks_delete_lock_ctx(fl: &fl->c, dispose);
1461	}
1462	return `0`;
1463	}
1464	EXPORT_SYMBOL(lease_modify);
1465
1466	static bool past_time(unsigned long then)
1467	{
1468	if (!then)
1469	/ 0 is a special value meaning "this never expires": /
1470	return false;
1471	return time_after(jiffies, then);
1472	}
1473
1474	static void time_out_leases(struct inode inode, struct* list_head *dispose)
1475	{
1476	struct file_lock_context *ctx = inode->i_flctx;
1477	struct file_lease fl, tmp;
1478
1479	lockdep_assert_held(&ctx->flc_lock);
1480
1481	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
1482	trace_time_out_leases(inode, fl);
1483	if (past_time(then: fl->fl_downgrade_time))
1484	lease_modify(fl, F_RDLCK, dispose);
1485	if (past_time(then: fl->fl_break_time))
1486	lease_modify(fl, F_UNLCK, dispose);
1487	}
1488	}
1489
1490	static bool leases_conflict(struct file_lock_core lc, struct* file_lock_core *bc)
1491	{
1492	bool rc;
1493	struct file_lease *lease = file_lease(flc: lc);
1494	struct file_lease *breaker = file_lease(flc: bc);
1495
1496	if (lease->fl_lmops->lm_breaker_owns_lease
1497	&& lease->fl_lmops->lm_breaker_owns_lease(lease))
1498	return false;
1499	if ((bc->flc_flags & FL_LAYOUT) != (lc->flc_flags & FL_LAYOUT)) {
1500	rc = false;
1501	goto trace;
1502	}
1503	if ((bc->flc_flags & FL_DELEG) && (lc->flc_flags & FL_LEASE)) {
1504	rc = false;
1505	goto trace;
1506	}
1507
1508	rc = locks_conflict(caller_flc: bc, sys_flc: lc);
1509	trace:
1510	trace_leases_conflict(conflict: rc, lease, breaker);
1511	return rc;
1512	}
1513
1514	static bool
1515	any_leases_conflict(struct inode inode, struct* file_lease *breaker)
1516	{
1517	struct file_lock_context *ctx = inode->i_flctx;
1518	struct file_lock_core *flc;
1519
1520	lockdep_assert_held(&ctx->flc_lock);
1521
1522	list_for_each_entry(flc, &ctx->flc_lease, flc_list) {
1523	if (leases_conflict(lc: flc, bc: &breaker->c))
1524	return true;
1525	}
1526	return false;
1527	}
1528
1529	/**
1530	* __break_lease - revoke all outstanding leases on file
1531	* @inode: the inode of the file to return
1532	* @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR:
1533	* break all leases
1534	* @type: FL_LEASE: break leases and delegations; FL_DELEG: break
1535	* only delegations
1536	*
1537	* break_lease (inlined for speed) has checked there already is at least
1538	* some kind of lock (maybe a lease) on this file. Leases are broken on
1539	* a call to open() or truncate(). This function can sleep unless you
1540	* specified %O_NONBLOCK to your open().
1541	*/
1542	int __break_lease(struct inode inode, unsigned* int mode, unsigned int type)
1543	{
1544	int error = `0`;
1545	struct file_lock_context *ctx;
1546	struct file_lease new_fl, fl, *tmp;
1547	unsigned long break_time;
1548	int want_write = (mode & O_ACCMODE) != O_RDONLY;
1549	LIST_HEAD(dispose);
1550
1551	new_fl = lease_alloc(NULL, type: want_write ? F_WRLCK : F_RDLCK);
1552	if (IS_ERR(ptr: new_fl))
1553	return PTR_ERR(ptr: new_fl);
1554	new_fl->c.flc_flags = type;
1555
1556	/ typically we will check that ctx is non-NULL before calling /
1557	ctx = locks_inode_context(inode);
1558	if (!ctx) {
1559	WARN_ON_ONCE(`1`);
1560	goto free_lock;
1561	}
1562
1563	percpu_down_read(sem: &file_rwsem);
1564	spin_lock(lock: &ctx->flc_lock);
1565
1566	time_out_leases(inode, dispose: &dispose);
1567
1568	if (!any_leases_conflict(inode, breaker: new_fl))
1569	goto out;
1570
1571	break_time = `0`;
1572	if (lease_break_time > `0`) {
1573	break_time = jiffies + lease_break_time * HZ;
1574	if (break_time == `0`)
1575	break_time++; / so that 0 means no break time /
1576	}
1577
1578	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
1579	if (!leases_conflict(lc: &fl->c, bc: &new_fl->c))
1580	continue;
1581	if (want_write) {
1582	if (fl->c.flc_flags & FL_UNLOCK_PENDING)
1583	continue;
1584	fl->c.flc_flags \|= FL_UNLOCK_PENDING;
1585	fl->fl_break_time = break_time;
1586	} else {
1587	if (lease_breaking(fl))
1588	continue;
1589	fl->c.flc_flags \|= FL_DOWNGRADE_PENDING;
1590	fl->fl_downgrade_time = break_time;
1591	}
1592	if (fl->fl_lmops->lm_break(fl))
1593	locks_delete_lock_ctx(fl: &fl->c, dispose: &dispose);
1594	}
1595
1596	if (list_empty(head: &ctx->flc_lease))
1597	goto out;
1598
1599	if (mode & O_NONBLOCK) {
1600	trace_break_lease_noblock(inode, fl: new_fl);
1601	error = -EWOULDBLOCK;
1602	goto out;
1603	}
1604
1605	restart:
1606	fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
1607	break_time = fl->fl_break_time;
1608	if (break_time != `0`)
1609	break_time -= jiffies;
1610	if (break_time == `0`)
1611	break_time++;
1612	locks_insert_block(blocker: &fl->c, waiter: &new_fl->c, conflict: leases_conflict);
1613	trace_break_lease_block(inode, fl: new_fl);
1614	spin_unlock(lock: &ctx->flc_lock);
1615	percpu_up_read(sem: &file_rwsem);
1616
1617	locks_dispose_list(dispose: &dispose);
1618	error = wait_event_interruptible_timeout(new_fl->c.flc_wait,
1619	list_empty(&new_fl->c.flc_blocked_member),
1620	break_time);
1621
1622	percpu_down_read(sem: &file_rwsem);
1623	spin_lock(lock: &ctx->flc_lock);
1624	trace_break_lease_unblock(inode, fl: new_fl);
1625	__locks_delete_block(waiter: &new_fl->c);
1626	if (error >= `0`) {
1627	/*
1628	* Wait for the next conflicting lease that has not been
1629	* broken yet
1630	*/
1631	if (error == `0`)
1632	time_out_leases(inode, dispose: &dispose);
1633	if (any_leases_conflict(inode, breaker: new_fl))
1634	goto restart;
1635	error = `0`;
1636	}
1637	out:
1638	spin_unlock(lock: &ctx->flc_lock);
1639	percpu_up_read(sem: &file_rwsem);
1640	locks_dispose_list(dispose: &dispose);
1641	free_lock:
1642	locks_free_lease(new_fl);
1643	return error;
1644	}
1645	EXPORT_SYMBOL(__break_lease);
1646
1647	/**
1648	* lease_get_mtime - update modified time of an inode with exclusive lease
1649	* @inode: the inode
1650	* @time: pointer to a timespec which contains the last modified time
1651	*
1652	* This is to force NFS clients to flush their caches for files with
1653	* exclusive leases. The justification is that if someone has an
1654	* exclusive lease, then they could be modifying it.
1655	*/
1656	void lease_get_mtime(struct inode inode, struct* timespec64 *time)
1657	{
1658	bool has_lease = false;
1659	struct file_lock_context *ctx;
1660	struct file_lock_core *flc;
1661
1662	ctx = locks_inode_context(inode);
1663	if (ctx && !list_empty_careful(head: &ctx->flc_lease)) {
1664	spin_lock(lock: &ctx->flc_lock);
1665	flc = list_first_entry_or_null(&ctx->flc_lease,
1666	struct file_lock_core, flc_list);
1667	if (flc && flc->flc_type == F_WRLCK)
1668	has_lease = true;
1669	spin_unlock(lock: &ctx->flc_lock);
1670	}
1671
1672	if (has_lease)
1673	*time = current_time(inode);
1674	}
1675	EXPORT_SYMBOL(lease_get_mtime);
1676
1677	/**
1678	* fcntl_getlease - Enquire what lease is currently active
1679	* @filp: the file
1680	*
1681	* The value returned by this function will be one of
1682	* (if no lease break is pending):
1683	*
1684	* %F_RDLCK to indicate a shared lease is held.
1685	*
1686	* %F_WRLCK to indicate an exclusive lease is held.
1687	*
1688	* %F_UNLCK to indicate no lease is held.
1689	*
1690	* (if a lease break is pending):
1691	*
1692	* %F_RDLCK to indicate an exclusive lease needs to be
1693	* changed to a shared lease (or removed).
1694	*
1695	* %F_UNLCK to indicate the lease needs to be removed.
1696	*
1697	* XXX: sfr & willy disagree over whether F_INPROGRESS
1698	* should be returned to userspace.
1699	*/
1700	int fcntl_getlease(struct file *filp)
1701	{
1702	struct file_lease *fl;
1703	struct inode *inode = file_inode(f: filp);
1704	struct file_lock_context *ctx;
1705	int type = F_UNLCK;
1706	LIST_HEAD(dispose);
1707
1708	ctx = locks_inode_context(inode);
1709	if (ctx && !list_empty_careful(head: &ctx->flc_lease)) {
1710	percpu_down_read(sem: &file_rwsem);
1711	spin_lock(lock: &ctx->flc_lock);
1712	time_out_leases(inode, dispose: &dispose);
1713	list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) {
1714	if (fl->c.flc_file != filp)
1715	continue;
1716	type = target_leasetype(fl);
1717	break;
1718	}
1719	spin_unlock(lock: &ctx->flc_lock);
1720	percpu_up_read(sem: &file_rwsem);
1721
1722	locks_dispose_list(dispose: &dispose);
1723	}
1724	return type;
1725	}
1726
1727	/**
1728	* check_conflicting_open - see if the given file points to an inode that has
1729	* an existing open that would conflict with the
1730	* desired lease.
1731	* @filp: file to check
1732	* @arg: type of lease that we're trying to acquire
1733	* @flags: current lock flags
1734	*
1735	* Check to see if there's an existing open fd on this file that would
1736	* conflict with the lease we're trying to set.
1737	*/
1738	static int
1739	check_conflicting_open(struct file filp, const* int arg, int flags)
1740	{
1741	struct inode *inode = file_inode(f: filp);
1742	int self_wcount = `0`, self_rcount = `0`;
1743
1744	if (flags & FL_LAYOUT)
1745	return `0`;
1746	if (flags & FL_DELEG)
1747	/ We leave these checks to the caller /
1748	return `0`;
1749
1750	if (arg == F_RDLCK)
1751	return inode_is_open_for_write(inode) ? -EAGAIN : `0`;
1752	else if (arg != F_WRLCK)
1753	return `0`;
1754
1755	/*
1756	* Make sure that only read/write count is from lease requestor.
1757	* Note that this will result in denying write leases when i_writecount
1758	* is negative, which is what we want. (We shouldn't grant write leases
1759	* on files open for execution.)
1760	*/
1761	if (filp->f_mode & FMODE_WRITE)
1762	self_wcount = `1`;
1763	else if (filp->f_mode & FMODE_READ)
1764	self_rcount = `1`;
1765
1766	if (atomic_read(v: &inode->i_writecount) != self_wcount \|\|
1767	atomic_read(v: &inode->i_readcount) != self_rcount)
1768	return -EAGAIN;
1769
1770	return `0`;
1771	}
1772
1773	static int
1774	generic_add_lease(struct file filp, int* arg, struct file_lease *flp, void* **priv)
1775	{
1776	struct file_lease fl, my_fl = NULL, *lease;
1777	struct inode *inode = file_inode(f: filp);
1778	struct file_lock_context *ctx;
1779	bool is_deleg = (*flp)->c.flc_flags & FL_DELEG;
1780	int error;
1781	LIST_HEAD(dispose);
1782
1783	lease = *flp;
1784	trace_generic_add_lease(inode, fl: lease);
1785
1786	/ Note that arg is never F_UNLCK here /
1787	ctx = locks_get_lock_context(inode, type: arg);
1788	if (!ctx)
1789	return -ENOMEM;
1790
1791	/*
1792	* In the delegation case we need mutual exclusion with
1793	* a number of operations that take the i_mutex. We trylock
1794	* because delegations are an optional optimization, and if
1795	* there's some chance of a conflict--we'd rather not
1796	* bother, maybe that's a sign this just isn't a good file to
1797	* hand out a delegation on.
1798	*/
1799	if (is_deleg && !inode_trylock(inode))
1800	return -EAGAIN;
1801
1802	percpu_down_read(sem: &file_rwsem);
1803	spin_lock(lock: &ctx->flc_lock);
1804	time_out_leases(inode, dispose: &dispose);
1805	error = check_conflicting_open(filp, arg, flags: lease->c.flc_flags);
1806	if (error)
1807	goto out;
1808
1809	/*
1810	* At this point, we know that if there is an exclusive
1811	* lease on this file, then we hold it on this filp
1812	* (otherwise our open of this file would have blocked).
1813	* And if we are trying to acquire an exclusive lease,
1814	* then the file is not open by anyone (including us)
1815	* except for this filp.
1816	*/
1817	error = -EAGAIN;
1818	list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) {
1819	if (fl->c.flc_file == filp &&
1820	fl->c.flc_owner == lease->c.flc_owner) {
1821	my_fl = fl;
1822	continue;
1823	}
1824
1825	/*
1826	* No exclusive leases if someone else has a lease on
1827	* this file:
1828	*/
1829	if (arg == F_WRLCK)
1830	goto out;
1831	/*
1832	* Modifying our existing lease is OK, but no getting a
1833	* new lease if someone else is opening for write:
1834	*/
1835	if (fl->c.flc_flags & FL_UNLOCK_PENDING)
1836	goto out;
1837	}
1838
1839	if (my_fl != NULL) {
1840	lease = my_fl;
1841	error = lease->fl_lmops->lm_change(lease, arg, &dispose);
1842	if (error)
1843	goto out;
1844	goto out_setup;
1845	}
1846
1847	error = -EINVAL;
1848	if (!leases_enable)
1849	goto out;
1850
1851	locks_insert_lock_ctx(fl: &lease->c, before: &ctx->flc_lease);
1852	/*
1853	* The check in break_lease() is lockless. It's possible for another
1854	* open to race in after we did the earlier check for a conflicting
1855	* open but before the lease was inserted. Check again for a
1856	* conflicting open and cancel the lease if there is one.
1857	*
1858	* We also add a barrier here to ensure that the insertion of the lock
1859	* precedes these checks.
1860	*/
1861	smp_mb();
1862	error = check_conflicting_open(filp, arg, flags: lease->c.flc_flags);
1863	if (error) {
1864	locks_unlink_lock_ctx(fl: &lease->c);
1865	goto out;
1866	}
1867
1868	out_setup:
1869	if (lease->fl_lmops->lm_setup)
1870	lease->fl_lmops->lm_setup(lease, priv);
1871	out:
1872	spin_unlock(lock: &ctx->flc_lock);
1873	percpu_up_read(sem: &file_rwsem);
1874	locks_dispose_list(dispose: &dispose);
1875	if (is_deleg)
1876	inode_unlock(inode);
1877	if (!error && !my_fl)
1878	*flp = NULL;
1879	return error;
1880	}
1881
1882	static int generic_delete_lease(struct file filp, void* *owner)
1883	{
1884	int error = -EAGAIN;
1885	struct file_lease fl, victim = NULL;
1886	struct inode *inode = file_inode(f: filp);
1887	struct file_lock_context *ctx;
1888	LIST_HEAD(dispose);
1889
1890	ctx = locks_inode_context(inode);
1891	if (!ctx) {
1892	trace_generic_delete_lease(inode, NULL);
1893	return error;
1894	}
1895
1896	percpu_down_read(sem: &file_rwsem);
1897	spin_lock(lock: &ctx->flc_lock);
1898	list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) {
1899	if (fl->c.flc_file == filp &&
1900	fl->c.flc_owner == owner) {
1901	victim = fl;
1902	break;
1903	}
1904	}
1905	trace_generic_delete_lease(inode, fl: victim);
1906	if (victim)
1907	error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
1908	spin_unlock(lock: &ctx->flc_lock);
1909	percpu_up_read(sem: &file_rwsem);
1910	locks_dispose_list(dispose: &dispose);
1911	return error;
1912	}
1913
1914	/**
1915	* generic_setlease - sets a lease on an open file
1916	* @filp: file pointer
1917	* @arg: type of lease to obtain
1918	* @flp: input - file_lock to use, output - file_lock inserted
1919	* @priv: private data for lm_setup (may be NULL if lm_setup
1920	* doesn't require it)
1921	*
1922	* The (input) flp->fl_lmops->lm_break function is required
1923	* by break_lease().
1924	*/
1925	int generic_setlease(struct file filp, int* arg, struct file_lease **flp,
1926	void **priv)
1927	{
1928	switch (arg) {
1929	case F_UNLCK:
1930	return generic_delete_lease(filp, owner: *priv);
1931	case F_RDLCK:
1932	case F_WRLCK:
1933	if (!(*flp)->fl_lmops->lm_break) {
1934	WARN_ON_ONCE(`1`);
1935	return -ENOLCK;
1936	}
1937
1938	return generic_add_lease(filp, arg, flp, priv);
1939	default:
1940	return -EINVAL;
1941	}
1942	}
1943	EXPORT_SYMBOL(generic_setlease);
1944
1945	/*
1946	* Kernel subsystems can register to be notified on any attempt to set
1947	* a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
1948	* to close files that it may have cached when there is an attempt to set a
1949	* conflicting lease.
1950	*/
1951	static struct srcu_notifier_head lease_notifier_chain;
1952
1953	static inline void
1954	lease_notifier_chain_init(void)
1955	{
1956	srcu_init_notifier_head(nh: &lease_notifier_chain);
1957	}
1958
1959	static inline void
1960	setlease_notifier(int arg, struct file_lease *lease)
1961	{
1962	if (arg != F_UNLCK)
1963	srcu_notifier_call_chain(nh: &lease_notifier_chain, val: arg, v: lease);
1964	}
1965
1966	int lease_register_notifier(struct notifier_block *nb)
1967	{
1968	return srcu_notifier_chain_register(nh: &lease_notifier_chain, nb);
1969	}
1970	EXPORT_SYMBOL_GPL(lease_register_notifier);
1971
1972	void lease_unregister_notifier(struct notifier_block *nb)
1973	{
1974	srcu_notifier_chain_unregister(nh: &lease_notifier_chain, nb);
1975	}
1976	EXPORT_SYMBOL_GPL(lease_unregister_notifier);
1977
1978
1979	int
1980	kernel_setlease(struct file filp, int* arg, struct file_lease *lease, void* **priv)
1981	{
1982	if (lease)
1983	setlease_notifier(arg, lease: *lease);
1984	if (filp->f_op->setlease)
1985	return filp->f_op->setlease(filp, arg, lease, priv);
1986	else
1987	return generic_setlease(filp, arg, lease, priv);
1988	}
1989	EXPORT_SYMBOL_GPL(kernel_setlease);
1990
1991	/**
1992	* vfs_setlease - sets a lease on an open file
1993	* @filp: file pointer
1994	* @arg: type of lease to obtain
1995	* @lease: file_lock to use when adding a lease
1996	* @priv: private info for lm_setup when adding a lease (may be
1997	* NULL if lm_setup doesn't require it)
1998	*
1999	* Call this to establish a lease on the file. The "lease" argument is not
2000	* used for F_UNLCK requests and may be NULL. For commands that set or alter
2001	* an existing lease, the ``(*lease)->fl_lmops->lm_break`` operation must be
2002	* set; if not, this function will return -ENOLCK (and generate a scary-looking
2003	* stack trace).
2004	*
2005	* The "priv" pointer is passed directly to the lm_setup function as-is. It
2006	* may be NULL if the lm_setup operation doesn't require it.
2007	*/
2008	int
2009	vfs_setlease(struct file filp, int* arg, struct file_lease *lease, void* **priv)
2010	{
2011	struct inode *inode = file_inode(f: filp);
2012	vfsuid_t vfsuid = i_uid_into_vfsuid(idmap: file_mnt_idmap(file: filp), inode);
2013	int error;
2014
2015	if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE))
2016	return -EACCES;
2017	if (!S_ISREG(inode->i_mode))
2018	return -EINVAL;
2019	error = security_file_lock(file: filp, cmd: arg);
2020	if (error)
2021	return error;
2022	return kernel_setlease(filp, arg, lease, priv);
2023	}
2024	EXPORT_SYMBOL_GPL(vfs_setlease);
2025
2026	static int do_fcntl_add_lease(unsigned int fd, struct file filp, int* arg)
2027	{
2028	struct file_lease *fl;
2029	struct fasync_struct *new;
2030	int error;
2031
2032	fl = lease_alloc(filp, type: arg);
2033	if (IS_ERR(ptr: fl))
2034	return PTR_ERR(ptr: fl);
2035
2036	new = fasync_alloc();
2037	if (!new) {
2038	locks_free_lease(fl);
2039	return -ENOMEM;
2040	}
2041	new->fa_fd = fd;
2042
2043	error = vfs_setlease(filp, arg, &fl, (void **)&new);
2044	if (fl)
2045	locks_free_lease(fl);
2046	if (new)
2047	fasync_free(new);
2048	return error;
2049	}
2050
2051	/**
2052	* fcntl_setlease - sets a lease on an open file
2053	* @fd: open file descriptor
2054	* @filp: file pointer
2055	* @arg: type of lease to obtain
2056	*
2057	* Call this fcntl to establish a lease on the file.
2058	* Note that you also need to call %F_SETSIG to
2059	* receive a signal when the lease is broken.
2060	*/
2061	int fcntl_setlease(unsigned int fd, struct file filp, int* arg)
2062	{
2063	if (arg == F_UNLCK)
2064	return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp);
2065	return do_fcntl_add_lease(fd, filp, arg);
2066	}
2067
2068	/**
2069	* flock_lock_inode_wait - Apply a FLOCK-style lock to a file
2070	* @inode: inode of the file to apply to
2071	* @fl: The lock to be applied
2072	*
2073	* Apply a FLOCK style lock request to an inode.
2074	*/
2075	static int flock_lock_inode_wait(struct inode inode, struct* file_lock *fl)
2076	{
2077	int error;
2078	might_sleep();
2079	for (;;) {
2080	error = flock_lock_inode(inode, request: fl);
2081	if (error != FILE_LOCK_DEFERRED)
2082	break;
2083	error = wait_event_interruptible(fl->c.flc_wait,
2084	list_empty(&fl->c.flc_blocked_member));
2085	if (error)
2086	break;
2087	}
2088	locks_delete_block(fl);
2089	return error;
2090	}
2091
2092	/**
2093	* locks_lock_inode_wait - Apply a lock to an inode
2094	* @inode: inode of the file to apply to
2095	* @fl: The lock to be applied
2096	*
2097	* Apply a POSIX or FLOCK style lock request to an inode.
2098	*/
2099	int locks_lock_inode_wait(struct inode inode, struct* file_lock *fl)
2100	{
2101	int res = `0`;
2102	switch (fl->c.flc_flags & (FL_POSIX\|FL_FLOCK)) {
2103	case FL_POSIX:
2104	res = posix_lock_inode_wait(inode, fl);
2105	break;
2106	case FL_FLOCK:
2107	res = flock_lock_inode_wait(inode, fl);
2108	break;
2109	default:
2110	BUG();
2111	}
2112	return res;
2113	}
2114	EXPORT_SYMBOL(locks_lock_inode_wait);
2115
2116	/**
2117	* sys_flock: - flock() system call.
2118	* @fd: the file descriptor to lock.
2119	* @cmd: the type of lock to apply.
2120	*
2121	* Apply a %FL_FLOCK style lock to an open file descriptor.
2122	* The @cmd can be one of:
2123	*
2124	* - %LOCK_SH -- a shared lock.
2125	* - %LOCK_EX -- an exclusive lock.
2126	* - %LOCK_UN -- remove an existing lock.
2127	* - %LOCK_MAND -- a 'mandatory' flock. (DEPRECATED)
2128	*
2129	* %LOCK_MAND support has been removed from the kernel.
2130	*/
2131	SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
2132	{
2133	int can_sleep, error, type;
2134	struct file_lock fl;
2135	struct fd f;
2136
2137	/*
2138	* LOCK_MAND locks were broken for a long time in that they never
2139	* conflicted with one another and didn't prevent any sort of open,
2140	* read or write activity.
2141	*
2142	* Just ignore these requests now, to preserve legacy behavior, but
2143	* throw a warning to let people know that they don't actually work.
2144	*/
2145	if (cmd & LOCK_MAND) {
2146	pr_warn_once("%s(%d): Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n", current->comm, current->pid);
2147	return `0`;
2148	}
2149
2150	type = flock_translate_cmd(cmd: cmd & ~LOCK_NB);
2151	if (type < `0`)
2152	return type;
2153
2154	error = -EBADF;
2155	f = fdget(fd);
2156	if (!f.file)
2157	return error;
2158
2159	if (type != F_UNLCK && !(f.file->f_mode & (FMODE_READ \| FMODE_WRITE)))
2160	goto out_putf;
2161
2162	flock_make_lock(filp: f.file, fl: &fl, type);
2163
2164	error = security_file_lock(file: f.file, cmd: fl.c.flc_type);
2165	if (error)
2166	goto out_putf;
2167
2168	can_sleep = !(cmd & LOCK_NB);
2169	if (can_sleep)
2170	fl.c.flc_flags \|= FL_SLEEP;
2171
2172	if (f.file->f_op->flock)
2173	error = f.file->f_op->flock(f.file,
2174	(can_sleep) ? F_SETLKW : F_SETLK,
2175	&fl);
2176	else
2177	error = locks_lock_file_wait(filp: f.file, fl: &fl);
2178
2179	locks_release_private(&fl);
2180	out_putf:
2181	fdput(fd: f);
2182
2183	return error;
2184	}
2185
2186	/**
2187	* vfs_test_lock - test file byte range lock
2188	* @filp: The file to test lock for
2189	* @fl: The lock to test; also used to hold result
2190	*
2191	* Returns -ERRNO on failure. Indicates presence of conflicting lock by
2192	* setting conf->fl_type to something other than F_UNLCK.
2193	*/
2194	int vfs_test_lock(struct file filp, struct* file_lock *fl)
2195	{
2196	WARN_ON_ONCE(filp != fl->c.flc_file);
2197	if (filp->f_op->lock)
2198	return filp->f_op->lock(filp, F_GETLK, fl);
2199	posix_test_lock(filp, fl);
2200	return `0`;
2201	}
2202	EXPORT_SYMBOL_GPL(vfs_test_lock);
2203
2204	/**
2205	* locks_translate_pid - translate a file_lock's fl_pid number into a namespace
2206	* @fl: The file_lock who's fl_pid should be translated
2207	* @ns: The namespace into which the pid should be translated
2208	*
2209	* Used to translate a fl_pid into a namespace virtual pid number
2210	*/
2211	static pid_t locks_translate_pid(struct file_lock_core fl, struct* pid_namespace *ns)
2212	{
2213	pid_t vnr;
2214	struct pid *pid;
2215
2216	if (fl->flc_flags & FL_OFDLCK)
2217	return -`1`;
2218
2219	/ Remote locks report a negative pid value /
2220	if (fl->flc_pid <= `0`)
2221	return fl->flc_pid;
2222
2223	/*
2224	* If the flock owner process is dead and its pid has been already
2225	* freed, the translation below won't work, but we still want to show
2226	* flock owner pid number in init pidns.
2227	*/
2228	if (ns == &init_pid_ns)
2229	return (pid_t) fl->flc_pid;
2230
2231	rcu_read_lock();
2232	pid = find_pid_ns(nr: fl->flc_pid, ns: &init_pid_ns);
2233	vnr = pid_nr_ns(pid, ns);
2234	rcu_read_unlock();
2235	return vnr;
2236	}
2237
2238	static int posix_lock_to_flock(struct flock flock, struct* file_lock *fl)
2239	{
2240	flock->l_pid = locks_translate_pid(fl: &fl->c, ns: task_active_pid_ns(current));
2241	#if BITS_PER_LONG == 32
2242	/*
2243	* Make sure we can represent the posix lock via
2244	* legacy 32bit flock.
2245	*/
2246	if (fl->fl_start > OFFT_OFFSET_MAX)
2247	return -EOVERFLOW;
2248	if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX)
2249	return -EOVERFLOW;
2250	#endif
2251	flock->l_start = fl->fl_start;
2252	flock->l_len = fl->fl_end == OFFSET_MAX ? `0` :
2253	fl->fl_end - fl->fl_start + `1`;
2254	flock->l_whence = `0`;
2255	flock->l_type = fl->c.flc_type;
2256	return `0`;
2257	}
2258
2259	#if BITS_PER_LONG == 32
2260	static void posix_lock_to_flock64(struct flock64 flock, struct* file_lock *fl)
2261	{
2262	flock->l_pid = locks_translate_pid(&fl->c, task_active_pid_ns(current));
2263	flock->l_start = fl->fl_start;
2264	flock->l_len = fl->fl_end == OFFSET_MAX ? `0` :
2265	fl->fl_end - fl->fl_start + `1`;
2266	flock->l_whence = `0`;
2267	flock->l_type = fl->c.flc_type;
2268	}
2269	#endif
2270
2271	/ Report the first existing lock that would conflict with l.*
2272	* This implements the F_GETLK command of fcntl().
2273	*/
2274	int fcntl_getlk(struct file filp, unsigned* int cmd, struct flock *flock)
2275	{
2276	struct file_lock *fl;
2277	int error;
2278
2279	fl = locks_alloc_lock();
2280	if (fl == NULL)
2281	return -ENOMEM;
2282	error = -EINVAL;
2283	if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK
2284	&& flock->l_type != F_WRLCK)
2285	goto out;
2286
2287	error = flock_to_posix_lock(filp, fl, l: flock);
2288	if (error)
2289	goto out;
2290
2291	if (cmd == F_OFD_GETLK) {
2292	error = -EINVAL;
2293	if (flock->l_pid != `0`)
2294	goto out;
2295
2296	fl->c.flc_flags \|= FL_OFDLCK;
2297	fl->c.flc_owner = filp;
2298	}
2299
2300	error = vfs_test_lock(filp, fl);
2301	if (error)
2302	goto out;
2303
2304	flock->l_type = fl->c.flc_type;
2305	if (fl->c.flc_type != F_UNLCK) {
2306	error = posix_lock_to_flock(flock, fl);
2307	if (error)
2308	goto out;
2309	}
2310	out:
2311	locks_free_lock(fl);
2312	return error;
2313	}
2314
2315	/**
2316	* vfs_lock_file - file byte range lock
2317	* @filp: The file to apply the lock to
2318	* @cmd: type of locking operation (F_SETLK, F_GETLK, etc.)
2319	* @fl: The lock to be applied
2320	* @conf: Place to return a copy of the conflicting lock, if found.
2321	*
2322	* A caller that doesn't care about the conflicting lock may pass NULL
2323	* as the final argument.
2324	*
2325	* If the filesystem defines a private ->lock() method, then @conf will
2326	* be left unchanged; so a caller that cares should initialize it to
2327	* some acceptable default.
2328	*
2329	* To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX
2330	* locks, the ->lock() interface may return asynchronously, before the lock has
2331	* been granted or denied by the underlying filesystem, if (and only if)
2332	* lm_grant is set. Additionally EXPORT_OP_ASYNC_LOCK in export_operations
2333	* flags need to be set.
2334	*
2335	* Callers expecting ->lock() to return asynchronously will only use F_SETLK,
2336	* not F_SETLKW; they will set FL_SLEEP if (and only if) the request is for a
2337	* blocking lock. When ->lock() does return asynchronously, it must return
2338	* FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock request completes.
2339	* If the request is for non-blocking lock the file system should return
2340	* FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
2341	* with the result. If the request timed out the callback routine will return a
2342	* nonzero return code and the file system should release the lock. The file
2343	* system is also responsible to keep a corresponding posix lock when it
2344	* grants a lock so the VFS can find out which locks are locally held and do
2345	* the correct lock cleanup when required.
2346	* The underlying filesystem must not drop the kernel lock or call
2347	* ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED
2348	* return code.
2349	*/
2350	int vfs_lock_file(struct file filp, unsigned* int cmd, struct file_lock fl, struct* file_lock *conf)
2351	{
2352	WARN_ON_ONCE(filp != fl->c.flc_file);
2353	if (filp->f_op->lock)
2354	return filp->f_op->lock(filp, cmd, fl);
2355	else
2356	return posix_lock_file(filp, fl, conf);
2357	}
2358	EXPORT_SYMBOL_GPL(vfs_lock_file);
2359
2360	static int do_lock_file_wait(struct file filp, unsigned* int cmd,
2361	struct file_lock *fl)
2362	{
2363	int error;
2364
2365	error = security_file_lock(file: filp, cmd: fl->c.flc_type);
2366	if (error)
2367	return error;
2368
2369	for (;;) {
2370	error = vfs_lock_file(filp, cmd, fl, NULL);
2371	if (error != FILE_LOCK_DEFERRED)
2372	break;
2373	error = wait_event_interruptible(fl->c.flc_wait,
2374	list_empty(&fl->c.flc_blocked_member));
2375	if (error)
2376	break;
2377	}
2378	locks_delete_block(fl);
2379
2380	return error;
2381	}
2382
2383	/ Ensure that fl->fl_file has compatible f_mode for F_SETLK calls /
2384	static int
2385	check_fmode_for_setlk(struct file_lock *fl)
2386	{
2387	switch (fl->c.flc_type) {
2388	case F_RDLCK:
2389	if (!(fl->c.flc_file->f_mode & FMODE_READ))
2390	return -EBADF;
2391	break;
2392	case F_WRLCK:
2393	if (!(fl->c.flc_file->f_mode & FMODE_WRITE))
2394	return -EBADF;
2395	}
2396	return `0`;
2397	}
2398
2399	/ Apply the lock described by l to an open file descriptor.*
2400	* This implements both the F_SETLK and F_SETLKW commands of fcntl().
2401	*/
2402	int fcntl_setlk(unsigned int fd, struct file filp, unsigned* int cmd,
2403	struct flock *flock)
2404	{
2405	struct file_lock *file_lock = locks_alloc_lock();
2406	struct inode *inode = file_inode(f: filp);
2407	struct file *f;
2408	int error;
2409
2410	if (file_lock == NULL)
2411	return -ENOLCK;
2412
2413	error = flock_to_posix_lock(filp, fl: file_lock, l: flock);
2414	if (error)
2415	goto out;
2416
2417	error = check_fmode_for_setlk(fl: file_lock);
2418	if (error)
2419	goto out;
2420
2421	/*
2422	* If the cmd is requesting file-private locks, then set the
2423	* FL_OFDLCK flag and override the owner.
2424	*/
2425	switch (cmd) {
2426	case F_OFD_SETLK:
2427	error = -EINVAL;
2428	if (flock->l_pid != `0`)
2429	goto out;
2430
2431	cmd = F_SETLK;
2432	file_lock->c.flc_flags \|= FL_OFDLCK;
2433	file_lock->c.flc_owner = filp;
2434	break;
2435	case F_OFD_SETLKW:
2436	error = -EINVAL;
2437	if (flock->l_pid != `0`)
2438	goto out;
2439
2440	cmd = F_SETLKW;
2441	file_lock->c.flc_flags \|= FL_OFDLCK;
2442	file_lock->c.flc_owner = filp;
2443	fallthrough;
2444	case F_SETLKW:
2445	file_lock->c.flc_flags \|= FL_SLEEP;
2446	}
2447
2448	error = do_lock_file_wait(filp, cmd, fl: file_lock);
2449
2450	/*
2451	* Attempt to detect a close/fcntl race and recover by releasing the
2452	* lock that was just acquired. There is no need to do that when we're
2453	* unlocking though, or for OFD locks.
2454	*/
2455	if (!error && file_lock->c.flc_type != F_UNLCK &&
2456	!(file_lock->c.flc_flags & FL_OFDLCK)) {
2457	struct files_struct *files = current->files;
2458	/*
2459	* We need that spin_lock here - it prevents reordering between
2460	* update of i_flctx->flc_posix and check for it done in
2461	* close(). rcu_read_lock() wouldn't do.
2462	*/
2463	spin_lock(lock: &files->file_lock);
2464	f = files_lookup_fd_locked(files, fd);
2465	spin_unlock(lock: &files->file_lock);
2466	if (f != filp) {
2467	file_lock->c.flc_type = F_UNLCK;
2468	error = do_lock_file_wait(filp, cmd, fl: file_lock);
2469	WARN_ON_ONCE(error);
2470	error = -EBADF;
2471	}
2472	}
2473	out:
2474	trace_fcntl_setlk(inode, fl: file_lock, ret: error);
2475	locks_free_lock(file_lock);
2476	return error;
2477	}
2478
2479	#if BITS_PER_LONG == 32
2480	/ Report the first existing lock that would conflict with l.*
2481	* This implements the F_GETLK command of fcntl().
2482	*/
2483	int fcntl_getlk64(struct file filp, unsigned* int cmd, struct flock64 *flock)
2484	{
2485	struct file_lock *fl;
2486	int error;
2487
2488	fl = locks_alloc_lock();
2489	if (fl == NULL)
2490	return -ENOMEM;
2491
2492	error = -EINVAL;
2493	if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK
2494	&& flock->l_type != F_WRLCK)
2495	goto out;
2496
2497	error = flock64_to_posix_lock(filp, fl, flock);
2498	if (error)
2499	goto out;
2500
2501	if (cmd == F_OFD_GETLK) {
2502	error = -EINVAL;
2503	if (flock->l_pid != `0`)
2504	goto out;
2505
2506	fl->c.flc_flags \|= FL_OFDLCK;
2507	fl->c.flc_owner = filp;
2508	}
2509
2510	error = vfs_test_lock(filp, fl);
2511	if (error)
2512	goto out;
2513
2514	flock->l_type = fl->c.flc_type;
2515	if (fl->c.flc_type != F_UNLCK)
2516	posix_lock_to_flock64(flock, fl);
2517
2518	out:
2519	locks_free_lock(fl);
2520	return error;
2521	}
2522
2523	/ Apply the lock described by l to an open file descriptor.*
2524	* This implements both the F_SETLK and F_SETLKW commands of fcntl().
2525	*/
2526	int fcntl_setlk64(unsigned int fd, struct file filp, unsigned* int cmd,
2527	struct flock64 *flock)
2528	{
2529	struct file_lock *file_lock = locks_alloc_lock();
2530	struct file *f;
2531	int error;
2532
2533	if (file_lock == NULL)
2534	return -ENOLCK;
2535
2536	error = flock64_to_posix_lock(filp, file_lock, flock);
2537	if (error)
2538	goto out;
2539
2540	error = check_fmode_for_setlk(file_lock);
2541	if (error)
2542	goto out;
2543
2544	/*
2545	* If the cmd is requesting file-private locks, then set the
2546	* FL_OFDLCK flag and override the owner.
2547	*/
2548	switch (cmd) {
2549	case F_OFD_SETLK:
2550	error = -EINVAL;
2551	if (flock->l_pid != `0`)
2552	goto out;
2553
2554	cmd = F_SETLK64;
2555	file_lock->c.flc_flags \|= FL_OFDLCK;
2556	file_lock->c.flc_owner = filp;
2557	break;
2558	case F_OFD_SETLKW:
2559	error = -EINVAL;
2560	if (flock->l_pid != `0`)
2561	goto out;
2562
2563	cmd = F_SETLKW64;
2564	file_lock->c.flc_flags \|= FL_OFDLCK;
2565	file_lock->c.flc_owner = filp;
2566	fallthrough;
2567	case F_SETLKW64:
2568	file_lock->c.flc_flags \|= FL_SLEEP;
2569	}
2570
2571	error = do_lock_file_wait(filp, cmd, file_lock);
2572
2573	/*
2574	* Attempt to detect a close/fcntl race and recover by releasing the
2575	* lock that was just acquired. There is no need to do that when we're
2576	* unlocking though, or for OFD locks.
2577	*/
2578	if (!error && file_lock->c.flc_type != F_UNLCK &&
2579	!(file_lock->c.flc_flags & FL_OFDLCK)) {
2580	struct files_struct *files = current->files;
2581	/*
2582	* We need that spin_lock here - it prevents reordering between
2583	* update of i_flctx->flc_posix and check for it done in
2584	* close(). rcu_read_lock() wouldn't do.
2585	*/
2586	spin_lock(&files->file_lock);
2587	f = files_lookup_fd_locked(files, fd);
2588	spin_unlock(&files->file_lock);
2589	if (f != filp) {
2590	file_lock->c.flc_type = F_UNLCK;
2591	error = do_lock_file_wait(filp, cmd, file_lock);
2592	WARN_ON_ONCE(error);
2593	error = -EBADF;
2594	}
2595	}
2596	out:
2597	locks_free_lock(file_lock);
2598	return error;
2599	}
2600	#endif /* BITS_PER_LONG == 32 */
2601
2602	/*
2603	* This function is called when the file is being removed
2604	* from the task's fd array. POSIX locks belonging to this task
2605	* are deleted at this time.
2606	*/
2607	void locks_remove_posix(struct file *filp, fl_owner_t owner)
2608	{
2609	int error;
2610	struct inode *inode = file_inode(f: filp);
2611	struct file_lock lock;
2612	struct file_lock_context *ctx;
2613
2614	/*
2615	* If there are no locks held on this file, we don't need to call
2616	* posix_lock_file(). Another process could be setting a lock on this
2617	* file at the same time, but we wouldn't remove that lock anyway.
2618	*/
2619	ctx = locks_inode_context(inode);
2620	if (!ctx \|\| list_empty(head: &ctx->flc_posix))
2621	return;
2622
2623	locks_init_lock(&lock);
2624	lock.c.flc_type = F_UNLCK;
2625	lock.c.flc_flags = FL_POSIX \| FL_CLOSE;
2626	lock.fl_start = `0`;
2627	lock.fl_end = OFFSET_MAX;
2628	lock.c.flc_owner = owner;
2629	lock.c.flc_pid = current->tgid;
2630	lock.c.flc_file = filp;
2631	lock.fl_ops = NULL;
2632	lock.fl_lmops = NULL;
2633
2634	error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
2635
2636	if (lock.fl_ops && lock.fl_ops->fl_release_private)
2637	lock.fl_ops->fl_release_private(&lock);
2638	trace_locks_remove_posix(inode, fl: &lock, ret: error);
2639	}
2640	EXPORT_SYMBOL(locks_remove_posix);
2641
2642	/ The i_flctx must be valid when calling into here /
2643	static void
2644	locks_remove_flock(struct file filp, struct* file_lock_context *flctx)
2645	{
2646	struct file_lock fl;
2647	struct inode *inode = file_inode(f: filp);
2648
2649	if (list_empty(head: &flctx->flc_flock))
2650	return;
2651
2652	flock_make_lock(filp, fl: &fl, F_UNLCK);
2653	fl.c.flc_flags \|= FL_CLOSE;
2654
2655	if (filp->f_op->flock)
2656	filp->f_op->flock(filp, F_SETLKW, &fl);
2657	else
2658	flock_lock_inode(inode, request: &fl);
2659
2660	if (fl.fl_ops && fl.fl_ops->fl_release_private)
2661	fl.fl_ops->fl_release_private(&fl);
2662	}
2663
2664	/ The i_flctx must be valid when calling into here /
2665	static void
2666	locks_remove_lease(struct file filp, struct* file_lock_context *ctx)
2667	{
2668	struct file_lease fl, tmp;
2669	LIST_HEAD(dispose);
2670
2671	if (list_empty(head: &ctx->flc_lease))
2672	return;
2673
2674	percpu_down_read(sem: &file_rwsem);
2675	spin_lock(lock: &ctx->flc_lock);
2676	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list)
2677	if (filp == fl->c.flc_file)
2678	lease_modify(fl, F_UNLCK, &dispose);
2679	spin_unlock(lock: &ctx->flc_lock);
2680	percpu_up_read(sem: &file_rwsem);
2681
2682	locks_dispose_list(dispose: &dispose);
2683	}
2684
2685	/*
2686	* This function is called on the last close of an open file.
2687	*/
2688	void locks_remove_file(struct file *filp)
2689	{
2690	struct file_lock_context *ctx;
2691
2692	ctx = locks_inode_context(inode: file_inode(f: filp));
2693	if (!ctx)
2694	return;
2695
2696	/ remove any OFD locks /
2697	locks_remove_posix(filp, filp);
2698
2699	/ remove flock locks /
2700	locks_remove_flock(filp, flctx: ctx);
2701
2702	/ remove any leases /
2703	locks_remove_lease(filp, ctx);
2704
2705	spin_lock(lock: &ctx->flc_lock);
2706	locks_check_ctx_file_list(filp, list: &ctx->flc_posix, list_type: "POSIX");
2707	locks_check_ctx_file_list(filp, list: &ctx->flc_flock, list_type: "FLOCK");
2708	locks_check_ctx_file_list(filp, list: &ctx->flc_lease, list_type: "LEASE");
2709	spin_unlock(lock: &ctx->flc_lock);
2710	}
2711
2712	/**
2713	* vfs_cancel_lock - file byte range unblock lock
2714	* @filp: The file to apply the unblock to
2715	* @fl: The lock to be unblocked
2716	*
2717	* Used by lock managers to cancel blocked requests
2718	*/
2719	int vfs_cancel_lock(struct file filp, struct* file_lock *fl)
2720	{
2721	WARN_ON_ONCE(filp != fl->c.flc_file);
2722	if (filp->f_op->lock)
2723	return filp->f_op->lock(filp, F_CANCELLK, fl);
2724	return `0`;
2725	}
2726	EXPORT_SYMBOL_GPL(vfs_cancel_lock);
2727
2728	/**
2729	* vfs_inode_has_locks - are any file locks held on @inode?
2730	* @inode: inode to check for locks
2731	*
2732	* Return true if there are any FL_POSIX or FL_FLOCK locks currently
2733	* set on @inode.
2734	*/
2735	bool vfs_inode_has_locks(struct inode *inode)
2736	{
2737	struct file_lock_context *ctx;
2738	bool ret;
2739
2740	ctx = locks_inode_context(inode);
2741	if (!ctx)
2742	return false;
2743
2744	spin_lock(lock: &ctx->flc_lock);
2745	ret = !list_empty(head: &ctx->flc_posix) \|\| !list_empty(head: &ctx->flc_flock);
2746	spin_unlock(lock: &ctx->flc_lock);
2747	return ret;
2748	}
2749	EXPORT_SYMBOL_GPL(vfs_inode_has_locks);
2750
2751	#ifdef CONFIG_PROC_FS
2752	#include <linux/proc_fs.h>
2753	#include <linux/seq_file.h>
2754
2755	struct locks_iterator {
2756	int li_cpu;
2757	loff_t li_pos;
2758	};
2759
2760	static void lock_get_status(struct seq_file f, struct* file_lock_core *flc,
2761	loff_t id, char pfx, int* repeat)
2762	{
2763	struct inode *inode = NULL;
2764	unsigned int pid;
2765	struct pid_namespace *proc_pidns = proc_pid_ns(sb: file_inode(f: f->file)->i_sb);
2766	int type = flc->flc_type;
2767	struct file_lock *fl = file_lock(flc);
2768
2769	pid = locks_translate_pid(fl: flc, ns: proc_pidns);
2770
2771	/*
2772	* If lock owner is dead (and pid is freed) or not visible in current
2773	* pidns, zero is shown as a pid value. Check lock info from
2774	* init_pid_ns to get saved lock pid value.
2775	*/
2776	if (flc->flc_file != NULL)
2777	inode = file_inode(f: flc->flc_file);
2778
2779	seq_printf(m: f, fmt: "%lld: ", id);
2780
2781	if (repeat)
2782	seq_printf(m: f, fmt: "%s", repeat - `1` + (int*)strlen(pfx), pfx);
2783
2784	if (flc->flc_flags & FL_POSIX) {
2785	if (flc->flc_flags & FL_ACCESS)
2786	seq_puts(m: f, s: "ACCESS");
2787	else if (flc->flc_flags & FL_OFDLCK)
2788	seq_puts(m: f, s: "OFDLCK");
2789	else
2790	seq_puts(m: f, s: "POSIX ");
2791
2792	seq_printf(m: f, fmt: " %s ",
2793	(inode == NULL) ? "NOINODE" : "ADVISORY ");
2794	} else if (flc->flc_flags & FL_FLOCK) {
2795	seq_puts(m: f, s: "FLOCK ADVISORY ");
2796	} else if (flc->flc_flags & (FL_LEASE\|FL_DELEG\|FL_LAYOUT)) {
2797	struct file_lease *lease = file_lease(flc);
2798
2799	type = target_leasetype(fl: lease);
2800
2801	if (flc->flc_flags & FL_DELEG)
2802	seq_puts(m: f, s: "DELEG ");
2803	else
2804	seq_puts(m: f, s: "LEASE ");
2805
2806	if (lease_breaking(fl: lease))
2807	seq_puts(m: f, s: "BREAKING ");
2808	else if (flc->flc_file)
2809	seq_puts(m: f, s: "ACTIVE ");
2810	else
2811	seq_puts(m: f, s: "BREAKER ");
2812	} else {
2813	seq_puts(m: f, s: "UNKNOWN UNKNOWN ");
2814	}
2815
2816	seq_printf(m: f, fmt: "%s ", (type == F_WRLCK) ? "WRITE" :
2817	(type == F_RDLCK) ? "READ" : "UNLCK");
2818	if (inode) {
2819	/ userspace relies on this representation of dev_t /
2820	seq_printf(m: f, fmt: "%d %02x:%02x:%lu ", pid,
2821	MAJOR(inode->i_sb->s_dev),
2822	MINOR(inode->i_sb->s_dev), inode->i_ino);
2823	} else {
2824	seq_printf(m: f, fmt: "%d <none>:0 ", pid);
2825	}
2826	if (flc->flc_flags & FL_POSIX) {
2827	if (fl->fl_end == OFFSET_MAX)
2828	seq_printf(m: f, fmt: "%Ld EOF\n", fl->fl_start);
2829	else
2830	seq_printf(m: f, fmt: "%Ld %Ld\n", fl->fl_start, fl->fl_end);
2831	} else {
2832	seq_puts(m: f, s: "0 EOF\n");
2833	}
2834	}
2835
2836	static struct file_lock_core get_next_blocked_member(struct* file_lock_core *node)
2837	{
2838	struct file_lock_core *tmp;
2839
2840	/ NULL node or root node /
2841	if (node == NULL \|\| node->flc_blocker == NULL)
2842	return NULL;
2843
2844	/ Next member in the linked list could be itself /
2845	tmp = list_next_entry(node, flc_blocked_member);
2846	if (list_entry_is_head(tmp, &node->flc_blocker->flc_blocked_requests,
2847	flc_blocked_member)
2848	\|\| tmp == node) {
2849	return NULL;
2850	}
2851
2852	return tmp;
2853	}
2854
2855	static int locks_show(struct seq_file f, void* *v)
2856	{
2857	struct locks_iterator *iter = f->private;
2858	struct file_lock_core cur, tmp;
2859	struct pid_namespace *proc_pidns = proc_pid_ns(sb: file_inode(f: f->file)->i_sb);
2860	int level = `0`;
2861
2862	cur = hlist_entry(v, struct file_lock_core, flc_link);
2863
2864	if (locks_translate_pid(fl: cur, ns: proc_pidns) == `0`)
2865	return `0`;
2866
2867	/ View this crossed linked list as a binary tree, the first member of flc_blocked_requests*
2868	* is the left child of current node, the next silibing in flc_blocked_member is the
2869	* right child, we can alse get the parent of current node from flc_blocker, so this
2870	* question becomes traversal of a binary tree
2871	*/
2872	while (cur != NULL) {
2873	if (level)
2874	lock_get_status(f, flc: cur, id: iter->li_pos, pfx: "-> ", repeat: level);
2875	else
2876	lock_get_status(f, flc: cur, id: iter->li_pos, pfx: "", repeat: level);
2877
2878	if (!list_empty(head: &cur->flc_blocked_requests)) {
2879	/ Turn left /
2880	cur = list_first_entry_or_null(&cur->flc_blocked_requests,
2881	struct file_lock_core,
2882	flc_blocked_member);
2883	level++;
2884	} else {
2885	/ Turn right /
2886	tmp = get_next_blocked_member(node: cur);
2887	/ Fall back to parent node /
2888	while (tmp == NULL && cur->flc_blocker != NULL) {
2889	cur = cur->flc_blocker;
2890	level--;
2891	tmp = get_next_blocked_member(node: cur);
2892	}
2893	cur = tmp;
2894	}
2895	}
2896
2897	return `0`;
2898	}
2899
2900	static void __show_fd_locks(struct seq_file *f,
2901	struct list_head head, int* *id,
2902	struct file filp, struct* files_struct *files)
2903	{
2904	struct file_lock_core *fl;
2905
2906	list_for_each_entry(fl, head, flc_list) {
2907
2908	if (filp != fl->flc_file)
2909	continue;
2910	if (fl->flc_owner != files && fl->flc_owner != filp)
2911	continue;
2912
2913	(*id)++;
2914	seq_puts(m: f, s: "lock:\t");
2915	lock_get_status(f, flc: fl, id: *id, pfx: "", repeat: `0`);
2916	}
2917	}
2918
2919	void show_fd_locks(struct seq_file *f,
2920	struct file filp, struct* files_struct *files)
2921	{
2922	struct inode *inode = file_inode(f: filp);
2923	struct file_lock_context *ctx;
2924	int id = `0`;
2925
2926	ctx = locks_inode_context(inode);
2927	if (!ctx)
2928	return;
2929
2930	spin_lock(lock: &ctx->flc_lock);
2931	__show_fd_locks(f, head: &ctx->flc_flock, id: &id, filp, files);
2932	__show_fd_locks(f, head: &ctx->flc_posix, id: &id, filp, files);
2933	__show_fd_locks(f, head: &ctx->flc_lease, id: &id, filp, files);
2934	spin_unlock(lock: &ctx->flc_lock);
2935	}
2936
2937	static void locks_start(struct* seq_file f, loff_t pos)
2938	__acquires(&blocked_lock_lock)
2939	{
2940	struct locks_iterator *iter = f->private;
2941
2942	iter->li_pos = *pos + `1`;
2943	percpu_down_write(&file_rwsem);
2944	spin_lock(lock: &blocked_lock_lock);
2945	return seq_hlist_start_percpu(head: &file_lock_list.hlist, cpu: &iter->li_cpu, pos: *pos);
2946	}
2947
2948	static void locks_next(struct* seq_file f, void* v, loff_t pos)
2949	{
2950	struct locks_iterator *iter = f->private;
2951
2952	++iter->li_pos;
2953	return seq_hlist_next_percpu(v, head: &file_lock_list.hlist, cpu: &iter->li_cpu, pos);
2954	}
2955
2956	static void locks_stop(struct seq_file f, void* *v)
2957	__releases(&blocked_lock_lock)
2958	{
2959	spin_unlock(lock: &blocked_lock_lock);
2960	percpu_up_write(&file_rwsem);
2961	}
2962
2963	static const struct seq_operations locks_seq_operations = {
2964	.start = locks_start,
2965	.next = locks_next,
2966	.stop = locks_stop,
2967	.show = locks_show,
2968	};
2969
2970	static int __init proc_locks_init(void)
2971	{
2972	proc_create_seq_private(name: "locks", mode: `0`, NULL, ops: &locks_seq_operations,
2973	state_size: sizeof(struct locks_iterator), NULL);
2974	return `0`;
2975	}
2976	fs_initcall(proc_locks_init);
2977	#endif
2978
2979	static int __init filelock_init(void)
2980	{
2981	int i;
2982
2983	flctx_cache = kmem_cache_create(name: "file_lock_ctx",
2984	size: sizeof(struct file_lock_context), align: `0`, SLAB_PANIC, NULL);
2985
2986	filelock_cache = kmem_cache_create(name: "file_lock_cache",
2987	size: sizeof(struct file_lock), align: `0`, SLAB_PANIC, NULL);
2988
2989	filelease_cache = kmem_cache_create(name: "file_lock_cache",
2990	size: sizeof(struct file_lease), align: `0`, SLAB_PANIC, NULL);
2991
2992	for_each_possible_cpu(i) {
2993	struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
2994
2995	spin_lock_init(&fll->lock);
2996	INIT_HLIST_HEAD(&fll->hlist);
2997	}
2998
2999	lease_notifier_chain_init();
3000	return `0`;
3001	}
3002	core_initcall(filelock_init);
3003

source code of linux/fs/locks.c