fcntl.c source code [linux/fs/fcntl.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* linux/fs/fcntl.c
4	*
5	* Copyright (C) 1991, 1992 Linus Torvalds
6	*/
7
8	#include <linux/syscalls.h>
9	#include <linux/init.h>
10	#include <linux/mm.h>
11	#include <linux/sched/task.h>
12	#include <linux/fs.h>
13	#include <linux/filelock.h>
14	#include <linux/file.h>
15	#include <linux/fdtable.h>
16	#include <linux/capability.h>
17	#include <linux/dnotify.h>
18	#include <linux/slab.h>
19	#include <linux/module.h>
20	#include <linux/pipe_fs_i.h>
21	#include <linux/security.h>
22	#include <linux/ptrace.h>
23	#include <linux/signal.h>
24	#include <linux/rcupdate.h>
25	#include <linux/pid_namespace.h>
26	#include <linux/user_namespace.h>
27	#include <linux/memfd.h>
28	#include <linux/compat.h>
29	#include <linux/mount.h>
30	#include <linux/rw_hint.h>
31
32	#include <linux/poll.h>
33	#include <asm/siginfo.h>
34	#include <linux/uaccess.h>
35
36	#define SETFL_MASK (O_APPEND \| O_NONBLOCK \| O_NDELAY \| O_DIRECT \| O_NOATIME)
37
38	static int setfl(int fd, struct file * filp, unsigned int arg)
39	{
40	struct inode * inode = file_inode(f: filp);
41	int error = `0`;
42
43	/*
44	* O_APPEND cannot be cleared if the file is marked as append-only
45	* and the file is open for write.
46	*/
47	if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
48	return -EPERM;
49
50	/ O_NOATIME can only be set by the owner or superuser /
51	if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
52	if (!inode_owner_or_capable(idmap: file_mnt_idmap(file: filp), inode))
53	return -EPERM;
54
55	/ required for strict SunOS emulation /
56	if (O_NONBLOCK != O_NDELAY)
57	if (arg & O_NDELAY)
58	arg \|= O_NONBLOCK;
59
60	/ Pipe packetized mode is controlled by O_DIRECT flag /
61	if (!S_ISFIFO(inode->i_mode) &&
62	(arg & O_DIRECT) &&
63	!(filp->f_mode & FMODE_CAN_ODIRECT))
64	return -EINVAL;
65
66	if (filp->f_op->check_flags)
67	error = filp->f_op->check_flags(arg);
68	if (error)
69	return error;
70
71	/*
72	* ->fasync() is responsible for setting the FASYNC bit.
73	*/
74	if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
75	error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != `0`);
76	if (error < `0`)
77	goto out;
78	if (error > `0`)
79	error = `0`;
80	}
81	spin_lock(lock: &filp->f_lock);
82	filp->f_flags = (arg & SETFL_MASK) \| (filp->f_flags & ~SETFL_MASK);
83	filp->f_iocb_flags = iocb_flags(file: filp);
84	spin_unlock(lock: &filp->f_lock);
85
86	out:
87	return error;
88	}
89
90	static void f_modown(struct file filp, struct* pid pid, enum* pid_type type,
91	int force)
92	{
93	write_lock_irq(&filp->f_owner.lock);
94	if (force \|\| !filp->f_owner.pid) {
95	put_pid(pid: filp->f_owner.pid);
96	filp->f_owner.pid = get_pid(pid);
97	filp->f_owner.pid_type = type;
98
99	if (pid) {
100	const struct cred *cred = current_cred();
101	filp->f_owner.uid = cred->uid;
102	filp->f_owner.euid = cred->euid;
103	}
104	}
105	write_unlock_irq(&filp->f_owner.lock);
106	}
107
108	void __f_setown(struct file filp, struct* pid pid, enum* pid_type type,
109	int force)
110	{
111	security_file_set_fowner(file: filp);
112	f_modown(filp, pid, type, force);
113	}
114	EXPORT_SYMBOL(__f_setown);
115
116	int f_setown(struct file filp, int* who, int force)
117	{
118	enum pid_type type;
119	struct pid *pid = NULL;
120	int ret = `0`;
121
122	type = PIDTYPE_TGID;
123	if (who < `0`) {
124	/ avoid overflow below /
125	if (who == INT_MIN)
126	return -EINVAL;
127
128	type = PIDTYPE_PGID;
129	who = -who;
130	}
131
132	rcu_read_lock();
133	if (who) {
134	pid = find_vpid(nr: who);
135	if (!pid)
136	ret = -ESRCH;
137	}
138
139	if (!ret)
140	__f_setown(filp, pid, type, force);
141	rcu_read_unlock();
142
143	return ret;
144	}
145	EXPORT_SYMBOL(f_setown);
146
147	void f_delown(struct file *filp)
148	{
149	f_modown(filp, NULL, type: PIDTYPE_TGID, force: `1`);
150	}
151
152	pid_t f_getown(struct file *filp)
153	{
154	pid_t pid = `0`;
155
156	read_lock_irq(&filp->f_owner.lock);
157	rcu_read_lock();
158	if (pid_task(pid: filp->f_owner.pid, filp->f_owner.pid_type)) {
159	pid = pid_vnr(pid: filp->f_owner.pid);
160	if (filp->f_owner.pid_type == PIDTYPE_PGID)
161	pid = -pid;
162	}
163	rcu_read_unlock();
164	read_unlock_irq(&filp->f_owner.lock);
165	return pid;
166	}
167
168	static int f_setown_ex(struct file filp, unsigned* long arg)
169	{
170	struct f_owner_ex __user owner_p = (void* __user *)arg;
171	struct f_owner_ex owner;
172	struct pid *pid;
173	int type;
174	int ret;
175
176	ret = copy_from_user(to: &owner, from: owner_p, n: sizeof(owner));
177	if (ret)
178	return -EFAULT;
179
180	switch (owner.type) {
181	case F_OWNER_TID:
182	type = PIDTYPE_PID;
183	break;
184
185	case F_OWNER_PID:
186	type = PIDTYPE_TGID;
187	break;
188
189	case F_OWNER_PGRP:
190	type = PIDTYPE_PGID;
191	break;
192
193	default:
194	return -EINVAL;
195	}
196
197	rcu_read_lock();
198	pid = find_vpid(nr: owner.pid);
199	if (owner.pid && !pid)
200	ret = -ESRCH;
201	else
202	__f_setown(filp, pid, type, `1`);
203	rcu_read_unlock();
204
205	return ret;
206	}
207
208	static int f_getown_ex(struct file filp, unsigned* long arg)
209	{
210	struct f_owner_ex __user owner_p = (void* __user *)arg;
211	struct f_owner_ex owner = {};
212	int ret = `0`;
213
214	read_lock_irq(&filp->f_owner.lock);
215	rcu_read_lock();
216	if (pid_task(pid: filp->f_owner.pid, filp->f_owner.pid_type))
217	owner.pid = pid_vnr(pid: filp->f_owner.pid);
218	rcu_read_unlock();
219	switch (filp->f_owner.pid_type) {
220	case PIDTYPE_PID:
221	owner.type = F_OWNER_TID;
222	break;
223
224	case PIDTYPE_TGID:
225	owner.type = F_OWNER_PID;
226	break;
227
228	case PIDTYPE_PGID:
229	owner.type = F_OWNER_PGRP;
230	break;
231
232	default:
233	WARN_ON(`1`);
234	ret = -EINVAL;
235	break;
236	}
237	read_unlock_irq(&filp->f_owner.lock);
238
239	if (!ret) {
240	ret = copy_to_user(to: owner_p, from: &owner, n: sizeof(owner));
241	if (ret)
242	ret = -EFAULT;
243	}
244	return ret;
245	}
246
247	#ifdef CONFIG_CHECKPOINT_RESTORE
248	static int f_getowner_uids(struct file filp, unsigned* long arg)
249	{
250	struct user_namespace *user_ns = current_user_ns();
251	uid_t __user dst = (void* __user *)arg;
252	uid_t src[`2`];
253	int err;
254
255	read_lock_irq(&filp->f_owner.lock);
256	src[`0`] = from_kuid(to: user_ns, uid: filp->f_owner.uid);
257	src[`1`] = from_kuid(to: user_ns, uid: filp->f_owner.euid);
258	read_unlock_irq(&filp->f_owner.lock);
259
260	err = put_user(src[`0`], &dst[`0`]);
261	err \|= put_user(src[`1`], &dst[`1`]);
262
263	return err;
264	}
265	#else
266	static int f_getowner_uids(struct file filp, unsigned* long arg)
267	{
268	return -EINVAL;
269	}
270	#endif
271
272	static bool rw_hint_valid(u64 hint)
273	{
274	BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET);
275	BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE);
276	BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT);
277	BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM);
278	BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG);
279	BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME);
280
281	switch (hint) {
282	case RWH_WRITE_LIFE_NOT_SET:
283	case RWH_WRITE_LIFE_NONE:
284	case RWH_WRITE_LIFE_SHORT:
285	case RWH_WRITE_LIFE_MEDIUM:
286	case RWH_WRITE_LIFE_LONG:
287	case RWH_WRITE_LIFE_EXTREME:
288	return true;
289	default:
290	return false;
291	}
292	}
293
294	static long fcntl_get_rw_hint(struct file file, unsigned* int cmd,
295	unsigned long arg)
296	{
297	struct inode *inode = file_inode(f: file);
298	u64 __user argp = (u64 __user )arg;
299	u64 hint = READ_ONCE(inode->i_write_hint);
300
301	if (copy_to_user(to: argp, from: &hint, n: sizeof(*argp)))
302	return -EFAULT;
303	return `0`;
304	}
305
306	static long fcntl_set_rw_hint(struct file file, unsigned* int cmd,
307	unsigned long arg)
308	{
309	struct inode *inode = file_inode(f: file);
310	u64 __user argp = (u64 __user )arg;
311	u64 hint;
312
313	if (copy_from_user(to: &hint, from: argp, n: sizeof(hint)))
314	return -EFAULT;
315	if (!rw_hint_valid(hint))
316	return -EINVAL;
317
318	WRITE_ONCE(inode->i_write_hint, hint);
319
320	/*
321	* file->f_mapping->host may differ from inode. As an example,
322	* blkdev_open() modifies file->f_mapping.
323	*/
324	if (file->f_mapping->host != inode)
325	WRITE_ONCE(file->f_mapping->host->i_write_hint, hint);
326
327	return `0`;
328	}
329
330	static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
331	struct file *filp)
332	{
333	void __user argp = (void* __user *)arg;
334	int argi = (int)arg;
335	struct flock flock;
336	long err = -EINVAL;
337
338	switch (cmd) {
339	case F_DUPFD:
340	err = f_dupfd(from: argi, file: filp, flags: `0`);
341	break;
342	case F_DUPFD_CLOEXEC:
343	err = f_dupfd(from: argi, file: filp, O_CLOEXEC);
344	break;
345	case F_GETFD:
346	err = get_close_on_exec(fd) ? FD_CLOEXEC : `0`;
347	break;
348	case F_SETFD:
349	err = `0`;
350	set_close_on_exec(fd, flag: argi & FD_CLOEXEC);
351	break;
352	case F_GETFL:
353	err = filp->f_flags;
354	break;
355	case F_SETFL:
356	err = setfl(fd, filp, arg: argi);
357	break;
358	#if BITS_PER_LONG != 32
359	/ 32-bit arches must use fcntl64() /
360	case F_OFD_GETLK:
361	#endif
362	case F_GETLK:
363	if (copy_from_user(to: &flock, from: argp, n: sizeof(flock)))
364	return -EFAULT;
365	err = fcntl_getlk(filp, cmd, &flock);
366	if (!err && copy_to_user(to: argp, from: &flock, n: sizeof(flock)))
367	return -EFAULT;
368	break;
369	#if BITS_PER_LONG != 32
370	/ 32-bit arches must use fcntl64() /
371	case F_OFD_SETLK:
372	case F_OFD_SETLKW:
373	fallthrough;
374	#endif
375	case F_SETLK:
376	case F_SETLKW:
377	if (copy_from_user(to: &flock, from: argp, n: sizeof(flock)))
378	return -EFAULT;
379	err = fcntl_setlk(fd, filp, cmd, &flock);
380	break;
381	case F_GETOWN:
382	/*
383	* XXX If f_owner is a process group, the
384	* negative return value will get converted
385	* into an error. Oops. If we keep the
386	* current syscall conventions, the only way
387	* to fix this will be in libc.
388	*/
389	err = f_getown(filp);
390	force_successful_syscall_return();
391	break;
392	case F_SETOWN:
393	err = f_setown(filp, argi, `1`);
394	break;
395	case F_GETOWN_EX:
396	err = f_getown_ex(filp, arg);
397	break;
398	case F_SETOWN_EX:
399	err = f_setown_ex(filp, arg);
400	break;
401	case F_GETOWNER_UIDS:
402	err = f_getowner_uids(filp, arg);
403	break;
404	case F_GETSIG:
405	err = filp->f_owner.signum;
406	break;
407	case F_SETSIG:
408	/ arg == 0 restores default behaviour. /
409	if (!valid_signal(sig: argi)) {
410	break;
411	}
412	err = `0`;
413	filp->f_owner.signum = argi;
414	break;
415	case F_GETLEASE:
416	err = fcntl_getlease(filp);
417	break;
418	case F_SETLEASE:
419	err = fcntl_setlease(fd, filp, arg: argi);
420	break;
421	case F_NOTIFY:
422	err = fcntl_dirnotify(fd, filp, argi);
423	break;
424	case F_SETPIPE_SZ:
425	case F_GETPIPE_SZ:
426	err = pipe_fcntl(filp, cmd, arg: argi);
427	break;
428	case F_ADD_SEALS:
429	case F_GET_SEALS:
430	err = memfd_fcntl(file: filp, cmd, arg: argi);
431	break;
432	case F_GET_RW_HINT:
433	err = fcntl_get_rw_hint(file: filp, cmd, arg);
434	break;
435	case F_SET_RW_HINT:
436	err = fcntl_set_rw_hint(file: filp, cmd, arg);
437	break;
438	default:
439	break;
440	}
441	return err;
442	}
443
444	static int check_fcntl_cmd(unsigned cmd)
445	{
446	switch (cmd) {
447	case F_DUPFD:
448	case F_DUPFD_CLOEXEC:
449	case F_GETFD:
450	case F_SETFD:
451	case F_GETFL:
452	return `1`;
453	}
454	return `0`;
455	}
456
457	SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
458	{
459	struct fd f = fdget_raw(fd);
460	long err = -EBADF;
461
462	if (!f.file)
463	goto out;
464
465	if (unlikely(f.file->f_mode & FMODE_PATH)) {
466	if (!check_fcntl_cmd(cmd))
467	goto out1;
468	}
469
470	err = security_file_fcntl(file: f.file, cmd, arg);
471	if (!err)
472	err = do_fcntl(fd, cmd, arg, filp: f.file);
473
474	out1:
475	fdput(fd: f);
476	out:
477	return err;
478	}
479
480	#if BITS_PER_LONG == 32
481	SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
482	unsigned long, arg)
483	{
484	void __user argp = (void* __user *)arg;
485	struct fd f = fdget_raw(fd);
486	struct flock64 flock;
487	long err = -EBADF;
488
489	if (!f.file)
490	goto out;
491
492	if (unlikely(f.file->f_mode & FMODE_PATH)) {
493	if (!check_fcntl_cmd(cmd))
494	goto out1;
495	}
496
497	err = security_file_fcntl(f.file, cmd, arg);
498	if (err)
499	goto out1;
500
501	switch (cmd) {
502	case F_GETLK64:
503	case F_OFD_GETLK:
504	err = -EFAULT;
505	if (copy_from_user(&flock, argp, sizeof(flock)))
506	break;
507	err = fcntl_getlk64(f.file, cmd, &flock);
508	if (!err && copy_to_user(argp, &flock, sizeof(flock)))
509	err = -EFAULT;
510	break;
511	case F_SETLK64:
512	case F_SETLKW64:
513	case F_OFD_SETLK:
514	case F_OFD_SETLKW:
515	err = -EFAULT;
516	if (copy_from_user(&flock, argp, sizeof(flock)))
517	break;
518	err = fcntl_setlk64(fd, f.file, cmd, &flock);
519	break;
520	default:
521	err = do_fcntl(fd, cmd, arg, f.file);
522	break;
523	}
524	out1:
525	fdput(f);
526	out:
527	return err;
528	}
529	#endif
530
531	#ifdef CONFIG_COMPAT
532	/ careful - don't use anywhere else /
533	#define copy_flock_fields(dst, src) \
534	(dst)->l_type = (src)->l_type; \
535	(dst)->l_whence = (src)->l_whence; \
536	(dst)->l_start = (src)->l_start; \
537	(dst)->l_len = (src)->l_len; \
538	(dst)->l_pid = (src)->l_pid;
539
540	static int get_compat_flock(struct flock kfl, const* struct compat_flock __user *ufl)
541	{
542	struct compat_flock fl;
543
544	if (copy_from_user(to: &fl, from: ufl, n: sizeof(struct compat_flock)))
545	return -EFAULT;
546	copy_flock_fields(kfl, &fl);
547	return `0`;
548	}
549
550	static int get_compat_flock64(struct flock kfl, const* struct compat_flock64 __user *ufl)
551	{
552	struct compat_flock64 fl;
553
554	if (copy_from_user(to: &fl, from: ufl, n: sizeof(struct compat_flock64)))
555	return -EFAULT;
556	copy_flock_fields(kfl, &fl);
557	return `0`;
558	}
559
560	static int put_compat_flock(const struct flock kfl, struct* compat_flock __user *ufl)
561	{
562	struct compat_flock fl;
563
564	memset(&fl, `0`, sizeof(struct compat_flock));
565	copy_flock_fields(&fl, kfl);
566	if (copy_to_user(to: ufl, from: &fl, n: sizeof(struct compat_flock)))
567	return -EFAULT;
568	return `0`;
569	}
570
571	static int put_compat_flock64(const struct flock kfl, struct* compat_flock64 __user *ufl)
572	{
573	struct compat_flock64 fl;
574
575	BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start));
576	BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len));
577
578	memset(&fl, `0`, sizeof(struct compat_flock64));
579	copy_flock_fields(&fl, kfl);
580	if (copy_to_user(to: ufl, from: &fl, n: sizeof(struct compat_flock64)))
581	return -EFAULT;
582	return `0`;
583	}
584	#undef copy_flock_fields
585
586	static unsigned int
587	convert_fcntl_cmd(unsigned int cmd)
588	{
589	switch (cmd) {
590	case F_GETLK64:
591	return F_GETLK;
592	case F_SETLK64:
593	return F_SETLK;
594	case F_SETLKW64:
595	return F_SETLKW;
596	}
597
598	return cmd;
599	}
600
601	/*
602	* GETLK was successful and we need to return the data, but it needs to fit in
603	* the compat structure.
604	* l_start shouldn't be too big, unless the original start + end is greater than
605	* COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return
606	* -EOVERFLOW in that case. l_len could be too big, in which case we just
607	* truncate it, and only allow the app to see that part of the conflicting lock
608	* that might make sense to it anyway
609	*/
610	static int fixup_compat_flock(struct flock *flock)
611	{
612	if (flock->l_start > COMPAT_OFF_T_MAX)
613	return -EOVERFLOW;
614	if (flock->l_len > COMPAT_OFF_T_MAX)
615	flock->l_len = COMPAT_OFF_T_MAX;
616	return `0`;
617	}
618
619	static long do_compat_fcntl64(unsigned int fd, unsigned int cmd,
620	compat_ulong_t arg)
621	{
622	struct fd f = fdget_raw(fd);
623	struct flock flock;
624	long err = -EBADF;
625
626	if (!f.file)
627	return err;
628
629	if (unlikely(f.file->f_mode & FMODE_PATH)) {
630	if (!check_fcntl_cmd(cmd))
631	goto out_put;
632	}
633
634	err = security_file_fcntl(file: f.file, cmd, arg);
635	if (err)
636	goto out_put;
637
638	switch (cmd) {
639	case F_GETLK:
640	err = get_compat_flock(kfl: &flock, ufl: compat_ptr(uptr: arg));
641	if (err)
642	break;
643	err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
644	if (err)
645	break;
646	err = fixup_compat_flock(flock: &flock);
647	if (!err)
648	err = put_compat_flock(kfl: &flock, ufl: compat_ptr(uptr: arg));
649	break;
650	case F_GETLK64:
651	case F_OFD_GETLK:
652	err = get_compat_flock64(kfl: &flock, ufl: compat_ptr(uptr: arg));
653	if (err)
654	break;
655	err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
656	if (!err)
657	err = put_compat_flock64(kfl: &flock, ufl: compat_ptr(uptr: arg));
658	break;
659	case F_SETLK:
660	case F_SETLKW:
661	err = get_compat_flock(kfl: &flock, ufl: compat_ptr(uptr: arg));
662	if (err)
663	break;
664	err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
665	break;
666	case F_SETLK64:
667	case F_SETLKW64:
668	case F_OFD_SETLK:
669	case F_OFD_SETLKW:
670	err = get_compat_flock64(kfl: &flock, ufl: compat_ptr(uptr: arg));
671	if (err)
672	break;
673	err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
674	break;
675	default:
676	err = do_fcntl(fd, cmd, arg, filp: f.file);
677	break;
678	}
679	out_put:
680	fdput(fd: f);
681	return err;
682	}
683
684	COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
685	compat_ulong_t, arg)
686	{
687	return do_compat_fcntl64(fd, cmd, arg);
688	}
689
690	COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
691	compat_ulong_t, arg)
692	{
693	switch (cmd) {
694	case F_GETLK64:
695	case F_SETLK64:
696	case F_SETLKW64:
697	case F_OFD_GETLK:
698	case F_OFD_SETLK:
699	case F_OFD_SETLKW:
700	return -EINVAL;
701	}
702	return do_compat_fcntl64(fd, cmd, arg);
703	}
704	#endif
705
706	/ Table to convert sigio signal codes into poll band bitmaps /
707
708	static const __poll_t band_table[NSIGPOLL] = {
709	EPOLLIN \| EPOLLRDNORM, / POLL_IN /
710	EPOLLOUT \| EPOLLWRNORM \| EPOLLWRBAND, / POLL_OUT /
711	EPOLLIN \| EPOLLRDNORM \| EPOLLMSG, / POLL_MSG /
712	EPOLLERR, / POLL_ERR /
713	EPOLLPRI \| EPOLLRDBAND, / POLL_PRI /
714	EPOLLHUP \| EPOLLERR / POLL_HUP /
715	};
716
717	static inline int sigio_perm(struct task_struct *p,
718	struct fown_struct fown, int* sig)
719	{
720	const struct cred *cred;
721	int ret;
722
723	rcu_read_lock();
724	cred = __task_cred(p);
725	ret = ((uid_eq(left: fown->euid, GLOBAL_ROOT_UID) \|\|
726	uid_eq(fown->euid, cred->suid) \|\| uid_eq(fown->euid, cred->uid) \|\|
727	uid_eq(fown->uid, cred->suid) \|\| uid_eq(fown->uid, cred->uid)) &&
728	!security_file_send_sigiotask(p, fown, sig));
729	rcu_read_unlock();
730	return ret;
731	}
732
733	static void send_sigio_to_task(struct task_struct *p,
734	struct fown_struct *fown,
735	int fd, int reason, enum pid_type type)
736	{
737	/*
738	* F_SETSIG can change ->signum lockless in parallel, make
739	* sure we read it once and use the same value throughout.
740	*/
741	int signum = READ_ONCE(fown->signum);
742
743	if (!sigio_perm(p, fown, sig: signum))
744	return;
745
746	switch (signum) {
747	default: {
748	kernel_siginfo_t si;
749
750	/ Queue a rt signal with the appropriate fd as its*
751	value. We use SI_SIGIO as the source, not
752	SI_KERNEL, since kernel signals always get
753	delivered even if we can't queue. Failure to
754	queue in this case _should_ be reported; we fall
755	back to SIGIO in that case. --sct /*
756	clear_siginfo(info: &si);
757	si.si_signo = signum;
758	si.si_errno = `0`;
759	si.si_code = reason;
760	/*
761	* Posix definies POLL_IN and friends to be signal
762	* specific si_codes for SIG_POLL. Linux extended
763	* these si_codes to other signals in a way that is
764	* ambiguous if other signals also have signal
765	* specific si_codes. In that case use SI_SIGIO instead
766	* to remove the ambiguity.
767	*/
768	if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
769	si.si_code = SI_SIGIO;
770
771	/* Make sure we are called with one of the POLL_*
772	reasons, otherwise we could leak kernel stack into
773	userspace. /*
774	BUG_ON((reason < POLL_IN) \|\| ((reason - POLL_IN) >= NSIGPOLL));
775	if (reason - POLL_IN >= NSIGPOLL)
776	si.si_band = ~`0L`;
777	else
778	si.si_band = mangle_poll(val: band_table[reason - POLL_IN]);
779	si.si_fd = fd;
780	if (!do_send_sig_info(sig: signum, info: &si, p, type))
781	break;
782	}
783	fallthrough; / fall back on the old plain SIGIO signal /
784	case `0`:
785	do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
786	}
787	}
788
789	void send_sigio(struct fown_struct fown, int* fd, int band)
790	{
791	struct task_struct *p;
792	enum pid_type type;
793	unsigned long flags;
794	struct pid *pid;
795
796	read_lock_irqsave(&fown->lock, flags);
797
798	type = fown->pid_type;
799	pid = fown->pid;
800	if (!pid)
801	goto out_unlock_fown;
802
803	if (type <= PIDTYPE_TGID) {
804	rcu_read_lock();
805	p = pid_task(pid, PIDTYPE_PID);
806	if (p)
807	send_sigio_to_task(p, fown, fd, reason: band, type);
808	rcu_read_unlock();
809	} else {
810	read_lock(&tasklist_lock);
811	do_each_pid_task(pid, type, p) {
812	send_sigio_to_task(p, fown, fd, reason: band, type);
813	} while_each_pid_task(pid, type, p);
814	read_unlock(&tasklist_lock);
815	}
816	out_unlock_fown:
817	read_unlock_irqrestore(&fown->lock, flags);
818	}
819
820	static void send_sigurg_to_task(struct task_struct *p,
821	struct fown_struct fown, enum* pid_type type)
822	{
823	if (sigio_perm(p, fown, SIGURG))
824	do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type);
825	}
826
827	int send_sigurg(struct fown_struct *fown)
828	{
829	struct task_struct *p;
830	enum pid_type type;
831	struct pid *pid;
832	unsigned long flags;
833	int ret = `0`;
834
835	read_lock_irqsave(&fown->lock, flags);
836
837	type = fown->pid_type;
838	pid = fown->pid;
839	if (!pid)
840	goto out_unlock_fown;
841
842	ret = `1`;
843
844	if (type <= PIDTYPE_TGID) {
845	rcu_read_lock();
846	p = pid_task(pid, PIDTYPE_PID);
847	if (p)
848	send_sigurg_to_task(p, fown, type);
849	rcu_read_unlock();
850	} else {
851	read_lock(&tasklist_lock);
852	do_each_pid_task(pid, type, p) {
853	send_sigurg_to_task(p, fown, type);
854	} while_each_pid_task(pid, type, p);
855	read_unlock(&tasklist_lock);
856	}
857	out_unlock_fown:
858	read_unlock_irqrestore(&fown->lock, flags);
859	return ret;
860	}
861
862	static DEFINE_SPINLOCK(fasync_lock);
863	static struct kmem_cache *fasync_cache __ro_after_init;
864
865	/*
866	* Remove a fasync entry. If successfully removed, return
867	* positive and clear the FASYNC flag. If no entry exists,
868	* do nothing and return 0.
869	*
870	* NOTE! It is very important that the FASYNC flag always
871	* match the state "is the filp on a fasync list".
872	*
873	*/
874	int fasync_remove_entry(struct file filp, struct* fasync_struct **fapp)
875	{
876	struct fasync_struct fa, *fp;
877	int result = `0`;
878
879	spin_lock(lock: &filp->f_lock);
880	spin_lock(lock: &fasync_lock);
881	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
882	if (fa->fa_file != filp)
883	continue;
884
885	write_lock_irq(&fa->fa_lock);
886	fa->fa_file = NULL;
887	write_unlock_irq(&fa->fa_lock);
888
889	*fp = fa->fa_next;
890	kfree_rcu(fa, fa_rcu);
891	filp->f_flags &= ~FASYNC;
892	result = `1`;
893	break;
894	}
895	spin_unlock(lock: &fasync_lock);
896	spin_unlock(lock: &filp->f_lock);
897	return result;
898	}
899
900	struct fasync_struct fasync_alloc(void*)
901	{
902	return kmem_cache_alloc(cachep: fasync_cache, GFP_KERNEL);
903	}
904
905	/*
906	* NOTE! This can be used only for unused fasync entries:
907	* entries that actually got inserted on the fasync list
908	* need to be released by rcu - see fasync_remove_entry.
909	*/
910	void fasync_free(struct fasync_struct *new)
911	{
912	kmem_cache_free(s: fasync_cache, objp: new);
913	}
914
915	/*
916	* Insert a new entry into the fasync list. Return the pointer to the
917	* old one if we didn't use the new one.
918	*
919	* NOTE! It is very important that the FASYNC flag always
920	* match the state "is the filp on a fasync list".
921	*/
922	struct fasync_struct fasync_insert_entry(int* fd, struct file filp, struct* fasync_struct fapp, struct** fasync_struct *new)
923	{
924	struct fasync_struct fa, *fp;
925
926	spin_lock(lock: &filp->f_lock);
927	spin_lock(lock: &fasync_lock);
928	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
929	if (fa->fa_file != filp)
930	continue;
931
932	write_lock_irq(&fa->fa_lock);
933	fa->fa_fd = fd;
934	write_unlock_irq(&fa->fa_lock);
935	goto out;
936	}
937
938	rwlock_init(&new->fa_lock);
939	new->magic = FASYNC_MAGIC;
940	new->fa_file = filp;
941	new->fa_fd = fd;
942	new->fa_next = *fapp;
943	rcu_assign_pointer(*fapp, new);
944	filp->f_flags \|= FASYNC;
945
946	out:
947	spin_unlock(lock: &fasync_lock);
948	spin_unlock(lock: &filp->f_lock);
949	return fa;
950	}
951
952	/*
953	* Add a fasync entry. Return negative on error, positive if
954	* added, and zero if did nothing but change an existing one.
955	*/
956	static int fasync_add_entry(int fd, struct file filp, struct* fasync_struct **fapp)
957	{
958	struct fasync_struct *new;
959
960	new = fasync_alloc();
961	if (!new)
962	return -ENOMEM;
963
964	/*
965	* fasync_insert_entry() returns the old (update) entry if
966	* it existed.
967	*
968	* So free the (unused) new entry and return 0 to let the
969	* caller know that we didn't add any new fasync entries.
970	*/
971	if (fasync_insert_entry(fd, filp, fapp, new)) {
972	fasync_free(new);
973	return `0`;
974	}
975
976	return `1`;
977	}
978
979	/*
980	* fasync_helper() is used by almost all character device drivers
981	* to set up the fasync queue, and for regular files by the file
982	* lease code. It returns negative on error, 0 if it did no changes
983	* and positive if it added/deleted the entry.
984	*/
985	int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
986	{
987	if (!on)
988	return fasync_remove_entry(filp, fapp);
989	return fasync_add_entry(fd, filp, fapp);
990	}
991
992	EXPORT_SYMBOL(fasync_helper);
993
994	/*
995	* rcu_read_lock() is held
996	*/
997	static void kill_fasync_rcu(struct fasync_struct fa, int* sig, int band)
998	{
999	while (fa) {
1000	struct fown_struct *fown;
1001	unsigned long flags;
1002
1003	if (fa->magic != FASYNC_MAGIC) {
1004	printk(KERN_ERR "kill_fasync: bad magic number in "
1005	"fasync_struct!\n");
1006	return;
1007	}
1008	read_lock_irqsave(&fa->fa_lock, flags);
1009	if (fa->fa_file) {
1010	fown = &fa->fa_file->f_owner;
1011	/ Don't send SIGURG to processes which have not set a*
1012	queued signum: SIGURG has its own default signalling
1013	mechanism. /*
1014	if (!(sig == SIGURG && fown->signum == `0`))
1015	send_sigio(fown, fd: fa->fa_fd, band);
1016	}
1017	read_unlock_irqrestore(&fa->fa_lock, flags);
1018	fa = rcu_dereference(fa->fa_next);
1019	}
1020	}
1021
1022	void kill_fasync(struct fasync_struct *fp, int* sig, int band)
1023	{
1024	/ First a quick test without locking: usually*
1025	* the list is empty.
1026	*/
1027	if (*fp) {
1028	rcu_read_lock();
1029	kill_fasync_rcu(rcu_dereference(*fp), sig, band);
1030	rcu_read_unlock();
1031	}
1032	}
1033	EXPORT_SYMBOL(kill_fasync);
1034
1035	static int __init fcntl_init(void)
1036	{
1037	/*
1038	* Please add new bits here to ensure allocation uniqueness.
1039	* Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
1040	* is defined as O_NONBLOCK on some platforms and not on others.
1041	*/
1042	BUILD_BUG_ON(`21` - `1` / for O_RDONLY being 0 / !=
1043	HWEIGHT32(
1044	(VALID_OPEN_FLAGS & ~(O_NONBLOCK \| O_NDELAY)) \|
1045	__FMODE_EXEC \| __FMODE_NONOTIFY));
1046
1047	fasync_cache = kmem_cache_create(name: "fasync_cache",
1048	size: sizeof(struct fasync_struct), align: `0`,
1049	SLAB_PANIC \| SLAB_ACCOUNT, NULL);
1050	return `0`;
1051	}
1052
1053	module_init(fcntl_init)
1054

source code of linux/fs/fcntl.c