file.c source code [linux/fs/file.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* linux/fs/file.c
4	*
5	* Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes
6	*
7	* Manage the dynamic fd arrays in the process files_struct.
8	*/
9
10	#include <linux/syscalls.h>
11	#include <linux/export.h>
12	#include <linux/fs.h>
13	#include <linux/kernel.h>
14	#include <linux/mm.h>
15	#include <linux/sched/signal.h>
16	#include <linux/slab.h>
17	#include <linux/file.h>
18	#include <linux/fdtable.h>
19	#include <linux/bitops.h>
20	#include <linux/spinlock.h>
21	#include <linux/rcupdate.h>
22	#include <linux/close_range.h>
23	#include <net/sock.h>
24
25	#include "internal.h"
26
27	unsigned int sysctl_nr_open __read_mostly = `1024`*`1024`;
28	unsigned int sysctl_nr_open_min = BITS_PER_LONG;
29	/ our min() is unusable in constant expressions ;-/ /
30	#define __const_min(x, y) ((x) < (y) ? (x) : (y))
31	unsigned int sysctl_nr_open_max =
32	__const_min(INT_MAX, ~(size_t)`0`/sizeof(void *)) & -BITS_PER_LONG;
33
34	static void __free_fdtable(struct fdtable *fdt)
35	{
36	kvfree(addr: fdt->fd);
37	kvfree(addr: fdt->open_fds);
38	kfree(objp: fdt);
39	}
40
41	static void free_fdtable_rcu(struct rcu_head *rcu)
42	{
43	__free_fdtable(container_of(rcu, struct fdtable, rcu));
44	}
45
46	#define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr))
47	#define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long))
48
49	/*
50	* Copy 'count' fd bits from the old table to the new table and clear the extra
51	* space if any. This does not copy the file pointers. Called with the files
52	* spinlock held for write.
53	*/
54	static void copy_fd_bitmaps(struct fdtable nfdt, struct* fdtable *ofdt,
55	unsigned int count)
56	{
57	unsigned int cpy, set;
58
59	cpy = count / BITS_PER_BYTE;
60	set = (nfdt->max_fds - count) / BITS_PER_BYTE;
61	memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
62	memset((char *)nfdt->open_fds + cpy, `0`, set);
63	memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
64	memset((char *)nfdt->close_on_exec + cpy, `0`, set);
65
66	cpy = BITBIT_SIZE(count);
67	set = BITBIT_SIZE(nfdt->max_fds) - cpy;
68	memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
69	memset((char *)nfdt->full_fds_bits + cpy, `0`, set);
70	}
71
72	/*
73	* Copy all file descriptors from the old table to the new, expanded table and
74	* clear the extra space. Called with the files spinlock held for write.
75	*/
76	static void copy_fdtable(struct fdtable nfdt, struct* fdtable *ofdt)
77	{
78	size_t cpy, set;
79
80	BUG_ON(nfdt->max_fds < ofdt->max_fds);
81
82	cpy = ofdt->max_fds * sizeof(struct file *);
83	set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
84	memcpy(nfdt->fd, ofdt->fd, cpy);
85	memset((char *)nfdt->fd + cpy, `0`, set);
86
87	copy_fd_bitmaps(nfdt, ofdt, count: ofdt->max_fds);
88	}
89
90	/*
91	* Note how the fdtable bitmap allocations very much have to be a multiple of
92	* BITS_PER_LONG. This is not only because we walk those things in chunks of
93	* 'unsigned long' in some places, but simply because that is how the Linux
94	* kernel bitmaps are defined to work: they are not "bits in an array of bytes",
95	* they are very much "bits in an array of unsigned long".
96	*
97	* The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied
98	* by that "1024/sizeof(ptr)" before, we already know there are sufficient
99	* clear low bits. Clang seems to realize that, gcc ends up being confused.
100	*
101	* On a 128-bit machine, the ALIGN() would actually matter. In the meantime,
102	* let's consider it documentation (and maybe a test-case for gcc to improve
103	* its code generation ;)
104	*/
105	static struct fdtable * alloc_fdtable(unsigned int nr)
106	{
107	struct fdtable *fdt;
108	void *data;
109
110	/*
111	* Figure out how many fds we actually want to support in this fdtable.
112	* Allocation steps are keyed to the size of the fdarray, since it
113	* grows far faster than any of the other dynamic data. We try to fit
114	* the fdarray into comfortable page-tuned chunks: starting at 1024B
115	* and growing in powers of two from there on.
116	*/
117	nr /= (`1024` / sizeof(struct file *));
118	nr = roundup_pow_of_two(nr + `1`);
119	nr = (`1024` / sizeof(struct* file *));
120	nr = ALIGN(nr, BITS_PER_LONG);
121	/*
122	* Note that this can drive nr below what we had passed if sysctl_nr_open
123	* had been set lower between the check in expand_files() and here. Deal
124	* with that in caller, it's cheaper that way.
125	*
126	* We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
127	* bitmaps handling below becomes unpleasant, to put it mildly...
128	*/
129	if (unlikely(nr > sysctl_nr_open))
130	nr = ((sysctl_nr_open - `1`) \| (BITS_PER_LONG - `1`)) + `1`;
131
132	fdt = kmalloc(size: sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
133	if (!fdt)
134	goto out;
135	fdt->max_fds = nr;
136	data = kvmalloc_array(n: nr, size: sizeof(struct file *), GFP_KERNEL_ACCOUNT);
137	if (!data)
138	goto out_fdt;
139	fdt->fd = data;
140
141	data = kvmalloc(max_t(size_t,
142	`2` * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES),
143	GFP_KERNEL_ACCOUNT);
144	if (!data)
145	goto out_arr;
146	fdt->open_fds = data;
147	data += nr / BITS_PER_BYTE;
148	fdt->close_on_exec = data;
149	data += nr / BITS_PER_BYTE;
150	fdt->full_fds_bits = data;
151
152	return fdt;
153
154	out_arr:
155	kvfree(addr: fdt->fd);
156	out_fdt:
157	kfree(objp: fdt);
158	out:
159	return NULL;
160	}
161
162	/*
163	* Expand the file descriptor table.
164	* This function will allocate a new fdtable and both fd array and fdset, of
165	* the given size.
166	* Return <0 error code on error; 1 on successful completion.
167	* The files->file_lock should be held on entry, and will be held on exit.
168	*/
169	static int expand_fdtable(struct files_struct files, unsigned* int nr)
170	__releases(files->file_lock)
171	__acquires(files->file_lock)
172	{
173	struct fdtable new_fdt, cur_fdt;
174
175	spin_unlock(lock: &files->file_lock);
176	new_fdt = alloc_fdtable(nr);
177
178	/ make sure all fd_install() have seen resize_in_progress*
179	* or have finished their rcu_read_lock_sched() section.
180	*/
181	if (atomic_read(v: &files->count) > `1`)
182	synchronize_rcu();
183
184	spin_lock(lock: &files->file_lock);
185	if (!new_fdt)
186	return -ENOMEM;
187	/*
188	* extremely unlikely race - sysctl_nr_open decreased between the check in
189	* caller and alloc_fdtable(). Cheaper to catch it here...
190	*/
191	if (unlikely(new_fdt->max_fds <= nr)) {
192	__free_fdtable(fdt: new_fdt);
193	return -EMFILE;
194	}
195	cur_fdt = files_fdtable(files);
196	BUG_ON(nr < cur_fdt->max_fds);
197	copy_fdtable(nfdt: new_fdt, ofdt: cur_fdt);
198	rcu_assign_pointer(files->fdt, new_fdt);
199	if (cur_fdt != &files->fdtab)
200	call_rcu(head: &cur_fdt->rcu, func: free_fdtable_rcu);
201	/ coupled with smp_rmb() in fd_install() /
202	smp_wmb();
203	return `1`;
204	}
205
206	/*
207	* Expand files.
208	* This function will expand the file structures, if the requested size exceeds
209	* the current capacity and there is room for expansion.
210	* Return <0 error code on error; 0 when nothing done; 1 when files were
211	* expanded and execution may have blocked.
212	* The files->file_lock should be held on entry, and will be held on exit.
213	*/
214	static int expand_files(struct files_struct files, unsigned* int nr)
215	__releases(files->file_lock)
216	__acquires(files->file_lock)
217	{
218	struct fdtable *fdt;
219	int expanded = `0`;
220
221	repeat:
222	fdt = files_fdtable(files);
223
224	/ Do we need to expand? /
225	if (nr < fdt->max_fds)
226	return expanded;
227
228	/ Can we expand? /
229	if (nr >= sysctl_nr_open)
230	return -EMFILE;
231
232	if (unlikely(files->resize_in_progress)) {
233	spin_unlock(lock: &files->file_lock);
234	expanded = `1`;
235	wait_event(files->resize_wait, !files->resize_in_progress);
236	spin_lock(lock: &files->file_lock);
237	goto repeat;
238	}
239
240	/ All good, so we try /
241	files->resize_in_progress = true;
242	expanded = expand_fdtable(files, nr);
243	files->resize_in_progress = false;
244
245	wake_up_all(&files->resize_wait);
246	return expanded;
247	}
248
249	static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt)
250	{
251	__set_bit(fd, fdt->close_on_exec);
252	}
253
254	static inline void __clear_close_on_exec(unsigned int fd, struct fdtable *fdt)
255	{
256	if (test_bit(fd, fdt->close_on_exec))
257	__clear_bit(fd, fdt->close_on_exec);
258	}
259
260	static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
261	{
262	__set_bit(fd, fdt->open_fds);
263	fd /= BITS_PER_LONG;
264	if (!~fdt->open_fds[fd])
265	__set_bit(fd, fdt->full_fds_bits);
266	}
267
268	static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
269	{
270	__clear_bit(fd, fdt->open_fds);
271	__clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits);
272	}
273
274	static unsigned int count_open_files(struct fdtable *fdt)
275	{
276	unsigned int size = fdt->max_fds;
277	unsigned int i;
278
279	/ Find the last open fd /
280	for (i = size / BITS_PER_LONG; i > `0`; ) {
281	if (fdt->open_fds[--i])
282	break;
283	}
284	i = (i + `1`) * BITS_PER_LONG;
285	return i;
286	}
287
288	/*
289	* Note that a sane fdtable size always has to be a multiple of
290	* BITS_PER_LONG, since we have bitmaps that are sized by this.
291	*
292	* 'max_fds' will normally already be properly aligned, but it
293	* turns out that in the close_range() -> __close_range() ->
294	* unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end
295	* up having a 'max_fds' value that isn't already aligned.
296	*
297	* Rather than make close_range() have to worry about this,
298	* just make that BITS_PER_LONG alignment be part of a sane
299	* fdtable size. Becuase that's really what it is.
300	*/
301	static unsigned int sane_fdtable_size(struct fdtable fdt, unsigned* int max_fds)
302	{
303	unsigned int count;
304
305	count = count_open_files(fdt);
306	if (max_fds < NR_OPEN_DEFAULT)
307	max_fds = NR_OPEN_DEFAULT;
308	return ALIGN(min(count, max_fds), BITS_PER_LONG);
309	}
310
311	/*
312	* Allocate a new files structure and copy contents from the
313	* passed in files structure.
314	* errorp will be valid only when the returned files_struct is NULL.
315	*/
316	struct files_struct dup_fd(struct* files_struct oldf, unsigned* int max_fds, int *errorp)
317	{
318	struct files_struct *newf;
319	struct file old_fds, new_fds;
320	unsigned int open_files, i;
321	struct fdtable old_fdt, new_fdt;
322
323	*errorp = -ENOMEM;
324	newf = kmem_cache_alloc(cachep: files_cachep, GFP_KERNEL);
325	if (!newf)
326	goto out;
327
328	atomic_set(v: &newf->count, i: `1`);
329
330	spin_lock_init(&newf->file_lock);
331	newf->resize_in_progress = false;
332	init_waitqueue_head(&newf->resize_wait);
333	newf->next_fd = `0`;
334	new_fdt = &newf->fdtab;
335	new_fdt->max_fds = NR_OPEN_DEFAULT;
336	new_fdt->close_on_exec = newf->close_on_exec_init;
337	new_fdt->open_fds = newf->open_fds_init;
338	new_fdt->full_fds_bits = newf->full_fds_bits_init;
339	new_fdt->fd = &newf->fd_array[`0`];
340
341	spin_lock(lock: &oldf->file_lock);
342	old_fdt = files_fdtable(oldf);
343	open_files = sane_fdtable_size(fdt: old_fdt, max_fds);
344
345	/*
346	* Check whether we need to allocate a larger fd array and fd set.
347	*/
348	while (unlikely(open_files > new_fdt->max_fds)) {
349	spin_unlock(lock: &oldf->file_lock);
350
351	if (new_fdt != &newf->fdtab)
352	__free_fdtable(fdt: new_fdt);
353
354	new_fdt = alloc_fdtable(nr: open_files - `1`);
355	if (!new_fdt) {
356	*errorp = -ENOMEM;
357	goto out_release;
358	}
359
360	/ beyond sysctl_nr_open; nothing to do /
361	if (unlikely(new_fdt->max_fds < open_files)) {
362	__free_fdtable(fdt: new_fdt);
363	*errorp = -EMFILE;
364	goto out_release;
365	}
366
367	/*
368	* Reacquire the oldf lock and a pointer to its fd table
369	* who knows it may have a new bigger fd table. We need
370	* the latest pointer.
371	*/
372	spin_lock(lock: &oldf->file_lock);
373	old_fdt = files_fdtable(oldf);
374	open_files = sane_fdtable_size(fdt: old_fdt, max_fds);
375	}
376
377	copy_fd_bitmaps(nfdt: new_fdt, ofdt: old_fdt, count: open_files);
378
379	old_fds = old_fdt->fd;
380	new_fds = new_fdt->fd;
381
382	for (i = open_files; i != `0`; i--) {
383	struct file f = old_fds++;
384	if (f) {
385	get_file(f);
386	} else {
387	/*
388	* The fd may be claimed in the fd bitmap but not yet
389	* instantiated in the files array if a sibling thread
390	* is partway through open(). So make sure that this
391	* fd is available to the new process.
392	*/
393	__clear_open_fd(fd: open_files - i, fdt: new_fdt);
394	}
395	rcu_assign_pointer(*new_fds++, f);
396	}
397	spin_unlock(lock: &oldf->file_lock);
398
399	/ clear the remainder /
400	memset(new_fds, `0`, (new_fdt->max_fds - open_files) * sizeof(struct file *));
401
402	rcu_assign_pointer(newf->fdt, new_fdt);
403
404	return newf;
405
406	out_release:
407	kmem_cache_free(s: files_cachep, objp: newf);
408	out:
409	return NULL;
410	}
411
412	static struct fdtable close_files(struct* files_struct * files)
413	{
414	/*
415	* It is safe to dereference the fd table without RCU or
416	* ->file_lock because this is the last reference to the
417	* files structure.
418	*/
419	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
420	unsigned int i, j = `0`;
421
422	for (;;) {
423	unsigned long set;
424	i = j * BITS_PER_LONG;
425	if (i >= fdt->max_fds)
426	break;
427	set = fdt->open_fds[j++];
428	while (set) {
429	if (set & `1`) {
430	struct file * file = xchg(&fdt->fd[i], NULL);
431	if (file) {
432	filp_close(file, id: files);
433	cond_resched();
434	}
435	}
436	i++;
437	set >>= `1`;
438	}
439	}
440
441	return fdt;
442	}
443
444	void put_files_struct(struct files_struct *files)
445	{
446	if (atomic_dec_and_test(v: &files->count)) {
447	struct fdtable *fdt = close_files(files);
448
449	/ free the arrays if they are not embedded /
450	if (fdt != &files->fdtab)
451	__free_fdtable(fdt);
452	kmem_cache_free(s: files_cachep, objp: files);
453	}
454	}
455
456	void exit_files(struct task_struct *tsk)
457	{
458	struct files_struct * files = tsk->files;
459
460	if (files) {
461	task_lock(p: tsk);
462	tsk->files = NULL;
463	task_unlock(p: tsk);
464	put_files_struct(files);
465	}
466	}
467
468	struct files_struct init_files = {
469	.count = ATOMIC_INIT(`1`),
470	.fdt = &init_files.fdtab,
471	.fdtab = {
472	.max_fds = NR_OPEN_DEFAULT,
473	.fd = &init_files.fd_array[`0`],
474	.close_on_exec = init_files.close_on_exec_init,
475	.open_fds = init_files.open_fds_init,
476	.full_fds_bits = init_files.full_fds_bits_init,
477	},
478	.file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
479	.resize_wait = __WAIT_QUEUE_HEAD_INITIALIZER(init_files.resize_wait),
480	};
481
482	static unsigned int find_next_fd(struct fdtable fdt, unsigned* int start)
483	{
484	unsigned int maxfd = fdt->max_fds;
485	unsigned int maxbit = maxfd / BITS_PER_LONG;
486	unsigned int bitbit = start / BITS_PER_LONG;
487
488	bitbit = find_next_zero_bit(addr: fdt->full_fds_bits, size: maxbit, offset: bitbit) * BITS_PER_LONG;
489	if (bitbit > maxfd)
490	return maxfd;
491	if (bitbit > start)
492	start = bitbit;
493	return find_next_zero_bit(addr: fdt->open_fds, size: maxfd, offset: start);
494	}
495
496	/*
497	* allocate a file descriptor, mark it busy.
498	*/
499	static int alloc_fd(unsigned start, unsigned end, unsigned flags)
500	{
501	struct files_struct *files = current->files;
502	unsigned int fd;
503	int error;
504	struct fdtable *fdt;
505
506	spin_lock(lock: &files->file_lock);
507	repeat:
508	fdt = files_fdtable(files);
509	fd = start;
510	if (fd < files->next_fd)
511	fd = files->next_fd;
512
513	if (fd < fdt->max_fds)
514	fd = find_next_fd(fdt, start: fd);
515
516	/*
517	* N.B. For clone tasks sharing a files structure, this test
518	* will limit the total number of files that can be opened.
519	*/
520	error = -EMFILE;
521	if (fd >= end)
522	goto out;
523
524	error = expand_files(files, nr: fd);
525	if (error < `0`)
526	goto out;
527
528	/*
529	* If we needed to expand the fs array we
530	* might have blocked - try again.
531	*/
532	if (error)
533	goto repeat;
534
535	if (start <= files->next_fd)
536	files->next_fd = fd + `1`;
537
538	__set_open_fd(fd, fdt);
539	if (flags & O_CLOEXEC)
540	__set_close_on_exec(fd, fdt);
541	else
542	__clear_close_on_exec(fd, fdt);
543	error = fd;
544	#if 1
545	/ Sanity check /
546	if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
547	printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
548	rcu_assign_pointer(fdt->fd[fd], NULL);
549	}
550	#endif
551
552	out:
553	spin_unlock(lock: &files->file_lock);
554	return error;
555	}
556
557	int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
558	{
559	return alloc_fd(start: `0`, end: nofile, flags);
560	}
561
562	int get_unused_fd_flags(unsigned flags)
563	{
564	return __get_unused_fd_flags(flags, nofile: rlimit(RLIMIT_NOFILE));
565	}
566	EXPORT_SYMBOL(get_unused_fd_flags);
567
568	static void __put_unused_fd(struct files_struct files, unsigned* int fd)
569	{
570	struct fdtable *fdt = files_fdtable(files);
571	__clear_open_fd(fd, fdt);
572	if (fd < files->next_fd)
573	files->next_fd = fd;
574	}
575
576	void put_unused_fd(unsigned int fd)
577	{
578	struct files_struct *files = current->files;
579	spin_lock(lock: &files->file_lock);
580	__put_unused_fd(files, fd);
581	spin_unlock(lock: &files->file_lock);
582	}
583
584	EXPORT_SYMBOL(put_unused_fd);
585
586	/*
587	* Install a file pointer in the fd array.
588	*
589	* The VFS is full of places where we drop the files lock between
590	* setting the open_fds bitmap and installing the file in the file
591	* array. At any such point, we are vulnerable to a dup2() race
592	* installing a file in the array before us. We need to detect this and
593	* fput() the struct file we are about to overwrite in this case.
594	*
595	* It should never happen - if we allow dup2() do it, _really_ bad things
596	* will follow.
597	*
598	* This consumes the "file" refcount, so callers should treat it
599	* as if they had called fput(file).
600	*/
601
602	void fd_install(unsigned int fd, struct file *file)
603	{
604	struct files_struct *files = current->files;
605	struct fdtable *fdt;
606
607	if (WARN_ON_ONCE(unlikely(file->f_mode & FMODE_BACKING)))
608	return;
609
610	rcu_read_lock_sched();
611
612	if (unlikely(files->resize_in_progress)) {
613	rcu_read_unlock_sched();
614	spin_lock(lock: &files->file_lock);
615	fdt = files_fdtable(files);
616	BUG_ON(fdt->fd[fd] != NULL);
617	rcu_assign_pointer(fdt->fd[fd], file);
618	spin_unlock(lock: &files->file_lock);
619	return;
620	}
621	/ coupled with smp_wmb() in expand_fdtable() /
622	smp_rmb();
623	fdt = rcu_dereference_sched(files->fdt);
624	BUG_ON(fdt->fd[fd] != NULL);
625	rcu_assign_pointer(fdt->fd[fd], file);
626	rcu_read_unlock_sched();
627	}
628
629	EXPORT_SYMBOL(fd_install);
630
631	/**
632	* file_close_fd_locked - return file associated with fd
633	* @files: file struct to retrieve file from
634	* @fd: file descriptor to retrieve file for
635	*
636	* Doesn't take a separate reference count.
637	*
638	* Context: files_lock must be held.
639	*
640	* Returns: The file associated with @fd (NULL if @fd is not open)
641	*/
642	struct file file_close_fd_locked(struct* files_struct files, unsigned* fd)
643	{
644	struct fdtable *fdt = files_fdtable(files);
645	struct file *file;
646
647	lockdep_assert_held(&files->file_lock);
648
649	if (fd >= fdt->max_fds)
650	return NULL;
651
652	fd = array_index_nospec(fd, fdt->max_fds);
653	file = fdt->fd[fd];
654	if (file) {
655	rcu_assign_pointer(fdt->fd[fd], NULL);
656	__put_unused_fd(files, fd);
657	}
658	return file;
659	}
660
661	int close_fd(unsigned fd)
662	{
663	struct files_struct *files = current->files;
664	struct file *file;
665
666	spin_lock(lock: &files->file_lock);
667	file = file_close_fd_locked(files, fd);
668	spin_unlock(lock: &files->file_lock);
669	if (!file)
670	return -EBADF;
671
672	return filp_close(file, id: files);
673	}
674	EXPORT_SYMBOL(close_fd); / for ksys_close() /
675
676	/**
677	* last_fd - return last valid index into fd table
678	* @fdt: File descriptor table.
679	*
680	* Context: Either rcu read lock or files_lock must be held.
681	*
682	* Returns: Last valid index into fdtable.
683	*/
684	static inline unsigned last_fd(struct fdtable *fdt)
685	{
686	return fdt->max_fds - `1`;
687	}
688
689	static inline void __range_cloexec(struct files_struct *cur_fds,
690	unsigned int fd, unsigned int max_fd)
691	{
692	struct fdtable *fdt;
693
694	/ make sure we're using the correct maximum value /
695	spin_lock(lock: &cur_fds->file_lock);
696	fdt = files_fdtable(cur_fds);
697	max_fd = min(last_fd(fdt), max_fd);
698	if (fd <= max_fd)
699	bitmap_set(map: fdt->close_on_exec, start: fd, nbits: max_fd - fd + `1`);
700	spin_unlock(lock: &cur_fds->file_lock);
701	}
702
703	static inline void __range_close(struct files_struct files, unsigned* int fd,
704	unsigned int max_fd)
705	{
706	struct file *file;
707	unsigned n;
708
709	spin_lock(lock: &files->file_lock);
710	n = last_fd(files_fdtable(files));
711	max_fd = min(max_fd, n);
712
713	for (; fd <= max_fd; fd++) {
714	file = file_close_fd_locked(files, fd);
715	if (file) {
716	spin_unlock(lock: &files->file_lock);
717	filp_close(file, id: files);
718	cond_resched();
719	spin_lock(lock: &files->file_lock);
720	} else if (need_resched()) {
721	spin_unlock(lock: &files->file_lock);
722	cond_resched();
723	spin_lock(lock: &files->file_lock);
724	}
725	}
726	spin_unlock(lock: &files->file_lock);
727	}
728
729	/**
730	* __close_range() - Close all file descriptors in a given range.
731	*
732	* @fd: starting file descriptor to close
733	* @max_fd: last file descriptor to close
734	* @flags: CLOSE_RANGE flags.
735	*
736	* This closes a range of file descriptors. All file descriptors
737	* from @fd up to and including @max_fd are closed.
738	*/
739	int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
740	{
741	struct task_struct *me = current;
742	struct files_struct cur_fds = me->files, fds = NULL;
743
744	if (flags & ~(CLOSE_RANGE_UNSHARE \| CLOSE_RANGE_CLOEXEC))
745	return -EINVAL;
746
747	if (fd > max_fd)
748	return -EINVAL;
749
750	if (flags & CLOSE_RANGE_UNSHARE) {
751	int ret;
752	unsigned int max_unshare_fds = NR_OPEN_MAX;
753
754	/*
755	* If the caller requested all fds to be made cloexec we always
756	* copy all of the file descriptors since they still want to
757	* use them.
758	*/
759	if (!(flags & CLOSE_RANGE_CLOEXEC)) {
760	/*
761	* If the requested range is greater than the current
762	* maximum, we're closing everything so only copy all
763	* file descriptors beneath the lowest file descriptor.
764	*/
765	rcu_read_lock();
766	if (max_fd >= last_fd(files_fdtable(cur_fds)))
767	max_unshare_fds = fd;
768	rcu_read_unlock();
769	}
770
771	ret = unshare_fd(CLONE_FILES, max_fds: max_unshare_fds, new_fdp: &fds);
772	if (ret)
773	return ret;
774
775	/*
776	* We used to share our file descriptor table, and have now
777	* created a private one, make sure we're using it below.
778	*/
779	if (fds)
780	swap(cur_fds, fds);
781	}
782
783	if (flags & CLOSE_RANGE_CLOEXEC)
784	__range_cloexec(cur_fds, fd, max_fd);
785	else
786	__range_close(files: cur_fds, fd, max_fd);
787
788	if (fds) {
789	/*
790	* We're done closing the files we were supposed to. Time to install
791	* the new file descriptor table and drop the old one.
792	*/
793	task_lock(p: me);
794	me->files = cur_fds;
795	task_unlock(p: me);
796	put_files_struct(files: fds);
797	}
798
799	return `0`;
800	}
801
802	/**
803	* file_close_fd - return file associated with fd
804	* @fd: file descriptor to retrieve file for
805	*
806	* Doesn't take a separate reference count.
807	*
808	* Returns: The file associated with @fd (NULL if @fd is not open)
809	*/
810	struct file file_close_fd(unsigned* int fd)
811	{
812	struct files_struct *files = current->files;
813	struct file *file;
814
815	spin_lock(lock: &files->file_lock);
816	file = file_close_fd_locked(files, fd);
817	spin_unlock(lock: &files->file_lock);
818
819	return file;
820	}
821
822	void do_close_on_exec(struct files_struct *files)
823	{
824	unsigned i;
825	struct fdtable *fdt;
826
827	/ exec unshares first /
828	spin_lock(lock: &files->file_lock);
829	for (i = `0`; ; i++) {
830	unsigned long set;
831	unsigned fd = i * BITS_PER_LONG;
832	fdt = files_fdtable(files);
833	if (fd >= fdt->max_fds)
834	break;
835	set = fdt->close_on_exec[i];
836	if (!set)
837	continue;
838	fdt->close_on_exec[i] = `0`;
839	for ( ; set ; fd++, set >>= `1`) {
840	struct file *file;
841	if (!(set & `1`))
842	continue;
843	file = fdt->fd[fd];
844	if (!file)
845	continue;
846	rcu_assign_pointer(fdt->fd[fd], NULL);
847	__put_unused_fd(files, fd);
848	spin_unlock(lock: &files->file_lock);
849	filp_close(file, id: files);
850	cond_resched();
851	spin_lock(lock: &files->file_lock);
852	}
853
854	}
855	spin_unlock(lock: &files->file_lock);
856	}
857
858	static struct file __get_file_rcu(struct* file __rcu **f)
859	{
860	struct file __rcu *file;
861	struct file __rcu *file_reloaded;
862	struct file __rcu *file_reloaded_cmp;
863
864	file = rcu_dereference_raw(*f);
865	if (!file)
866	return NULL;
867
868	if (unlikely(!atomic_long_inc_not_zero(&file->f_count)))
869	return ERR_PTR(error: -EAGAIN);
870
871	file_reloaded = rcu_dereference_raw(*f);
872
873	/*
874	* Ensure that all accesses have a dependency on the load from
875	* rcu_dereference_raw() above so we get correct ordering
876	* between reuse/allocation and the pointer check below.
877	*/
878	file_reloaded_cmp = file_reloaded;
879	OPTIMIZER_HIDE_VAR(file_reloaded_cmp);
880
881	/*
882	* atomic_long_inc_not_zero() above provided a full memory
883	* barrier when we acquired a reference.
884	*
885	* This is paired with the write barrier from assigning to the
886	* __rcu protected file pointer so that if that pointer still
887	* matches the current file, we know we have successfully
888	* acquired a reference to the right file.
889	*
890	* If the pointers don't match the file has been reallocated by
891	* SLAB_TYPESAFE_BY_RCU.
892	*/
893	if (file == file_reloaded_cmp)
894	return file_reloaded;
895
896	fput(file);
897	return ERR_PTR(error: -EAGAIN);
898	}
899
900	/**
901	* get_file_rcu - try go get a reference to a file under rcu
902	* @f: the file to get a reference on
903	*
904	* This function tries to get a reference on @f carefully verifying that
905	* @f hasn't been reused.
906	*
907	* This function should rarely have to be used and only by users who
908	* understand the implications of SLAB_TYPESAFE_BY_RCU. Try to avoid it.
909	*
910	* Return: Returns @f with the reference count increased or NULL.
911	*/
912	struct file get_file_rcu(struct* file __rcu **f)
913	{
914	for (;;) {
915	struct file __rcu *file;
916
917	file = __get_file_rcu(f);
918	if (unlikely(!file))
919	return NULL;
920
921	if (unlikely(IS_ERR(file)))
922	continue;
923
924	return file;
925	}
926	}
927	EXPORT_SYMBOL_GPL(get_file_rcu);
928
929	/**
930	* get_file_active - try go get a reference to a file
931	* @f: the file to get a reference on
932	*
933	* In contast to get_file_rcu() the pointer itself isn't part of the
934	* reference counting.
935	*
936	* This function should rarely have to be used and only by users who
937	* understand the implications of SLAB_TYPESAFE_BY_RCU. Try to avoid it.
938	*
939	* Return: Returns @f with the reference count increased or NULL.
940	*/
941	struct file get_file_active(struct* file **f)
942	{
943	struct file __rcu *file;
944
945	rcu_read_lock();
946	file = __get_file_rcu(f);
947	rcu_read_unlock();
948	if (IS_ERR(ptr: file))
949	file = NULL;
950	return file;
951	}
952	EXPORT_SYMBOL_GPL(get_file_active);
953
954	static inline struct file __fget_files_rcu(struct* files_struct *files,
955	unsigned int fd, fmode_t mask)
956	{
957	for (;;) {
958	struct file *file;
959	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
960	struct file __rcu **fdentry;
961	unsigned long nospec_mask;
962
963	/ Mask is a 0 for invalid fd's, ~0 for valid ones /
964	nospec_mask = array_index_mask_nospec(index: fd, size: fdt->max_fds);
965
966	/*
967	* fdentry points to the 'fd' offset, or fdt->fd[0].
968	* Loading from fdt->fd[0] is always safe, because the
969	* array always exists.
970	*/
971	fdentry = fdt->fd + (fd & nospec_mask);
972
973	/ Do the load, then mask any invalid result /
974	file = rcu_dereference_raw(*fdentry);
975	file = (void )(nospec_mask & (unsigned* long)file);
976	if (unlikely(!file))
977	return NULL;
978
979	/*
980	* Ok, we have a file pointer that was valid at
981	* some point, but it might have become stale since.
982	*
983	* We need to confirm it by incrementing the refcount
984	* and then check the lookup again.
985	*
986	* atomic_long_inc_not_zero() gives us a full memory
987	* barrier. We only really need an 'acquire' one to
988	* protect the loads below, but we don't have that.
989	*/
990	if (unlikely(!atomic_long_inc_not_zero(&file->f_count)))
991	continue;
992
993	/*
994	* Such a race can take two forms:
995	*
996	* (a) the file ref already went down to zero and the
997	* file hasn't been reused yet or the file count
998	* isn't zero but the file has already been reused.
999	*
1000	* (b) the file table entry has changed under us.
1001	* Note that we don't need to re-check the 'fdt->fd'
1002	* pointer having changed, because it always goes
1003	* hand-in-hand with 'fdt'.
1004	*
1005	* If so, we need to put our ref and try again.
1006	*/
1007	if (unlikely(file != rcu_dereference_raw(*fdentry)) \|\|
1008	unlikely(rcu_dereference_raw(files->fdt) != fdt)) {
1009	fput(file);
1010	continue;
1011	}
1012
1013	/*
1014	* This isn't the file we're looking for or we're not
1015	* allowed to get a reference to it.
1016	*/
1017	if (unlikely(file->f_mode & mask)) {
1018	fput(file);
1019	return NULL;
1020	}
1021
1022	/*
1023	* Ok, we have a ref to the file, and checked that it
1024	* still exists.
1025	*/
1026	return file;
1027	}
1028	}
1029
1030	static struct file __fget_files(struct* files_struct files, unsigned* int fd,
1031	fmode_t mask)
1032	{
1033	struct file *file;
1034
1035	rcu_read_lock();
1036	file = __fget_files_rcu(files, fd, mask);
1037	rcu_read_unlock();
1038
1039	return file;
1040	}
1041
1042	static inline struct file __fget(unsigned* int fd, fmode_t mask)
1043	{
1044	return __fget_files(current->files, fd, mask);
1045	}
1046
1047	struct file fget(unsigned* int fd)
1048	{
1049	return __fget(fd, FMODE_PATH);
1050	}
1051	EXPORT_SYMBOL(fget);
1052
1053	struct file fget_raw(unsigned* int fd)
1054	{
1055	return __fget(fd, mask: `0`);
1056	}
1057	EXPORT_SYMBOL(fget_raw);
1058
1059	struct file fget_task(struct* task_struct task, unsigned* int fd)
1060	{
1061	struct file *file = NULL;
1062
1063	task_lock(p: task);
1064	if (task->files)
1065	file = __fget_files(files: task->files, fd, mask: `0`);
1066	task_unlock(p: task);
1067
1068	return file;
1069	}
1070
1071	struct file lookup_fdget_rcu(unsigned* int fd)
1072	{
1073	return __fget_files_rcu(current->files, fd, mask: `0`);
1074
1075	}
1076	EXPORT_SYMBOL_GPL(lookup_fdget_rcu);
1077
1078	struct file task_lookup_fdget_rcu(struct* task_struct task, unsigned* int fd)
1079	{
1080	/ Must be called with rcu_read_lock held /
1081	struct files_struct *files;
1082	struct file *file = NULL;
1083
1084	task_lock(p: task);
1085	files = task->files;
1086	if (files)
1087	file = __fget_files_rcu(files, fd, mask: `0`);
1088	task_unlock(p: task);
1089
1090	return file;
1091	}
1092
1093	struct file task_lookup_next_fdget_rcu(struct* task_struct task, unsigned* int *ret_fd)
1094	{
1095	/ Must be called with rcu_read_lock held /
1096	struct files_struct *files;
1097	unsigned int fd = *ret_fd;
1098	struct file *file = NULL;
1099
1100	task_lock(p: task);
1101	files = task->files;
1102	if (files) {
1103	for (; fd < files_fdtable(files)->max_fds; fd++) {
1104	file = __fget_files_rcu(files, fd, mask: `0`);
1105	if (file)
1106	break;
1107	}
1108	}
1109	task_unlock(p: task);
1110	*ret_fd = fd;
1111	return file;
1112	}
1113	EXPORT_SYMBOL(task_lookup_next_fdget_rcu);
1114
1115	/*
1116	* Lightweight file lookup - no refcnt increment if fd table isn't shared.
1117	*
1118	* You can use this instead of fget if you satisfy all of the following
1119	* conditions:
1120	* 1) You must call fput_light before exiting the syscall and returning control
1121	* to userspace (i.e. you cannot remember the returned struct file * after
1122	* returning to userspace).
1123	* 2) You must not call filp_close on the returned struct file * in between
1124	* calls to fget_light and fput_light.
1125	* 3) You must not clone the current task in between the calls to fget_light
1126	* and fput_light.
1127	*
1128	* The fput_needed flag returned by fget_light should be passed to the
1129	* corresponding fput_light.
1130	*/
1131	static unsigned long __fget_light(unsigned int fd, fmode_t mask)
1132	{
1133	struct files_struct *files = current->files;
1134	struct file *file;
1135
1136	/*
1137	* If another thread is concurrently calling close_fd() followed
1138	* by put_files_struct(), we must not observe the old table
1139	* entry combined with the new refcount - otherwise we could
1140	* return a file that is concurrently being freed.
1141	*
1142	* atomic_read_acquire() pairs with atomic_dec_and_test() in
1143	* put_files_struct().
1144	*/
1145	if (likely(atomic_read_acquire(&files->count) == `1`)) {
1146	file = files_lookup_fd_raw(files, fd);
1147	if (!file \|\| unlikely(file->f_mode & mask))
1148	return `0`;
1149	return (unsigned long)file;
1150	} else {
1151	file = __fget_files(files, fd, mask);
1152	if (!file)
1153	return `0`;
1154	return FDPUT_FPUT \| (unsigned long)file;
1155	}
1156	}
1157	unsigned long __fdget(unsigned int fd)
1158	{
1159	return __fget_light(fd, FMODE_PATH);
1160	}
1161	EXPORT_SYMBOL(__fdget);
1162
1163	unsigned long __fdget_raw(unsigned int fd)
1164	{
1165	return __fget_light(fd, mask: `0`);
1166	}
1167
1168	/*
1169	* Try to avoid f_pos locking. We only need it if the
1170	* file is marked for FMODE_ATOMIC_POS, and it can be
1171	* accessed multiple ways.
1172	*
1173	* Always do it for directories, because pidfd_getfd()
1174	* can make a file accessible even if it otherwise would
1175	* not be, and for directories this is a correctness
1176	* issue, not a "POSIX requirement".
1177	*/
1178	static inline bool file_needs_f_pos_lock(struct file *file)
1179	{
1180	return (file->f_mode & FMODE_ATOMIC_POS) &&
1181	(file_count(file) > `1` \|\| file->f_op->iterate_shared);
1182	}
1183
1184	unsigned long __fdget_pos(unsigned int fd)
1185	{
1186	unsigned long v = __fdget(fd);
1187	struct file file = (struct* file *)(v & ~`3`);
1188
1189	if (file && file_needs_f_pos_lock(file)) {
1190	v \|= FDPUT_POS_UNLOCK;
1191	mutex_lock(&file->f_pos_lock);
1192	}
1193	return v;
1194	}
1195
1196	void __f_unlock_pos(struct file *f)
1197	{
1198	mutex_unlock(lock: &f->f_pos_lock);
1199	}
1200
1201	/*
1202	* We only lock f_pos if we have threads or if the file might be
1203	* shared with another process. In both cases we'll have an elevated
1204	* file count (done either by fdget() or by fork()).
1205	*/
1206
1207	void set_close_on_exec(unsigned int fd, int flag)
1208	{
1209	struct files_struct *files = current->files;
1210	struct fdtable *fdt;
1211	spin_lock(lock: &files->file_lock);
1212	fdt = files_fdtable(files);
1213	if (flag)
1214	__set_close_on_exec(fd, fdt);
1215	else
1216	__clear_close_on_exec(fd, fdt);
1217	spin_unlock(lock: &files->file_lock);
1218	}
1219
1220	bool get_close_on_exec(unsigned int fd)
1221	{
1222	struct files_struct *files = current->files;
1223	struct fdtable *fdt;
1224	bool res;
1225	rcu_read_lock();
1226	fdt = files_fdtable(files);
1227	res = close_on_exec(fd, fdt);
1228	rcu_read_unlock();
1229	return res;
1230	}
1231
1232	static int do_dup2(struct files_struct *files,
1233	struct file file, unsigned* fd, unsigned flags)
1234	__releases(&files->file_lock)
1235	{
1236	struct file *tofree;
1237	struct fdtable *fdt;
1238
1239	/*
1240	* We need to detect attempts to do dup2() over allocated but still
1241	* not finished descriptor. NB: OpenBSD avoids that at the price of
1242	* extra work in their equivalent of fget() - they insert struct
1243	* file immediately after grabbing descriptor, mark it larval if
1244	* more work (e.g. actual opening) is needed and make sure that
1245	* fget() treats larval files as absent. Potentially interesting,
1246	* but while extra work in fget() is trivial, locking implications
1247	* and amount of surgery on open()-related paths in VFS are not.
1248	* FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
1249	* deadlocks in rather amusing ways, AFAICS. All of that is out of
1250	* scope of POSIX or SUS, since neither considers shared descriptor
1251	* tables and this condition does not arise without those.
1252	*/
1253	fdt = files_fdtable(files);
1254	tofree = fdt->fd[fd];
1255	if (!tofree && fd_is_open(fd, fdt))
1256	goto Ebusy;
1257	get_file(f: file);
1258	rcu_assign_pointer(fdt->fd[fd], file);
1259	__set_open_fd(fd, fdt);
1260	if (flags & O_CLOEXEC)
1261	__set_close_on_exec(fd, fdt);
1262	else
1263	__clear_close_on_exec(fd, fdt);
1264	spin_unlock(lock: &files->file_lock);
1265
1266	if (tofree)
1267	filp_close(tofree, id: files);
1268
1269	return fd;
1270
1271	Ebusy:
1272	spin_unlock(lock: &files->file_lock);
1273	return -EBUSY;
1274	}
1275
1276	int replace_fd(unsigned fd, struct file file, unsigned* flags)
1277	{
1278	int err;
1279	struct files_struct *files = current->files;
1280
1281	if (!file)
1282	return close_fd(fd);
1283
1284	if (fd >= rlimit(RLIMIT_NOFILE))
1285	return -EBADF;
1286
1287	spin_lock(lock: &files->file_lock);
1288	err = expand_files(files, nr: fd);
1289	if (unlikely(err < `0`))
1290	goto out_unlock;
1291	return do_dup2(files, file, fd, flags);
1292
1293	out_unlock:
1294	spin_unlock(lock: &files->file_lock);
1295	return err;
1296	}
1297
1298	/**
1299	* receive_fd() - Install received file into file descriptor table
1300	* @file: struct file that was received from another process
1301	* @ufd: __user pointer to write new fd number to
1302	* @o_flags: the O_* flags to apply to the new fd entry
1303	*
1304	* Installs a received file into the file descriptor table, with appropriate
1305	* checks and count updates. Optionally writes the fd number to userspace, if
1306	* @ufd is non-NULL.
1307	*
1308	* This helper handles its own reference counting of the incoming
1309	* struct file.
1310	*
1311	* Returns newly install fd or -ve on error.
1312	*/
1313	int receive_fd(struct file file, int* __user ufd, unsigned* int o_flags)
1314	{
1315	int new_fd;
1316	int error;
1317
1318	error = security_file_receive(file);
1319	if (error)
1320	return error;
1321
1322	new_fd = get_unused_fd_flags(o_flags);
1323	if (new_fd < `0`)
1324	return new_fd;
1325
1326	if (ufd) {
1327	error = put_user(new_fd, ufd);
1328	if (error) {
1329	put_unused_fd(new_fd);
1330	return error;
1331	}
1332	}
1333
1334	fd_install(new_fd, get_file(f: file));
1335	__receive_sock(file);
1336	return new_fd;
1337	}
1338	EXPORT_SYMBOL_GPL(receive_fd);
1339
1340	int receive_fd_replace(int new_fd, struct file file, unsigned* int o_flags)
1341	{
1342	int error;
1343
1344	error = security_file_receive(file);
1345	if (error)
1346	return error;
1347	error = replace_fd(fd: new_fd, file, flags: o_flags);
1348	if (error)
1349	return error;
1350	__receive_sock(file);
1351	return new_fd;
1352	}
1353
1354	static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
1355	{
1356	int err = -EBADF;
1357	struct file *file;
1358	struct files_struct *files = current->files;
1359
1360	if ((flags & ~O_CLOEXEC) != `0`)
1361	return -EINVAL;
1362
1363	if (unlikely(oldfd == newfd))
1364	return -EINVAL;
1365
1366	if (newfd >= rlimit(RLIMIT_NOFILE))
1367	return -EBADF;
1368
1369	spin_lock(lock: &files->file_lock);
1370	err = expand_files(files, nr: newfd);
1371	file = files_lookup_fd_locked(files, fd: oldfd);
1372	if (unlikely(!file))
1373	goto Ebadf;
1374	if (unlikely(err < `0`)) {
1375	if (err == -EMFILE)
1376	goto Ebadf;
1377	goto out_unlock;
1378	}
1379	return do_dup2(files, file, fd: newfd, flags);
1380
1381	Ebadf:
1382	err = -EBADF;
1383	out_unlock:
1384	spin_unlock(lock: &files->file_lock);
1385	return err;
1386	}
1387
1388	SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
1389	{
1390	return ksys_dup3(oldfd, newfd, flags);
1391	}
1392
1393	SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
1394	{
1395	if (unlikely(newfd == oldfd)) { / corner case /
1396	struct files_struct *files = current->files;
1397	struct file *f;
1398	int retval = oldfd;
1399
1400	rcu_read_lock();
1401	f = __fget_files_rcu(files, fd: oldfd, mask: `0`);
1402	if (!f)
1403	retval = -EBADF;
1404	rcu_read_unlock();
1405	if (f)
1406	fput(f);
1407	return retval;
1408	}
1409	return ksys_dup3(oldfd, newfd, flags: `0`);
1410	}
1411
1412	SYSCALL_DEFINE1(dup, unsigned int, fildes)
1413	{
1414	int ret = -EBADF;
1415	struct file *file = fget_raw(fildes);
1416
1417	if (file) {
1418	ret = get_unused_fd_flags(`0`);
1419	if (ret >= `0`)
1420	fd_install(ret, file);
1421	else
1422	fput(file);
1423	}
1424	return ret;
1425	}
1426
1427	int f_dupfd(unsigned int from, struct file file, unsigned* flags)
1428	{
1429	unsigned long nofile = rlimit(RLIMIT_NOFILE);
1430	int err;
1431	if (from >= nofile)
1432	return -EINVAL;
1433	err = alloc_fd(start: from, end: nofile, flags);
1434	if (err >= `0`) {
1435	get_file(f: file);
1436	fd_install(err, file);
1437	}
1438	return err;
1439	}
1440
1441	int iterate_fd(struct files_struct files, unsigned* n,
1442	int (f)(const* void , struct* file , unsigned*),
1443	const void *p)
1444	{
1445	struct fdtable *fdt;
1446	int res = `0`;
1447	if (!files)
1448	return `0`;
1449	spin_lock(lock: &files->file_lock);
1450	for (fdt = files_fdtable(files); n < fdt->max_fds; n++) {
1451	struct file *file;
1452	file = rcu_dereference_check_fdtable(files, fdt->fd[n]);
1453	if (!file)
1454	continue;
1455	res = f(p, file, n);
1456	if (res)
1457	break;
1458	}
1459	spin_unlock(lock: &files->file_lock);
1460	return res;
1461	}
1462	EXPORT_SYMBOL(iterate_fd);
1463

source code of linux/fs/file.c