expfs.c source code [linux/fs/exportfs/expfs.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* Copyright (C) Neil Brown 2002
4	* Copyright (C) Christoph Hellwig 2007
5	*
6	* This file contains the code mapping from inodes to NFS file handles,
7	* and for mapping back from file handles to dentries.
8	*
9	* For details on why we do all the strange and hairy things in here
10	* take a look at Documentation/filesystems/nfs/exporting.rst.
11	*/
12	#include <linux/exportfs.h>
13	#include <linux/fs.h>
14	#include <linux/file.h>
15	#include <linux/module.h>
16	#include <linux/mount.h>
17	#include <linux/namei.h>
18	#include <linux/sched.h>
19	#include <linux/cred.h>
20
21	#define dprintk(fmt, args...) pr_debug(fmt, ##args)
22
23
24	static int get_name(const struct path path, char* name, struct* dentry *child);
25
26
27	static int exportfs_get_name(struct vfsmount mnt, struct* dentry *dir,
28	char name, struct* dentry *child)
29	{
30	const struct export_operations *nop = dir->d_sb->s_export_op;
31	struct path path = {.mnt = mnt, .dentry = dir};
32
33	if (nop->get_name)
34	return nop->get_name(dir, name, child);
35	else
36	return get_name(path: &path, name, child);
37	}
38
39	/*
40	* Check if the dentry or any of it's aliases is acceptable.
41	*/
42	static struct dentry *
43	find_acceptable_alias(struct dentry *result,
44	int (acceptable)(void* context, struct* dentry *dentry),
45	void *context)
46	{
47	struct dentry dentry, toput = NULL;
48	struct inode *inode;
49
50	if (acceptable(context, result))
51	return result;
52
53	inode = result->d_inode;
54	spin_lock(lock: &inode->i_lock);
55	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
56	dget(dentry);
57	spin_unlock(lock: &inode->i_lock);
58	if (toput)
59	dput(toput);
60	if (dentry != result && acceptable(context, dentry)) {
61	dput(result);
62	return dentry;
63	}
64	spin_lock(lock: &inode->i_lock);
65	toput = dentry;
66	}
67	spin_unlock(lock: &inode->i_lock);
68
69	if (toput)
70	dput(toput);
71	return NULL;
72	}
73
74	static bool dentry_connected(struct dentry *dentry)
75	{
76	dget(dentry);
77	while (dentry->d_flags & DCACHE_DISCONNECTED) {
78	struct dentry *parent = dget_parent(dentry);
79
80	dput(dentry);
81	if (dentry == parent) {
82	dput(parent);
83	return false;
84	}
85	dentry = parent;
86	}
87	dput(dentry);
88	return true;
89	}
90
91	static void clear_disconnected(struct dentry *dentry)
92	{
93	dget(dentry);
94	while (dentry->d_flags & DCACHE_DISCONNECTED) {
95	struct dentry *parent = dget_parent(dentry);
96
97	WARN_ON_ONCE(IS_ROOT(dentry));
98
99	spin_lock(lock: &dentry->d_lock);
100	dentry->d_flags &= ~DCACHE_DISCONNECTED;
101	spin_unlock(lock: &dentry->d_lock);
102
103	dput(dentry);
104	dentry = parent;
105	}
106	dput(dentry);
107	}
108
109	/*
110	* Reconnect a directory dentry with its parent.
111	*
112	* This can return a dentry, or NULL, or an error.
113	*
114	* In the first case the returned dentry is the parent of the given
115	* dentry, and may itself need to be reconnected to its parent.
116	*
117	* In the NULL case, a concurrent VFS operation has either renamed or
118	* removed this directory. The concurrent operation has reconnected our
119	* dentry, so we no longer need to.
120	*/
121	static struct dentry reconnect_one(struct* vfsmount *mnt,
122	struct dentry dentry, char* *nbuf)
123	{
124	struct dentry *parent;
125	struct dentry *tmp;
126	int err;
127
128	parent = ERR_PTR(error: -EACCES);
129	inode_lock(inode: dentry->d_inode);
130	if (mnt->mnt_sb->s_export_op->get_parent)
131	parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
132	inode_unlock(inode: dentry->d_inode);
133
134	if (IS_ERR(ptr: parent)) {
135	dprintk("get_parent of %lu failed, err %ld\n",
136	dentry->d_inode->i_ino, PTR_ERR(parent));
137	return parent;
138	}
139
140	dprintk("%s: find name of %lu in %lu\n", __func__,
141	dentry->d_inode->i_ino, parent->d_inode->i_ino);
142	err = exportfs_get_name(mnt, dir: parent, name: nbuf, child: dentry);
143	if (err == -ENOENT)
144	goto out_reconnected;
145	if (err)
146	goto out_err;
147	dprintk("%s: found name: %s\n", __func__, nbuf);
148	tmp = lookup_one_unlocked(idmap: mnt_idmap(mnt), name: nbuf, base: parent, strlen(nbuf));
149	if (IS_ERR(ptr: tmp)) {
150	dprintk("lookup failed: %ld\n", PTR_ERR(tmp));
151	err = PTR_ERR(ptr: tmp);
152	goto out_err;
153	}
154	if (tmp != dentry) {
155	/*
156	* Somebody has renamed it since exportfs_get_name();
157	* great, since it could've only been renamed if it
158	* got looked up and thus connected, and it would
159	* remain connected afterwards. We are done.
160	*/
161	dput(tmp);
162	goto out_reconnected;
163	}
164	dput(tmp);
165	if (IS_ROOT(dentry)) {
166	err = -ESTALE;
167	goto out_err;
168	}
169	return parent;
170
171	out_err:
172	dput(parent);
173	return ERR_PTR(error: err);
174	out_reconnected:
175	dput(parent);
176	/*
177	* Someone must have renamed our entry into another parent, in
178	* which case it has been reconnected by the rename.
179	*
180	* Or someone removed it entirely, in which case filehandle
181	* lookup will succeed but the directory is now IS_DEAD and
182	* subsequent operations on it will fail.
183	*
184	* Alternatively, maybe there was no race at all, and the
185	* filesystem is just corrupt and gave us a parent that doesn't
186	* actually contain any entry pointing to this inode. So,
187	* double check that this worked and return -ESTALE if not:
188	*/
189	if (!dentry_connected(dentry))
190	return ERR_PTR(error: -ESTALE);
191	return NULL;
192	}
193
194	/*
195	* Make sure target_dir is fully connected to the dentry tree.
196	*
197	* On successful return, DCACHE_DISCONNECTED will be cleared on
198	* target_dir, and target_dir->d_parent->...->d_parent will reach the
199	* root of the filesystem.
200	*
201	* Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected.
202	* But the converse is not true: target_dir may have DCACHE_DISCONNECTED
203	* set but already be connected. In that case we'll verify the
204	* connection to root and then clear the flag.
205	*
206	* Note that target_dir could be removed by a concurrent operation. In
207	* that case reconnect_path may still succeed with target_dir fully
208	* connected, but further operations using the filehandle will fail when
209	* necessary (due to S_DEAD being set on the directory).
210	*/
211	static int
212	reconnect_path(struct vfsmount mnt, struct* dentry target_dir, char* *nbuf)
213	{
214	struct dentry dentry, parent;
215
216	dentry = dget(dentry: target_dir);
217
218	while (dentry->d_flags & DCACHE_DISCONNECTED) {
219	BUG_ON(dentry == mnt->mnt_sb->s_root);
220
221	if (IS_ROOT(dentry))
222	parent = reconnect_one(mnt, dentry, nbuf);
223	else
224	parent = dget_parent(dentry);
225
226	if (!parent)
227	break;
228	dput(dentry);
229	if (IS_ERR(ptr: parent))
230	return PTR_ERR(ptr: parent);
231	dentry = parent;
232	}
233	dput(dentry);
234	clear_disconnected(dentry: target_dir);
235	return `0`;
236	}
237
238	struct getdents_callback {
239	struct dir_context ctx;
240	char name; /* name that was found. It already points to a*
241	buffer NAME_MAX+1 is size /*
242	u64 ino; / the inum we are looking for /
243	int found; / inode matched? /
244	int sequence; / sequence counter /
245	};
246
247	/*
248	* A rather strange filldir function to capture
249	* the name matching the specified inode number.
250	*/
251	static bool filldir_one(struct dir_context ctx, const* char name, int* len,
252	loff_t pos, u64 ino, unsigned int d_type)
253	{
254	struct getdents_callback *buf =
255	container_of(ctx, struct getdents_callback, ctx);
256
257	buf->sequence++;
258	if (buf->ino == ino && len <= NAME_MAX && !is_dot_dotdot(name, len)) {
259	memcpy(buf->name, name, len);
260	buf->name[len] = `'\0'`;
261	buf->found = `1`;
262	return false; // no more
263	}
264	return true;
265	}
266
267	/**
268	* get_name - default export_operations->get_name function
269	* @path: the directory in which to find a name
270	* @name: a pointer to a %NAME_MAX+1 char buffer to store the name
271	* @child: the dentry for the child directory.
272	*
273	* calls readdir on the parent until it finds an entry with
274	* the same inode number as the child, and returns that.
275	*/
276	static int get_name(const struct path path, char* name, struct* dentry *child)
277	{
278	const struct cred *cred = current_cred();
279	struct inode *dir = path->dentry->d_inode;
280	int error;
281	struct file *file;
282	struct kstat stat;
283	struct path child_path = {
284	.mnt = path->mnt,
285	.dentry = child,
286	};
287	struct getdents_callback buffer = {
288	.ctx.actor = filldir_one,
289	.name = name,
290	};
291
292	error = -ENOTDIR;
293	if (!dir \|\| !S_ISDIR(dir->i_mode))
294	goto out;
295	error = -EINVAL;
296	if (!dir->i_fop)
297	goto out;
298	/*
299	* inode->i_ino is unsigned long, kstat->ino is u64, so the
300	* former would be insufficient on 32-bit hosts when the
301	* filesystem supports 64-bit inode numbers. So we need to
302	* actually call ->getattr, not just read i_ino:
303	*/
304	error = vfs_getattr_nosec(&child_path, &stat,
305	STATX_INO, AT_STATX_SYNC_AS_STAT);
306	if (error)
307	return error;
308	buffer.ino = stat.ino;
309	/*
310	* Open the directory ...
311	*/
312	file = dentry_open(path, O_RDONLY, creds: cred);
313	error = PTR_ERR(ptr: file);
314	if (IS_ERR(ptr: file))
315	goto out;
316
317	error = -EINVAL;
318	if (!file->f_op->iterate_shared)
319	goto out_close;
320
321	buffer.sequence = `0`;
322	while (`1`) {
323	int old_seq = buffer.sequence;
324
325	error = iterate_dir(file, &buffer.ctx);
326	if (buffer.found) {
327	error = `0`;
328	break;
329	}
330
331	if (error < `0`)
332	break;
333
334	error = -ENOENT;
335	if (old_seq == buffer.sequence)
336	break;
337	}
338
339	out_close:
340	fput(file);
341	out:
342	return error;
343	}
344
345	#define FILEID_INO64_GEN_LEN 3
346
347	/**
348	* exportfs_encode_ino64_fid - encode non-decodeable 64bit ino file id
349	* @inode: the object to encode
350	* @fid: where to store the file handle fragment
351	* @max_len: maximum length to store there (in 4 byte units)
352	*
353	* This generic function is used to encode a non-decodeable file id for
354	* fanotify for filesystems that do not support NFS export.
355	*/
356	static int exportfs_encode_ino64_fid(struct inode inode, struct* fid *fid,
357	int *max_len)
358	{
359	if (*max_len < FILEID_INO64_GEN_LEN) {
360	*max_len = FILEID_INO64_GEN_LEN;
361	return FILEID_INVALID;
362	}
363
364	fid->i64.ino = inode->i_ino;
365	fid->i64.gen = inode->i_generation;
366	*max_len = FILEID_INO64_GEN_LEN;
367
368	return FILEID_INO64_GEN;
369	}
370
371	/**
372	* exportfs_encode_inode_fh - encode a file handle from inode
373	* @inode: the object to encode
374	* @fid: where to store the file handle fragment
375	* @max_len: maximum length to store there
376	* @parent: parent directory inode, if wanted
377	* @flags: properties of the requested file handle
378	*
379	* Returns an enum fid_type or a negative errno.
380	*/
381	int exportfs_encode_inode_fh(struct inode inode, struct* fid *fid,
382	int max_len, struct* inode parent, int* flags)
383	{
384	const struct export_operations *nop = inode->i_sb->s_export_op;
385
386	if (!exportfs_can_encode_fh(nop, fh_flags: flags))
387	return -EOPNOTSUPP;
388
389	if (!nop && (flags & EXPORT_FH_FID))
390	return exportfs_encode_ino64_fid(inode, fid, max_len);
391
392	return nop->encode_fh(inode, fid->raw, max_len, parent);
393	}
394	EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh);
395
396	/**
397	* exportfs_encode_fh - encode a file handle from dentry
398	* @dentry: the object to encode
399	* @fid: where to store the file handle fragment
400	* @max_len: maximum length to store there
401	* @flags: properties of the requested file handle
402	*
403	* Returns an enum fid_type or a negative errno.
404	*/
405	int exportfs_encode_fh(struct dentry dentry, struct* fid fid, int* *max_len,
406	int flags)
407	{
408	int error;
409	struct dentry *p = NULL;
410	struct inode inode = dentry->d_inode, parent = NULL;
411
412	if ((flags & EXPORT_FH_CONNECTABLE) && !S_ISDIR(inode->i_mode)) {
413	p = dget_parent(dentry);
414	/*
415	* note that while p might've ceased to be our parent already,
416	* it's still pinned by and still positive.
417	*/
418	parent = p->d_inode;
419	}
420
421	error = exportfs_encode_inode_fh(inode, fid, max_len, parent, flags);
422	dput(p);
423
424	return error;
425	}
426	EXPORT_SYMBOL_GPL(exportfs_encode_fh);
427
428	struct dentry *
429	exportfs_decode_fh_raw(struct vfsmount mnt, struct* fid fid, int* fh_len,
430	int fileid_type,
431	int (acceptable)(void* , struct* dentry *),
432	void *context)
433	{
434	const struct export_operations *nop = mnt->mnt_sb->s_export_op;
435	struct dentry result, alias;
436	char nbuf[NAME_MAX+`1`];
437	int err;
438
439	/*
440	* Try to get any dentry for the given file handle from the filesystem.
441	*/
442	if (!exportfs_can_decode_fh(nop))
443	return ERR_PTR(error: -ESTALE);
444	result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
445	if (IS_ERR_OR_NULL(ptr: result))
446	return result;
447
448	/*
449	* If no acceptance criteria was specified by caller, a disconnected
450	* dentry is also accepatable. Callers may use this mode to query if
451	* file handle is stale or to get a reference to an inode without
452	* risking the high overhead caused by directory reconnect.
453	*/
454	if (!acceptable)
455	return result;
456
457	if (d_is_dir(dentry: result)) {
458	/*
459	* This request is for a directory.
460	*
461	* On the positive side there is only one dentry for each
462	* directory inode. On the negative side this implies that we
463	* to ensure our dentry is connected all the way up to the
464	* filesystem root.
465	*/
466	if (result->d_flags & DCACHE_DISCONNECTED) {
467	err = reconnect_path(mnt, target_dir: result, nbuf);
468	if (err)
469	goto err_result;
470	}
471
472	if (!acceptable(context, result)) {
473	err = -EACCES;
474	goto err_result;
475	}
476
477	return result;
478	} else {
479	/*
480	* It's not a directory. Life is a little more complicated.
481	*/
482	struct dentry target_dir, nresult;
483
484	/*
485	* See if either the dentry we just got from the filesystem
486	* or any alias for it is acceptable. This is always true
487	* if this filesystem is exported without the subtreecheck
488	* option. If the filesystem is exported with the subtree
489	* check option there's a fair chance we need to look at
490	* the parent directory in the file handle and make sure
491	* it's connected to the filesystem root.
492	*/
493	alias = find_acceptable_alias(result, acceptable, context);
494	if (alias)
495	return alias;
496
497	/*
498	* Try to extract a dentry for the parent directory from the
499	* file handle. If this fails we'll have to give up.
500	*/
501	err = -ESTALE;
502	if (!nop->fh_to_parent)
503	goto err_result;
504
505	target_dir = nop->fh_to_parent(mnt->mnt_sb, fid,
506	fh_len, fileid_type);
507	if (!target_dir)
508	goto err_result;
509	err = PTR_ERR(ptr: target_dir);
510	if (IS_ERR(ptr: target_dir))
511	goto err_result;
512
513	/*
514	* And as usual we need to make sure the parent directory is
515	* connected to the filesystem root. The VFS really doesn't
516	* like disconnected directories..
517	*/
518	err = reconnect_path(mnt, target_dir, nbuf);
519	if (err) {
520	dput(target_dir);
521	goto err_result;
522	}
523
524	/*
525	* Now that we've got both a well-connected parent and a
526	* dentry for the inode we're after, make sure that our
527	* inode is actually connected to the parent.
528	*/
529	err = exportfs_get_name(mnt, dir: target_dir, name: nbuf, child: result);
530	if (err) {
531	dput(target_dir);
532	goto err_result;
533	}
534
535	inode_lock(inode: target_dir->d_inode);
536	nresult = lookup_one(mnt_idmap(mnt), nbuf,
537	target_dir, strlen(nbuf));
538	if (!IS_ERR(ptr: nresult)) {
539	if (unlikely(nresult->d_inode != result->d_inode)) {
540	dput(nresult);
541	nresult = ERR_PTR(error: -ESTALE);
542	}
543	}
544	inode_unlock(inode: target_dir->d_inode);
545	/*
546	* At this point we are done with the parent, but it's pinned
547	* by the child dentry anyway.
548	*/
549	dput(target_dir);
550
551	if (IS_ERR(ptr: nresult)) {
552	err = PTR_ERR(ptr: nresult);
553	goto err_result;
554	}
555	dput(result);
556	result = nresult;
557
558	/*
559	* And finally make sure the dentry is actually acceptable
560	* to NFSD.
561	*/
562	alias = find_acceptable_alias(result, acceptable, context);
563	if (!alias) {
564	err = -EACCES;
565	goto err_result;
566	}
567
568	return alias;
569	}
570
571	err_result:
572	dput(result);
573	return ERR_PTR(error: err);
574	}
575	EXPORT_SYMBOL_GPL(exportfs_decode_fh_raw);
576
577	struct dentry exportfs_decode_fh(struct* vfsmount mnt, struct* fid *fid,
578	int fh_len, int fileid_type,
579	int (acceptable)(void* , struct* dentry *),
580	void *context)
581	{
582	struct dentry *ret;
583
584	ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type,
585	acceptable, context);
586	if (IS_ERR_OR_NULL(ptr: ret)) {
587	if (ret == ERR_PTR(error: -ENOMEM))
588	return ret;
589	return ERR_PTR(error: -ESTALE);
590	}
591	return ret;
592	}
593	EXPORT_SYMBOL_GPL(exportfs_decode_fh);
594
595	MODULE_LICENSE("GPL");
596

source code of linux/fs/exportfs/expfs.c