1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) Neil Brown 2002 |
4 | * Copyright (C) Christoph Hellwig 2007 |
5 | * |
6 | * This file contains the code mapping from inodes to NFS file handles, |
7 | * and for mapping back from file handles to dentries. |
8 | * |
9 | * For details on why we do all the strange and hairy things in here |
10 | * take a look at Documentation/filesystems/nfs/exporting.rst. |
11 | */ |
12 | #include <linux/exportfs.h> |
13 | #include <linux/fs.h> |
14 | #include <linux/file.h> |
15 | #include <linux/module.h> |
16 | #include <linux/mount.h> |
17 | #include <linux/namei.h> |
18 | #include <linux/sched.h> |
19 | #include <linux/cred.h> |
20 | |
21 | #define dprintk(fmt, args...) pr_debug(fmt, ##args) |
22 | |
23 | |
24 | static int get_name(const struct path *path, char *name, struct dentry *child); |
25 | |
26 | |
27 | static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir, |
28 | char *name, struct dentry *child) |
29 | { |
30 | const struct export_operations *nop = dir->d_sb->s_export_op; |
31 | struct path path = {.mnt = mnt, .dentry = dir}; |
32 | |
33 | if (nop->get_name) |
34 | return nop->get_name(dir, name, child); |
35 | else |
36 | return get_name(path: &path, name, child); |
37 | } |
38 | |
39 | /* |
40 | * Check if the dentry or any of it's aliases is acceptable. |
41 | */ |
42 | static struct dentry * |
43 | find_acceptable_alias(struct dentry *result, |
44 | int (*acceptable)(void *context, struct dentry *dentry), |
45 | void *context) |
46 | { |
47 | struct dentry *dentry, *toput = NULL; |
48 | struct inode *inode; |
49 | |
50 | if (acceptable(context, result)) |
51 | return result; |
52 | |
53 | inode = result->d_inode; |
54 | spin_lock(lock: &inode->i_lock); |
55 | hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { |
56 | dget(dentry); |
57 | spin_unlock(lock: &inode->i_lock); |
58 | if (toput) |
59 | dput(toput); |
60 | if (dentry != result && acceptable(context, dentry)) { |
61 | dput(result); |
62 | return dentry; |
63 | } |
64 | spin_lock(lock: &inode->i_lock); |
65 | toput = dentry; |
66 | } |
67 | spin_unlock(lock: &inode->i_lock); |
68 | |
69 | if (toput) |
70 | dput(toput); |
71 | return NULL; |
72 | } |
73 | |
74 | static bool dentry_connected(struct dentry *dentry) |
75 | { |
76 | dget(dentry); |
77 | while (dentry->d_flags & DCACHE_DISCONNECTED) { |
78 | struct dentry *parent = dget_parent(dentry); |
79 | |
80 | dput(dentry); |
81 | if (dentry == parent) { |
82 | dput(parent); |
83 | return false; |
84 | } |
85 | dentry = parent; |
86 | } |
87 | dput(dentry); |
88 | return true; |
89 | } |
90 | |
91 | static void clear_disconnected(struct dentry *dentry) |
92 | { |
93 | dget(dentry); |
94 | while (dentry->d_flags & DCACHE_DISCONNECTED) { |
95 | struct dentry *parent = dget_parent(dentry); |
96 | |
97 | WARN_ON_ONCE(IS_ROOT(dentry)); |
98 | |
99 | spin_lock(lock: &dentry->d_lock); |
100 | dentry->d_flags &= ~DCACHE_DISCONNECTED; |
101 | spin_unlock(lock: &dentry->d_lock); |
102 | |
103 | dput(dentry); |
104 | dentry = parent; |
105 | } |
106 | dput(dentry); |
107 | } |
108 | |
109 | /* |
110 | * Reconnect a directory dentry with its parent. |
111 | * |
112 | * This can return a dentry, or NULL, or an error. |
113 | * |
114 | * In the first case the returned dentry is the parent of the given |
115 | * dentry, and may itself need to be reconnected to its parent. |
116 | * |
117 | * In the NULL case, a concurrent VFS operation has either renamed or |
118 | * removed this directory. The concurrent operation has reconnected our |
119 | * dentry, so we no longer need to. |
120 | */ |
121 | static struct dentry *reconnect_one(struct vfsmount *mnt, |
122 | struct dentry *dentry, char *nbuf) |
123 | { |
124 | struct dentry *parent; |
125 | struct dentry *tmp; |
126 | int err; |
127 | |
128 | parent = ERR_PTR(error: -EACCES); |
129 | inode_lock(inode: dentry->d_inode); |
130 | if (mnt->mnt_sb->s_export_op->get_parent) |
131 | parent = mnt->mnt_sb->s_export_op->get_parent(dentry); |
132 | inode_unlock(inode: dentry->d_inode); |
133 | |
134 | if (IS_ERR(ptr: parent)) { |
135 | dprintk("get_parent of %lu failed, err %ld\n" , |
136 | dentry->d_inode->i_ino, PTR_ERR(parent)); |
137 | return parent; |
138 | } |
139 | |
140 | dprintk("%s: find name of %lu in %lu\n" , __func__, |
141 | dentry->d_inode->i_ino, parent->d_inode->i_ino); |
142 | err = exportfs_get_name(mnt, dir: parent, name: nbuf, child: dentry); |
143 | if (err == -ENOENT) |
144 | goto out_reconnected; |
145 | if (err) |
146 | goto out_err; |
147 | dprintk("%s: found name: %s\n" , __func__, nbuf); |
148 | tmp = lookup_one_unlocked(idmap: mnt_idmap(mnt), name: nbuf, base: parent, strlen(nbuf)); |
149 | if (IS_ERR(ptr: tmp)) { |
150 | dprintk("lookup failed: %ld\n" , PTR_ERR(tmp)); |
151 | err = PTR_ERR(ptr: tmp); |
152 | goto out_err; |
153 | } |
154 | if (tmp != dentry) { |
155 | /* |
156 | * Somebody has renamed it since exportfs_get_name(); |
157 | * great, since it could've only been renamed if it |
158 | * got looked up and thus connected, and it would |
159 | * remain connected afterwards. We are done. |
160 | */ |
161 | dput(tmp); |
162 | goto out_reconnected; |
163 | } |
164 | dput(tmp); |
165 | if (IS_ROOT(dentry)) { |
166 | err = -ESTALE; |
167 | goto out_err; |
168 | } |
169 | return parent; |
170 | |
171 | out_err: |
172 | dput(parent); |
173 | return ERR_PTR(error: err); |
174 | out_reconnected: |
175 | dput(parent); |
176 | /* |
177 | * Someone must have renamed our entry into another parent, in |
178 | * which case it has been reconnected by the rename. |
179 | * |
180 | * Or someone removed it entirely, in which case filehandle |
181 | * lookup will succeed but the directory is now IS_DEAD and |
182 | * subsequent operations on it will fail. |
183 | * |
184 | * Alternatively, maybe there was no race at all, and the |
185 | * filesystem is just corrupt and gave us a parent that doesn't |
186 | * actually contain any entry pointing to this inode. So, |
187 | * double check that this worked and return -ESTALE if not: |
188 | */ |
189 | if (!dentry_connected(dentry)) |
190 | return ERR_PTR(error: -ESTALE); |
191 | return NULL; |
192 | } |
193 | |
194 | /* |
195 | * Make sure target_dir is fully connected to the dentry tree. |
196 | * |
197 | * On successful return, DCACHE_DISCONNECTED will be cleared on |
198 | * target_dir, and target_dir->d_parent->...->d_parent will reach the |
199 | * root of the filesystem. |
200 | * |
201 | * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected. |
202 | * But the converse is not true: target_dir may have DCACHE_DISCONNECTED |
203 | * set but already be connected. In that case we'll verify the |
204 | * connection to root and then clear the flag. |
205 | * |
206 | * Note that target_dir could be removed by a concurrent operation. In |
207 | * that case reconnect_path may still succeed with target_dir fully |
208 | * connected, but further operations using the filehandle will fail when |
209 | * necessary (due to S_DEAD being set on the directory). |
210 | */ |
211 | static int |
212 | reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) |
213 | { |
214 | struct dentry *dentry, *parent; |
215 | |
216 | dentry = dget(dentry: target_dir); |
217 | |
218 | while (dentry->d_flags & DCACHE_DISCONNECTED) { |
219 | BUG_ON(dentry == mnt->mnt_sb->s_root); |
220 | |
221 | if (IS_ROOT(dentry)) |
222 | parent = reconnect_one(mnt, dentry, nbuf); |
223 | else |
224 | parent = dget_parent(dentry); |
225 | |
226 | if (!parent) |
227 | break; |
228 | dput(dentry); |
229 | if (IS_ERR(ptr: parent)) |
230 | return PTR_ERR(ptr: parent); |
231 | dentry = parent; |
232 | } |
233 | dput(dentry); |
234 | clear_disconnected(dentry: target_dir); |
235 | return 0; |
236 | } |
237 | |
238 | struct getdents_callback { |
239 | struct dir_context ctx; |
240 | char *name; /* name that was found. It already points to a |
241 | buffer NAME_MAX+1 is size */ |
242 | u64 ino; /* the inum we are looking for */ |
243 | int found; /* inode matched? */ |
244 | int sequence; /* sequence counter */ |
245 | }; |
246 | |
247 | /* |
248 | * A rather strange filldir function to capture |
249 | * the name matching the specified inode number. |
250 | */ |
251 | static bool filldir_one(struct dir_context *ctx, const char *name, int len, |
252 | loff_t pos, u64 ino, unsigned int d_type) |
253 | { |
254 | struct getdents_callback *buf = |
255 | container_of(ctx, struct getdents_callback, ctx); |
256 | |
257 | buf->sequence++; |
258 | if (buf->ino == ino && len <= NAME_MAX && !is_dot_dotdot(name, len)) { |
259 | memcpy(buf->name, name, len); |
260 | buf->name[len] = '\0'; |
261 | buf->found = 1; |
262 | return false; // no more |
263 | } |
264 | return true; |
265 | } |
266 | |
267 | /** |
268 | * get_name - default export_operations->get_name function |
269 | * @path: the directory in which to find a name |
270 | * @name: a pointer to a %NAME_MAX+1 char buffer to store the name |
271 | * @child: the dentry for the child directory. |
272 | * |
273 | * calls readdir on the parent until it finds an entry with |
274 | * the same inode number as the child, and returns that. |
275 | */ |
276 | static int get_name(const struct path *path, char *name, struct dentry *child) |
277 | { |
278 | const struct cred *cred = current_cred(); |
279 | struct inode *dir = path->dentry->d_inode; |
280 | int error; |
281 | struct file *file; |
282 | struct kstat stat; |
283 | struct path child_path = { |
284 | .mnt = path->mnt, |
285 | .dentry = child, |
286 | }; |
287 | struct getdents_callback buffer = { |
288 | .ctx.actor = filldir_one, |
289 | .name = name, |
290 | }; |
291 | |
292 | error = -ENOTDIR; |
293 | if (!dir || !S_ISDIR(dir->i_mode)) |
294 | goto out; |
295 | error = -EINVAL; |
296 | if (!dir->i_fop) |
297 | goto out; |
298 | /* |
299 | * inode->i_ino is unsigned long, kstat->ino is u64, so the |
300 | * former would be insufficient on 32-bit hosts when the |
301 | * filesystem supports 64-bit inode numbers. So we need to |
302 | * actually call ->getattr, not just read i_ino: |
303 | */ |
304 | error = vfs_getattr_nosec(&child_path, &stat, |
305 | STATX_INO, AT_STATX_SYNC_AS_STAT); |
306 | if (error) |
307 | return error; |
308 | buffer.ino = stat.ino; |
309 | /* |
310 | * Open the directory ... |
311 | */ |
312 | file = dentry_open(path, O_RDONLY, creds: cred); |
313 | error = PTR_ERR(ptr: file); |
314 | if (IS_ERR(ptr: file)) |
315 | goto out; |
316 | |
317 | error = -EINVAL; |
318 | if (!file->f_op->iterate_shared) |
319 | goto out_close; |
320 | |
321 | buffer.sequence = 0; |
322 | while (1) { |
323 | int old_seq = buffer.sequence; |
324 | |
325 | error = iterate_dir(file, &buffer.ctx); |
326 | if (buffer.found) { |
327 | error = 0; |
328 | break; |
329 | } |
330 | |
331 | if (error < 0) |
332 | break; |
333 | |
334 | error = -ENOENT; |
335 | if (old_seq == buffer.sequence) |
336 | break; |
337 | } |
338 | |
339 | out_close: |
340 | fput(file); |
341 | out: |
342 | return error; |
343 | } |
344 | |
345 | #define FILEID_INO64_GEN_LEN 3 |
346 | |
347 | /** |
348 | * exportfs_encode_ino64_fid - encode non-decodeable 64bit ino file id |
349 | * @inode: the object to encode |
350 | * @fid: where to store the file handle fragment |
351 | * @max_len: maximum length to store there (in 4 byte units) |
352 | * |
353 | * This generic function is used to encode a non-decodeable file id for |
354 | * fanotify for filesystems that do not support NFS export. |
355 | */ |
356 | static int exportfs_encode_ino64_fid(struct inode *inode, struct fid *fid, |
357 | int *max_len) |
358 | { |
359 | if (*max_len < FILEID_INO64_GEN_LEN) { |
360 | *max_len = FILEID_INO64_GEN_LEN; |
361 | return FILEID_INVALID; |
362 | } |
363 | |
364 | fid->i64.ino = inode->i_ino; |
365 | fid->i64.gen = inode->i_generation; |
366 | *max_len = FILEID_INO64_GEN_LEN; |
367 | |
368 | return FILEID_INO64_GEN; |
369 | } |
370 | |
371 | /** |
372 | * exportfs_encode_inode_fh - encode a file handle from inode |
373 | * @inode: the object to encode |
374 | * @fid: where to store the file handle fragment |
375 | * @max_len: maximum length to store there |
376 | * @parent: parent directory inode, if wanted |
377 | * @flags: properties of the requested file handle |
378 | * |
379 | * Returns an enum fid_type or a negative errno. |
380 | */ |
381 | int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, |
382 | int *max_len, struct inode *parent, int flags) |
383 | { |
384 | const struct export_operations *nop = inode->i_sb->s_export_op; |
385 | |
386 | if (!exportfs_can_encode_fh(nop, fh_flags: flags)) |
387 | return -EOPNOTSUPP; |
388 | |
389 | if (!nop && (flags & EXPORT_FH_FID)) |
390 | return exportfs_encode_ino64_fid(inode, fid, max_len); |
391 | |
392 | return nop->encode_fh(inode, fid->raw, max_len, parent); |
393 | } |
394 | EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh); |
395 | |
396 | /** |
397 | * exportfs_encode_fh - encode a file handle from dentry |
398 | * @dentry: the object to encode |
399 | * @fid: where to store the file handle fragment |
400 | * @max_len: maximum length to store there |
401 | * @flags: properties of the requested file handle |
402 | * |
403 | * Returns an enum fid_type or a negative errno. |
404 | */ |
405 | int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, |
406 | int flags) |
407 | { |
408 | int error; |
409 | struct dentry *p = NULL; |
410 | struct inode *inode = dentry->d_inode, *parent = NULL; |
411 | |
412 | if ((flags & EXPORT_FH_CONNECTABLE) && !S_ISDIR(inode->i_mode)) { |
413 | p = dget_parent(dentry); |
414 | /* |
415 | * note that while p might've ceased to be our parent already, |
416 | * it's still pinned by and still positive. |
417 | */ |
418 | parent = p->d_inode; |
419 | } |
420 | |
421 | error = exportfs_encode_inode_fh(inode, fid, max_len, parent, flags); |
422 | dput(p); |
423 | |
424 | return error; |
425 | } |
426 | EXPORT_SYMBOL_GPL(exportfs_encode_fh); |
427 | |
428 | struct dentry * |
429 | exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, |
430 | int fileid_type, |
431 | int (*acceptable)(void *, struct dentry *), |
432 | void *context) |
433 | { |
434 | const struct export_operations *nop = mnt->mnt_sb->s_export_op; |
435 | struct dentry *result, *alias; |
436 | char nbuf[NAME_MAX+1]; |
437 | int err; |
438 | |
439 | /* |
440 | * Try to get any dentry for the given file handle from the filesystem. |
441 | */ |
442 | if (!exportfs_can_decode_fh(nop)) |
443 | return ERR_PTR(error: -ESTALE); |
444 | result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); |
445 | if (IS_ERR_OR_NULL(ptr: result)) |
446 | return result; |
447 | |
448 | /* |
449 | * If no acceptance criteria was specified by caller, a disconnected |
450 | * dentry is also accepatable. Callers may use this mode to query if |
451 | * file handle is stale or to get a reference to an inode without |
452 | * risking the high overhead caused by directory reconnect. |
453 | */ |
454 | if (!acceptable) |
455 | return result; |
456 | |
457 | if (d_is_dir(dentry: result)) { |
458 | /* |
459 | * This request is for a directory. |
460 | * |
461 | * On the positive side there is only one dentry for each |
462 | * directory inode. On the negative side this implies that we |
463 | * to ensure our dentry is connected all the way up to the |
464 | * filesystem root. |
465 | */ |
466 | if (result->d_flags & DCACHE_DISCONNECTED) { |
467 | err = reconnect_path(mnt, target_dir: result, nbuf); |
468 | if (err) |
469 | goto err_result; |
470 | } |
471 | |
472 | if (!acceptable(context, result)) { |
473 | err = -EACCES; |
474 | goto err_result; |
475 | } |
476 | |
477 | return result; |
478 | } else { |
479 | /* |
480 | * It's not a directory. Life is a little more complicated. |
481 | */ |
482 | struct dentry *target_dir, *nresult; |
483 | |
484 | /* |
485 | * See if either the dentry we just got from the filesystem |
486 | * or any alias for it is acceptable. This is always true |
487 | * if this filesystem is exported without the subtreecheck |
488 | * option. If the filesystem is exported with the subtree |
489 | * check option there's a fair chance we need to look at |
490 | * the parent directory in the file handle and make sure |
491 | * it's connected to the filesystem root. |
492 | */ |
493 | alias = find_acceptable_alias(result, acceptable, context); |
494 | if (alias) |
495 | return alias; |
496 | |
497 | /* |
498 | * Try to extract a dentry for the parent directory from the |
499 | * file handle. If this fails we'll have to give up. |
500 | */ |
501 | err = -ESTALE; |
502 | if (!nop->fh_to_parent) |
503 | goto err_result; |
504 | |
505 | target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, |
506 | fh_len, fileid_type); |
507 | if (!target_dir) |
508 | goto err_result; |
509 | err = PTR_ERR(ptr: target_dir); |
510 | if (IS_ERR(ptr: target_dir)) |
511 | goto err_result; |
512 | |
513 | /* |
514 | * And as usual we need to make sure the parent directory is |
515 | * connected to the filesystem root. The VFS really doesn't |
516 | * like disconnected directories.. |
517 | */ |
518 | err = reconnect_path(mnt, target_dir, nbuf); |
519 | if (err) { |
520 | dput(target_dir); |
521 | goto err_result; |
522 | } |
523 | |
524 | /* |
525 | * Now that we've got both a well-connected parent and a |
526 | * dentry for the inode we're after, make sure that our |
527 | * inode is actually connected to the parent. |
528 | */ |
529 | err = exportfs_get_name(mnt, dir: target_dir, name: nbuf, child: result); |
530 | if (err) { |
531 | dput(target_dir); |
532 | goto err_result; |
533 | } |
534 | |
535 | inode_lock(inode: target_dir->d_inode); |
536 | nresult = lookup_one(mnt_idmap(mnt), nbuf, |
537 | target_dir, strlen(nbuf)); |
538 | if (!IS_ERR(ptr: nresult)) { |
539 | if (unlikely(nresult->d_inode != result->d_inode)) { |
540 | dput(nresult); |
541 | nresult = ERR_PTR(error: -ESTALE); |
542 | } |
543 | } |
544 | inode_unlock(inode: target_dir->d_inode); |
545 | /* |
546 | * At this point we are done with the parent, but it's pinned |
547 | * by the child dentry anyway. |
548 | */ |
549 | dput(target_dir); |
550 | |
551 | if (IS_ERR(ptr: nresult)) { |
552 | err = PTR_ERR(ptr: nresult); |
553 | goto err_result; |
554 | } |
555 | dput(result); |
556 | result = nresult; |
557 | |
558 | /* |
559 | * And finally make sure the dentry is actually acceptable |
560 | * to NFSD. |
561 | */ |
562 | alias = find_acceptable_alias(result, acceptable, context); |
563 | if (!alias) { |
564 | err = -EACCES; |
565 | goto err_result; |
566 | } |
567 | |
568 | return alias; |
569 | } |
570 | |
571 | err_result: |
572 | dput(result); |
573 | return ERR_PTR(error: err); |
574 | } |
575 | EXPORT_SYMBOL_GPL(exportfs_decode_fh_raw); |
576 | |
577 | struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, |
578 | int fh_len, int fileid_type, |
579 | int (*acceptable)(void *, struct dentry *), |
580 | void *context) |
581 | { |
582 | struct dentry *ret; |
583 | |
584 | ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type, |
585 | acceptable, context); |
586 | if (IS_ERR_OR_NULL(ptr: ret)) { |
587 | if (ret == ERR_PTR(error: -ENOMEM)) |
588 | return ret; |
589 | return ERR_PTR(error: -ESTALE); |
590 | } |
591 | return ret; |
592 | } |
593 | EXPORT_SYMBOL_GPL(exportfs_decode_fh); |
594 | |
595 | MODULE_LICENSE("GPL" ); |
596 | |