1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2011 Novell Inc.
4 * Copyright (C) 2016 Red Hat, Inc.
5 */
6
7#include <linux/fs.h>
8#include <linux/cred.h>
9#include <linux/ctype.h>
10#include <linux/namei.h>
11#include <linux/xattr.h>
12#include <linux/ratelimit.h>
13#include <linux/mount.h>
14#include <linux/exportfs.h>
15#include "overlayfs.h"
16
17#include "../internal.h" /* for vfs_path_lookup */
18
19struct ovl_lookup_data {
20 struct super_block *sb;
21 struct vfsmount *mnt;
22 struct qstr name;
23 bool is_dir;
24 bool opaque;
25 bool stop;
26 bool last;
27 char *redirect;
28 int metacopy;
29 /* Referring to last redirect xattr */
30 bool absolute_redirect;
31};
32
33static int ovl_check_redirect(const struct path *path, struct ovl_lookup_data *d,
34 size_t prelen, const char *post)
35{
36 int res;
37 char *buf;
38 struct ovl_fs *ofs = OVL_FS(sb: d->sb);
39
40 d->absolute_redirect = false;
41 buf = ovl_get_redirect_xattr(ofs, path, padding: prelen + strlen(post));
42 if (IS_ERR_OR_NULL(ptr: buf))
43 return PTR_ERR(ptr: buf);
44
45 if (buf[0] == '/') {
46 d->absolute_redirect = true;
47 /*
48 * One of the ancestor path elements in an absolute path
49 * lookup in ovl_lookup_layer() could have been opaque and
50 * that will stop further lookup in lower layers (d->stop=true)
51 * But we have found an absolute redirect in descendant path
52 * element and that should force continue lookup in lower
53 * layers (reset d->stop).
54 */
55 d->stop = false;
56 } else {
57 res = strlen(buf) + 1;
58 memmove(buf + prelen, buf, res);
59 memcpy(buf, d->name.name, prelen);
60 }
61
62 strcat(p: buf, q: post);
63 kfree(objp: d->redirect);
64 d->redirect = buf;
65 d->name.name = d->redirect;
66 d->name.len = strlen(d->redirect);
67
68 return 0;
69}
70
71static int ovl_acceptable(void *ctx, struct dentry *dentry)
72{
73 /*
74 * A non-dir origin may be disconnected, which is fine, because
75 * we only need it for its unique inode number.
76 */
77 if (!d_is_dir(dentry))
78 return 1;
79
80 /* Don't decode a deleted empty directory */
81 if (d_unhashed(dentry))
82 return 0;
83
84 /* Check if directory belongs to the layer we are decoding from */
85 return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
86}
87
88/*
89 * Check validity of an overlay file handle buffer.
90 *
91 * Return 0 for a valid file handle.
92 * Return -ENODATA for "origin unknown".
93 * Return <0 for an invalid file handle.
94 */
95int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
96{
97 if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
98 return -EINVAL;
99
100 if (fb->magic != OVL_FH_MAGIC)
101 return -EINVAL;
102
103 /* Treat larger version and unknown flags as "origin unknown" */
104 if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
105 return -ENODATA;
106
107 /* Treat endianness mismatch as "origin unknown" */
108 if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
109 (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
110 return -ENODATA;
111
112 return 0;
113}
114
115static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *upperdentry,
116 enum ovl_xattr ox)
117{
118 int res, err;
119 struct ovl_fh *fh = NULL;
120
121 res = ovl_getxattr_upper(ofs, upperdentry, ox, NULL, size: 0);
122 if (res < 0) {
123 if (res == -ENODATA || res == -EOPNOTSUPP)
124 return NULL;
125 goto fail;
126 }
127 /* Zero size value means "copied up but origin unknown" */
128 if (res == 0)
129 return NULL;
130
131 fh = kzalloc(size: res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
132 if (!fh)
133 return ERR_PTR(error: -ENOMEM);
134
135 res = ovl_getxattr_upper(ofs, upperdentry, ox, value: fh->buf, size: res);
136 if (res < 0)
137 goto fail;
138
139 err = ovl_check_fb_len(fb: &fh->fb, fb_len: res);
140 if (err < 0) {
141 if (err == -ENODATA)
142 goto out;
143 goto invalid;
144 }
145
146 return fh;
147
148out:
149 kfree(objp: fh);
150 return NULL;
151
152fail:
153 pr_warn_ratelimited("failed to get origin (%i)\n", res);
154 goto out;
155invalid:
156 pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
157 goto out;
158}
159
160struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
161 struct vfsmount *mnt, bool connected)
162{
163 struct dentry *real;
164 int bytes;
165
166 if (!capable(CAP_DAC_READ_SEARCH))
167 return NULL;
168
169 /*
170 * Make sure that the stored uuid matches the uuid of the lower
171 * layer where file handle will be decoded.
172 * In case of uuid=off option just make sure that stored uuid is null.
173 */
174 if (ovl_origin_uuid(ofs) ?
175 !uuid_equal(u1: &fh->fb.uuid, u2: &mnt->mnt_sb->s_uuid) :
176 !uuid_is_null(uuid: &fh->fb.uuid))
177 return NULL;
178
179 bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
180 real = exportfs_decode_fh(mnt, fid: (struct fid *)fh->fb.fid,
181 fh_len: bytes >> 2, fileid_type: (int)fh->fb.type,
182 acceptable: connected ? ovl_acceptable : NULL, context: mnt);
183 if (IS_ERR(ptr: real)) {
184 /*
185 * Treat stale file handle to lower file as "origin unknown".
186 * upper file handle could become stale when upper file is
187 * unlinked and this information is needed to handle stale
188 * index entries correctly.
189 */
190 if (real == ERR_PTR(error: -ESTALE) &&
191 !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
192 real = NULL;
193 return real;
194 }
195
196 if (ovl_dentry_weird(dentry: real)) {
197 dput(real);
198 return NULL;
199 }
200
201 return real;
202}
203
204static bool ovl_is_opaquedir(struct ovl_fs *ofs, const struct path *path)
205{
206 return ovl_path_check_dir_xattr(ofs, path, ox: OVL_XATTR_OPAQUE);
207}
208
209static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d,
210 const char *name,
211 struct dentry *base, int len,
212 bool drop_negative)
213{
214 struct dentry *ret = lookup_one_unlocked(idmap: mnt_idmap(mnt: d->mnt), name, base, len);
215
216 if (!IS_ERR(ptr: ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
217 if (drop_negative && ret->d_lockref.count == 1) {
218 spin_lock(lock: &ret->d_lock);
219 /* Recheck condition under lock */
220 if (d_is_negative(dentry: ret) && ret->d_lockref.count == 1)
221 __d_drop(dentry: ret);
222 spin_unlock(lock: &ret->d_lock);
223 }
224 dput(ret);
225 ret = ERR_PTR(error: -ENOENT);
226 }
227 return ret;
228}
229
230static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
231 const char *name, unsigned int namelen,
232 size_t prelen, const char *post,
233 struct dentry **ret, bool drop_negative)
234{
235 struct dentry *this;
236 struct path path;
237 int err;
238 bool last_element = !post[0];
239
240 this = ovl_lookup_positive_unlocked(d, name, base, len: namelen, drop_negative);
241 if (IS_ERR(ptr: this)) {
242 err = PTR_ERR(ptr: this);
243 this = NULL;
244 if (err == -ENOENT || err == -ENAMETOOLONG)
245 goto out;
246 goto out_err;
247 }
248
249 if (ovl_dentry_weird(dentry: this)) {
250 /* Don't support traversing automounts and other weirdness */
251 err = -EREMOTE;
252 goto out_err;
253 }
254
255 path.dentry = this;
256 path.mnt = d->mnt;
257 if (ovl_path_is_whiteout(ofs: OVL_FS(sb: d->sb), path: &path)) {
258 d->stop = d->opaque = true;
259 goto put_and_out;
260 }
261 /*
262 * This dentry should be a regular file if previous layer lookup
263 * found a metacopy dentry.
264 */
265 if (last_element && d->metacopy && !d_is_reg(dentry: this)) {
266 d->stop = true;
267 goto put_and_out;
268 }
269
270 if (!d_can_lookup(dentry: this)) {
271 if (d->is_dir || !last_element) {
272 d->stop = true;
273 goto put_and_out;
274 }
275 err = ovl_check_metacopy_xattr(ofs: OVL_FS(sb: d->sb), path: &path, NULL);
276 if (err < 0)
277 goto out_err;
278
279 d->metacopy = err;
280 d->stop = !d->metacopy;
281 if (!d->metacopy || d->last)
282 goto out;
283 } else {
284 if (ovl_lookup_trap_inode(sb: d->sb, dir: this)) {
285 /* Caught in a trap of overlapping layers */
286 err = -ELOOP;
287 goto out_err;
288 }
289
290 if (last_element)
291 d->is_dir = true;
292 if (d->last)
293 goto out;
294
295 if (ovl_is_opaquedir(ofs: OVL_FS(sb: d->sb), path: &path)) {
296 d->stop = true;
297 if (last_element)
298 d->opaque = true;
299 goto out;
300 }
301 }
302 err = ovl_check_redirect(path: &path, d, prelen, post);
303 if (err)
304 goto out_err;
305out:
306 *ret = this;
307 return 0;
308
309put_and_out:
310 dput(this);
311 this = NULL;
312 goto out;
313
314out_err:
315 dput(this);
316 return err;
317}
318
319static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
320 struct dentry **ret, bool drop_negative)
321{
322 /* Counting down from the end, since the prefix can change */
323 size_t rem = d->name.len - 1;
324 struct dentry *dentry = NULL;
325 int err;
326
327 if (d->name.name[0] != '/')
328 return ovl_lookup_single(base, d, name: d->name.name, namelen: d->name.len,
329 prelen: 0, post: "", ret, drop_negative);
330
331 while (!IS_ERR_OR_NULL(ptr: base) && d_can_lookup(dentry: base)) {
332 const char *s = d->name.name + d->name.len - rem;
333 const char *next = strchrnul(s, '/');
334 size_t thislen = next - s;
335 bool end = !next[0];
336
337 /* Verify we did not go off the rails */
338 if (WARN_ON(s[-1] != '/'))
339 return -EIO;
340
341 err = ovl_lookup_single(base, d, name: s, namelen: thislen,
342 prelen: d->name.len - rem, post: next, ret: &base,
343 drop_negative);
344 dput(dentry);
345 if (err)
346 return err;
347 dentry = base;
348 if (end)
349 break;
350
351 rem -= thislen + 1;
352
353 if (WARN_ON(rem >= d->name.len))
354 return -EIO;
355 }
356 *ret = dentry;
357 return 0;
358}
359
360static int ovl_lookup_data_layer(struct dentry *dentry, const char *redirect,
361 const struct ovl_layer *layer,
362 struct path *datapath)
363{
364 int err;
365
366 err = vfs_path_lookup(layer->mnt->mnt_root, layer->mnt, redirect,
367 LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS | LOOKUP_NO_XDEV,
368 datapath);
369 pr_debug("lookup lowerdata (%pd2, redirect=\"%s\", layer=%d, err=%i)\n",
370 dentry, redirect, layer->idx, err);
371
372 if (err)
373 return err;
374
375 err = -EREMOTE;
376 if (ovl_dentry_weird(dentry: datapath->dentry))
377 goto out_path_put;
378
379 err = -ENOENT;
380 /* Only regular file is acceptable as lower data */
381 if (!d_is_reg(dentry: datapath->dentry))
382 goto out_path_put;
383
384 return 0;
385
386out_path_put:
387 path_put(datapath);
388
389 return err;
390}
391
392/* Lookup in data-only layers by absolute redirect to layer root */
393static int ovl_lookup_data_layers(struct dentry *dentry, const char *redirect,
394 struct ovl_path *lowerdata)
395{
396 struct ovl_fs *ofs = OVL_FS(sb: dentry->d_sb);
397 const struct ovl_layer *layer;
398 struct path datapath;
399 int err = -ENOENT;
400 int i;
401
402 layer = &ofs->layers[ofs->numlayer - ofs->numdatalayer];
403 for (i = 0; i < ofs->numdatalayer; i++, layer++) {
404 err = ovl_lookup_data_layer(dentry, redirect, layer, datapath: &datapath);
405 if (!err) {
406 mntput(mnt: datapath.mnt);
407 lowerdata->dentry = datapath.dentry;
408 lowerdata->layer = layer;
409 return 0;
410 }
411 }
412
413 return err;
414}
415
416int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
417 struct dentry *upperdentry, struct ovl_path **stackp)
418{
419 struct dentry *origin = NULL;
420 int i;
421
422 for (i = 1; i <= ovl_numlowerlayer(ofs); i++) {
423 /*
424 * If lower fs uuid is not unique among lower fs we cannot match
425 * fh->uuid to layer.
426 */
427 if (ofs->layers[i].fsid &&
428 ofs->layers[i].fs->bad_uuid)
429 continue;
430
431 origin = ovl_decode_real_fh(ofs, fh, mnt: ofs->layers[i].mnt,
432 connected);
433 if (origin)
434 break;
435 }
436
437 if (!origin)
438 return -ESTALE;
439 else if (IS_ERR(ptr: origin))
440 return PTR_ERR(ptr: origin);
441
442 if (upperdentry && !ovl_upper_is_whiteout(ofs, upperdentry) &&
443 inode_wrong_type(inode: d_inode(dentry: upperdentry), mode: d_inode(dentry: origin)->i_mode))
444 goto invalid;
445
446 if (!*stackp)
447 *stackp = kmalloc(size: sizeof(struct ovl_path), GFP_KERNEL);
448 if (!*stackp) {
449 dput(origin);
450 return -ENOMEM;
451 }
452 **stackp = (struct ovl_path){
453 .dentry = origin,
454 .layer = &ofs->layers[i]
455 };
456
457 return 0;
458
459invalid:
460 pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
461 upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
462 d_inode(origin)->i_mode & S_IFMT);
463 dput(origin);
464 return -ESTALE;
465}
466
467static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
468 struct ovl_path **stackp)
469{
470 struct ovl_fh *fh = ovl_get_fh(ofs, upperdentry, ox: OVL_XATTR_ORIGIN);
471 int err;
472
473 if (IS_ERR_OR_NULL(ptr: fh))
474 return PTR_ERR(ptr: fh);
475
476 err = ovl_check_origin_fh(ofs, fh, connected: false, upperdentry, stackp);
477 kfree(objp: fh);
478
479 if (err) {
480 if (err == -ESTALE)
481 return 0;
482 return err;
483 }
484
485 return 0;
486}
487
488/*
489 * Verify that @fh matches the file handle stored in xattr @name.
490 * Return 0 on match, -ESTALE on mismatch, < 0 on error.
491 */
492static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
493 enum ovl_xattr ox, const struct ovl_fh *fh)
494{
495 struct ovl_fh *ofh = ovl_get_fh(ofs, upperdentry: dentry, ox);
496 int err = 0;
497
498 if (!ofh)
499 return -ENODATA;
500
501 if (IS_ERR(ptr: ofh))
502 return PTR_ERR(ptr: ofh);
503
504 if (fh->fb.len != ofh->fb.len || memcmp(p: &fh->fb, q: &ofh->fb, size: fh->fb.len))
505 err = -ESTALE;
506
507 kfree(objp: ofh);
508 return err;
509}
510
511int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
512 enum ovl_xattr ox, const struct ovl_fh *fh,
513 bool is_upper, bool set)
514{
515 int err;
516
517 err = ovl_verify_fh(ofs, dentry, ox, fh);
518 if (set && err == -ENODATA)
519 err = ovl_setxattr(ofs, dentry, ox, value: fh->buf, size: fh->fb.len);
520
521 return err;
522}
523
524/*
525 * Verify that @real dentry matches the file handle stored in xattr @name.
526 *
527 * If @set is true and there is no stored file handle, encode @real and store
528 * file handle in xattr @name.
529 *
530 * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
531 */
532int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
533 enum ovl_xattr ox, struct dentry *real,
534 bool is_upper, bool set)
535{
536 struct inode *inode;
537 struct ovl_fh *fh;
538 int err;
539
540 fh = ovl_encode_real_fh(ofs, real, is_upper);
541 err = PTR_ERR(ptr: fh);
542 if (IS_ERR(ptr: fh)) {
543 fh = NULL;
544 goto fail;
545 }
546
547 err = ovl_verify_set_fh(ofs, dentry, ox, fh, is_upper, set);
548 if (err)
549 goto fail;
550
551out:
552 kfree(objp: fh);
553 return err;
554
555fail:
556 inode = d_inode(dentry: real);
557 pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
558 is_upper ? "upper" : "origin", real,
559 inode ? inode->i_ino : 0, err);
560 goto out;
561}
562
563
564/* Get upper dentry from index */
565struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index,
566 bool connected)
567{
568 struct ovl_fh *fh;
569 struct dentry *upper;
570
571 if (!d_is_dir(dentry: index))
572 return dget(dentry: index);
573
574 fh = ovl_get_fh(ofs, upperdentry: index, ox: OVL_XATTR_UPPER);
575 if (IS_ERR_OR_NULL(ptr: fh))
576 return ERR_CAST(ptr: fh);
577
578 upper = ovl_decode_real_fh(ofs, fh, mnt: ovl_upper_mnt(ofs), connected);
579 kfree(objp: fh);
580
581 if (IS_ERR_OR_NULL(ptr: upper))
582 return upper ?: ERR_PTR(error: -ESTALE);
583
584 if (!d_is_dir(dentry: upper)) {
585 pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
586 index, upper);
587 dput(upper);
588 return ERR_PTR(error: -EIO);
589 }
590
591 return upper;
592}
593
594/*
595 * Verify that an index entry name matches the origin file handle stored in
596 * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
597 * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
598 */
599int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
600{
601 struct ovl_fh *fh = NULL;
602 size_t len;
603 struct ovl_path origin = { };
604 struct ovl_path *stack = &origin;
605 struct dentry *upper = NULL;
606 int err;
607
608 if (!d_inode(dentry: index))
609 return 0;
610
611 err = -EINVAL;
612 if (index->d_name.len < sizeof(struct ovl_fb)*2)
613 goto fail;
614
615 err = -ENOMEM;
616 len = index->d_name.len / 2;
617 fh = kzalloc(size: len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
618 if (!fh)
619 goto fail;
620
621 err = -EINVAL;
622 if (hex2bin(dst: fh->buf, src: index->d_name.name, count: len))
623 goto fail;
624
625 err = ovl_check_fb_len(fb: &fh->fb, fb_len: len);
626 if (err)
627 goto fail;
628
629 /*
630 * Whiteout index entries are used as an indication that an exported
631 * overlay file handle should be treated as stale (i.e. after unlink
632 * of the overlay inode). These entries contain no origin xattr.
633 */
634 if (ovl_is_whiteout(dentry: index))
635 goto out;
636
637 /*
638 * Verifying directory index entries are not stale is expensive, so
639 * only verify stale dir index if NFS export is enabled.
640 */
641 if (d_is_dir(dentry: index) && !ofs->config.nfs_export)
642 goto out;
643
644 /*
645 * Directory index entries should have 'upper' xattr pointing to the
646 * real upper dir. Non-dir index entries are hardlinks to the upper
647 * real inode. For non-dir index, we can read the copy up origin xattr
648 * directly from the index dentry, but for dir index we first need to
649 * decode the upper directory.
650 */
651 upper = ovl_index_upper(ofs, index, connected: false);
652 if (IS_ERR_OR_NULL(ptr: upper)) {
653 err = PTR_ERR(ptr: upper);
654 /*
655 * Directory index entries with no 'upper' xattr need to be
656 * removed. When dir index entry has a stale 'upper' xattr,
657 * we assume that upper dir was removed and we treat the dir
658 * index as orphan entry that needs to be whited out.
659 */
660 if (err == -ESTALE)
661 goto orphan;
662 else if (!err)
663 err = -ESTALE;
664 goto fail;
665 }
666
667 err = ovl_verify_fh(ofs, dentry: upper, ox: OVL_XATTR_ORIGIN, fh);
668 dput(upper);
669 if (err)
670 goto fail;
671
672 /* Check if non-dir index is orphan and don't warn before cleaning it */
673 if (!d_is_dir(dentry: index) && d_inode(dentry: index)->i_nlink == 1) {
674 err = ovl_check_origin_fh(ofs, fh, connected: false, upperdentry: index, stackp: &stack);
675 if (err)
676 goto fail;
677
678 if (ovl_get_nlink(ofs, lowerdentry: origin.dentry, upperdentry: index, fallback: 0) == 0)
679 goto orphan;
680 }
681
682out:
683 dput(origin.dentry);
684 kfree(objp: fh);
685 return err;
686
687fail:
688 pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
689 index, d_inode(index)->i_mode & S_IFMT, err);
690 goto out;
691
692orphan:
693 pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
694 index, d_inode(index)->i_mode & S_IFMT,
695 d_inode(index)->i_nlink);
696 err = -ENOENT;
697 goto out;
698}
699
700int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name)
701{
702 char *n, *s;
703
704 n = kcalloc(n: fh->fb.len, size: 2, GFP_KERNEL);
705 if (!n)
706 return -ENOMEM;
707
708 s = bin2hex(dst: n, src: fh->buf, count: fh->fb.len);
709 *name = (struct qstr) QSTR_INIT(n, s - n);
710
711 return 0;
712
713}
714
715/*
716 * Lookup in indexdir for the index entry of a lower real inode or a copy up
717 * origin inode. The index entry name is the hex representation of the lower
718 * inode file handle.
719 *
720 * If the index dentry in negative, then either no lower aliases have been
721 * copied up yet, or aliases have been copied up in older kernels and are
722 * not indexed.
723 *
724 * If the index dentry for a copy up origin inode is positive, but points
725 * to an inode different than the upper inode, then either the upper inode
726 * has been copied up and not indexed or it was indexed, but since then
727 * index dir was cleared. Either way, that index cannot be used to identify
728 * the overlay inode.
729 */
730int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
731 struct qstr *name)
732{
733 struct ovl_fh *fh;
734 int err;
735
736 fh = ovl_encode_real_fh(ofs, real: origin, is_upper: false);
737 if (IS_ERR(ptr: fh))
738 return PTR_ERR(ptr: fh);
739
740 err = ovl_get_index_name_fh(fh, name);
741
742 kfree(objp: fh);
743 return err;
744}
745
746/* Lookup index by file handle for NFS export */
747struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
748{
749 struct dentry *index;
750 struct qstr name;
751 int err;
752
753 err = ovl_get_index_name_fh(fh, name: &name);
754 if (err)
755 return ERR_PTR(error: err);
756
757 index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
758 kfree(objp: name.name);
759 if (IS_ERR(ptr: index)) {
760 if (PTR_ERR(ptr: index) == -ENOENT)
761 index = NULL;
762 return index;
763 }
764
765 if (ovl_is_whiteout(dentry: index))
766 err = -ESTALE;
767 else if (ovl_dentry_weird(dentry: index))
768 err = -EIO;
769 else
770 return index;
771
772 dput(index);
773 return ERR_PTR(error: err);
774}
775
776struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
777 struct dentry *origin, bool verify)
778{
779 struct dentry *index;
780 struct inode *inode;
781 struct qstr name;
782 bool is_dir = d_is_dir(dentry: origin);
783 int err;
784
785 err = ovl_get_index_name(ofs, origin, name: &name);
786 if (err)
787 return ERR_PTR(error: err);
788
789 index = lookup_one_positive_unlocked(idmap: ovl_upper_mnt_idmap(ofs), name: name.name,
790 base: ofs->indexdir, len: name.len);
791 if (IS_ERR(ptr: index)) {
792 err = PTR_ERR(ptr: index);
793 if (err == -ENOENT) {
794 index = NULL;
795 goto out;
796 }
797 pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
798 "overlayfs: mount with '-o index=off' to disable inodes index.\n",
799 d_inode(origin)->i_ino, name.len, name.name,
800 err);
801 goto out;
802 }
803
804 inode = d_inode(dentry: index);
805 if (ovl_is_whiteout(dentry: index) && !verify) {
806 /*
807 * When index lookup is called with !verify for decoding an
808 * overlay file handle, a whiteout index implies that decode
809 * should treat file handle as stale and no need to print a
810 * warning about it.
811 */
812 dput(index);
813 index = ERR_PTR(error: -ESTALE);
814 goto out;
815 } else if (ovl_dentry_weird(dentry: index) || ovl_is_whiteout(dentry: index) ||
816 inode_wrong_type(inode, mode: d_inode(dentry: origin)->i_mode)) {
817 /*
818 * Index should always be of the same file type as origin
819 * except for the case of a whiteout index. A whiteout
820 * index should only exist if all lower aliases have been
821 * unlinked, which means that finding a lower origin on lookup
822 * whose index is a whiteout should be treated as an error.
823 */
824 pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
825 index, d_inode(index)->i_mode & S_IFMT,
826 d_inode(origin)->i_mode & S_IFMT);
827 goto fail;
828 } else if (is_dir && verify) {
829 if (!upper) {
830 pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
831 origin, index);
832 goto fail;
833 }
834
835 /* Verify that dir index 'upper' xattr points to upper dir */
836 err = ovl_verify_upper(ofs, index, upper, set: false);
837 if (err) {
838 if (err == -ESTALE) {
839 pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
840 upper, origin, index);
841 }
842 goto fail;
843 }
844 } else if (upper && d_inode(dentry: upper) != inode) {
845 goto out_dput;
846 }
847out:
848 kfree(objp: name.name);
849 return index;
850
851out_dput:
852 dput(index);
853 index = NULL;
854 goto out;
855
856fail:
857 dput(index);
858 index = ERR_PTR(error: -EIO);
859 goto out;
860}
861
862/*
863 * Returns next layer in stack starting from top.
864 * Returns -1 if this is the last layer.
865 */
866int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
867{
868 struct ovl_entry *oe = OVL_E(dentry);
869 struct ovl_path *lowerstack = ovl_lowerstack(oe);
870
871 BUG_ON(idx < 0);
872 if (idx == 0) {
873 ovl_path_upper(dentry, path);
874 if (path->dentry)
875 return ovl_numlower(oe) ? 1 : -1;
876 idx++;
877 }
878 BUG_ON(idx > ovl_numlower(oe));
879 path->dentry = lowerstack[idx - 1].dentry;
880 path->mnt = lowerstack[idx - 1].layer->mnt;
881
882 return (idx < ovl_numlower(oe)) ? idx + 1 : -1;
883}
884
885/* Fix missing 'origin' xattr */
886static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
887 struct dentry *lower, struct dentry *upper)
888{
889 const struct ovl_fh *fh;
890 int err;
891
892 if (ovl_check_origin_xattr(ofs, upperdentry: upper))
893 return 0;
894
895 fh = ovl_get_origin_fh(ofs, origin: lower);
896 if (IS_ERR(ptr: fh))
897 return PTR_ERR(ptr: fh);
898
899 err = ovl_want_write(dentry);
900 if (err)
901 goto out;
902
903 err = ovl_set_origin_fh(ofs, fh, upper);
904 if (!err)
905 err = ovl_set_impure(dentry: dentry->d_parent, upperdentry: upper->d_parent);
906
907 ovl_drop_write(dentry);
908out:
909 kfree(objp: fh);
910 return err;
911}
912
913static int ovl_maybe_validate_verity(struct dentry *dentry)
914{
915 struct ovl_fs *ofs = OVL_FS(sb: dentry->d_sb);
916 struct inode *inode = d_inode(dentry);
917 struct path datapath, metapath;
918 int err;
919
920 if (!ofs->config.verity_mode ||
921 !ovl_is_metacopy_dentry(dentry) ||
922 ovl_test_flag(flag: OVL_VERIFIED_DIGEST, inode))
923 return 0;
924
925 if (!ovl_test_flag(flag: OVL_HAS_DIGEST, inode)) {
926 if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) {
927 pr_warn_ratelimited("metacopy file '%pd' has no digest specified\n",
928 dentry);
929 return -EIO;
930 }
931 return 0;
932 }
933
934 ovl_path_lowerdata(dentry, path: &datapath);
935 if (!datapath.dentry)
936 return -EIO;
937
938 ovl_path_real(dentry, path: &metapath);
939 if (!metapath.dentry)
940 return -EIO;
941
942 err = ovl_inode_lock_interruptible(inode);
943 if (err)
944 return err;
945
946 if (!ovl_test_flag(flag: OVL_VERIFIED_DIGEST, inode)) {
947 const struct cred *old_cred;
948
949 old_cred = ovl_override_creds(sb: dentry->d_sb);
950
951 err = ovl_validate_verity(ofs, metapath: &metapath, datapath: &datapath);
952 if (err == 0)
953 ovl_set_flag(flag: OVL_VERIFIED_DIGEST, inode);
954
955 revert_creds(old_cred);
956 }
957
958 ovl_inode_unlock(inode);
959
960 return err;
961}
962
963/* Lazy lookup of lowerdata */
964static int ovl_maybe_lookup_lowerdata(struct dentry *dentry)
965{
966 struct inode *inode = d_inode(dentry);
967 const char *redirect = ovl_lowerdata_redirect(inode);
968 struct ovl_path datapath = {};
969 const struct cred *old_cred;
970 int err;
971
972 if (!redirect || ovl_dentry_lowerdata(dentry))
973 return 0;
974
975 if (redirect[0] != '/')
976 return -EIO;
977
978 err = ovl_inode_lock_interruptible(inode);
979 if (err)
980 return err;
981
982 err = 0;
983 /* Someone got here before us? */
984 if (ovl_dentry_lowerdata(dentry))
985 goto out;
986
987 old_cred = ovl_override_creds(sb: dentry->d_sb);
988 err = ovl_lookup_data_layers(dentry, redirect, lowerdata: &datapath);
989 revert_creds(old_cred);
990 if (err)
991 goto out_err;
992
993 err = ovl_dentry_set_lowerdata(dentry, datapath: &datapath);
994 if (err)
995 goto out_err;
996
997out:
998 ovl_inode_unlock(inode);
999 dput(datapath.dentry);
1000
1001 return err;
1002
1003out_err:
1004 pr_warn_ratelimited("lazy lowerdata lookup failed (%pd2, err=%i)\n",
1005 dentry, err);
1006 goto out;
1007}
1008
1009int ovl_verify_lowerdata(struct dentry *dentry)
1010{
1011 int err;
1012
1013 err = ovl_maybe_lookup_lowerdata(dentry);
1014 if (err)
1015 return err;
1016
1017 return ovl_maybe_validate_verity(dentry);
1018}
1019
1020struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
1021 unsigned int flags)
1022{
1023 struct ovl_entry *oe = NULL;
1024 const struct cred *old_cred;
1025 struct ovl_fs *ofs = OVL_FS(sb: dentry->d_sb);
1026 struct ovl_entry *poe = OVL_E(dentry: dentry->d_parent);
1027 struct ovl_entry *roe = OVL_E(dentry: dentry->d_sb->s_root);
1028 struct ovl_path *stack = NULL, *origin_path = NULL;
1029 struct dentry *upperdir, *upperdentry = NULL;
1030 struct dentry *origin = NULL;
1031 struct dentry *index = NULL;
1032 unsigned int ctr = 0;
1033 struct inode *inode = NULL;
1034 bool upperopaque = false;
1035 char *upperredirect = NULL;
1036 struct dentry *this;
1037 unsigned int i;
1038 int err;
1039 bool uppermetacopy = false;
1040 int metacopy_size = 0;
1041 struct ovl_lookup_data d = {
1042 .sb = dentry->d_sb,
1043 .name = dentry->d_name,
1044 .is_dir = false,
1045 .opaque = false,
1046 .stop = false,
1047 .last = ovl_redirect_follow(ofs) ? false : !ovl_numlower(oe: poe),
1048 .redirect = NULL,
1049 .metacopy = 0,
1050 };
1051
1052 if (dentry->d_name.len > ofs->namelen)
1053 return ERR_PTR(error: -ENAMETOOLONG);
1054
1055 old_cred = ovl_override_creds(sb: dentry->d_sb);
1056 upperdir = ovl_dentry_upper(dentry: dentry->d_parent);
1057 if (upperdir) {
1058 d.mnt = ovl_upper_mnt(ofs);
1059 err = ovl_lookup_layer(base: upperdir, d: &d, ret: &upperdentry, drop_negative: true);
1060 if (err)
1061 goto out;
1062
1063 if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) {
1064 dput(upperdentry);
1065 err = -EREMOTE;
1066 goto out;
1067 }
1068 if (upperdentry && !d.is_dir) {
1069 /*
1070 * Lookup copy up origin by decoding origin file handle.
1071 * We may get a disconnected dentry, which is fine,
1072 * because we only need to hold the origin inode in
1073 * cache and use its inode number. We may even get a
1074 * connected dentry, that is not under any of the lower
1075 * layers root. That is also fine for using it's inode
1076 * number - it's the same as if we held a reference
1077 * to a dentry in lower layer that was moved under us.
1078 */
1079 err = ovl_check_origin(ofs, upperdentry, stackp: &origin_path);
1080 if (err)
1081 goto out_put_upper;
1082
1083 if (d.metacopy)
1084 uppermetacopy = true;
1085 metacopy_size = d.metacopy;
1086 }
1087
1088 if (d.redirect) {
1089 err = -ENOMEM;
1090 upperredirect = kstrdup(s: d.redirect, GFP_KERNEL);
1091 if (!upperredirect)
1092 goto out_put_upper;
1093 if (d.redirect[0] == '/')
1094 poe = roe;
1095 }
1096 upperopaque = d.opaque;
1097 }
1098
1099 if (!d.stop && ovl_numlower(oe: poe)) {
1100 err = -ENOMEM;
1101 stack = ovl_stack_alloc(n: ofs->numlayer - 1);
1102 if (!stack)
1103 goto out_put_upper;
1104 }
1105
1106 for (i = 0; !d.stop && i < ovl_numlower(oe: poe); i++) {
1107 struct ovl_path lower = ovl_lowerstack(oe: poe)[i];
1108
1109 if (!ovl_redirect_follow(ofs))
1110 d.last = i == ovl_numlower(oe: poe) - 1;
1111 else if (d.is_dir || !ofs->numdatalayer)
1112 d.last = lower.layer->idx == ovl_numlower(oe: roe);
1113
1114 d.mnt = lower.layer->mnt;
1115 err = ovl_lookup_layer(base: lower.dentry, d: &d, ret: &this, drop_negative: false);
1116 if (err)
1117 goto out_put;
1118
1119 if (!this)
1120 continue;
1121
1122 if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) {
1123 dput(this);
1124 err = -EPERM;
1125 pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry);
1126 goto out_put;
1127 }
1128
1129 /*
1130 * If no origin fh is stored in upper of a merge dir, store fh
1131 * of lower dir and set upper parent "impure".
1132 */
1133 if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
1134 err = ovl_fix_origin(ofs, dentry, lower: this, upper: upperdentry);
1135 if (err) {
1136 dput(this);
1137 goto out_put;
1138 }
1139 }
1140
1141 /*
1142 * When "verify_lower" feature is enabled, do not merge with a
1143 * lower dir that does not match a stored origin xattr. In any
1144 * case, only verified origin is used for index lookup.
1145 *
1146 * For non-dir dentry, if index=on, then ensure origin
1147 * matches the dentry found using path based lookup,
1148 * otherwise error out.
1149 */
1150 if (upperdentry && !ctr &&
1151 ((d.is_dir && ovl_verify_lower(sb: dentry->d_sb)) ||
1152 (!d.is_dir && ofs->config.index && origin_path))) {
1153 err = ovl_verify_origin(ofs, upper: upperdentry, origin: this, set: false);
1154 if (err) {
1155 dput(this);
1156 if (d.is_dir)
1157 break;
1158 goto out_put;
1159 }
1160 origin = this;
1161 }
1162
1163 if (!upperdentry && !d.is_dir && !ctr && d.metacopy)
1164 metacopy_size = d.metacopy;
1165
1166 if (d.metacopy && ctr) {
1167 /*
1168 * Do not store intermediate metacopy dentries in
1169 * lower chain, except top most lower metacopy dentry.
1170 * Continue the loop so that if there is an absolute
1171 * redirect on this dentry, poe can be reset to roe.
1172 */
1173 dput(this);
1174 this = NULL;
1175 } else {
1176 stack[ctr].dentry = this;
1177 stack[ctr].layer = lower.layer;
1178 ctr++;
1179 }
1180
1181 /*
1182 * Following redirects can have security consequences: it's like
1183 * a symlink into the lower layer without the permission checks.
1184 * This is only a problem if the upper layer is untrusted (e.g
1185 * comes from an USB drive). This can allow a non-readable file
1186 * or directory to become readable.
1187 *
1188 * Only following redirects when redirects are enabled disables
1189 * this attack vector when not necessary.
1190 */
1191 err = -EPERM;
1192 if (d.redirect && !ovl_redirect_follow(ofs)) {
1193 pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n",
1194 dentry);
1195 goto out_put;
1196 }
1197
1198 if (d.stop)
1199 break;
1200
1201 if (d.redirect && d.redirect[0] == '/' && poe != roe) {
1202 poe = roe;
1203 /* Find the current layer on the root dentry */
1204 i = lower.layer->idx - 1;
1205 }
1206 }
1207
1208 /* Defer lookup of lowerdata in data-only layers to first access */
1209 if (d.metacopy && ctr && ofs->numdatalayer && d.absolute_redirect) {
1210 d.metacopy = 0;
1211 ctr++;
1212 }
1213
1214 /*
1215 * For regular non-metacopy upper dentries, there is no lower
1216 * path based lookup, hence ctr will be zero. If a dentry is found
1217 * using ORIGIN xattr on upper, install it in stack.
1218 *
1219 * For metacopy dentry, path based lookup will find lower dentries.
1220 * Just make sure a corresponding data dentry has been found.
1221 */
1222 if (d.metacopy || (uppermetacopy && !ctr)) {
1223 pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n",
1224 dentry);
1225 err = -EIO;
1226 goto out_put;
1227 } else if (!d.is_dir && upperdentry && !ctr && origin_path) {
1228 if (WARN_ON(stack != NULL)) {
1229 err = -EIO;
1230 goto out_put;
1231 }
1232 stack = origin_path;
1233 ctr = 1;
1234 origin = origin_path->dentry;
1235 origin_path = NULL;
1236 }
1237
1238 /*
1239 * Always lookup index if there is no-upperdentry.
1240 *
1241 * For the case of upperdentry, we have set origin by now if it
1242 * needed to be set. There are basically three cases.
1243 *
1244 * For directories, lookup index by lower inode and verify it matches
1245 * upper inode. We only trust dir index if we verified that lower dir
1246 * matches origin, otherwise dir index entries may be inconsistent
1247 * and we ignore them.
1248 *
1249 * For regular upper, we already set origin if upper had ORIGIN
1250 * xattr. There is no verification though as there is no path
1251 * based dentry lookup in lower in this case.
1252 *
1253 * For metacopy upper, we set a verified origin already if index
1254 * is enabled and if upper had an ORIGIN xattr.
1255 *
1256 */
1257 if (!upperdentry && ctr)
1258 origin = stack[0].dentry;
1259
1260 if (origin && ovl_indexdir(sb: dentry->d_sb) &&
1261 (!d.is_dir || ovl_index_all(sb: dentry->d_sb))) {
1262 index = ovl_lookup_index(ofs, upper: upperdentry, origin, verify: true);
1263 if (IS_ERR(ptr: index)) {
1264 err = PTR_ERR(ptr: index);
1265 index = NULL;
1266 goto out_put;
1267 }
1268 }
1269
1270 if (ctr) {
1271 oe = ovl_alloc_entry(numlower: ctr);
1272 err = -ENOMEM;
1273 if (!oe)
1274 goto out_put;
1275
1276 ovl_stack_cpy(dst: ovl_lowerstack(oe), src: stack, n: ctr);
1277 }
1278
1279 if (upperopaque)
1280 ovl_dentry_set_opaque(dentry);
1281
1282 if (upperdentry)
1283 ovl_dentry_set_upper_alias(dentry);
1284 else if (index) {
1285 struct path upperpath = {
1286 .dentry = upperdentry = dget(dentry: index),
1287 .mnt = ovl_upper_mnt(ofs),
1288 };
1289
1290 /*
1291 * It's safe to assign upperredirect here: the previous
1292 * assignment of happens only if upperdentry is non-NULL, and
1293 * this one only if upperdentry is NULL.
1294 */
1295 upperredirect = ovl_get_redirect_xattr(ofs, path: &upperpath, padding: 0);
1296 if (IS_ERR(ptr: upperredirect)) {
1297 err = PTR_ERR(ptr: upperredirect);
1298 upperredirect = NULL;
1299 goto out_free_oe;
1300 }
1301 err = ovl_check_metacopy_xattr(ofs, path: &upperpath, NULL);
1302 if (err < 0)
1303 goto out_free_oe;
1304 uppermetacopy = err;
1305 metacopy_size = err;
1306 }
1307
1308 if (upperdentry || ctr) {
1309 struct ovl_inode_params oip = {
1310 .upperdentry = upperdentry,
1311 .oe = oe,
1312 .index = index,
1313 .redirect = upperredirect,
1314 };
1315
1316 /* Store lowerdata redirect for lazy lookup */
1317 if (ctr > 1 && !d.is_dir && !stack[ctr - 1].dentry) {
1318 oip.lowerdata_redirect = d.redirect;
1319 d.redirect = NULL;
1320 }
1321 inode = ovl_get_inode(sb: dentry->d_sb, oip: &oip);
1322 err = PTR_ERR(ptr: inode);
1323 if (IS_ERR(ptr: inode))
1324 goto out_free_oe;
1325 if (upperdentry && !uppermetacopy)
1326 ovl_set_flag(flag: OVL_UPPERDATA, inode);
1327
1328 if (metacopy_size > OVL_METACOPY_MIN_SIZE)
1329 ovl_set_flag(flag: OVL_HAS_DIGEST, inode);
1330 }
1331
1332 ovl_dentry_init_reval(dentry, upperdentry, oe: OVL_I_E(inode));
1333
1334 revert_creds(old_cred);
1335 if (origin_path) {
1336 dput(origin_path->dentry);
1337 kfree(objp: origin_path);
1338 }
1339 dput(index);
1340 ovl_stack_free(stack, n: ctr);
1341 kfree(objp: d.redirect);
1342 return d_splice_alias(inode, dentry);
1343
1344out_free_oe:
1345 ovl_free_entry(oe);
1346out_put:
1347 dput(index);
1348 ovl_stack_free(stack, n: ctr);
1349out_put_upper:
1350 if (origin_path) {
1351 dput(origin_path->dentry);
1352 kfree(objp: origin_path);
1353 }
1354 dput(upperdentry);
1355 kfree(objp: upperredirect);
1356out:
1357 kfree(objp: d.redirect);
1358 revert_creds(old_cred);
1359 return ERR_PTR(error: err);
1360}
1361
1362bool ovl_lower_positive(struct dentry *dentry)
1363{
1364 struct ovl_entry *poe = OVL_E(dentry: dentry->d_parent);
1365 const struct qstr *name = &dentry->d_name;
1366 const struct cred *old_cred;
1367 unsigned int i;
1368 bool positive = false;
1369 bool done = false;
1370
1371 /*
1372 * If dentry is negative, then lower is positive iff this is a
1373 * whiteout.
1374 */
1375 if (!dentry->d_inode)
1376 return ovl_dentry_is_opaque(dentry);
1377
1378 /* Negative upper -> positive lower */
1379 if (!ovl_dentry_upper(dentry))
1380 return true;
1381
1382 old_cred = ovl_override_creds(sb: dentry->d_sb);
1383 /* Positive upper -> have to look up lower to see whether it exists */
1384 for (i = 0; !done && !positive && i < ovl_numlower(oe: poe); i++) {
1385 struct dentry *this;
1386 struct ovl_path *parentpath = &ovl_lowerstack(oe: poe)[i];
1387
1388 this = lookup_one_positive_unlocked(
1389 idmap: mnt_idmap(mnt: parentpath->layer->mnt),
1390 name: name->name, base: parentpath->dentry, len: name->len);
1391 if (IS_ERR(ptr: this)) {
1392 switch (PTR_ERR(ptr: this)) {
1393 case -ENOENT:
1394 case -ENAMETOOLONG:
1395 break;
1396
1397 default:
1398 /*
1399 * Assume something is there, we just couldn't
1400 * access it.
1401 */
1402 positive = true;
1403 break;
1404 }
1405 } else {
1406 struct path path = {
1407 .dentry = this,
1408 .mnt = parentpath->layer->mnt,
1409 };
1410 positive = !ovl_path_is_whiteout(ofs: OVL_FS(sb: dentry->d_sb), path: &path);
1411 done = true;
1412 dput(this);
1413 }
1414 }
1415 revert_creds(old_cred);
1416
1417 return positive;
1418}
1419

source code of linux/fs/overlayfs/namei.c