1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * |
4 | * Copyright (C) 2011 Novell Inc. |
5 | */ |
6 | |
7 | #include <linux/fs.h> |
8 | #include <linux/slab.h> |
9 | #include <linux/namei.h> |
10 | #include <linux/file.h> |
11 | #include <linux/xattr.h> |
12 | #include <linux/rbtree.h> |
13 | #include <linux/security.h> |
14 | #include <linux/cred.h> |
15 | #include <linux/ratelimit.h> |
16 | #include "overlayfs.h" |
17 | |
18 | struct ovl_cache_entry { |
19 | unsigned int len; |
20 | unsigned int type; |
21 | u64 real_ino; |
22 | u64 ino; |
23 | struct list_head l_node; |
24 | struct rb_node node; |
25 | struct ovl_cache_entry *next_maybe_whiteout; |
26 | bool is_upper; |
27 | bool is_whiteout; |
28 | bool check_xwhiteout; |
29 | char name[]; |
30 | }; |
31 | |
32 | struct ovl_dir_cache { |
33 | long refcount; |
34 | u64 version; |
35 | struct list_head entries; |
36 | struct rb_root root; |
37 | }; |
38 | |
39 | struct ovl_readdir_data { |
40 | struct dir_context ctx; |
41 | struct dentry *dentry; |
42 | bool is_lowest; |
43 | struct rb_root *root; |
44 | struct list_head *list; |
45 | struct list_head middle; |
46 | struct ovl_cache_entry *first_maybe_whiteout; |
47 | int count; |
48 | int err; |
49 | bool is_upper; |
50 | bool d_type_supported; |
51 | bool in_xwhiteouts_dir; |
52 | }; |
53 | |
54 | struct ovl_dir_file { |
55 | bool is_real; |
56 | bool is_upper; |
57 | struct ovl_dir_cache *cache; |
58 | struct list_head *cursor; |
59 | struct file *realfile; |
60 | struct file *upperfile; |
61 | }; |
62 | |
63 | static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) |
64 | { |
65 | return rb_entry(n, struct ovl_cache_entry, node); |
66 | } |
67 | |
68 | static bool ovl_cache_entry_find_link(const char *name, int len, |
69 | struct rb_node ***link, |
70 | struct rb_node **parent) |
71 | { |
72 | bool found = false; |
73 | struct rb_node **newp = *link; |
74 | |
75 | while (!found && *newp) { |
76 | int cmp; |
77 | struct ovl_cache_entry *tmp; |
78 | |
79 | *parent = *newp; |
80 | tmp = ovl_cache_entry_from_node(n: *newp); |
81 | cmp = strncmp(name, tmp->name, len); |
82 | if (cmp > 0) |
83 | newp = &tmp->node.rb_right; |
84 | else if (cmp < 0 || len < tmp->len) |
85 | newp = &tmp->node.rb_left; |
86 | else |
87 | found = true; |
88 | } |
89 | *link = newp; |
90 | |
91 | return found; |
92 | } |
93 | |
94 | static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, |
95 | const char *name, int len) |
96 | { |
97 | struct rb_node *node = root->rb_node; |
98 | int cmp; |
99 | |
100 | while (node) { |
101 | struct ovl_cache_entry *p = ovl_cache_entry_from_node(n: node); |
102 | |
103 | cmp = strncmp(name, p->name, len); |
104 | if (cmp > 0) |
105 | node = p->node.rb_right; |
106 | else if (cmp < 0 || len < p->len) |
107 | node = p->node.rb_left; |
108 | else |
109 | return p; |
110 | } |
111 | |
112 | return NULL; |
113 | } |
114 | |
115 | static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd, |
116 | struct ovl_cache_entry *p) |
117 | { |
118 | /* Don't care if not doing ovl_iter() */ |
119 | if (!rdd->dentry) |
120 | return false; |
121 | |
122 | /* Always recalc d_ino when remapping lower inode numbers */ |
123 | if (ovl_xino_bits(ofs: OVL_FS(sb: rdd->dentry->d_sb))) |
124 | return true; |
125 | |
126 | /* Always recalc d_ino for parent */ |
127 | if (strcmp(p->name, ".." ) == 0) |
128 | return true; |
129 | |
130 | /* If this is lower, then native d_ino will do */ |
131 | if (!rdd->is_upper) |
132 | return false; |
133 | |
134 | /* |
135 | * Recalc d_ino for '.' and for all entries if dir is impure (contains |
136 | * copied up entries) |
137 | */ |
138 | if ((p->name[0] == '.' && p->len == 1) || |
139 | ovl_test_flag(flag: OVL_IMPURE, inode: d_inode(dentry: rdd->dentry))) |
140 | return true; |
141 | |
142 | return false; |
143 | } |
144 | |
145 | static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, |
146 | const char *name, int len, |
147 | u64 ino, unsigned int d_type) |
148 | { |
149 | struct ovl_cache_entry *p; |
150 | size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); |
151 | |
152 | p = kmalloc(size, GFP_KERNEL); |
153 | if (!p) |
154 | return NULL; |
155 | |
156 | memcpy(p->name, name, len); |
157 | p->name[len] = '\0'; |
158 | p->len = len; |
159 | p->type = d_type; |
160 | p->real_ino = ino; |
161 | p->ino = ino; |
162 | /* Defer setting d_ino for upper entry to ovl_iterate() */ |
163 | if (ovl_calc_d_ino(rdd, p)) |
164 | p->ino = 0; |
165 | p->is_upper = rdd->is_upper; |
166 | p->is_whiteout = false; |
167 | /* Defer check for overlay.whiteout to ovl_iterate() */ |
168 | p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG; |
169 | |
170 | if (d_type == DT_CHR) { |
171 | p->next_maybe_whiteout = rdd->first_maybe_whiteout; |
172 | rdd->first_maybe_whiteout = p; |
173 | } |
174 | return p; |
175 | } |
176 | |
177 | static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, |
178 | const char *name, int len, u64 ino, |
179 | unsigned int d_type) |
180 | { |
181 | struct rb_node **newp = &rdd->root->rb_node; |
182 | struct rb_node *parent = NULL; |
183 | struct ovl_cache_entry *p; |
184 | |
185 | if (ovl_cache_entry_find_link(name, len, link: &newp, parent: &parent)) |
186 | return true; |
187 | |
188 | p = ovl_cache_entry_new(rdd, name, len, ino, d_type); |
189 | if (p == NULL) { |
190 | rdd->err = -ENOMEM; |
191 | return false; |
192 | } |
193 | |
194 | list_add_tail(new: &p->l_node, head: rdd->list); |
195 | rb_link_node(node: &p->node, parent, rb_link: newp); |
196 | rb_insert_color(&p->node, rdd->root); |
197 | |
198 | return true; |
199 | } |
200 | |
201 | static bool ovl_fill_lowest(struct ovl_readdir_data *rdd, |
202 | const char *name, int namelen, |
203 | loff_t offset, u64 ino, unsigned int d_type) |
204 | { |
205 | struct ovl_cache_entry *p; |
206 | |
207 | p = ovl_cache_entry_find(root: rdd->root, name, len: namelen); |
208 | if (p) { |
209 | list_move_tail(list: &p->l_node, head: &rdd->middle); |
210 | } else { |
211 | p = ovl_cache_entry_new(rdd, name, len: namelen, ino, d_type); |
212 | if (p == NULL) |
213 | rdd->err = -ENOMEM; |
214 | else |
215 | list_add_tail(new: &p->l_node, head: &rdd->middle); |
216 | } |
217 | |
218 | return rdd->err == 0; |
219 | } |
220 | |
221 | void ovl_cache_free(struct list_head *list) |
222 | { |
223 | struct ovl_cache_entry *p; |
224 | struct ovl_cache_entry *n; |
225 | |
226 | list_for_each_entry_safe(p, n, list, l_node) |
227 | kfree(objp: p); |
228 | |
229 | INIT_LIST_HEAD(list); |
230 | } |
231 | |
232 | void ovl_dir_cache_free(struct inode *inode) |
233 | { |
234 | struct ovl_dir_cache *cache = ovl_dir_cache(inode); |
235 | |
236 | if (cache) { |
237 | ovl_cache_free(list: &cache->entries); |
238 | kfree(objp: cache); |
239 | } |
240 | } |
241 | |
242 | static void ovl_cache_put(struct ovl_dir_file *od, struct inode *inode) |
243 | { |
244 | struct ovl_dir_cache *cache = od->cache; |
245 | |
246 | WARN_ON(cache->refcount <= 0); |
247 | cache->refcount--; |
248 | if (!cache->refcount) { |
249 | if (ovl_dir_cache(inode) == cache) |
250 | ovl_set_dir_cache(inode, NULL); |
251 | |
252 | ovl_cache_free(list: &cache->entries); |
253 | kfree(objp: cache); |
254 | } |
255 | } |
256 | |
257 | static bool ovl_fill_merge(struct dir_context *ctx, const char *name, |
258 | int namelen, loff_t offset, u64 ino, |
259 | unsigned int d_type) |
260 | { |
261 | struct ovl_readdir_data *rdd = |
262 | container_of(ctx, struct ovl_readdir_data, ctx); |
263 | |
264 | rdd->count++; |
265 | if (!rdd->is_lowest) |
266 | return ovl_cache_entry_add_rb(rdd, name, len: namelen, ino, d_type); |
267 | else |
268 | return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type); |
269 | } |
270 | |
271 | static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd) |
272 | { |
273 | int err; |
274 | struct ovl_cache_entry *p; |
275 | struct dentry *dentry, *dir = path->dentry; |
276 | const struct cred *old_cred; |
277 | |
278 | old_cred = ovl_override_creds(sb: rdd->dentry->d_sb); |
279 | |
280 | err = down_write_killable(sem: &dir->d_inode->i_rwsem); |
281 | if (!err) { |
282 | while (rdd->first_maybe_whiteout) { |
283 | p = rdd->first_maybe_whiteout; |
284 | rdd->first_maybe_whiteout = p->next_maybe_whiteout; |
285 | dentry = lookup_one(mnt_idmap(mnt: path->mnt), p->name, dir, p->len); |
286 | if (!IS_ERR(ptr: dentry)) { |
287 | p->is_whiteout = ovl_is_whiteout(dentry); |
288 | dput(dentry); |
289 | } |
290 | } |
291 | inode_unlock(inode: dir->d_inode); |
292 | } |
293 | revert_creds(old_cred); |
294 | |
295 | return err; |
296 | } |
297 | |
298 | static inline int ovl_dir_read(const struct path *realpath, |
299 | struct ovl_readdir_data *rdd) |
300 | { |
301 | struct file *realfile; |
302 | int err; |
303 | |
304 | realfile = ovl_path_open(path: realpath, O_RDONLY | O_LARGEFILE); |
305 | if (IS_ERR(ptr: realfile)) |
306 | return PTR_ERR(ptr: realfile); |
307 | |
308 | rdd->in_xwhiteouts_dir = rdd->dentry && |
309 | ovl_path_check_xwhiteouts_xattr(ofs: OVL_FS(sb: rdd->dentry->d_sb), path: realpath); |
310 | rdd->first_maybe_whiteout = NULL; |
311 | rdd->ctx.pos = 0; |
312 | do { |
313 | rdd->count = 0; |
314 | rdd->err = 0; |
315 | err = iterate_dir(realfile, &rdd->ctx); |
316 | if (err >= 0) |
317 | err = rdd->err; |
318 | } while (!err && rdd->count); |
319 | |
320 | if (!err && rdd->first_maybe_whiteout && rdd->dentry) |
321 | err = ovl_check_whiteouts(path: realpath, rdd); |
322 | |
323 | fput(realfile); |
324 | |
325 | return err; |
326 | } |
327 | |
328 | static void ovl_dir_reset(struct file *file) |
329 | { |
330 | struct ovl_dir_file *od = file->private_data; |
331 | struct ovl_dir_cache *cache = od->cache; |
332 | struct inode *inode = file_inode(f: file); |
333 | bool is_real; |
334 | |
335 | if (cache && ovl_inode_version_get(inode) != cache->version) { |
336 | ovl_cache_put(od, inode); |
337 | od->cache = NULL; |
338 | od->cursor = NULL; |
339 | } |
340 | is_real = ovl_dir_is_real(dir: inode); |
341 | if (od->is_real != is_real) { |
342 | /* is_real can only become false when dir is copied up */ |
343 | if (WARN_ON(is_real)) |
344 | return; |
345 | od->is_real = false; |
346 | } |
347 | } |
348 | |
349 | static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list, |
350 | struct rb_root *root) |
351 | { |
352 | int err; |
353 | struct path realpath; |
354 | struct ovl_readdir_data rdd = { |
355 | .ctx.actor = ovl_fill_merge, |
356 | .dentry = dentry, |
357 | .list = list, |
358 | .root = root, |
359 | .is_lowest = false, |
360 | }; |
361 | int idx, next; |
362 | |
363 | for (idx = 0; idx != -1; idx = next) { |
364 | next = ovl_path_next(idx, dentry, path: &realpath); |
365 | rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry; |
366 | |
367 | if (next != -1) { |
368 | err = ovl_dir_read(realpath: &realpath, rdd: &rdd); |
369 | if (err) |
370 | break; |
371 | } else { |
372 | /* |
373 | * Insert lowest layer entries before upper ones, this |
374 | * allows offsets to be reasonably constant |
375 | */ |
376 | list_add(new: &rdd.middle, head: rdd.list); |
377 | rdd.is_lowest = true; |
378 | err = ovl_dir_read(realpath: &realpath, rdd: &rdd); |
379 | list_del(entry: &rdd.middle); |
380 | } |
381 | } |
382 | return err; |
383 | } |
384 | |
385 | static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) |
386 | { |
387 | struct list_head *p; |
388 | loff_t off = 0; |
389 | |
390 | list_for_each(p, &od->cache->entries) { |
391 | if (off >= pos) |
392 | break; |
393 | off++; |
394 | } |
395 | /* Cursor is safe since the cache is stable */ |
396 | od->cursor = p; |
397 | } |
398 | |
399 | static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) |
400 | { |
401 | int res; |
402 | struct ovl_dir_cache *cache; |
403 | struct inode *inode = d_inode(dentry); |
404 | |
405 | cache = ovl_dir_cache(inode); |
406 | if (cache && ovl_inode_version_get(inode) == cache->version) { |
407 | WARN_ON(!cache->refcount); |
408 | cache->refcount++; |
409 | return cache; |
410 | } |
411 | ovl_set_dir_cache(inode: d_inode(dentry), NULL); |
412 | |
413 | cache = kzalloc(size: sizeof(struct ovl_dir_cache), GFP_KERNEL); |
414 | if (!cache) |
415 | return ERR_PTR(error: -ENOMEM); |
416 | |
417 | cache->refcount = 1; |
418 | INIT_LIST_HEAD(list: &cache->entries); |
419 | cache->root = RB_ROOT; |
420 | |
421 | res = ovl_dir_read_merged(dentry, list: &cache->entries, root: &cache->root); |
422 | if (res) { |
423 | ovl_cache_free(list: &cache->entries); |
424 | kfree(objp: cache); |
425 | return ERR_PTR(error: res); |
426 | } |
427 | |
428 | cache->version = ovl_inode_version_get(inode); |
429 | ovl_set_dir_cache(inode, cache); |
430 | |
431 | return cache; |
432 | } |
433 | |
434 | /* Map inode number to lower fs unique range */ |
435 | static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, |
436 | const char *name, int namelen, bool warn) |
437 | { |
438 | unsigned int xinoshift = 64 - xinobits; |
439 | |
440 | if (unlikely(ino >> xinoshift)) { |
441 | if (warn) { |
442 | pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n" , |
443 | namelen, name, ino, xinobits); |
444 | } |
445 | return ino; |
446 | } |
447 | |
448 | /* |
449 | * The lowest xinobit is reserved for mapping the non-peresistent inode |
450 | * numbers range, but this range is only exposed via st_ino, not here. |
451 | */ |
452 | return ino | ((u64)fsid) << (xinoshift + 1); |
453 | } |
454 | |
455 | /* |
456 | * Set d_ino for upper entries if needed. Non-upper entries should always report |
457 | * the uppermost real inode ino and should not call this function. |
458 | * |
459 | * When not all layer are on same fs, report real ino also for upper. |
460 | * |
461 | * When all layers are on the same fs, and upper has a reference to |
462 | * copy up origin, call vfs_getattr() on the overlay entry to make |
463 | * sure that d_ino will be consistent with st_ino from stat(2). |
464 | * |
465 | * Also checks the overlay.whiteout xattr by doing a full lookup which will return |
466 | * negative in this case. |
467 | */ |
468 | static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino) |
469 | |
470 | { |
471 | struct dentry *dir = path->dentry; |
472 | struct ovl_fs *ofs = OVL_FS(sb: dir->d_sb); |
473 | struct dentry *this = NULL; |
474 | enum ovl_path_type type; |
475 | u64 ino = p->real_ino; |
476 | int xinobits = ovl_xino_bits(ofs); |
477 | int err = 0; |
478 | |
479 | if (!ovl_same_dev(ofs) && !p->check_xwhiteout) |
480 | goto out; |
481 | |
482 | if (p->name[0] == '.') { |
483 | if (p->len == 1) { |
484 | this = dget(dentry: dir); |
485 | goto get; |
486 | } |
487 | if (p->len == 2 && p->name[1] == '.') { |
488 | /* we shall not be moved */ |
489 | this = dget(dentry: dir->d_parent); |
490 | goto get; |
491 | } |
492 | } |
493 | /* This checks also for xwhiteouts */ |
494 | this = lookup_one(mnt_idmap(mnt: path->mnt), p->name, dir, p->len); |
495 | if (IS_ERR_OR_NULL(ptr: this) || !this->d_inode) { |
496 | /* Mark a stale entry */ |
497 | p->is_whiteout = true; |
498 | if (IS_ERR(ptr: this)) { |
499 | err = PTR_ERR(ptr: this); |
500 | this = NULL; |
501 | goto fail; |
502 | } |
503 | goto out; |
504 | } |
505 | |
506 | get: |
507 | if (!ovl_same_dev(ofs) || !update_ino) |
508 | goto out; |
509 | |
510 | type = ovl_path_type(dentry: this); |
511 | if (OVL_TYPE_ORIGIN(type)) { |
512 | struct kstat stat; |
513 | struct path statpath = *path; |
514 | |
515 | statpath.dentry = this; |
516 | err = vfs_getattr(&statpath, &stat, STATX_INO, 0); |
517 | if (err) |
518 | goto fail; |
519 | |
520 | /* |
521 | * Directory inode is always on overlay st_dev. |
522 | * Non-dir with ovl_same_dev() could be on pseudo st_dev in case |
523 | * of xino bits overflow. |
524 | */ |
525 | WARN_ON_ONCE(S_ISDIR(stat.mode) && |
526 | dir->d_sb->s_dev != stat.dev); |
527 | ino = stat.ino; |
528 | } else if (xinobits && !OVL_TYPE_UPPER(type)) { |
529 | ino = ovl_remap_lower_ino(ino, xinobits, |
530 | fsid: ovl_layer_lower(dentry: this)->fsid, |
531 | name: p->name, namelen: p->len, |
532 | warn: ovl_xino_warn(ofs)); |
533 | } |
534 | |
535 | out: |
536 | p->ino = ino; |
537 | dput(this); |
538 | return err; |
539 | |
540 | fail: |
541 | pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n" , |
542 | p->name, err); |
543 | goto out; |
544 | } |
545 | |
546 | static bool ovl_fill_plain(struct dir_context *ctx, const char *name, |
547 | int namelen, loff_t offset, u64 ino, |
548 | unsigned int d_type) |
549 | { |
550 | struct ovl_cache_entry *p; |
551 | struct ovl_readdir_data *rdd = |
552 | container_of(ctx, struct ovl_readdir_data, ctx); |
553 | |
554 | rdd->count++; |
555 | p = ovl_cache_entry_new(rdd, name, len: namelen, ino, d_type); |
556 | if (p == NULL) { |
557 | rdd->err = -ENOMEM; |
558 | return false; |
559 | } |
560 | list_add_tail(new: &p->l_node, head: rdd->list); |
561 | |
562 | return true; |
563 | } |
564 | |
565 | static int ovl_dir_read_impure(const struct path *path, struct list_head *list, |
566 | struct rb_root *root) |
567 | { |
568 | int err; |
569 | struct path realpath; |
570 | struct ovl_cache_entry *p, *n; |
571 | struct ovl_readdir_data rdd = { |
572 | .ctx.actor = ovl_fill_plain, |
573 | .list = list, |
574 | .root = root, |
575 | }; |
576 | |
577 | INIT_LIST_HEAD(list); |
578 | *root = RB_ROOT; |
579 | ovl_path_upper(dentry: path->dentry, path: &realpath); |
580 | |
581 | err = ovl_dir_read(realpath: &realpath, rdd: &rdd); |
582 | if (err) |
583 | return err; |
584 | |
585 | list_for_each_entry_safe(p, n, list, l_node) { |
586 | if (strcmp(p->name, "." ) != 0 && |
587 | strcmp(p->name, ".." ) != 0) { |
588 | err = ovl_cache_update(path, p, update_ino: true); |
589 | if (err) |
590 | return err; |
591 | } |
592 | if (p->ino == p->real_ino) { |
593 | list_del(entry: &p->l_node); |
594 | kfree(objp: p); |
595 | } else { |
596 | struct rb_node **newp = &root->rb_node; |
597 | struct rb_node *parent = NULL; |
598 | |
599 | if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len, |
600 | &newp, &parent))) |
601 | return -EIO; |
602 | |
603 | rb_link_node(node: &p->node, parent, rb_link: newp); |
604 | rb_insert_color(&p->node, root); |
605 | } |
606 | } |
607 | return 0; |
608 | } |
609 | |
610 | static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path) |
611 | { |
612 | int res; |
613 | struct dentry *dentry = path->dentry; |
614 | struct inode *inode = d_inode(dentry); |
615 | struct ovl_fs *ofs = OVL_FS(sb: dentry->d_sb); |
616 | struct ovl_dir_cache *cache; |
617 | |
618 | cache = ovl_dir_cache(inode); |
619 | if (cache && ovl_inode_version_get(inode) == cache->version) |
620 | return cache; |
621 | |
622 | /* Impure cache is not refcounted, free it here */ |
623 | ovl_dir_cache_free(inode); |
624 | ovl_set_dir_cache(inode, NULL); |
625 | |
626 | cache = kzalloc(size: sizeof(struct ovl_dir_cache), GFP_KERNEL); |
627 | if (!cache) |
628 | return ERR_PTR(error: -ENOMEM); |
629 | |
630 | res = ovl_dir_read_impure(path, list: &cache->entries, root: &cache->root); |
631 | if (res) { |
632 | ovl_cache_free(list: &cache->entries); |
633 | kfree(objp: cache); |
634 | return ERR_PTR(error: res); |
635 | } |
636 | if (list_empty(head: &cache->entries)) { |
637 | /* |
638 | * A good opportunity to get rid of an unneeded "impure" flag. |
639 | * Removing the "impure" xattr is best effort. |
640 | */ |
641 | if (!ovl_want_write(dentry)) { |
642 | ovl_removexattr(ofs, dentry: ovl_dentry_upper(dentry), |
643 | ox: OVL_XATTR_IMPURE); |
644 | ovl_drop_write(dentry); |
645 | } |
646 | ovl_clear_flag(flag: OVL_IMPURE, inode); |
647 | kfree(objp: cache); |
648 | return NULL; |
649 | } |
650 | |
651 | cache->version = ovl_inode_version_get(inode); |
652 | ovl_set_dir_cache(inode, cache); |
653 | |
654 | return cache; |
655 | } |
656 | |
657 | struct ovl_readdir_translate { |
658 | struct dir_context *orig_ctx; |
659 | struct ovl_dir_cache *cache; |
660 | struct dir_context ctx; |
661 | u64 parent_ino; |
662 | int fsid; |
663 | int xinobits; |
664 | bool xinowarn; |
665 | }; |
666 | |
667 | static bool ovl_fill_real(struct dir_context *ctx, const char *name, |
668 | int namelen, loff_t offset, u64 ino, |
669 | unsigned int d_type) |
670 | { |
671 | struct ovl_readdir_translate *rdt = |
672 | container_of(ctx, struct ovl_readdir_translate, ctx); |
673 | struct dir_context *orig_ctx = rdt->orig_ctx; |
674 | |
675 | if (rdt->parent_ino && strcmp(name, ".." ) == 0) { |
676 | ino = rdt->parent_ino; |
677 | } else if (rdt->cache) { |
678 | struct ovl_cache_entry *p; |
679 | |
680 | p = ovl_cache_entry_find(root: &rdt->cache->root, name, len: namelen); |
681 | if (p) |
682 | ino = p->ino; |
683 | } else if (rdt->xinobits) { |
684 | ino = ovl_remap_lower_ino(ino, xinobits: rdt->xinobits, fsid: rdt->fsid, |
685 | name, namelen, warn: rdt->xinowarn); |
686 | } |
687 | |
688 | return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); |
689 | } |
690 | |
691 | static bool ovl_is_impure_dir(struct file *file) |
692 | { |
693 | struct ovl_dir_file *od = file->private_data; |
694 | struct inode *dir = file_inode(f: file); |
695 | |
696 | /* |
697 | * Only upper dir can be impure, but if we are in the middle of |
698 | * iterating a lower real dir, dir could be copied up and marked |
699 | * impure. We only want the impure cache if we started iterating |
700 | * a real upper dir to begin with. |
701 | */ |
702 | return od->is_upper && ovl_test_flag(flag: OVL_IMPURE, inode: dir); |
703 | |
704 | } |
705 | |
706 | static int ovl_iterate_real(struct file *file, struct dir_context *ctx) |
707 | { |
708 | int err; |
709 | struct ovl_dir_file *od = file->private_data; |
710 | struct dentry *dir = file->f_path.dentry; |
711 | struct ovl_fs *ofs = OVL_FS(sb: dir->d_sb); |
712 | const struct ovl_layer *lower_layer = ovl_layer_lower(dentry: dir); |
713 | struct ovl_readdir_translate rdt = { |
714 | .ctx.actor = ovl_fill_real, |
715 | .orig_ctx = ctx, |
716 | .xinobits = ovl_xino_bits(ofs), |
717 | .xinowarn = ovl_xino_warn(ofs), |
718 | }; |
719 | |
720 | if (rdt.xinobits && lower_layer) |
721 | rdt.fsid = lower_layer->fsid; |
722 | |
723 | if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) { |
724 | struct kstat stat; |
725 | struct path statpath = file->f_path; |
726 | |
727 | statpath.dentry = dir->d_parent; |
728 | err = vfs_getattr(&statpath, &stat, STATX_INO, 0); |
729 | if (err) |
730 | return err; |
731 | |
732 | WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev); |
733 | rdt.parent_ino = stat.ino; |
734 | } |
735 | |
736 | if (ovl_is_impure_dir(file)) { |
737 | rdt.cache = ovl_cache_get_impure(path: &file->f_path); |
738 | if (IS_ERR(ptr: rdt.cache)) |
739 | return PTR_ERR(ptr: rdt.cache); |
740 | } |
741 | |
742 | err = iterate_dir(od->realfile, &rdt.ctx); |
743 | ctx->pos = rdt.ctx.pos; |
744 | |
745 | return err; |
746 | } |
747 | |
748 | |
749 | static int ovl_iterate(struct file *file, struct dir_context *ctx) |
750 | { |
751 | struct ovl_dir_file *od = file->private_data; |
752 | struct dentry *dentry = file->f_path.dentry; |
753 | struct ovl_fs *ofs = OVL_FS(sb: dentry->d_sb); |
754 | struct ovl_cache_entry *p; |
755 | const struct cred *old_cred; |
756 | int err; |
757 | |
758 | old_cred = ovl_override_creds(sb: dentry->d_sb); |
759 | if (!ctx->pos) |
760 | ovl_dir_reset(file); |
761 | |
762 | if (od->is_real) { |
763 | /* |
764 | * If parent is merge, then need to adjust d_ino for '..', if |
765 | * dir is impure then need to adjust d_ino for copied up |
766 | * entries. |
767 | */ |
768 | if (ovl_xino_bits(ofs) || |
769 | (ovl_same_fs(ofs) && |
770 | (ovl_is_impure_dir(file) || |
771 | OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) { |
772 | err = ovl_iterate_real(file, ctx); |
773 | } else { |
774 | err = iterate_dir(od->realfile, ctx); |
775 | } |
776 | goto out; |
777 | } |
778 | |
779 | if (!od->cache) { |
780 | struct ovl_dir_cache *cache; |
781 | |
782 | cache = ovl_cache_get(dentry); |
783 | err = PTR_ERR(ptr: cache); |
784 | if (IS_ERR(ptr: cache)) |
785 | goto out; |
786 | |
787 | od->cache = cache; |
788 | ovl_seek_cursor(od, pos: ctx->pos); |
789 | } |
790 | |
791 | while (od->cursor != &od->cache->entries) { |
792 | p = list_entry(od->cursor, struct ovl_cache_entry, l_node); |
793 | if (!p->is_whiteout) { |
794 | if (!p->ino || p->check_xwhiteout) { |
795 | err = ovl_cache_update(path: &file->f_path, p, update_ino: !p->ino); |
796 | if (err) |
797 | goto out; |
798 | } |
799 | } |
800 | /* ovl_cache_update() sets is_whiteout on stale entry */ |
801 | if (!p->is_whiteout) { |
802 | if (!dir_emit(ctx, name: p->name, namelen: p->len, ino: p->ino, type: p->type)) |
803 | break; |
804 | } |
805 | od->cursor = p->l_node.next; |
806 | ctx->pos++; |
807 | } |
808 | err = 0; |
809 | out: |
810 | revert_creds(old_cred); |
811 | return err; |
812 | } |
813 | |
814 | static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) |
815 | { |
816 | loff_t res; |
817 | struct ovl_dir_file *od = file->private_data; |
818 | |
819 | inode_lock(inode: file_inode(f: file)); |
820 | if (!file->f_pos) |
821 | ovl_dir_reset(file); |
822 | |
823 | if (od->is_real) { |
824 | res = vfs_llseek(file: od->realfile, offset, whence: origin); |
825 | file->f_pos = od->realfile->f_pos; |
826 | } else { |
827 | res = -EINVAL; |
828 | |
829 | switch (origin) { |
830 | case SEEK_CUR: |
831 | offset += file->f_pos; |
832 | break; |
833 | case SEEK_SET: |
834 | break; |
835 | default: |
836 | goto out_unlock; |
837 | } |
838 | if (offset < 0) |
839 | goto out_unlock; |
840 | |
841 | if (offset != file->f_pos) { |
842 | file->f_pos = offset; |
843 | if (od->cache) |
844 | ovl_seek_cursor(od, pos: offset); |
845 | } |
846 | res = offset; |
847 | } |
848 | out_unlock: |
849 | inode_unlock(inode: file_inode(f: file)); |
850 | |
851 | return res; |
852 | } |
853 | |
854 | static struct file *ovl_dir_open_realfile(const struct file *file, |
855 | const struct path *realpath) |
856 | { |
857 | struct file *res; |
858 | const struct cred *old_cred; |
859 | |
860 | old_cred = ovl_override_creds(sb: file_inode(f: file)->i_sb); |
861 | res = ovl_path_open(path: realpath, O_RDONLY | (file->f_flags & O_LARGEFILE)); |
862 | revert_creds(old_cred); |
863 | |
864 | return res; |
865 | } |
866 | |
867 | /* |
868 | * Like ovl_real_fdget(), returns upperfile if dir was copied up since open. |
869 | * Unlike ovl_real_fdget(), this caches upperfile in file->private_data. |
870 | * |
871 | * TODO: use same abstract type for file->private_data of dir and file so |
872 | * upperfile could also be cached for files as well. |
873 | */ |
874 | struct file *ovl_dir_real_file(const struct file *file, bool want_upper) |
875 | { |
876 | |
877 | struct ovl_dir_file *od = file->private_data; |
878 | struct dentry *dentry = file->f_path.dentry; |
879 | struct file *old, *realfile = od->realfile; |
880 | |
881 | if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) |
882 | return want_upper ? NULL : realfile; |
883 | |
884 | /* |
885 | * Need to check if we started out being a lower dir, but got copied up |
886 | */ |
887 | if (!od->is_upper) { |
888 | realfile = READ_ONCE(od->upperfile); |
889 | if (!realfile) { |
890 | struct path upperpath; |
891 | |
892 | ovl_path_upper(dentry, path: &upperpath); |
893 | realfile = ovl_dir_open_realfile(file, realpath: &upperpath); |
894 | if (IS_ERR(ptr: realfile)) |
895 | return realfile; |
896 | |
897 | old = cmpxchg_release(&od->upperfile, NULL, realfile); |
898 | if (old) { |
899 | fput(realfile); |
900 | realfile = old; |
901 | } |
902 | } |
903 | } |
904 | |
905 | return realfile; |
906 | } |
907 | |
908 | static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, |
909 | int datasync) |
910 | { |
911 | struct file *realfile; |
912 | int err; |
913 | |
914 | err = ovl_sync_status(ofs: OVL_FS(sb: file_inode(f: file)->i_sb)); |
915 | if (err <= 0) |
916 | return err; |
917 | |
918 | realfile = ovl_dir_real_file(file, want_upper: true); |
919 | err = PTR_ERR_OR_ZERO(ptr: realfile); |
920 | |
921 | /* Nothing to sync for lower */ |
922 | if (!realfile || err) |
923 | return err; |
924 | |
925 | return vfs_fsync_range(file: realfile, start, end, datasync); |
926 | } |
927 | |
928 | static int ovl_dir_release(struct inode *inode, struct file *file) |
929 | { |
930 | struct ovl_dir_file *od = file->private_data; |
931 | |
932 | if (od->cache) { |
933 | inode_lock(inode); |
934 | ovl_cache_put(od, inode); |
935 | inode_unlock(inode); |
936 | } |
937 | fput(od->realfile); |
938 | if (od->upperfile) |
939 | fput(od->upperfile); |
940 | kfree(objp: od); |
941 | |
942 | return 0; |
943 | } |
944 | |
945 | static int ovl_dir_open(struct inode *inode, struct file *file) |
946 | { |
947 | struct path realpath; |
948 | struct file *realfile; |
949 | struct ovl_dir_file *od; |
950 | enum ovl_path_type type; |
951 | |
952 | od = kzalloc(size: sizeof(struct ovl_dir_file), GFP_KERNEL); |
953 | if (!od) |
954 | return -ENOMEM; |
955 | |
956 | type = ovl_path_real(dentry: file->f_path.dentry, path: &realpath); |
957 | realfile = ovl_dir_open_realfile(file, realpath: &realpath); |
958 | if (IS_ERR(ptr: realfile)) { |
959 | kfree(objp: od); |
960 | return PTR_ERR(ptr: realfile); |
961 | } |
962 | od->realfile = realfile; |
963 | od->is_real = ovl_dir_is_real(dir: inode); |
964 | od->is_upper = OVL_TYPE_UPPER(type); |
965 | file->private_data = od; |
966 | |
967 | return 0; |
968 | } |
969 | |
970 | WRAP_DIR_ITER(ovl_iterate) // FIXME! |
971 | const struct file_operations ovl_dir_operations = { |
972 | .read = generic_read_dir, |
973 | .open = ovl_dir_open, |
974 | .iterate_shared = shared_ovl_iterate, |
975 | .llseek = ovl_dir_llseek, |
976 | .fsync = ovl_dir_fsync, |
977 | .release = ovl_dir_release, |
978 | }; |
979 | |
980 | int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) |
981 | { |
982 | int err; |
983 | struct ovl_cache_entry *p, *n; |
984 | struct rb_root root = RB_ROOT; |
985 | const struct cred *old_cred; |
986 | |
987 | old_cred = ovl_override_creds(sb: dentry->d_sb); |
988 | err = ovl_dir_read_merged(dentry, list, root: &root); |
989 | revert_creds(old_cred); |
990 | if (err) |
991 | return err; |
992 | |
993 | err = 0; |
994 | |
995 | list_for_each_entry_safe(p, n, list, l_node) { |
996 | /* |
997 | * Select whiteouts in upperdir, they should |
998 | * be cleared when deleting this directory. |
999 | */ |
1000 | if (p->is_whiteout) { |
1001 | if (p->is_upper) |
1002 | continue; |
1003 | goto del_entry; |
1004 | } |
1005 | |
1006 | if (p->name[0] == '.') { |
1007 | if (p->len == 1) |
1008 | goto del_entry; |
1009 | if (p->len == 2 && p->name[1] == '.') |
1010 | goto del_entry; |
1011 | } |
1012 | err = -ENOTEMPTY; |
1013 | break; |
1014 | |
1015 | del_entry: |
1016 | list_del(entry: &p->l_node); |
1017 | kfree(objp: p); |
1018 | } |
1019 | |
1020 | return err; |
1021 | } |
1022 | |
1023 | void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, |
1024 | struct list_head *list) |
1025 | { |
1026 | struct ovl_cache_entry *p; |
1027 | |
1028 | inode_lock_nested(inode: upper->d_inode, subclass: I_MUTEX_CHILD); |
1029 | list_for_each_entry(p, list, l_node) { |
1030 | struct dentry *dentry; |
1031 | |
1032 | if (WARN_ON(!p->is_whiteout || !p->is_upper)) |
1033 | continue; |
1034 | |
1035 | dentry = ovl_lookup_upper(ofs, name: p->name, base: upper, len: p->len); |
1036 | if (IS_ERR(ptr: dentry)) { |
1037 | pr_err("lookup '%s/%.*s' failed (%i)\n" , |
1038 | upper->d_name.name, p->len, p->name, |
1039 | (int) PTR_ERR(dentry)); |
1040 | continue; |
1041 | } |
1042 | if (dentry->d_inode) |
1043 | ovl_cleanup(ofs, dir: upper->d_inode, dentry); |
1044 | dput(dentry); |
1045 | } |
1046 | inode_unlock(inode: upper->d_inode); |
1047 | } |
1048 | |
1049 | static bool ovl_check_d_type(struct dir_context *ctx, const char *name, |
1050 | int namelen, loff_t offset, u64 ino, |
1051 | unsigned int d_type) |
1052 | { |
1053 | struct ovl_readdir_data *rdd = |
1054 | container_of(ctx, struct ovl_readdir_data, ctx); |
1055 | |
1056 | /* Even if d_type is not supported, DT_DIR is returned for . and .. */ |
1057 | if (!strncmp(name, "." , namelen) || !strncmp(name, ".." , namelen)) |
1058 | return true; |
1059 | |
1060 | if (d_type != DT_UNKNOWN) |
1061 | rdd->d_type_supported = true; |
1062 | |
1063 | return true; |
1064 | } |
1065 | |
1066 | /* |
1067 | * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values |
1068 | * if error is encountered. |
1069 | */ |
1070 | int ovl_check_d_type_supported(const struct path *realpath) |
1071 | { |
1072 | int err; |
1073 | struct ovl_readdir_data rdd = { |
1074 | .ctx.actor = ovl_check_d_type, |
1075 | .d_type_supported = false, |
1076 | }; |
1077 | |
1078 | err = ovl_dir_read(realpath, rdd: &rdd); |
1079 | if (err) |
1080 | return err; |
1081 | |
1082 | return rdd.d_type_supported; |
1083 | } |
1084 | |
1085 | #define OVL_INCOMPATDIR_NAME "incompat" |
1086 | |
1087 | static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path, |
1088 | int level) |
1089 | { |
1090 | int err; |
1091 | struct inode *dir = path->dentry->d_inode; |
1092 | LIST_HEAD(list); |
1093 | struct ovl_cache_entry *p; |
1094 | struct ovl_readdir_data rdd = { |
1095 | .ctx.actor = ovl_fill_plain, |
1096 | .list = &list, |
1097 | }; |
1098 | bool incompat = false; |
1099 | |
1100 | /* |
1101 | * The "work/incompat" directory is treated specially - if it is not |
1102 | * empty, instead of printing a generic error and mounting read-only, |
1103 | * we will error about incompat features and fail the mount. |
1104 | * |
1105 | * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name |
1106 | * starts with '#'. |
1107 | */ |
1108 | if (level == 2 && |
1109 | !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME)) |
1110 | incompat = true; |
1111 | |
1112 | err = ovl_dir_read(realpath: path, rdd: &rdd); |
1113 | if (err) |
1114 | goto out; |
1115 | |
1116 | inode_lock_nested(inode: dir, subclass: I_MUTEX_PARENT); |
1117 | list_for_each_entry(p, &list, l_node) { |
1118 | struct dentry *dentry; |
1119 | |
1120 | if (p->name[0] == '.') { |
1121 | if (p->len == 1) |
1122 | continue; |
1123 | if (p->len == 2 && p->name[1] == '.') |
1124 | continue; |
1125 | } else if (incompat) { |
1126 | pr_err("overlay with incompat feature '%s' cannot be mounted\n" , |
1127 | p->name); |
1128 | err = -EINVAL; |
1129 | break; |
1130 | } |
1131 | dentry = ovl_lookup_upper(ofs, name: p->name, base: path->dentry, len: p->len); |
1132 | if (IS_ERR(ptr: dentry)) |
1133 | continue; |
1134 | if (dentry->d_inode) |
1135 | err = ovl_workdir_cleanup(ofs, dir, mnt: path->mnt, dentry, level); |
1136 | dput(dentry); |
1137 | if (err) |
1138 | break; |
1139 | } |
1140 | inode_unlock(inode: dir); |
1141 | out: |
1142 | ovl_cache_free(list: &list); |
1143 | return err; |
1144 | } |
1145 | |
1146 | int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir, |
1147 | struct vfsmount *mnt, struct dentry *dentry, int level) |
1148 | { |
1149 | int err; |
1150 | |
1151 | if (!d_is_dir(dentry) || level > 1) { |
1152 | return ovl_cleanup(ofs, dir, dentry); |
1153 | } |
1154 | |
1155 | err = ovl_do_rmdir(ofs, dir, dentry); |
1156 | if (err) { |
1157 | struct path path = { .mnt = mnt, .dentry = dentry }; |
1158 | |
1159 | inode_unlock(inode: dir); |
1160 | err = ovl_workdir_cleanup_recurse(ofs, path: &path, level: level + 1); |
1161 | inode_lock_nested(inode: dir, subclass: I_MUTEX_PARENT); |
1162 | if (!err) |
1163 | err = ovl_cleanup(ofs, dir, dentry); |
1164 | } |
1165 | |
1166 | return err; |
1167 | } |
1168 | |
1169 | int ovl_indexdir_cleanup(struct ovl_fs *ofs) |
1170 | { |
1171 | int err; |
1172 | struct dentry *indexdir = ofs->indexdir; |
1173 | struct dentry *index = NULL; |
1174 | struct inode *dir = indexdir->d_inode; |
1175 | struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir }; |
1176 | LIST_HEAD(list); |
1177 | struct ovl_cache_entry *p; |
1178 | struct ovl_readdir_data rdd = { |
1179 | .ctx.actor = ovl_fill_plain, |
1180 | .list = &list, |
1181 | }; |
1182 | |
1183 | err = ovl_dir_read(realpath: &path, rdd: &rdd); |
1184 | if (err) |
1185 | goto out; |
1186 | |
1187 | inode_lock_nested(inode: dir, subclass: I_MUTEX_PARENT); |
1188 | list_for_each_entry(p, &list, l_node) { |
1189 | if (p->name[0] == '.') { |
1190 | if (p->len == 1) |
1191 | continue; |
1192 | if (p->len == 2 && p->name[1] == '.') |
1193 | continue; |
1194 | } |
1195 | index = ovl_lookup_upper(ofs, name: p->name, base: indexdir, len: p->len); |
1196 | if (IS_ERR(ptr: index)) { |
1197 | err = PTR_ERR(ptr: index); |
1198 | index = NULL; |
1199 | break; |
1200 | } |
1201 | /* Cleanup leftover from index create/cleanup attempt */ |
1202 | if (index->d_name.name[0] == '#') { |
1203 | err = ovl_workdir_cleanup(ofs, dir, mnt: path.mnt, dentry: index, level: 1); |
1204 | if (err) |
1205 | break; |
1206 | goto next; |
1207 | } |
1208 | err = ovl_verify_index(ofs, index); |
1209 | if (!err) { |
1210 | goto next; |
1211 | } else if (err == -ESTALE) { |
1212 | /* Cleanup stale index entries */ |
1213 | err = ovl_cleanup(ofs, dir, dentry: index); |
1214 | } else if (err != -ENOENT) { |
1215 | /* |
1216 | * Abort mount to avoid corrupting the index if |
1217 | * an incompatible index entry was found or on out |
1218 | * of memory. |
1219 | */ |
1220 | break; |
1221 | } else if (ofs->config.nfs_export) { |
1222 | /* |
1223 | * Whiteout orphan index to block future open by |
1224 | * handle after overlay nlink dropped to zero. |
1225 | */ |
1226 | err = ovl_cleanup_and_whiteout(ofs, dir, dentry: index); |
1227 | } else { |
1228 | /* Cleanup orphan index entries */ |
1229 | err = ovl_cleanup(ofs, dir, dentry: index); |
1230 | } |
1231 | |
1232 | if (err) |
1233 | break; |
1234 | |
1235 | next: |
1236 | dput(index); |
1237 | index = NULL; |
1238 | } |
1239 | dput(index); |
1240 | inode_unlock(inode: dir); |
1241 | out: |
1242 | ovl_cache_free(list: &list); |
1243 | if (err) |
1244 | pr_err("failed index dir cleanup (%i)\n" , err); |
1245 | return err; |
1246 | } |
1247 | |