1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2017 Red Hat, Inc. |
4 | */ |
5 | |
6 | #include <linux/cred.h> |
7 | #include <linux/file.h> |
8 | #include <linux/mount.h> |
9 | #include <linux/xattr.h> |
10 | #include <linux/uio.h> |
11 | #include <linux/uaccess.h> |
12 | #include <linux/splice.h> |
13 | #include <linux/security.h> |
14 | #include <linux/mm.h> |
15 | #include <linux/fs.h> |
16 | #include "overlayfs.h" |
17 | |
18 | #include "../internal.h" /* for sb_init_dio_done_wq */ |
19 | |
20 | struct ovl_aio_req { |
21 | struct kiocb iocb; |
22 | refcount_t ref; |
23 | struct kiocb *orig_iocb; |
24 | /* used for aio completion */ |
25 | struct work_struct work; |
26 | long res; |
27 | }; |
28 | |
29 | static struct kmem_cache *ovl_aio_request_cachep; |
30 | |
31 | static char ovl_whatisit(struct inode *inode, struct inode *realinode) |
32 | { |
33 | if (realinode != ovl_inode_upper(inode)) |
34 | return 'l'; |
35 | if (ovl_has_upperdata(inode)) |
36 | return 'u'; |
37 | else |
38 | return 'm'; |
39 | } |
40 | |
41 | /* No atime modification on underlying */ |
42 | #define OVL_OPEN_FLAGS (O_NOATIME) |
43 | |
44 | static struct file *ovl_open_realfile(const struct file *file, |
45 | const struct path *realpath) |
46 | { |
47 | struct inode *realinode = d_inode(dentry: realpath->dentry); |
48 | struct inode *inode = file_inode(f: file); |
49 | struct mnt_idmap *real_idmap; |
50 | struct file *realfile; |
51 | const struct cred *old_cred; |
52 | int flags = file->f_flags | OVL_OPEN_FLAGS; |
53 | int acc_mode = ACC_MODE(flags); |
54 | int err; |
55 | |
56 | if (flags & O_APPEND) |
57 | acc_mode |= MAY_APPEND; |
58 | |
59 | old_cred = ovl_override_creds(sb: inode->i_sb); |
60 | real_idmap = mnt_idmap(mnt: realpath->mnt); |
61 | err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode); |
62 | if (err) { |
63 | realfile = ERR_PTR(error: err); |
64 | } else { |
65 | if (!inode_owner_or_capable(idmap: real_idmap, inode: realinode)) |
66 | flags &= ~O_NOATIME; |
67 | |
68 | realfile = backing_file_open(user_path: &file->f_path, flags, real_path: realpath, |
69 | current_cred()); |
70 | } |
71 | revert_creds(old_cred); |
72 | |
73 | pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n" , |
74 | file, file, ovl_whatisit(inode, realinode), file->f_flags, |
75 | realfile, IS_ERR(realfile) ? 0 : realfile->f_flags); |
76 | |
77 | return realfile; |
78 | } |
79 | |
80 | #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT) |
81 | |
82 | static int ovl_change_flags(struct file *file, unsigned int flags) |
83 | { |
84 | struct inode *inode = file_inode(f: file); |
85 | int err; |
86 | |
87 | flags &= OVL_SETFL_MASK; |
88 | |
89 | if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode)) |
90 | return -EPERM; |
91 | |
92 | if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT)) |
93 | return -EINVAL; |
94 | |
95 | if (file->f_op->check_flags) { |
96 | err = file->f_op->check_flags(flags); |
97 | if (err) |
98 | return err; |
99 | } |
100 | |
101 | spin_lock(lock: &file->f_lock); |
102 | file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags; |
103 | file->f_iocb_flags = iocb_flags(file); |
104 | spin_unlock(lock: &file->f_lock); |
105 | |
106 | return 0; |
107 | } |
108 | |
109 | static int ovl_real_fdget_meta(const struct file *file, struct fd *real, |
110 | bool allow_meta) |
111 | { |
112 | struct dentry *dentry = file_dentry(file); |
113 | struct path realpath; |
114 | int err; |
115 | |
116 | real->flags = 0; |
117 | real->file = file->private_data; |
118 | |
119 | if (allow_meta) { |
120 | ovl_path_real(dentry, path: &realpath); |
121 | } else { |
122 | /* lazy lookup and verify of lowerdata */ |
123 | err = ovl_verify_lowerdata(dentry); |
124 | if (err) |
125 | return err; |
126 | |
127 | ovl_path_realdata(dentry, path: &realpath); |
128 | } |
129 | if (!realpath.dentry) |
130 | return -EIO; |
131 | |
132 | /* Has it been copied up since we'd opened it? */ |
133 | if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) { |
134 | real->flags = FDPUT_FPUT; |
135 | real->file = ovl_open_realfile(file, realpath: &realpath); |
136 | |
137 | return PTR_ERR_OR_ZERO(ptr: real->file); |
138 | } |
139 | |
140 | /* Did the flags change since open? */ |
141 | if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS)) |
142 | return ovl_change_flags(file: real->file, flags: file->f_flags); |
143 | |
144 | return 0; |
145 | } |
146 | |
147 | static int ovl_real_fdget(const struct file *file, struct fd *real) |
148 | { |
149 | if (d_is_dir(dentry: file_dentry(file))) { |
150 | real->flags = 0; |
151 | real->file = ovl_dir_real_file(file, want_upper: false); |
152 | |
153 | return PTR_ERR_OR_ZERO(ptr: real->file); |
154 | } |
155 | |
156 | return ovl_real_fdget_meta(file, real, allow_meta: false); |
157 | } |
158 | |
159 | static int ovl_open(struct inode *inode, struct file *file) |
160 | { |
161 | struct dentry *dentry = file_dentry(file); |
162 | struct file *realfile; |
163 | struct path realpath; |
164 | int err; |
165 | |
166 | /* lazy lookup and verify lowerdata */ |
167 | err = ovl_verify_lowerdata(dentry); |
168 | if (err) |
169 | return err; |
170 | |
171 | err = ovl_maybe_copy_up(dentry, flags: file->f_flags); |
172 | if (err) |
173 | return err; |
174 | |
175 | /* No longer need these flags, so don't pass them on to underlying fs */ |
176 | file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); |
177 | |
178 | ovl_path_realdata(dentry, path: &realpath); |
179 | if (!realpath.dentry) |
180 | return -EIO; |
181 | |
182 | realfile = ovl_open_realfile(file, realpath: &realpath); |
183 | if (IS_ERR(ptr: realfile)) |
184 | return PTR_ERR(ptr: realfile); |
185 | |
186 | file->private_data = realfile; |
187 | |
188 | return 0; |
189 | } |
190 | |
191 | static int ovl_release(struct inode *inode, struct file *file) |
192 | { |
193 | fput(file->private_data); |
194 | |
195 | return 0; |
196 | } |
197 | |
198 | static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) |
199 | { |
200 | struct inode *inode = file_inode(f: file); |
201 | struct fd real; |
202 | const struct cred *old_cred; |
203 | loff_t ret; |
204 | |
205 | /* |
206 | * The two special cases below do not need to involve real fs, |
207 | * so we can optimizing concurrent callers. |
208 | */ |
209 | if (offset == 0) { |
210 | if (whence == SEEK_CUR) |
211 | return file->f_pos; |
212 | |
213 | if (whence == SEEK_SET) |
214 | return vfs_setpos(file, offset: 0, maxsize: 0); |
215 | } |
216 | |
217 | ret = ovl_real_fdget(file, real: &real); |
218 | if (ret) |
219 | return ret; |
220 | |
221 | /* |
222 | * Overlay file f_pos is the master copy that is preserved |
223 | * through copy up and modified on read/write, but only real |
224 | * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose |
225 | * limitations that are more strict than ->s_maxbytes for specific |
226 | * files, so we use the real file to perform seeks. |
227 | */ |
228 | ovl_inode_lock(inode); |
229 | real.file->f_pos = file->f_pos; |
230 | |
231 | old_cred = ovl_override_creds(sb: inode->i_sb); |
232 | ret = vfs_llseek(file: real.file, offset, whence); |
233 | revert_creds(old_cred); |
234 | |
235 | file->f_pos = real.file->f_pos; |
236 | ovl_inode_unlock(inode); |
237 | |
238 | fdput(fd: real); |
239 | |
240 | return ret; |
241 | } |
242 | |
243 | static void ovl_file_modified(struct file *file) |
244 | { |
245 | /* Update size/mtime */ |
246 | ovl_copyattr(to: file_inode(f: file)); |
247 | } |
248 | |
249 | static void ovl_file_accessed(struct file *file) |
250 | { |
251 | struct inode *inode, *upperinode; |
252 | struct timespec64 ctime, uctime; |
253 | struct timespec64 mtime, umtime; |
254 | |
255 | if (file->f_flags & O_NOATIME) |
256 | return; |
257 | |
258 | inode = file_inode(f: file); |
259 | upperinode = ovl_inode_upper(inode); |
260 | |
261 | if (!upperinode) |
262 | return; |
263 | |
264 | ctime = inode_get_ctime(inode); |
265 | uctime = inode_get_ctime(inode: upperinode); |
266 | mtime = inode_get_mtime(inode); |
267 | umtime = inode_get_mtime(inode: upperinode); |
268 | if ((!timespec64_equal(a: &mtime, b: &umtime)) || |
269 | !timespec64_equal(a: &ctime, b: &uctime)) { |
270 | inode_set_mtime_to_ts(inode, ts: inode_get_mtime(inode: upperinode)); |
271 | inode_set_ctime_to_ts(inode, ts: uctime); |
272 | } |
273 | |
274 | touch_atime(&file->f_path); |
275 | } |
276 | |
277 | #define OVL_IOCB_MASK \ |
278 | (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND) |
279 | |
280 | static rwf_t iocb_to_rw_flags(int flags) |
281 | { |
282 | return (__force rwf_t)(flags & OVL_IOCB_MASK); |
283 | } |
284 | |
285 | static inline void ovl_aio_put(struct ovl_aio_req *aio_req) |
286 | { |
287 | if (refcount_dec_and_test(r: &aio_req->ref)) { |
288 | fput(aio_req->iocb.ki_filp); |
289 | kmem_cache_free(s: ovl_aio_request_cachep, objp: aio_req); |
290 | } |
291 | } |
292 | |
293 | static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) |
294 | { |
295 | struct kiocb *iocb = &aio_req->iocb; |
296 | struct kiocb *orig_iocb = aio_req->orig_iocb; |
297 | |
298 | if (iocb->ki_flags & IOCB_WRITE) { |
299 | kiocb_end_write(iocb); |
300 | ovl_file_modified(file: orig_iocb->ki_filp); |
301 | } |
302 | |
303 | orig_iocb->ki_pos = iocb->ki_pos; |
304 | ovl_aio_put(aio_req); |
305 | } |
306 | |
307 | static void ovl_aio_rw_complete(struct kiocb *iocb, long res) |
308 | { |
309 | struct ovl_aio_req *aio_req = container_of(iocb, |
310 | struct ovl_aio_req, iocb); |
311 | struct kiocb *orig_iocb = aio_req->orig_iocb; |
312 | |
313 | ovl_aio_cleanup_handler(aio_req); |
314 | orig_iocb->ki_complete(orig_iocb, res); |
315 | } |
316 | |
317 | static void ovl_aio_complete_work(struct work_struct *work) |
318 | { |
319 | struct ovl_aio_req *aio_req = container_of(work, |
320 | struct ovl_aio_req, work); |
321 | |
322 | ovl_aio_rw_complete(iocb: &aio_req->iocb, res: aio_req->res); |
323 | } |
324 | |
325 | static void ovl_aio_queue_completion(struct kiocb *iocb, long res) |
326 | { |
327 | struct ovl_aio_req *aio_req = container_of(iocb, |
328 | struct ovl_aio_req, iocb); |
329 | struct kiocb *orig_iocb = aio_req->orig_iocb; |
330 | |
331 | /* |
332 | * Punt to a work queue to serialize updates of mtime/size. |
333 | */ |
334 | aio_req->res = res; |
335 | INIT_WORK(&aio_req->work, ovl_aio_complete_work); |
336 | queue_work(wq: file_inode(f: orig_iocb->ki_filp)->i_sb->s_dio_done_wq, |
337 | work: &aio_req->work); |
338 | } |
339 | |
340 | static int ovl_init_aio_done_wq(struct super_block *sb) |
341 | { |
342 | if (sb->s_dio_done_wq) |
343 | return 0; |
344 | |
345 | return sb_init_dio_done_wq(sb); |
346 | } |
347 | |
348 | static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) |
349 | { |
350 | struct file *file = iocb->ki_filp; |
351 | struct fd real; |
352 | const struct cred *old_cred; |
353 | ssize_t ret; |
354 | |
355 | if (!iov_iter_count(i: iter)) |
356 | return 0; |
357 | |
358 | ret = ovl_real_fdget(file, real: &real); |
359 | if (ret) |
360 | return ret; |
361 | |
362 | ret = -EINVAL; |
363 | if (iocb->ki_flags & IOCB_DIRECT && |
364 | !(real.file->f_mode & FMODE_CAN_ODIRECT)) |
365 | goto out_fdput; |
366 | |
367 | old_cred = ovl_override_creds(sb: file_inode(f: file)->i_sb); |
368 | if (is_sync_kiocb(kiocb: iocb)) { |
369 | rwf_t rwf = iocb_to_rw_flags(flags: iocb->ki_flags); |
370 | |
371 | ret = vfs_iter_read(file: real.file, iter, ppos: &iocb->ki_pos, flags: rwf); |
372 | } else { |
373 | struct ovl_aio_req *aio_req; |
374 | |
375 | ret = -ENOMEM; |
376 | aio_req = kmem_cache_zalloc(k: ovl_aio_request_cachep, GFP_KERNEL); |
377 | if (!aio_req) |
378 | goto out; |
379 | |
380 | aio_req->orig_iocb = iocb; |
381 | kiocb_clone(kiocb: &aio_req->iocb, kiocb_src: iocb, filp: get_file(f: real.file)); |
382 | aio_req->iocb.ki_complete = ovl_aio_rw_complete; |
383 | refcount_set(r: &aio_req->ref, n: 2); |
384 | ret = vfs_iocb_iter_read(file: real.file, iocb: &aio_req->iocb, iter); |
385 | ovl_aio_put(aio_req); |
386 | if (ret != -EIOCBQUEUED) |
387 | ovl_aio_cleanup_handler(aio_req); |
388 | } |
389 | out: |
390 | revert_creds(old_cred); |
391 | ovl_file_accessed(file); |
392 | out_fdput: |
393 | fdput(fd: real); |
394 | |
395 | return ret; |
396 | } |
397 | |
398 | static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) |
399 | { |
400 | struct file *file = iocb->ki_filp; |
401 | struct inode *inode = file_inode(f: file); |
402 | struct fd real; |
403 | const struct cred *old_cred; |
404 | ssize_t ret; |
405 | int ifl = iocb->ki_flags; |
406 | |
407 | if (!iov_iter_count(i: iter)) |
408 | return 0; |
409 | |
410 | inode_lock(inode); |
411 | /* Update mode */ |
412 | ovl_copyattr(to: inode); |
413 | ret = file_remove_privs(file); |
414 | if (ret) |
415 | goto out_unlock; |
416 | |
417 | ret = ovl_real_fdget(file, real: &real); |
418 | if (ret) |
419 | goto out_unlock; |
420 | |
421 | ret = -EINVAL; |
422 | if (iocb->ki_flags & IOCB_DIRECT && |
423 | !(real.file->f_mode & FMODE_CAN_ODIRECT)) |
424 | goto out_fdput; |
425 | |
426 | if (!ovl_should_sync(ofs: OVL_FS(sb: inode->i_sb))) |
427 | ifl &= ~(IOCB_DSYNC | IOCB_SYNC); |
428 | |
429 | /* |
430 | * Overlayfs doesn't support deferred completions, don't copy |
431 | * this property in case it is set by the issuer. |
432 | */ |
433 | ifl &= ~IOCB_DIO_CALLER_COMP; |
434 | |
435 | old_cred = ovl_override_creds(sb: file_inode(f: file)->i_sb); |
436 | if (is_sync_kiocb(kiocb: iocb)) { |
437 | rwf_t rwf = iocb_to_rw_flags(flags: ifl); |
438 | |
439 | file_start_write(file: real.file); |
440 | ret = vfs_iter_write(file: real.file, iter, ppos: &iocb->ki_pos, flags: rwf); |
441 | file_end_write(file: real.file); |
442 | /* Update size */ |
443 | ovl_file_modified(file); |
444 | } else { |
445 | struct ovl_aio_req *aio_req; |
446 | |
447 | ret = ovl_init_aio_done_wq(sb: inode->i_sb); |
448 | if (ret) |
449 | goto out; |
450 | |
451 | ret = -ENOMEM; |
452 | aio_req = kmem_cache_zalloc(k: ovl_aio_request_cachep, GFP_KERNEL); |
453 | if (!aio_req) |
454 | goto out; |
455 | |
456 | aio_req->orig_iocb = iocb; |
457 | kiocb_clone(kiocb: &aio_req->iocb, kiocb_src: iocb, filp: get_file(f: real.file)); |
458 | aio_req->iocb.ki_flags = ifl; |
459 | aio_req->iocb.ki_complete = ovl_aio_queue_completion; |
460 | refcount_set(r: &aio_req->ref, n: 2); |
461 | kiocb_start_write(iocb: &aio_req->iocb); |
462 | ret = vfs_iocb_iter_write(file: real.file, iocb: &aio_req->iocb, iter); |
463 | ovl_aio_put(aio_req); |
464 | if (ret != -EIOCBQUEUED) |
465 | ovl_aio_cleanup_handler(aio_req); |
466 | } |
467 | out: |
468 | revert_creds(old_cred); |
469 | out_fdput: |
470 | fdput(fd: real); |
471 | |
472 | out_unlock: |
473 | inode_unlock(inode); |
474 | |
475 | return ret; |
476 | } |
477 | |
478 | static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, |
479 | struct pipe_inode_info *pipe, size_t len, |
480 | unsigned int flags) |
481 | { |
482 | const struct cred *old_cred; |
483 | struct fd real; |
484 | ssize_t ret; |
485 | |
486 | ret = ovl_real_fdget(file: in, real: &real); |
487 | if (ret) |
488 | return ret; |
489 | |
490 | old_cred = ovl_override_creds(sb: file_inode(f: in)->i_sb); |
491 | ret = vfs_splice_read(in: real.file, ppos, pipe, len, flags); |
492 | revert_creds(old_cred); |
493 | ovl_file_accessed(file: in); |
494 | |
495 | fdput(fd: real); |
496 | return ret; |
497 | } |
498 | |
499 | /* |
500 | * Calling iter_file_splice_write() directly from overlay's f_op may deadlock |
501 | * due to lock order inversion between pipe->mutex in iter_file_splice_write() |
502 | * and file_start_write(real.file) in ovl_write_iter(). |
503 | * |
504 | * So do everything ovl_write_iter() does and call iter_file_splice_write() on |
505 | * the real file. |
506 | */ |
507 | static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, |
508 | loff_t *ppos, size_t len, unsigned int flags) |
509 | { |
510 | struct fd real; |
511 | const struct cred *old_cred; |
512 | struct inode *inode = file_inode(f: out); |
513 | ssize_t ret; |
514 | |
515 | inode_lock(inode); |
516 | /* Update mode */ |
517 | ovl_copyattr(to: inode); |
518 | ret = file_remove_privs(out); |
519 | if (ret) |
520 | goto out_unlock; |
521 | |
522 | ret = ovl_real_fdget(file: out, real: &real); |
523 | if (ret) |
524 | goto out_unlock; |
525 | |
526 | old_cred = ovl_override_creds(sb: inode->i_sb); |
527 | file_start_write(file: real.file); |
528 | |
529 | ret = iter_file_splice_write(pipe, real.file, ppos, len, flags); |
530 | |
531 | file_end_write(file: real.file); |
532 | /* Update size */ |
533 | ovl_file_modified(file: out); |
534 | revert_creds(old_cred); |
535 | fdput(fd: real); |
536 | |
537 | out_unlock: |
538 | inode_unlock(inode); |
539 | |
540 | return ret; |
541 | } |
542 | |
543 | static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) |
544 | { |
545 | struct fd real; |
546 | const struct cred *old_cred; |
547 | int ret; |
548 | |
549 | ret = ovl_sync_status(ofs: OVL_FS(sb: file_inode(f: file)->i_sb)); |
550 | if (ret <= 0) |
551 | return ret; |
552 | |
553 | ret = ovl_real_fdget_meta(file, real: &real, allow_meta: !datasync); |
554 | if (ret) |
555 | return ret; |
556 | |
557 | /* Don't sync lower file for fear of receiving EROFS error */ |
558 | if (file_inode(f: real.file) == ovl_inode_upper(inode: file_inode(f: file))) { |
559 | old_cred = ovl_override_creds(sb: file_inode(f: file)->i_sb); |
560 | ret = vfs_fsync_range(file: real.file, start, end, datasync); |
561 | revert_creds(old_cred); |
562 | } |
563 | |
564 | fdput(fd: real); |
565 | |
566 | return ret; |
567 | } |
568 | |
569 | static int ovl_mmap(struct file *file, struct vm_area_struct *vma) |
570 | { |
571 | struct file *realfile = file->private_data; |
572 | const struct cred *old_cred; |
573 | int ret; |
574 | |
575 | if (!realfile->f_op->mmap) |
576 | return -ENODEV; |
577 | |
578 | if (WARN_ON(file != vma->vm_file)) |
579 | return -EIO; |
580 | |
581 | vma_set_file(vma, file: realfile); |
582 | |
583 | old_cred = ovl_override_creds(sb: file_inode(f: file)->i_sb); |
584 | ret = call_mmap(file: vma->vm_file, vma); |
585 | revert_creds(old_cred); |
586 | ovl_file_accessed(file); |
587 | |
588 | return ret; |
589 | } |
590 | |
591 | static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len) |
592 | { |
593 | struct inode *inode = file_inode(f: file); |
594 | struct fd real; |
595 | const struct cred *old_cred; |
596 | int ret; |
597 | |
598 | inode_lock(inode); |
599 | /* Update mode */ |
600 | ovl_copyattr(to: inode); |
601 | ret = file_remove_privs(file); |
602 | if (ret) |
603 | goto out_unlock; |
604 | |
605 | ret = ovl_real_fdget(file, real: &real); |
606 | if (ret) |
607 | goto out_unlock; |
608 | |
609 | old_cred = ovl_override_creds(sb: file_inode(f: file)->i_sb); |
610 | ret = vfs_fallocate(file: real.file, mode, offset, len); |
611 | revert_creds(old_cred); |
612 | |
613 | /* Update size */ |
614 | ovl_file_modified(file); |
615 | |
616 | fdput(fd: real); |
617 | |
618 | out_unlock: |
619 | inode_unlock(inode); |
620 | |
621 | return ret; |
622 | } |
623 | |
624 | static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice) |
625 | { |
626 | struct fd real; |
627 | const struct cred *old_cred; |
628 | int ret; |
629 | |
630 | ret = ovl_real_fdget(file, real: &real); |
631 | if (ret) |
632 | return ret; |
633 | |
634 | old_cred = ovl_override_creds(sb: file_inode(f: file)->i_sb); |
635 | ret = vfs_fadvise(file: real.file, offset, len, advice); |
636 | revert_creds(old_cred); |
637 | |
638 | fdput(fd: real); |
639 | |
640 | return ret; |
641 | } |
642 | |
643 | enum ovl_copyop { |
644 | OVL_COPY, |
645 | OVL_CLONE, |
646 | OVL_DEDUPE, |
647 | }; |
648 | |
649 | static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, |
650 | struct file *file_out, loff_t pos_out, |
651 | loff_t len, unsigned int flags, enum ovl_copyop op) |
652 | { |
653 | struct inode *inode_out = file_inode(f: file_out); |
654 | struct fd real_in, real_out; |
655 | const struct cred *old_cred; |
656 | loff_t ret; |
657 | |
658 | inode_lock(inode: inode_out); |
659 | if (op != OVL_DEDUPE) { |
660 | /* Update mode */ |
661 | ovl_copyattr(to: inode_out); |
662 | ret = file_remove_privs(file_out); |
663 | if (ret) |
664 | goto out_unlock; |
665 | } |
666 | |
667 | ret = ovl_real_fdget(file: file_out, real: &real_out); |
668 | if (ret) |
669 | goto out_unlock; |
670 | |
671 | ret = ovl_real_fdget(file: file_in, real: &real_in); |
672 | if (ret) { |
673 | fdput(fd: real_out); |
674 | goto out_unlock; |
675 | } |
676 | |
677 | old_cred = ovl_override_creds(sb: file_inode(f: file_out)->i_sb); |
678 | switch (op) { |
679 | case OVL_COPY: |
680 | ret = vfs_copy_file_range(real_in.file, pos_in, |
681 | real_out.file, pos_out, len, flags); |
682 | break; |
683 | |
684 | case OVL_CLONE: |
685 | ret = vfs_clone_file_range(file_in: real_in.file, pos_in, |
686 | file_out: real_out.file, pos_out, len, remap_flags: flags); |
687 | break; |
688 | |
689 | case OVL_DEDUPE: |
690 | ret = vfs_dedupe_file_range_one(src_file: real_in.file, src_pos: pos_in, |
691 | dst_file: real_out.file, dst_pos: pos_out, len, |
692 | remap_flags: flags); |
693 | break; |
694 | } |
695 | revert_creds(old_cred); |
696 | |
697 | /* Update size */ |
698 | ovl_file_modified(file: file_out); |
699 | |
700 | fdput(fd: real_in); |
701 | fdput(fd: real_out); |
702 | |
703 | out_unlock: |
704 | inode_unlock(inode: inode_out); |
705 | |
706 | return ret; |
707 | } |
708 | |
709 | static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, |
710 | struct file *file_out, loff_t pos_out, |
711 | size_t len, unsigned int flags) |
712 | { |
713 | return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, |
714 | op: OVL_COPY); |
715 | } |
716 | |
717 | static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, |
718 | struct file *file_out, loff_t pos_out, |
719 | loff_t len, unsigned int remap_flags) |
720 | { |
721 | enum ovl_copyop op; |
722 | |
723 | if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) |
724 | return -EINVAL; |
725 | |
726 | if (remap_flags & REMAP_FILE_DEDUP) |
727 | op = OVL_DEDUPE; |
728 | else |
729 | op = OVL_CLONE; |
730 | |
731 | /* |
732 | * Don't copy up because of a dedupe request, this wouldn't make sense |
733 | * most of the time (data would be duplicated instead of deduplicated). |
734 | */ |
735 | if (op == OVL_DEDUPE && |
736 | (!ovl_inode_upper(inode: file_inode(f: file_in)) || |
737 | !ovl_inode_upper(inode: file_inode(f: file_out)))) |
738 | return -EPERM; |
739 | |
740 | return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, |
741 | flags: remap_flags, op); |
742 | } |
743 | |
744 | static int ovl_flush(struct file *file, fl_owner_t id) |
745 | { |
746 | struct fd real; |
747 | const struct cred *old_cred; |
748 | int err; |
749 | |
750 | err = ovl_real_fdget(file, real: &real); |
751 | if (err) |
752 | return err; |
753 | |
754 | if (real.file->f_op->flush) { |
755 | old_cred = ovl_override_creds(sb: file_inode(f: file)->i_sb); |
756 | err = real.file->f_op->flush(real.file, id); |
757 | revert_creds(old_cred); |
758 | } |
759 | fdput(fd: real); |
760 | |
761 | return err; |
762 | } |
763 | |
764 | const struct file_operations ovl_file_operations = { |
765 | .open = ovl_open, |
766 | .release = ovl_release, |
767 | .llseek = ovl_llseek, |
768 | .read_iter = ovl_read_iter, |
769 | .write_iter = ovl_write_iter, |
770 | .fsync = ovl_fsync, |
771 | .mmap = ovl_mmap, |
772 | .fallocate = ovl_fallocate, |
773 | .fadvise = ovl_fadvise, |
774 | .flush = ovl_flush, |
775 | .splice_read = ovl_splice_read, |
776 | .splice_write = ovl_splice_write, |
777 | |
778 | .copy_file_range = ovl_copy_file_range, |
779 | .remap_file_range = ovl_remap_file_range, |
780 | }; |
781 | |
782 | int __init ovl_aio_request_cache_init(void) |
783 | { |
784 | ovl_aio_request_cachep = kmem_cache_create(name: "ovl_aio_req" , |
785 | size: sizeof(struct ovl_aio_req), |
786 | align: 0, SLAB_HWCACHE_ALIGN, NULL); |
787 | if (!ovl_aio_request_cachep) |
788 | return -ENOMEM; |
789 | |
790 | return 0; |
791 | } |
792 | |
793 | void ovl_aio_request_cache_destroy(void) |
794 | { |
795 | kmem_cache_destroy(s: ovl_aio_request_cachep); |
796 | } |
797 | |