1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * linux/fs/ext2/file.c |
4 | * |
5 | * Copyright (C) 1992, 1993, 1994, 1995 |
6 | * Remy Card (card@masi.ibp.fr) |
7 | * Laboratoire MASI - Institut Blaise Pascal |
8 | * Universite Pierre et Marie Curie (Paris VI) |
9 | * |
10 | * from |
11 | * |
12 | * linux/fs/minix/file.c |
13 | * |
14 | * Copyright (C) 1991, 1992 Linus Torvalds |
15 | * |
16 | * ext2 fs regular file handling primitives |
17 | * |
18 | * 64-bit file support on 64-bit platforms by Jakub Jelinek |
19 | * (jj@sunsite.ms.mff.cuni.cz) |
20 | */ |
21 | |
22 | #include <linux/time.h> |
23 | #include <linux/pagemap.h> |
24 | #include <linux/dax.h> |
25 | #include <linux/quotaops.h> |
26 | #include <linux/iomap.h> |
27 | #include <linux/uio.h> |
28 | #include <linux/buffer_head.h> |
29 | #include "ext2.h" |
30 | #include "xattr.h" |
31 | #include "acl.h" |
32 | #include "trace.h" |
33 | |
34 | #ifdef CONFIG_FS_DAX |
35 | static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) |
36 | { |
37 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
38 | ssize_t ret; |
39 | |
40 | if (!iov_iter_count(i: to)) |
41 | return 0; /* skip atime */ |
42 | |
43 | inode_lock_shared(inode); |
44 | ret = dax_iomap_rw(iocb, iter: to, ops: &ext2_iomap_ops); |
45 | inode_unlock_shared(inode); |
46 | |
47 | file_accessed(file: iocb->ki_filp); |
48 | return ret; |
49 | } |
50 | |
51 | static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) |
52 | { |
53 | struct file *file = iocb->ki_filp; |
54 | struct inode *inode = file->f_mapping->host; |
55 | ssize_t ret; |
56 | |
57 | inode_lock(inode); |
58 | ret = generic_write_checks(iocb, from); |
59 | if (ret <= 0) |
60 | goto out_unlock; |
61 | ret = file_remove_privs(file); |
62 | if (ret) |
63 | goto out_unlock; |
64 | ret = file_update_time(file); |
65 | if (ret) |
66 | goto out_unlock; |
67 | |
68 | ret = dax_iomap_rw(iocb, iter: from, ops: &ext2_iomap_ops); |
69 | if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { |
70 | i_size_write(inode, i_size: iocb->ki_pos); |
71 | mark_inode_dirty(inode); |
72 | } |
73 | |
74 | out_unlock: |
75 | inode_unlock(inode); |
76 | if (ret > 0) |
77 | ret = generic_write_sync(iocb, count: ret); |
78 | return ret; |
79 | } |
80 | |
81 | /* |
82 | * The lock ordering for ext2 DAX fault paths is: |
83 | * |
84 | * mmap_lock (MM) |
85 | * sb_start_pagefault (vfs, freeze) |
86 | * address_space->invalidate_lock |
87 | * address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX) |
88 | * ext2_inode_info->truncate_mutex |
89 | * |
90 | * The default page_lock and i_size verification done by non-DAX fault paths |
91 | * is sufficient because ext2 doesn't support hole punching. |
92 | */ |
93 | static vm_fault_t ext2_dax_fault(struct vm_fault *vmf) |
94 | { |
95 | struct inode *inode = file_inode(f: vmf->vma->vm_file); |
96 | vm_fault_t ret; |
97 | bool write = (vmf->flags & FAULT_FLAG_WRITE) && |
98 | (vmf->vma->vm_flags & VM_SHARED); |
99 | |
100 | if (write) { |
101 | sb_start_pagefault(sb: inode->i_sb); |
102 | file_update_time(file: vmf->vma->vm_file); |
103 | } |
104 | filemap_invalidate_lock_shared(mapping: inode->i_mapping); |
105 | |
106 | ret = dax_iomap_fault(vmf, order: 0, NULL, NULL, ops: &ext2_iomap_ops); |
107 | |
108 | filemap_invalidate_unlock_shared(mapping: inode->i_mapping); |
109 | if (write) |
110 | sb_end_pagefault(sb: inode->i_sb); |
111 | return ret; |
112 | } |
113 | |
114 | static const struct vm_operations_struct ext2_dax_vm_ops = { |
115 | .fault = ext2_dax_fault, |
116 | /* |
117 | * .huge_fault is not supported for DAX because allocation in ext2 |
118 | * cannot be reliably aligned to huge page sizes and so pmd faults |
119 | * will always fail and fail back to regular faults. |
120 | */ |
121 | .page_mkwrite = ext2_dax_fault, |
122 | .pfn_mkwrite = ext2_dax_fault, |
123 | }; |
124 | |
125 | static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma) |
126 | { |
127 | if (!IS_DAX(file_inode(file))) |
128 | return generic_file_mmap(file, vma); |
129 | |
130 | file_accessed(file); |
131 | vma->vm_ops = &ext2_dax_vm_ops; |
132 | return 0; |
133 | } |
134 | #else |
135 | #define ext2_file_mmap generic_file_mmap |
136 | #endif |
137 | |
138 | /* |
139 | * Called when filp is released. This happens when all file descriptors |
140 | * for a single struct file are closed. Note that different open() calls |
141 | * for the same file yield different struct file structures. |
142 | */ |
143 | static int ext2_release_file (struct inode * inode, struct file * filp) |
144 | { |
145 | if (filp->f_mode & FMODE_WRITE) { |
146 | mutex_lock(&EXT2_I(inode)->truncate_mutex); |
147 | ext2_discard_reservation(inode); |
148 | mutex_unlock(lock: &EXT2_I(inode)->truncate_mutex); |
149 | } |
150 | return 0; |
151 | } |
152 | |
153 | int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync) |
154 | { |
155 | int ret; |
156 | struct super_block *sb = file->f_mapping->host->i_sb; |
157 | |
158 | ret = generic_buffers_fsync(file, start, end, datasync); |
159 | if (ret == -EIO) |
160 | /* We don't really know where the IO error happened... */ |
161 | ext2_error(sb, __func__, |
162 | "detected IO error when writing metadata buffers" ); |
163 | return ret; |
164 | } |
165 | |
166 | static ssize_t ext2_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) |
167 | { |
168 | struct file *file = iocb->ki_filp; |
169 | struct inode *inode = file->f_mapping->host; |
170 | ssize_t ret; |
171 | |
172 | trace_ext2_dio_read_begin(iocb, iter: to, ret: 0); |
173 | inode_lock_shared(inode); |
174 | ret = iomap_dio_rw(iocb, iter: to, ops: &ext2_iomap_ops, NULL, dio_flags: 0, NULL, done_before: 0); |
175 | inode_unlock_shared(inode); |
176 | trace_ext2_dio_read_end(iocb, iter: to, ret); |
177 | |
178 | return ret; |
179 | } |
180 | |
181 | static int ext2_dio_write_end_io(struct kiocb *iocb, ssize_t size, |
182 | int error, unsigned int flags) |
183 | { |
184 | loff_t pos = iocb->ki_pos; |
185 | struct inode *inode = file_inode(f: iocb->ki_filp); |
186 | |
187 | if (error) |
188 | goto out; |
189 | |
190 | /* |
191 | * If we are extending the file, we have to update i_size here before |
192 | * page cache gets invalidated in iomap_dio_rw(). This prevents racing |
193 | * buffered reads from zeroing out too much from page cache pages. |
194 | * Note that all extending writes always happens synchronously with |
195 | * inode lock held by ext2_dio_write_iter(). So it is safe to update |
196 | * inode size here for extending file writes. |
197 | */ |
198 | pos += size; |
199 | if (pos > i_size_read(inode)) { |
200 | i_size_write(inode, i_size: pos); |
201 | mark_inode_dirty(inode); |
202 | } |
203 | out: |
204 | trace_ext2_dio_write_endio(iocb, size, ret: error); |
205 | return error; |
206 | } |
207 | |
208 | static const struct iomap_dio_ops ext2_dio_write_ops = { |
209 | .end_io = ext2_dio_write_end_io, |
210 | }; |
211 | |
212 | static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) |
213 | { |
214 | struct file *file = iocb->ki_filp; |
215 | struct inode *inode = file->f_mapping->host; |
216 | ssize_t ret; |
217 | unsigned int flags = 0; |
218 | unsigned long blocksize = inode->i_sb->s_blocksize; |
219 | loff_t offset = iocb->ki_pos; |
220 | loff_t count = iov_iter_count(i: from); |
221 | ssize_t status = 0; |
222 | |
223 | trace_ext2_dio_write_begin(iocb, iter: from, ret: 0); |
224 | inode_lock(inode); |
225 | ret = generic_write_checks(iocb, from); |
226 | if (ret <= 0) |
227 | goto out_unlock; |
228 | |
229 | ret = kiocb_modified(iocb); |
230 | if (ret) |
231 | goto out_unlock; |
232 | |
233 | /* use IOMAP_DIO_FORCE_WAIT for unaligned or extending writes */ |
234 | if (iocb->ki_pos + iov_iter_count(i: from) > i_size_read(inode) || |
235 | (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(from), blocksize))) |
236 | flags |= IOMAP_DIO_FORCE_WAIT; |
237 | |
238 | ret = iomap_dio_rw(iocb, iter: from, ops: &ext2_iomap_ops, dops: &ext2_dio_write_ops, |
239 | dio_flags: flags, NULL, done_before: 0); |
240 | |
241 | /* ENOTBLK is magic return value for fallback to buffered-io */ |
242 | if (ret == -ENOTBLK) |
243 | ret = 0; |
244 | |
245 | if (ret < 0 && ret != -EIOCBQUEUED) |
246 | ext2_write_failed(mapping: inode->i_mapping, to: offset + count); |
247 | |
248 | /* handle case for partial write and for fallback to buffered write */ |
249 | if (ret >= 0 && iov_iter_count(i: from)) { |
250 | loff_t pos, endbyte; |
251 | int ret2; |
252 | |
253 | iocb->ki_flags &= ~IOCB_DIRECT; |
254 | pos = iocb->ki_pos; |
255 | status = generic_perform_write(iocb, from); |
256 | if (unlikely(status < 0)) { |
257 | ret = status; |
258 | goto out_unlock; |
259 | } |
260 | |
261 | iocb->ki_pos += status; |
262 | ret += status; |
263 | endbyte = pos + status - 1; |
264 | ret2 = filemap_write_and_wait_range(mapping: inode->i_mapping, lstart: pos, |
265 | lend: endbyte); |
266 | if (!ret2) |
267 | invalidate_mapping_pages(mapping: inode->i_mapping, |
268 | start: pos >> PAGE_SHIFT, |
269 | end: endbyte >> PAGE_SHIFT); |
270 | if (ret > 0) |
271 | generic_write_sync(iocb, count: ret); |
272 | } |
273 | |
274 | out_unlock: |
275 | inode_unlock(inode); |
276 | if (status) |
277 | trace_ext2_dio_write_buff_end(iocb, iter: from, ret: status); |
278 | trace_ext2_dio_write_end(iocb, iter: from, ret); |
279 | return ret; |
280 | } |
281 | |
282 | static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) |
283 | { |
284 | #ifdef CONFIG_FS_DAX |
285 | if (IS_DAX(iocb->ki_filp->f_mapping->host)) |
286 | return ext2_dax_read_iter(iocb, to); |
287 | #endif |
288 | if (iocb->ki_flags & IOCB_DIRECT) |
289 | return ext2_dio_read_iter(iocb, to); |
290 | |
291 | return generic_file_read_iter(iocb, to); |
292 | } |
293 | |
294 | static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) |
295 | { |
296 | #ifdef CONFIG_FS_DAX |
297 | if (IS_DAX(iocb->ki_filp->f_mapping->host)) |
298 | return ext2_dax_write_iter(iocb, from); |
299 | #endif |
300 | if (iocb->ki_flags & IOCB_DIRECT) |
301 | return ext2_dio_write_iter(iocb, from); |
302 | |
303 | return generic_file_write_iter(iocb, from); |
304 | } |
305 | |
306 | const struct file_operations ext2_file_operations = { |
307 | .llseek = generic_file_llseek, |
308 | .read_iter = ext2_file_read_iter, |
309 | .write_iter = ext2_file_write_iter, |
310 | .unlocked_ioctl = ext2_ioctl, |
311 | #ifdef CONFIG_COMPAT |
312 | .compat_ioctl = ext2_compat_ioctl, |
313 | #endif |
314 | .mmap = ext2_file_mmap, |
315 | .open = dquot_file_open, |
316 | .release = ext2_release_file, |
317 | .fsync = ext2_fsync, |
318 | .get_unmapped_area = thp_get_unmapped_area, |
319 | .splice_read = filemap_splice_read, |
320 | .splice_write = iter_file_splice_write, |
321 | }; |
322 | |
323 | const struct inode_operations ext2_file_inode_operations = { |
324 | .listxattr = ext2_listxattr, |
325 | .getattr = ext2_getattr, |
326 | .setattr = ext2_setattr, |
327 | .get_inode_acl = ext2_get_acl, |
328 | .set_acl = ext2_set_acl, |
329 | .fiemap = ext2_fiemap, |
330 | .fileattr_get = ext2_fileattr_get, |
331 | .fileattr_set = ext2_fileattr_set, |
332 | }; |
333 | |