1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright (C) 2018-2023 Oracle. All Rights Reserved. |
4 | * Author: Darrick J. Wong <djwong@kernel.org> |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_log_format.h" |
11 | #include "xfs_trans_resv.h" |
12 | #include "xfs_mount.h" |
13 | #include "scrub/xfile.h" |
14 | #include "scrub/xfarray.h" |
15 | #include "scrub/scrub.h" |
16 | #include "scrub/trace.h" |
17 | #include <linux/shmem_fs.h> |
18 | |
19 | /* |
20 | * Swappable Temporary Memory |
21 | * ========================== |
22 | * |
23 | * Online checking sometimes needs to be able to stage a large amount of data |
24 | * in memory. This information might not fit in the available memory and it |
25 | * doesn't all need to be accessible at all times. In other words, we want an |
26 | * indexed data buffer to store data that can be paged out. |
27 | * |
28 | * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those |
29 | * requirements. Therefore, the xfile mechanism uses an unlinked shmem file to |
30 | * store our staging data. This file is not installed in the file descriptor |
31 | * table so that user programs cannot access the data, which means that the |
32 | * xfile must be freed with xfile_destroy. |
33 | * |
34 | * xfiles assume that the caller will handle all required concurrency |
35 | * management; standard vfs locks (freezer and inode) are not taken. Reads |
36 | * and writes are satisfied directly from the page cache. |
37 | */ |
38 | |
39 | /* |
40 | * xfiles must not be exposed to userspace and require upper layers to |
41 | * coordinate access to the one handle returned by the constructor, so |
42 | * establish a separate lock class for xfiles to avoid confusing lockdep. |
43 | */ |
44 | static struct lock_class_key xfile_i_mutex_key; |
45 | |
46 | /* |
47 | * Create an xfile of the given size. The description will be used in the |
48 | * trace output. |
49 | */ |
50 | int |
51 | xfile_create( |
52 | const char *description, |
53 | loff_t isize, |
54 | struct xfile **xfilep) |
55 | { |
56 | struct inode *inode; |
57 | struct xfile *xf; |
58 | int error; |
59 | |
60 | xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS); |
61 | if (!xf) |
62 | return -ENOMEM; |
63 | |
64 | xf->file = shmem_kernel_file_setup(name: description, size: isize, VM_NORESERVE); |
65 | if (IS_ERR(ptr: xf->file)) { |
66 | error = PTR_ERR(ptr: xf->file); |
67 | goto out_xfile; |
68 | } |
69 | |
70 | inode = file_inode(f: xf->file); |
71 | lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key); |
72 | |
73 | /* |
74 | * We don't want to bother with kmapping data during repair, so don't |
75 | * allow highmem pages to back this mapping. |
76 | */ |
77 | mapping_set_gfp_mask(m: inode->i_mapping, GFP_KERNEL); |
78 | |
79 | trace_xfile_create(xf); |
80 | |
81 | *xfilep = xf; |
82 | return 0; |
83 | out_xfile: |
84 | kfree(objp: xf); |
85 | return error; |
86 | } |
87 | |
88 | /* Close the file and release all resources. */ |
89 | void |
90 | xfile_destroy( |
91 | struct xfile *xf) |
92 | { |
93 | struct inode *inode = file_inode(f: xf->file); |
94 | |
95 | trace_xfile_destroy(xf); |
96 | |
97 | lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key); |
98 | fput(xf->file); |
99 | kfree(objp: xf); |
100 | } |
101 | |
102 | /* |
103 | * Load an object. Since we're treating this file as "memory", any error or |
104 | * short IO is treated as a failure to allocate memory. |
105 | */ |
106 | int |
107 | xfile_load( |
108 | struct xfile *xf, |
109 | void *buf, |
110 | size_t count, |
111 | loff_t pos) |
112 | { |
113 | struct inode *inode = file_inode(f: xf->file); |
114 | unsigned int pflags; |
115 | |
116 | if (count > MAX_RW_COUNT) |
117 | return -ENOMEM; |
118 | if (inode->i_sb->s_maxbytes - pos < count) |
119 | return -ENOMEM; |
120 | |
121 | trace_xfile_load(xf, pos, count); |
122 | |
123 | pflags = memalloc_nofs_save(); |
124 | while (count > 0) { |
125 | struct folio *folio; |
126 | unsigned int len; |
127 | unsigned int offset; |
128 | |
129 | if (shmem_get_folio(inode, index: pos >> PAGE_SHIFT, foliop: &folio, |
130 | sgp: SGP_READ) < 0) |
131 | break; |
132 | if (!folio) { |
133 | /* |
134 | * No data stored at this offset, just zero the output |
135 | * buffer until the next page boundary. |
136 | */ |
137 | len = min_t(ssize_t, count, |
138 | PAGE_SIZE - offset_in_page(pos)); |
139 | memset(buf, 0, len); |
140 | } else { |
141 | if (filemap_check_wb_err(mapping: inode->i_mapping, since: 0)) { |
142 | folio_unlock(folio); |
143 | folio_put(folio); |
144 | break; |
145 | } |
146 | |
147 | offset = offset_in_folio(folio, pos); |
148 | len = min_t(ssize_t, count, folio_size(folio) - offset); |
149 | memcpy(buf, folio_address(folio) + offset, len); |
150 | |
151 | folio_unlock(folio); |
152 | folio_put(folio); |
153 | } |
154 | count -= len; |
155 | pos += len; |
156 | buf += len; |
157 | } |
158 | memalloc_nofs_restore(flags: pflags); |
159 | |
160 | if (count) |
161 | return -ENOMEM; |
162 | return 0; |
163 | } |
164 | |
165 | /* |
166 | * Store an object. Since we're treating this file as "memory", any error or |
167 | * short IO is treated as a failure to allocate memory. |
168 | */ |
169 | int |
170 | xfile_store( |
171 | struct xfile *xf, |
172 | const void *buf, |
173 | size_t count, |
174 | loff_t pos) |
175 | { |
176 | struct inode *inode = file_inode(f: xf->file); |
177 | unsigned int pflags; |
178 | |
179 | if (count > MAX_RW_COUNT) |
180 | return -ENOMEM; |
181 | if (inode->i_sb->s_maxbytes - pos < count) |
182 | return -ENOMEM; |
183 | |
184 | trace_xfile_store(xf, pos, count); |
185 | |
186 | /* |
187 | * Increase the file size first so that shmem_get_folio(..., SGP_CACHE), |
188 | * actually allocates a folio instead of erroring out. |
189 | */ |
190 | if (pos + count > i_size_read(inode)) |
191 | i_size_write(inode, i_size: pos + count); |
192 | |
193 | pflags = memalloc_nofs_save(); |
194 | while (count > 0) { |
195 | struct folio *folio; |
196 | unsigned int len; |
197 | unsigned int offset; |
198 | |
199 | if (shmem_get_folio(inode, index: pos >> PAGE_SHIFT, foliop: &folio, |
200 | sgp: SGP_CACHE) < 0) |
201 | break; |
202 | if (filemap_check_wb_err(mapping: inode->i_mapping, since: 0)) { |
203 | folio_unlock(folio); |
204 | folio_put(folio); |
205 | break; |
206 | } |
207 | |
208 | offset = offset_in_folio(folio, pos); |
209 | len = min_t(ssize_t, count, folio_size(folio) - offset); |
210 | memcpy(folio_address(folio) + offset, buf, len); |
211 | |
212 | folio_mark_dirty(folio); |
213 | folio_unlock(folio); |
214 | folio_put(folio); |
215 | |
216 | count -= len; |
217 | pos += len; |
218 | buf += len; |
219 | } |
220 | memalloc_nofs_restore(flags: pflags); |
221 | |
222 | if (count) |
223 | return -ENOMEM; |
224 | return 0; |
225 | } |
226 | |
227 | /* Find the next written area in the xfile data for a given offset. */ |
228 | loff_t |
229 | xfile_seek_data( |
230 | struct xfile *xf, |
231 | loff_t pos) |
232 | { |
233 | loff_t ret; |
234 | |
235 | ret = vfs_llseek(file: xf->file, offset: pos, SEEK_DATA); |
236 | trace_xfile_seek_data(xf, pos, ret); |
237 | return ret; |
238 | } |
239 | |
240 | /* |
241 | * Grab the (locked) folio for a memory object. The object cannot span a folio |
242 | * boundary. Returns the locked folio if successful, NULL if there was no |
243 | * folio or it didn't cover the range requested, or an ERR_PTR on failure. |
244 | */ |
245 | struct folio * |
246 | xfile_get_folio( |
247 | struct xfile *xf, |
248 | loff_t pos, |
249 | size_t len, |
250 | unsigned int flags) |
251 | { |
252 | struct inode *inode = file_inode(f: xf->file); |
253 | struct folio *folio = NULL; |
254 | unsigned int pflags; |
255 | int error; |
256 | |
257 | if (inode->i_sb->s_maxbytes - pos < len) |
258 | return ERR_PTR(error: -ENOMEM); |
259 | |
260 | trace_xfile_get_folio(xf, pos, len); |
261 | |
262 | /* |
263 | * Increase the file size first so that shmem_get_folio(..., SGP_CACHE), |
264 | * actually allocates a folio instead of erroring out. |
265 | */ |
266 | if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode)) |
267 | i_size_write(inode, i_size: pos + len); |
268 | |
269 | pflags = memalloc_nofs_save(); |
270 | error = shmem_get_folio(inode, index: pos >> PAGE_SHIFT, foliop: &folio, |
271 | sgp: (flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ); |
272 | memalloc_nofs_restore(flags: pflags); |
273 | if (error) |
274 | return ERR_PTR(error); |
275 | |
276 | if (!folio) |
277 | return NULL; |
278 | |
279 | if (len > folio_size(folio) - offset_in_folio(folio, pos)) { |
280 | folio_unlock(folio); |
281 | folio_put(folio); |
282 | return NULL; |
283 | } |
284 | |
285 | if (filemap_check_wb_err(mapping: inode->i_mapping, since: 0)) { |
286 | folio_unlock(folio); |
287 | folio_put(folio); |
288 | return ERR_PTR(error: -EIO); |
289 | } |
290 | |
291 | /* |
292 | * Mark the folio dirty so that it won't be reclaimed once we drop the |
293 | * (potentially last) reference in xfile_put_folio. |
294 | */ |
295 | if (flags & XFILE_ALLOC) |
296 | folio_set_dirty(folio); |
297 | return folio; |
298 | } |
299 | |
300 | /* |
301 | * Release the (locked) folio for a memory object. |
302 | */ |
303 | void |
304 | xfile_put_folio( |
305 | struct xfile *xf, |
306 | struct folio *folio) |
307 | { |
308 | trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio)); |
309 | |
310 | folio_unlock(folio); |
311 | folio_put(folio); |
312 | } |
313 | |