1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (c) 2023-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_buf.h"
9#include "xfs_buf_mem.h"
10#include "xfs_trace.h"
11#include <linux/shmem_fs.h>
12#include "xfs_log_format.h"
13#include "xfs_trans.h"
14#include "xfs_buf_item.h"
15#include "xfs_error.h"
16
17/*
18 * Buffer Cache for In-Memory Files
19 * ================================
20 *
21 * Online fsck wants to create ephemeral ordered recordsets. The existing
22 * btree infrastructure can do this, but we need the buffer cache to target
23 * memory instead of block devices.
24 *
25 * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
26 * requirements. Therefore, the xmbuf mechanism uses an unlinked shmem file to
27 * store our staging data. This file is not installed in the file descriptor
28 * table so that user programs cannot access the data, which means that the
29 * xmbuf must be freed with xmbuf_destroy.
30 *
31 * xmbufs assume that the caller will handle all required concurrency
32 * management; standard vfs locks (freezer and inode) are not taken. Reads
33 * and writes are satisfied directly from the page cache.
34 *
35 * The only supported block size is PAGE_SIZE, and we cannot use highmem.
36 */
37
38/*
39 * shmem files used to back an in-memory buffer cache must not be exposed to
40 * userspace. Upper layers must coordinate access to the one handle returned
41 * by the constructor, so establish a separate lock class for xmbufs to avoid
42 * confusing lockdep.
43 */
44static struct lock_class_key xmbuf_i_mutex_key;
45
46/*
47 * Allocate a buffer cache target for a memory-backed file and set up the
48 * buffer target.
49 */
50int
51xmbuf_alloc(
52 struct xfs_mount *mp,
53 const char *descr,
54 struct xfs_buftarg **btpp)
55{
56 struct file *file;
57 struct inode *inode;
58 struct xfs_buftarg *btp;
59 int error;
60
61 btp = kzalloc(struct_size(btp, bt_cache, 1), GFP_KERNEL);
62 if (!btp)
63 return -ENOMEM;
64
65 file = shmem_kernel_file_setup(name: descr, size: 0, flags: 0);
66 if (IS_ERR(ptr: file)) {
67 error = PTR_ERR(ptr: file);
68 goto out_free_btp;
69 }
70 inode = file_inode(f: file);
71
72 /* private file, private locking */
73 lockdep_set_class(&inode->i_rwsem, &xmbuf_i_mutex_key);
74
75 /*
76 * We don't want to bother with kmapping data during repair, so don't
77 * allow highmem pages to back this mapping.
78 */
79 mapping_set_gfp_mask(m: inode->i_mapping, GFP_KERNEL);
80
81 /* ensure all writes are below EOF to avoid pagecache zeroing */
82 i_size_write(inode, i_size: inode->i_sb->s_maxbytes);
83
84 error = xfs_buf_cache_init(bch: btp->bt_cache);
85 if (error)
86 goto out_file;
87
88 /* Initialize buffer target */
89 btp->bt_mount = mp;
90 btp->bt_dev = (dev_t)-1U;
91 btp->bt_bdev = NULL; /* in-memory buftargs have no bdev */
92 btp->bt_file = file;
93 btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE;
94 btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1;
95
96 error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr);
97 if (error)
98 goto out_bcache;
99
100 trace_xmbuf_create(btp);
101
102 *btpp = btp;
103 return 0;
104
105out_bcache:
106 xfs_buf_cache_destroy(bch: btp->bt_cache);
107out_file:
108 fput(file);
109out_free_btp:
110 kfree(objp: btp);
111 return error;
112}
113
114/* Free a buffer cache target for a memory-backed buffer cache. */
115void
116xmbuf_free(
117 struct xfs_buftarg *btp)
118{
119 ASSERT(xfs_buftarg_is_mem(btp));
120 ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
121
122 trace_xmbuf_free(btp);
123
124 xfs_destroy_buftarg(btp);
125 xfs_buf_cache_destroy(bch: btp->bt_cache);
126 fput(btp->bt_file);
127 kfree(objp: btp);
128}
129
130/* Directly map a shmem page into the buffer cache. */
131int
132xmbuf_map_page(
133 struct xfs_buf *bp)
134{
135 struct inode *inode = file_inode(f: bp->b_target->bt_file);
136 struct folio *folio = NULL;
137 struct page *page;
138 loff_t pos = BBTOB(xfs_buf_daddr(bp));
139 int error;
140
141 ASSERT(xfs_buftarg_is_mem(bp->b_target));
142
143 if (bp->b_map_count != 1)
144 return -ENOMEM;
145 if (BBTOB(bp->b_length) != XMBUF_BLOCKSIZE)
146 return -ENOMEM;
147 if (offset_in_page(pos) != 0) {
148 ASSERT(offset_in_page(pos));
149 return -ENOMEM;
150 }
151
152 error = shmem_get_folio(inode, index: pos >> PAGE_SHIFT, foliop: &folio, sgp: SGP_CACHE);
153 if (error)
154 return error;
155
156 if (filemap_check_wb_err(mapping: inode->i_mapping, since: 0)) {
157 folio_unlock(folio);
158 folio_put(folio);
159 return -EIO;
160 }
161
162 page = folio_file_page(folio, index: pos >> PAGE_SHIFT);
163
164 /*
165 * Mark the page dirty so that it won't be reclaimed once we drop the
166 * (potentially last) reference in xmbuf_unmap_page.
167 */
168 set_page_dirty(page);
169 unlock_page(page);
170
171 bp->b_addr = page_address(page);
172 bp->b_pages = bp->b_page_array;
173 bp->b_pages[0] = page;
174 bp->b_page_count = 1;
175 return 0;
176}
177
178/* Unmap a shmem page that was mapped into the buffer cache. */
179void
180xmbuf_unmap_page(
181 struct xfs_buf *bp)
182{
183 struct page *page = bp->b_pages[0];
184
185 ASSERT(xfs_buftarg_is_mem(bp->b_target));
186
187 put_page(page);
188
189 bp->b_addr = NULL;
190 bp->b_pages[0] = NULL;
191 bp->b_pages = NULL;
192 bp->b_page_count = 0;
193}
194
195/* Is this a valid daddr within the buftarg? */
196bool
197xmbuf_verify_daddr(
198 struct xfs_buftarg *btp,
199 xfs_daddr_t daddr)
200{
201 struct inode *inode = file_inode(f: btp->bt_file);
202
203 ASSERT(xfs_buftarg_is_mem(btp));
204
205 return daddr < (inode->i_sb->s_maxbytes >> BBSHIFT);
206}
207
208/* Discard the page backing this buffer. */
209static void
210xmbuf_stale(
211 struct xfs_buf *bp)
212{
213 struct inode *inode = file_inode(f: bp->b_target->bt_file);
214 loff_t pos;
215
216 ASSERT(xfs_buftarg_is_mem(bp->b_target));
217
218 pos = BBTOB(xfs_buf_daddr(bp));
219 shmem_truncate_range(inode, start: pos, end: pos + BBTOB(bp->b_length) - 1);
220}
221
222/*
223 * Finalize a buffer -- discard the backing page if it's stale, or run the
224 * write verifier to detect problems.
225 */
226int
227xmbuf_finalize(
228 struct xfs_buf *bp)
229{
230 xfs_failaddr_t fa;
231 int error = 0;
232
233 if (bp->b_flags & XBF_STALE) {
234 xmbuf_stale(bp);
235 return 0;
236 }
237
238 /*
239 * Although this btree is ephemeral, validate the buffer structure so
240 * that we can detect memory corruption errors and software bugs.
241 */
242 fa = bp->b_ops->verify_struct(bp);
243 if (fa) {
244 error = -EFSCORRUPTED;
245 xfs_verifier_error(bp, error, fa);
246 }
247
248 return error;
249}
250
251/*
252 * Detach this xmbuf buffer from the transaction by any means necessary.
253 * All buffers are direct-mapped, so they do not need bwrite.
254 */
255void
256xmbuf_trans_bdetach(
257 struct xfs_trans *tp,
258 struct xfs_buf *bp)
259{
260 struct xfs_buf_log_item *bli = bp->b_log_item;
261
262 ASSERT(bli != NULL);
263
264 bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED |
265 XFS_BLI_LOGGED | XFS_BLI_STALE);
266 clear_bit(XFS_LI_DIRTY, addr: &bli->bli_item.li_flags);
267
268 while (bp->b_log_item != NULL)
269 xfs_trans_bdetach(tp, bp);
270}
271

source code of linux/fs/xfs/xfs_buf_mem.c