brd.c source code [linux/drivers/block/brd.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* Ram backed block device driver.
4	*
5	* Copyright (C) 2007 Nick Piggin
6	* Copyright (C) 2007 Novell Inc.
7	*
8	* Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright
9	* of their respective owners.
10	*/
11
12	#include <linux/init.h>
13	#include <linux/initrd.h>
14	#include <linux/module.h>
15	#include <linux/moduleparam.h>
16	#include <linux/major.h>
17	#include <linux/blkdev.h>
18	#include <linux/bio.h>
19	#include <linux/highmem.h>
20	#include <linux/mutex.h>
21	#include <linux/pagemap.h>
22	#include <linux/xarray.h>
23	#include <linux/fs.h>
24	#include <linux/slab.h>
25	#include <linux/backing-dev.h>
26	#include <linux/debugfs.h>
27
28	#include <linux/uaccess.h>
29
30	/*
31	* Each block ramdisk device has a xarray brd_pages of pages that stores
32	* the pages containing the block device's contents. A brd page's ->index is
33	* its offset in PAGE_SIZE units. This is similar to, but in no way connected
34	* with, the kernel's pagecache or buffer cache (which sit above our block
35	* device).
36	*/
37	struct brd_device {
38	int brd_number;
39	struct gendisk *brd_disk;
40	struct list_head brd_list;
41
42	/*
43	* Backing store of pages. This is the contents of the block device.
44	*/
45	struct xarray brd_pages;
46	u64 brd_nr_pages;
47	};
48
49	/*
50	* Look up and return a brd's page for a given sector.
51	*/
52	static struct page brd_lookup_page(struct* brd_device *brd, sector_t sector)
53	{
54	pgoff_t idx;
55	struct page *page;
56
57	idx = sector >> PAGE_SECTORS_SHIFT; / sector to page index /
58	page = xa_load(&brd->brd_pages, index: idx);
59
60	BUG_ON(page && page->index != idx);
61
62	return page;
63	}
64
65	/*
66	* Insert a new page for a given sector, if one does not already exist.
67	*/
68	static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
69	{
70	pgoff_t idx;
71	struct page page, cur;
72	int ret = `0`;
73
74	page = brd_lookup_page(brd, sector);
75	if (page)
76	return `0`;
77
78	page = alloc_page(gfp \| __GFP_ZERO \| __GFP_HIGHMEM);
79	if (!page)
80	return -ENOMEM;
81
82	xa_lock(&brd->brd_pages);
83
84	idx = sector >> PAGE_SECTORS_SHIFT;
85	page->index = idx;
86
87	cur = __xa_cmpxchg(&brd->brd_pages, index: idx, NULL, entry: page, gfp);
88
89	if (unlikely(cur)) {
90	__free_page(page);
91	ret = xa_err(entry: cur);
92	if (!ret && (cur->index != idx))
93	ret = -EIO;
94	} else {
95	brd->brd_nr_pages++;
96	}
97
98	xa_unlock(&brd->brd_pages);
99
100	return ret;
101	}
102
103	/*
104	* Free all backing store pages and xarray. This must only be called when
105	* there are no other users of the device.
106	*/
107	static void brd_free_pages(struct brd_device *brd)
108	{
109	struct page *page;
110	pgoff_t idx;
111
112	xa_for_each(&brd->brd_pages, idx, page) {
113	__free_page(page);
114	cond_resched();
115	}
116
117	xa_destroy(&brd->brd_pages);
118	}
119
120	/*
121	* copy_to_brd_setup must be called before copy_to_brd. It may sleep.
122	*/
123	static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
124	gfp_t gfp)
125	{
126	unsigned int offset = (sector & (PAGE_SECTORS-`1`)) << SECTOR_SHIFT;
127	size_t copy;
128	int ret;
129
130	copy = min_t(size_t, n, PAGE_SIZE - offset);
131	ret = brd_insert_page(brd, sector, gfp);
132	if (ret)
133	return ret;
134	if (copy < n) {
135	sector += copy >> SECTOR_SHIFT;
136	ret = brd_insert_page(brd, sector, gfp);
137	}
138	return ret;
139	}
140
141	/*
142	* Copy n bytes from src to the brd starting at sector. Does not sleep.
143	*/
144	static void copy_to_brd(struct brd_device brd, const* void *src,
145	sector_t sector, size_t n)
146	{
147	struct page *page;
148	void *dst;
149	unsigned int offset = (sector & (PAGE_SECTORS-`1`)) << SECTOR_SHIFT;
150	size_t copy;
151
152	copy = min_t(size_t, n, PAGE_SIZE - offset);
153	page = brd_lookup_page(brd, sector);
154	BUG_ON(!page);
155
156	dst = kmap_atomic(page);
157	memcpy(dst + offset, src, copy);
158	kunmap_atomic(dst);
159
160	if (copy < n) {
161	src += copy;
162	sector += copy >> SECTOR_SHIFT;
163	copy = n - copy;
164	page = brd_lookup_page(brd, sector);
165	BUG_ON(!page);
166
167	dst = kmap_atomic(page);
168	memcpy(dst, src, copy);
169	kunmap_atomic(dst);
170	}
171	}
172
173	/*
174	* Copy n bytes to dst from the brd starting at sector. Does not sleep.
175	*/
176	static void copy_from_brd(void dst, struct* brd_device *brd,
177	sector_t sector, size_t n)
178	{
179	struct page *page;
180	void *src;
181	unsigned int offset = (sector & (PAGE_SECTORS-`1`)) << SECTOR_SHIFT;
182	size_t copy;
183
184	copy = min_t(size_t, n, PAGE_SIZE - offset);
185	page = brd_lookup_page(brd, sector);
186	if (page) {
187	src = kmap_atomic(page);
188	memcpy(dst, src + offset, copy);
189	kunmap_atomic(src);
190	} else
191	memset(dst, `0`, copy);
192
193	if (copy < n) {
194	dst += copy;
195	sector += copy >> SECTOR_SHIFT;
196	copy = n - copy;
197	page = brd_lookup_page(brd, sector);
198	if (page) {
199	src = kmap_atomic(page);
200	memcpy(dst, src, copy);
201	kunmap_atomic(src);
202	} else
203	memset(dst, `0`, copy);
204	}
205	}
206
207	/*
208	* Process a single bvec of a bio.
209	*/
210	static int brd_do_bvec(struct brd_device brd, struct* page *page,
211	unsigned int len, unsigned int off, blk_opf_t opf,
212	sector_t sector)
213	{
214	void *mem;
215	int err = `0`;
216
217	if (op_is_write(op: opf)) {
218	/*
219	* Must use NOIO because we don't want to recurse back into the
220	* block or filesystem layers from page reclaim.
221	*/
222	gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO;
223
224	err = copy_to_brd_setup(brd, sector, n: len, gfp);
225	if (err)
226	goto out;
227	}
228
229	mem = kmap_atomic(page);
230	if (!op_is_write(op: opf)) {
231	copy_from_brd(dst: mem + off, brd, sector, n: len);
232	flush_dcache_page(page);
233	} else {
234	flush_dcache_page(page);
235	copy_to_brd(brd, src: mem + off, sector, n: len);
236	}
237	kunmap_atomic(mem);
238
239	out:
240	return err;
241	}
242
243	static void brd_submit_bio(struct bio *bio)
244	{
245	struct brd_device *brd = bio->bi_bdev->bd_disk->private_data;
246	sector_t sector = bio->bi_iter.bi_sector;
247	struct bio_vec bvec;
248	struct bvec_iter iter;
249
250	bio_for_each_segment(bvec, bio, iter) {
251	unsigned int len = bvec.bv_len;
252	int err;
253
254	/ Don't support un-aligned buffer /
255	WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - `1`)) \|\|
256	(len & (SECTOR_SIZE - `1`)));
257
258	err = brd_do_bvec(brd, page: bvec.bv_page, len, off: bvec.bv_offset,
259	opf: bio->bi_opf, sector);
260	if (err) {
261	if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) {
262	bio_wouldblock_error(bio);
263	return;
264	}
265	bio_io_error(bio);
266	return;
267	}
268	sector += len >> SECTOR_SHIFT;
269	}
270
271	bio_endio(bio);
272	}
273
274	static const struct block_device_operations brd_fops = {
275	.owner = THIS_MODULE,
276	.submit_bio = brd_submit_bio,
277	};
278
279	/*
280	* And now the modules code and kernel interface.
281	*/
282	static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
283	module_param(rd_nr, int, `0444`);
284	MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
285
286	unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
287	module_param(rd_size, ulong, `0444`);
288	MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
289
290	static int max_part = `1`;
291	module_param(max_part, int, `0444`);
292	MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
293
294	MODULE_LICENSE("GPL");
295	MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
296	MODULE_ALIAS("rd");
297
298	#ifndef MODULE
299	/ Legacy boot options - nonmodular /
300	static int __init ramdisk_size(char *str)
301	{
302	rd_size = simple_strtol(str, NULL, `0`);
303	return `1`;
304	}
305	__setup("ramdisk_size=", ramdisk_size);
306	#endif
307
308	/*
309	* The device scheme is derived from loop.c. Keep them in synch where possible
310	* (should share code eventually).
311	*/
312	static LIST_HEAD(brd_devices);
313	static struct dentry *brd_debugfs_dir;
314
315	static int brd_alloc(int i)
316	{
317	struct brd_device *brd;
318	struct gendisk *disk;
319	char buf[DISK_NAME_LEN];
320	int err = -ENOMEM;
321	struct queue_limits lim = {
322	/*
323	* This is so fdisk will align partitions on 4k, because of
324	* direct_access API needing 4k alignment, returning a PFN
325	* (This is only a problem on very small devices <= 4M,
326	* otherwise fdisk will align on 1M. Regardless this call
327	* is harmless)
328	*/
329	.physical_block_size = PAGE_SIZE,
330	};
331
332	list_for_each_entry(brd, &brd_devices, brd_list)
333	if (brd->brd_number == i)
334	return -EEXIST;
335	brd = kzalloc(size: sizeof(*brd), GFP_KERNEL);
336	if (!brd)
337	return -ENOMEM;
338	brd->brd_number = i;
339	list_add_tail(new: &brd->brd_list, head: &brd_devices);
340
341	xa_init(xa: &brd->brd_pages);
342
343	snprintf(buf, DISK_NAME_LEN, fmt: "ram%d", i);
344	if (!IS_ERR_OR_NULL(ptr: brd_debugfs_dir))
345	debugfs_create_u64(name: buf, mode: `0444`, parent: brd_debugfs_dir,
346	value: &brd->brd_nr_pages);
347
348	disk = brd->brd_disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
349	if (IS_ERR(ptr: disk)) {
350	err = PTR_ERR(ptr: disk);
351	goto out_free_dev;
352	}
353	disk->major = RAMDISK_MAJOR;
354	disk->first_minor = i * max_part;
355	disk->minors = max_part;
356	disk->fops = &brd_fops;
357	disk->private_data = brd;
358	strscpy(disk->disk_name, buf, DISK_NAME_LEN);
359	set_capacity(disk, size: rd_size * `2`);
360
361	/ Tell the block layer that this is not a rotational device /
362	blk_queue_flag_set(QUEUE_FLAG_NONROT, q: disk->queue);
363	blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, q: disk->queue);
364	blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q: disk->queue);
365	err = add_disk(disk);
366	if (err)
367	goto out_cleanup_disk;
368
369	return `0`;
370
371	out_cleanup_disk:
372	put_disk(disk);
373	out_free_dev:
374	list_del(entry: &brd->brd_list);
375	kfree(objp: brd);
376	return err;
377	}
378
379	static void brd_probe(dev_t dev)
380	{
381	brd_alloc(MINOR(dev) / max_part);
382	}
383
384	static void brd_cleanup(void)
385	{
386	struct brd_device brd, next;
387
388	debugfs_remove_recursive(dentry: brd_debugfs_dir);
389
390	list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
391	del_gendisk(gp: brd->brd_disk);
392	put_disk(disk: brd->brd_disk);
393	brd_free_pages(brd);
394	list_del(entry: &brd->brd_list);
395	kfree(objp: brd);
396	}
397	}
398
399	static inline void brd_check_and_reset_par(void)
400	{
401	if (unlikely(!max_part))
402	max_part = `1`;
403
404	/*
405	* make sure 'max_part' can be divided exactly by (1U << MINORBITS),
406	* otherwise, it is possiable to get same dev_t when adding partitions.
407	*/
408	if ((`1U` << MINORBITS) % max_part != `0`)
409	max_part = `1UL` << fls(x: max_part);
410
411	if (max_part > DISK_MAX_PARTS) {
412	pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n",
413	DISK_MAX_PARTS, DISK_MAX_PARTS);
414	max_part = DISK_MAX_PARTS;
415	}
416	}
417
418	static int __init brd_init(void)
419	{
420	int err, i;
421
422	brd_check_and_reset_par();
423
424	brd_debugfs_dir = debugfs_create_dir(name: "ramdisk_pages", NULL);
425
426	for (i = `0`; i < rd_nr; i++) {
427	err = brd_alloc(i);
428	if (err)
429	goto out_free;
430	}
431
432	/*
433	* brd module now has a feature to instantiate underlying device
434	* structure on-demand, provided that there is an access dev node.
435	*
436	* (1) if rd_nr is specified, create that many upfront. else
437	* it defaults to CONFIG_BLK_DEV_RAM_COUNT
438	* (2) User can further extend brd devices by create dev node themselves
439	* and have kernel automatically instantiate actual device
440	* on-demand. Example:
441	* mknod /path/devnod_name b 1 X # 1 is the rd major
442	* fdisk -l /path/devnod_name
443	* If (X / max_part) was not already created it will be created
444	* dynamically.
445	*/
446
447	if (__register_blkdev(RAMDISK_MAJOR, name: "ramdisk", probe: brd_probe)) {
448	err = -EIO;
449	goto out_free;
450	}
451
452	pr_info("brd: module loaded\n");
453	return `0`;
454
455	out_free:
456	brd_cleanup();
457
458	pr_info("brd: module NOT loaded !!!\n");
459	return err;
460	}
461
462	static void __exit brd_exit(void)
463	{
464
465	unregister_blkdev(RAMDISK_MAJOR, name: "ramdisk");
466	brd_cleanup();
467
468	pr_info("brd: module unloaded\n");
469	}
470
471	module_init(brd_init);
472	module_exit(brd_exit);
473
474

source code of linux/drivers/block/brd.c