genhd.c source code [linux/block/genhd.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* gendisk handling
4	*
5	* Portions Copyright (C) 2020 Christoph Hellwig
6	*/
7
8	#include <linux/module.h>
9	#include <linux/ctype.h>
10	#include <linux/fs.h>
11	#include <linux/kdev_t.h>
12	#include <linux/kernel.h>
13	#include <linux/blkdev.h>
14	#include <linux/backing-dev.h>
15	#include <linux/init.h>
16	#include <linux/spinlock.h>
17	#include <linux/proc_fs.h>
18	#include <linux/seq_file.h>
19	#include <linux/slab.h>
20	#include <linux/kmod.h>
21	#include <linux/major.h>
22	#include <linux/mutex.h>
23	#include <linux/idr.h>
24	#include <linux/log2.h>
25	#include <linux/pm_runtime.h>
26	#include <linux/badblocks.h>
27	#include <linux/part_stat.h>
28	#include <linux/blktrace_api.h>
29
30	#include "blk-throttle.h"
31	#include "blk.h"
32	#include "blk-mq-sched.h"
33	#include "blk-rq-qos.h"
34	#include "blk-cgroup.h"
35
36	static struct kobject *block_depr;
37
38	/*
39	* Unique, monotonically increasing sequential number associated with block
40	* devices instances (i.e. incremented each time a device is attached).
41	* Associating uevents with block devices in userspace is difficult and racy:
42	* the uevent netlink socket is lossy, and on slow and overloaded systems has
43	* a very high latency.
44	* Block devices do not have exclusive owners in userspace, any process can set
45	* one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0
46	* can be reused again and again).
47	* A userspace process setting up a block device and watching for its events
48	* cannot thus reliably tell whether an event relates to the device it just set
49	* up or another earlier instance with the same name.
50	* This sequential number allows userspace processes to solve this problem, and
51	* uniquely associate an uevent to the lifetime to a device.
52	*/
53	static atomic64_t diskseq;
54
55	/ for extended dynamic devt allocation, currently only one major is used /
56	#define NR_EXT_DEVT (1 << MINORBITS)
57	static DEFINE_IDA(ext_devt_ida);
58
59	void set_capacity(struct gendisk *disk, sector_t sectors)
60	{
61	bdev_set_nr_sectors(bdev: disk->part0, sectors);
62	}
63	EXPORT_SYMBOL(set_capacity);
64
65	/*
66	* Set disk capacity and notify if the size is not currently zero and will not
67	* be set to zero. Returns true if a uevent was sent, otherwise false.
68	*/
69	bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
70	{
71	sector_t capacity = get_capacity(disk);
72	char *envp[] = { "RESIZE=1", NULL };
73
74	set_capacity(disk, size);
75
76	/*
77	* Only print a message and send a uevent if the gendisk is user visible
78	* and alive. This avoids spamming the log and udev when setting the
79	* initial capacity during probing.
80	*/
81	if (size == capacity \|\|
82	!disk_live(disk) \|\|
83	(disk->flags & GENHD_FL_HIDDEN))
84	return false;
85
86	pr_info("%s: detected capacity change from %lld to %lld\n",
87	disk->disk_name, capacity, size);
88
89	/*
90	* Historically we did not send a uevent for changes to/from an empty
91	* device.
92	*/
93	if (!capacity \|\| !size)
94	return false;
95	kobject_uevent_env(kobj: &disk_to_dev(disk)->kobj, action: KOBJ_CHANGE, envp);
96	return true;
97	}
98	EXPORT_SYMBOL_GPL(set_capacity_and_notify);
99
100	static void part_stat_read_all(struct block_device *part,
101	struct disk_stats *stat)
102	{
103	int cpu;
104
105	memset(stat, `0`, sizeof(struct disk_stats));
106	for_each_possible_cpu(cpu) {
107	struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu);
108	int group;
109
110	for (group = `0`; group < NR_STAT_GROUPS; group++) {
111	stat->nsecs[group] += ptr->nsecs[group];
112	stat->sectors[group] += ptr->sectors[group];
113	stat->ios[group] += ptr->ios[group];
114	stat->merges[group] += ptr->merges[group];
115	}
116
117	stat->io_ticks += ptr->io_ticks;
118	}
119	}
120
121	static unsigned int part_in_flight(struct block_device *part)
122	{
123	unsigned int inflight = `0`;
124	int cpu;
125
126	for_each_possible_cpu(cpu) {
127	inflight += part_stat_local_read_cpu(part, in_flight[`0`], cpu) +
128	part_stat_local_read_cpu(part, in_flight[`1`], cpu);
129	}
130	if ((int)inflight < `0`)
131	inflight = `0`;
132
133	return inflight;
134	}
135
136	static void part_in_flight_rw(struct block_device *part,
137	unsigned int inflight[`2`])
138	{
139	int cpu;
140
141	inflight[`0`] = `0`;
142	inflight[`1`] = `0`;
143	for_each_possible_cpu(cpu) {
144	inflight[`0`] += part_stat_local_read_cpu(part, in_flight[`0`], cpu);
145	inflight[`1`] += part_stat_local_read_cpu(part, in_flight[`1`], cpu);
146	}
147	if ((int)inflight[`0`] < `0`)
148	inflight[`0`] = `0`;
149	if ((int)inflight[`1`] < `0`)
150	inflight[`1`] = `0`;
151	}
152
153	/*
154	* Can be deleted altogether. Later.
155	*
156	*/
157	#define BLKDEV_MAJOR_HASH_SIZE 255
158	static struct blk_major_name {
159	struct blk_major_name *next;
160	int major;
161	char name[`16`];
162	#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
163	void (*probe)(dev_t devt);
164	#endif
165	} *major_names[BLKDEV_MAJOR_HASH_SIZE];
166	static DEFINE_MUTEX(major_names_lock);
167	static DEFINE_SPINLOCK(major_names_spinlock);
168
169	/ index in the above - for now: assume no multimajor ranges /
170	static inline int major_to_index(unsigned major)
171	{
172	return major % BLKDEV_MAJOR_HASH_SIZE;
173	}
174
175	#ifdef CONFIG_PROC_FS
176	void blkdev_show(struct seq_file *seqf, off_t offset)
177	{
178	struct blk_major_name *dp;
179
180	spin_lock(lock: &major_names_spinlock);
181	for (dp = major_names[major_to_index(major: offset)]; dp; dp = dp->next)
182	if (dp->major == offset)
183	seq_printf(m: seqf, fmt: "%3d %s\n", dp->major, dp->name);
184	spin_unlock(lock: &major_names_spinlock);
185	}
186	#endif /* CONFIG_PROC_FS */
187
188	/**
189	* __register_blkdev - register a new block device
190	*
191	* @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If
192	* @major = 0, try to allocate any unused major number.
193	* @name: the name of the new block device as a zero terminated string
194	* @probe: pre-devtmpfs / pre-udev callback used to create disks when their
195	* pre-created device node is accessed. When a probe call uses
196	* add_disk() and it fails the driver must cleanup resources. This
197	* interface may soon be removed.
198	*
199	* The @name must be unique within the system.
200	*
201	* The return value depends on the @major input parameter:
202	*
203	* - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1]
204	* then the function returns zero on success, or a negative error code
205	* - if any unused major number was requested with @major = 0 parameter
206	* then the return value is the allocated major number in range
207	* [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise
208	*
209	* See Documentation/admin-guide/devices.txt for the list of allocated
210	* major numbers.
211	*
212	* Use register_blkdev instead for any new code.
213	*/
214	int __register_blkdev(unsigned int major, const char *name,
215	void (*probe)(dev_t devt))
216	{
217	struct blk_major_name *n, p;
218	int index, ret = `0`;
219
220	mutex_lock(&major_names_lock);
221
222	/ temporary /
223	if (major == `0`) {
224	for (index = ARRAY_SIZE(major_names)-`1`; index > `0`; index--) {
225	if (major_names[index] == NULL)
226	break;
227	}
228
229	if (index == `0`) {
230	printk("%s: failed to get major for %s\n",
231	__func__, name);
232	ret = -EBUSY;
233	goto out;
234	}
235	major = index;
236	ret = major;
237	}
238
239	if (major >= BLKDEV_MAJOR_MAX) {
240	pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n",
241	__func__, major, BLKDEV_MAJOR_MAX-`1`, name);
242
243	ret = -EINVAL;
244	goto out;
245	}
246
247	p = kmalloc(size: sizeof(struct blk_major_name), GFP_KERNEL);
248	if (p == NULL) {
249	ret = -ENOMEM;
250	goto out;
251	}
252
253	p->major = major;
254	#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
255	p->probe = probe;
256	#endif
257	strscpy(p->name, name, sizeof(p->name));
258	p->next = NULL;
259	index = major_to_index(major);
260
261	spin_lock(lock: &major_names_spinlock);
262	for (n = &major_names[index]; n; n = &(n)->next) {
263	if ((*n)->major == major)
264	break;
265	}
266	if (!*n)
267	*n = p;
268	else
269	ret = -EBUSY;
270	spin_unlock(lock: &major_names_spinlock);
271
272	if (ret < `0`) {
273	printk("register_blkdev: cannot get major %u for %s\n",
274	major, name);
275	kfree(objp: p);
276	}
277	out:
278	mutex_unlock(lock: &major_names_lock);
279	return ret;
280	}
281	EXPORT_SYMBOL(__register_blkdev);
282
283	void unregister_blkdev(unsigned int major, const char *name)
284	{
285	struct blk_major_name **n;
286	struct blk_major_name *p = NULL;
287	int index = major_to_index(major);
288
289	mutex_lock(&major_names_lock);
290	spin_lock(lock: &major_names_spinlock);
291	for (n = &major_names[index]; n; n = &(n)->next)
292	if ((*n)->major == major)
293	break;
294	if (!n \|\| strcmp((n)->name, name)) {
295	WARN_ON(`1`);
296	} else {
297	p = *n;
298	*n = p->next;
299	}
300	spin_unlock(lock: &major_names_spinlock);
301	mutex_unlock(lock: &major_names_lock);
302	kfree(objp: p);
303	}
304
305	EXPORT_SYMBOL(unregister_blkdev);
306
307	int blk_alloc_ext_minor(void)
308	{
309	int idx;
310
311	idx = ida_alloc_range(&ext_devt_ida, min: `0`, NR_EXT_DEVT - `1`, GFP_KERNEL);
312	if (idx == -ENOSPC)
313	return -EBUSY;
314	return idx;
315	}
316
317	void blk_free_ext_minor(unsigned int minor)
318	{
319	ida_free(&ext_devt_ida, id: minor);
320	}
321
322	void disk_uevent(struct gendisk disk, enum* kobject_action action)
323	{
324	struct block_device *part;
325	unsigned long idx;
326
327	rcu_read_lock();
328	xa_for_each(&disk->part_tbl, idx, part) {
329	if (bdev_is_partition(bdev: part) && !bdev_nr_sectors(bdev: part))
330	continue;
331	if (!kobject_get_unless_zero(kobj: &part->bd_device.kobj))
332	continue;
333
334	rcu_read_unlock();
335	kobject_uevent(bdev_kobj(part), action);
336	put_device(dev: &part->bd_device);
337	rcu_read_lock();
338	}
339	rcu_read_unlock();
340	}
341	EXPORT_SYMBOL_GPL(disk_uevent);
342
343	int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode)
344	{
345	struct file *file;
346	int ret = `0`;
347
348	if (disk->flags & (GENHD_FL_NO_PART \| GENHD_FL_HIDDEN))
349	return -EINVAL;
350	if (test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
351	return -EINVAL;
352	if (disk->open_partitions)
353	return -EBUSY;
354
355	/*
356	* If the device is opened exclusively by current thread already, it's
357	* safe to scan partitons, otherwise, use bd_prepare_to_claim() to
358	* synchronize with other exclusive openers and other partition
359	* scanners.
360	*/
361	if (!(mode & BLK_OPEN_EXCL)) {
362	ret = bd_prepare_to_claim(bdev: disk->part0, holder: disk_scan_partitions,
363	NULL);
364	if (ret)
365	return ret;
366	}
367
368	set_bit(GD_NEED_PART_SCAN, addr: &disk->state);
369	file = bdev_file_open_by_dev(dev: disk_devt(disk), mode: mode & ~BLK_OPEN_EXCL,
370	NULL, NULL);
371	if (IS_ERR(ptr: file))
372	ret = PTR_ERR(ptr: file);
373	else
374	fput(file);
375
376	/*
377	* If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set,
378	* and this will cause that re-assemble partitioned raid device will
379	* creat partition for underlying disk.
380	*/
381	clear_bit(GD_NEED_PART_SCAN, addr: &disk->state);
382	if (!(mode & BLK_OPEN_EXCL))
383	bd_abort_claiming(bdev: disk->part0, holder: disk_scan_partitions);
384	return ret;
385	}
386
387	/**
388	* device_add_disk - add disk information to kernel list
389	* @parent: parent device for the disk
390	* @disk: per-device partitioning information
391	* @groups: Additional per-device sysfs groups
392	*
393	* This function registers the partitioning information in @disk
394	* with the kernel.
395	*/
396	int __must_check device_add_disk(struct device parent, struct* gendisk *disk,
397	const struct attribute_group **groups)
398
399	{
400	struct device *ddev = disk_to_dev(disk);
401	int ret;
402
403	/ Only makes sense for bio-based to set ->poll_bio /
404	if (queue_is_mq(q: disk->queue) && disk->fops->poll_bio)
405	return -EINVAL;
406
407	/*
408	* The disk queue should now be all set with enough information about
409	* the device for the elevator code to pick an adequate default
410	* elevator if one is needed, that is, for devices requesting queue
411	* registration.
412	*/
413	elevator_init_mq(q: disk->queue);
414
415	/ Mark bdev as having a submit_bio, if needed /
416	disk->part0->bd_has_submit_bio = disk->fops->submit_bio != NULL;
417
418	/*
419	* If the driver provides an explicit major number it also must provide
420	* the number of minors numbers supported, and those will be used to
421	* setup the gendisk.
422	* Otherwise just allocate the device numbers for both the whole device
423	* and all partitions from the extended dev_t space.
424	*/
425	ret = -EINVAL;
426	if (disk->major) {
427	if (WARN_ON(!disk->minors))
428	goto out_exit_elevator;
429
430	if (disk->minors > DISK_MAX_PARTS) {
431	pr_err("block: can't allocate more than %d partitions\n",
432	DISK_MAX_PARTS);
433	disk->minors = DISK_MAX_PARTS;
434	}
435	if (disk->first_minor > MINORMASK \|\|
436	disk->minors > MINORMASK + `1` \|\|
437	disk->first_minor + disk->minors > MINORMASK + `1`)
438	goto out_exit_elevator;
439	} else {
440	if (WARN_ON(disk->minors))
441	goto out_exit_elevator;
442
443	ret = blk_alloc_ext_minor();
444	if (ret < `0`)
445	goto out_exit_elevator;
446	disk->major = BLOCK_EXT_MAJOR;
447	disk->first_minor = ret;
448	}
449
450	/ delay uevents, until we scanned partition table /
451	dev_set_uevent_suppress(dev: ddev, val: `1`);
452
453	ddev->parent = parent;
454	ddev->groups = groups;
455	dev_set_name(dev: ddev, name: "%s", disk->disk_name);
456	if (!(disk->flags & GENHD_FL_HIDDEN))
457	ddev->devt = MKDEV(disk->major, disk->first_minor);
458	ret = device_add(dev: ddev);
459	if (ret)
460	goto out_free_ext_minor;
461
462	ret = disk_alloc_events(disk);
463	if (ret)
464	goto out_device_del;
465
466	ret = sysfs_create_link(kobj: block_depr, target: &ddev->kobj,
467	name: kobject_name(kobj: &ddev->kobj));
468	if (ret)
469	goto out_device_del;
470
471	/*
472	* avoid probable deadlock caused by allocating memory with
473	* GFP_KERNEL in runtime_resume callback of its all ancestor
474	* devices
475	*/
476	pm_runtime_set_memalloc_noio(dev: ddev, enable: true);
477
478	disk->part0->bd_holder_dir =
479	kobject_create_and_add(name: "holders", parent: &ddev->kobj);
480	if (!disk->part0->bd_holder_dir) {
481	ret = -ENOMEM;
482	goto out_del_block_link;
483	}
484	disk->slave_dir = kobject_create_and_add(name: "slaves", parent: &ddev->kobj);
485	if (!disk->slave_dir) {
486	ret = -ENOMEM;
487	goto out_put_holder_dir;
488	}
489
490	ret = blk_register_queue(disk);
491	if (ret)
492	goto out_put_slave_dir;
493
494	if (!(disk->flags & GENHD_FL_HIDDEN)) {
495	ret = bdi_register(bdi: disk->bdi, fmt: "%u:%u",
496	disk->major, disk->first_minor);
497	if (ret)
498	goto out_unregister_queue;
499	bdi_set_owner(bdi: disk->bdi, owner: ddev);
500	ret = sysfs_create_link(kobj: &ddev->kobj,
501	target: &disk->bdi->dev->kobj, name: "bdi");
502	if (ret)
503	goto out_unregister_bdi;
504
505	/ Make sure the first partition scan will be proceed /
506	if (get_capacity(disk) && !(disk->flags & GENHD_FL_NO_PART) &&
507	!test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
508	set_bit(GD_NEED_PART_SCAN, addr: &disk->state);
509
510	bdev_add(bdev: disk->part0, dev: ddev->devt);
511	if (get_capacity(disk))
512	disk_scan_partitions(disk, BLK_OPEN_READ);
513
514	/*
515	* Announce the disk and partitions after all partitions are
516	* created. (for hidden disks uevents remain suppressed forever)
517	*/
518	dev_set_uevent_suppress(dev: ddev, val: `0`);
519	disk_uevent(disk, KOBJ_ADD);
520	} else {
521	/*
522	* Even if the block_device for a hidden gendisk is not
523	* registered, it needs to have a valid bd_dev so that the
524	* freeing of the dynamic major works.
525	*/
526	disk->part0->bd_dev = MKDEV(disk->major, disk->first_minor);
527	}
528
529	disk_update_readahead(disk);
530	disk_add_events(disk);
531	set_bit(GD_ADDED, addr: &disk->state);
532	return `0`;
533
534	out_unregister_bdi:
535	if (!(disk->flags & GENHD_FL_HIDDEN))
536	bdi_unregister(bdi: disk->bdi);
537	out_unregister_queue:
538	blk_unregister_queue(disk);
539	rq_qos_exit(disk->queue);
540	out_put_slave_dir:
541	kobject_put(kobj: disk->slave_dir);
542	disk->slave_dir = NULL;
543	out_put_holder_dir:
544	kobject_put(kobj: disk->part0->bd_holder_dir);
545	out_del_block_link:
546	sysfs_remove_link(kobj: block_depr, name: dev_name(dev: ddev));
547	pm_runtime_set_memalloc_noio(dev: ddev, enable: false);
548	out_device_del:
549	device_del(dev: ddev);
550	out_free_ext_minor:
551	if (disk->major == BLOCK_EXT_MAJOR)
552	blk_free_ext_minor(minor: disk->first_minor);
553	out_exit_elevator:
554	if (disk->queue->elevator)
555	elevator_exit(q: disk->queue);
556	return ret;
557	}
558	EXPORT_SYMBOL(device_add_disk);
559
560	static void blk_report_disk_dead(struct gendisk *disk, bool surprise)
561	{
562	struct block_device *bdev;
563	unsigned long idx;
564
565	/*
566	* On surprise disk removal, bdev_mark_dead() may call into file
567	* systems below. Make it clear that we're expecting to not hold
568	* disk->open_mutex.
569	*/
570	lockdep_assert_not_held(&disk->open_mutex);
571
572	rcu_read_lock();
573	xa_for_each(&disk->part_tbl, idx, bdev) {
574	if (!kobject_get_unless_zero(kobj: &bdev->bd_device.kobj))
575	continue;
576	rcu_read_unlock();
577
578	bdev_mark_dead(bdev, surprise);
579
580	put_device(dev: &bdev->bd_device);
581	rcu_read_lock();
582	}
583	rcu_read_unlock();
584	}
585
586	static void __blk_mark_disk_dead(struct gendisk *disk)
587	{
588	/*
589	* Fail any new I/O.
590	*/
591	if (test_and_set_bit(GD_DEAD, addr: &disk->state))
592	return;
593
594	if (test_bit(GD_OWNS_QUEUE, &disk->state))
595	blk_queue_flag_set(QUEUE_FLAG_DYING, q: disk->queue);
596
597	/*
598	* Stop buffered writers from dirtying pages that can't be written out.
599	*/
600	set_capacity(disk, `0`);
601
602	/*
603	* Prevent new I/O from crossing bio_queue_enter().
604	*/
605	blk_queue_start_drain(q: disk->queue);
606	}
607
608	/**
609	* blk_mark_disk_dead - mark a disk as dead
610	* @disk: disk to mark as dead
611	*
612	* Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
613	* to this disk.
614	*/
615	void blk_mark_disk_dead(struct gendisk *disk)
616	{
617	__blk_mark_disk_dead(disk);
618	blk_report_disk_dead(disk, surprise: true);
619	}
620	EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
621
622	/**
623	* del_gendisk - remove the gendisk
624	* @disk: the struct gendisk to remove
625	*
626	* Removes the gendisk and all its associated resources. This deletes the
627	* partitions associated with the gendisk, and unregisters the associated
628	* request_queue.
629	*
630	* This is the counter to the respective __device_add_disk() call.
631	*
632	* The final removal of the struct gendisk happens when its refcount reaches 0
633	* with put_disk(), which should be called after del_gendisk(), if
634	* __device_add_disk() was used.
635	*
636	* Drivers exist which depend on the release of the gendisk to be synchronous,
637	* it should not be deferred.
638	*
639	* Context: can sleep
640	*/
641	void del_gendisk(struct gendisk *disk)
642	{
643	struct request_queue *q = disk->queue;
644	struct block_device *part;
645	unsigned long idx;
646
647	might_sleep();
648
649	if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
650	return;
651
652	disk_del_events(disk);
653
654	/*
655	* Prevent new openers by unlinked the bdev inode.
656	*/
657	mutex_lock(&disk->open_mutex);
658	xa_for_each(&disk->part_tbl, idx, part)
659	remove_inode_hash(inode: part->bd_inode);
660	mutex_unlock(lock: &disk->open_mutex);
661
662	/*
663	* Tell the file system to write back all dirty data and shut down if
664	* it hasn't been notified earlier.
665	*/
666	if (!test_bit(GD_DEAD, &disk->state))
667	blk_report_disk_dead(disk, surprise: false);
668	__blk_mark_disk_dead(disk);
669
670	/*
671	* Drop all partitions now that the disk is marked dead.
672	*/
673	mutex_lock(&disk->open_mutex);
674	xa_for_each_start(&disk->part_tbl, idx, part, `1`)
675	drop_partition(part);
676	mutex_unlock(lock: &disk->open_mutex);
677
678	if (!(disk->flags & GENHD_FL_HIDDEN)) {
679	sysfs_remove_link(kobj: &disk_to_dev(disk)->kobj, name: "bdi");
680
681	/*
682	* Unregister bdi before releasing device numbers (as they can
683	* get reused and we'd get clashes in sysfs).
684	*/
685	bdi_unregister(bdi: disk->bdi);
686	}
687
688	blk_unregister_queue(disk);
689
690	kobject_put(kobj: disk->part0->bd_holder_dir);
691	kobject_put(kobj: disk->slave_dir);
692	disk->slave_dir = NULL;
693
694	part_stat_set_all(part: disk->part0, value: `0`);
695	disk->part0->bd_stamp = `0`;
696	sysfs_remove_link(kobj: block_depr, name: dev_name(disk_to_dev(disk)));
697	pm_runtime_set_memalloc_noio(disk_to_dev(disk), enable: false);
698	device_del(disk_to_dev(disk));
699
700	blk_mq_freeze_queue_wait(q);
701
702	blk_throtl_cancel_bios(disk);
703
704	blk_sync_queue(q);
705	blk_flush_integrity();
706
707	if (queue_is_mq(q))
708	blk_mq_cancel_work_sync(q);
709
710	blk_mq_quiesce_queue(q);
711	if (q->elevator) {
712	mutex_lock(&q->sysfs_lock);
713	elevator_exit(q);
714	mutex_unlock(lock: &q->sysfs_lock);
715	}
716	rq_qos_exit(q);
717	blk_mq_unquiesce_queue(q);
718
719	/*
720	* If the disk does not own the queue, allow using passthrough requests
721	* again. Else leave the queue frozen to fail all I/O.
722	*/
723	if (!test_bit(GD_OWNS_QUEUE, &disk->state)) {
724	blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
725	__blk_mq_unfreeze_queue(q, force_atomic: true);
726	} else {
727	if (queue_is_mq(q))
728	blk_mq_exit_queue(q);
729	}
730	}
731	EXPORT_SYMBOL(del_gendisk);
732
733	/**
734	* invalidate_disk - invalidate the disk
735	* @disk: the struct gendisk to invalidate
736	*
737	* A helper to invalidates the disk. It will clean the disk's associated
738	* buffer/page caches and reset its internal states so that the disk
739	* can be reused by the drivers.
740	*
741	* Context: can sleep
742	*/
743	void invalidate_disk(struct gendisk *disk)
744	{
745	struct block_device *bdev = disk->part0;
746
747	invalidate_bdev(bdev);
748	bdev->bd_inode->i_mapping->wb_err = `0`;
749	set_capacity(disk, `0`);
750	}
751	EXPORT_SYMBOL(invalidate_disk);
752
753	/ sysfs access to bad-blocks list. /
754	static ssize_t disk_badblocks_show(struct device *dev,
755	struct device_attribute *attr,
756	char *page)
757	{
758	struct gendisk *disk = dev_to_disk(dev);
759
760	if (!disk->bb)
761	return sprintf(buf: page, fmt: "\n");
762
763	return badblocks_show(bb: disk->bb, page, unack: `0`);
764	}
765
766	static ssize_t disk_badblocks_store(struct device *dev,
767	struct device_attribute *attr,
768	const char *page, size_t len)
769	{
770	struct gendisk *disk = dev_to_disk(dev);
771
772	if (!disk->bb)
773	return -ENXIO;
774
775	return badblocks_store(bb: disk->bb, page, len, unack: `0`);
776	}
777
778	#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
779	void blk_request_module(dev_t devt)
780	{
781	unsigned int major = MAJOR(devt);
782	struct blk_major_name **n;
783
784	mutex_lock(&major_names_lock);
785	for (n = &major_names[major_to_index(major)]; n; n = &(n)->next) {
786	if ((n)->major == major && (n)->probe) {
787	(*n)->probe(devt);
788	mutex_unlock(lock: &major_names_lock);
789	return;
790	}
791	}
792	mutex_unlock(lock: &major_names_lock);
793
794	if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > `0`)
795	/ Make old-style 2.4 aliases work /
796	request_module("block-major-%d", MAJOR(devt));
797	}
798	#endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */
799
800	#ifdef CONFIG_PROC_FS
801	/ iterator /
802	static void disk_seqf_start(struct* seq_file seqf, loff_t pos)
803	{
804	loff_t skip = *pos;
805	struct class_dev_iter *iter;
806	struct device *dev;
807
808	iter = kmalloc(size: sizeof(*iter), GFP_KERNEL);
809	if (!iter)
810	return ERR_PTR(error: -ENOMEM);
811
812	seqf->private = iter;
813	class_dev_iter_init(iter, class: &block_class, NULL, type: &disk_type);
814	do {
815	dev = class_dev_iter_next(iter);
816	if (!dev)
817	return NULL;
818	} while (skip--);
819
820	return dev_to_disk(dev);
821	}
822
823	static void disk_seqf_next(struct* seq_file seqf, void* v, loff_t pos)
824	{
825	struct device *dev;
826
827	(*pos)++;
828	dev = class_dev_iter_next(iter: seqf->private);
829	if (dev)
830	return dev_to_disk(dev);
831
832	return NULL;
833	}
834
835	static void disk_seqf_stop(struct seq_file seqf, void* *v)
836	{
837	struct class_dev_iter *iter = seqf->private;
838
839	/ stop is called even after start failed :-( /
840	if (iter) {
841	class_dev_iter_exit(iter);
842	kfree(objp: iter);
843	seqf->private = NULL;
844	}
845	}
846
847	static void show_partition_start(struct* seq_file seqf, loff_t pos)
848	{
849	void *p;
850
851	p = disk_seqf_start(seqf, pos);
852	if (!IS_ERR_OR_NULL(ptr: p) && !*pos)
853	seq_puts(m: seqf, s: "major minor #blocks name\n\n");
854	return p;
855	}
856
857	static int show_partition(struct seq_file seqf, void* *v)
858	{
859	struct gendisk *sgp = v;
860	struct block_device *part;
861	unsigned long idx;
862
863	if (!get_capacity(disk: sgp) \|\| (sgp->flags & GENHD_FL_HIDDEN))
864	return `0`;
865
866	rcu_read_lock();
867	xa_for_each(&sgp->part_tbl, idx, part) {
868	if (!bdev_nr_sectors(bdev: part))
869	continue;
870	seq_printf(m: seqf, fmt: "%4d %7d %10llu %pg\n",
871	MAJOR(part->bd_dev), MINOR(part->bd_dev),
872	bdev_nr_sectors(bdev: part) >> `1`, part);
873	}
874	rcu_read_unlock();
875	return `0`;
876	}
877
878	static const struct seq_operations partitions_op = {
879	.start = show_partition_start,
880	.next = disk_seqf_next,
881	.stop = disk_seqf_stop,
882	.show = show_partition
883	};
884	#endif
885
886	static int __init genhd_device_init(void)
887	{
888	int error;
889
890	error = class_register(class: &block_class);
891	if (unlikely(error))
892	return error;
893	blk_dev_init();
894
895	register_blkdev(BLOCK_EXT_MAJOR, "blkext");
896
897	/ create top-level block dir /
898	block_depr = kobject_create_and_add(name: "block", NULL);
899	return `0`;
900	}
901
902	subsys_initcall(genhd_device_init);
903
904	static ssize_t disk_range_show(struct device *dev,
905	struct device_attribute attr, char* *buf)
906	{
907	struct gendisk *disk = dev_to_disk(dev);
908
909	return sprintf(buf, fmt: "%d\n", disk->minors);
910	}
911
912	static ssize_t disk_ext_range_show(struct device *dev,
913	struct device_attribute attr, char* *buf)
914	{
915	struct gendisk *disk = dev_to_disk(dev);
916
917	return sprintf(buf, fmt: "%d\n",
918	(disk->flags & GENHD_FL_NO_PART) ? `1` : DISK_MAX_PARTS);
919	}
920
921	static ssize_t disk_removable_show(struct device *dev,
922	struct device_attribute attr, char* *buf)
923	{
924	struct gendisk *disk = dev_to_disk(dev);
925
926	return sprintf(buf, fmt: "%d\n",
927	(disk->flags & GENHD_FL_REMOVABLE ? `1` : `0`));
928	}
929
930	static ssize_t disk_hidden_show(struct device *dev,
931	struct device_attribute attr, char* *buf)
932	{
933	struct gendisk *disk = dev_to_disk(dev);
934
935	return sprintf(buf, fmt: "%d\n",
936	(disk->flags & GENHD_FL_HIDDEN ? `1` : `0`));
937	}
938
939	static ssize_t disk_ro_show(struct device *dev,
940	struct device_attribute attr, char* *buf)
941	{
942	struct gendisk *disk = dev_to_disk(dev);
943
944	return sprintf(buf, fmt: "%d\n", get_disk_ro(disk) ? `1` : `0`);
945	}
946
947	ssize_t part_size_show(struct device *dev,
948	struct device_attribute attr, char* *buf)
949	{
950	return sprintf(buf, fmt: "%llu\n", bdev_nr_sectors(dev_to_bdev(dev)));
951	}
952
953	ssize_t part_stat_show(struct device *dev,
954	struct device_attribute attr, char* *buf)
955	{
956	struct block_device *bdev = dev_to_bdev(dev);
957	struct request_queue *q = bdev_get_queue(bdev);
958	struct disk_stats stat;
959	unsigned int inflight;
960
961	if (queue_is_mq(q))
962	inflight = blk_mq_in_flight(q, part: bdev);
963	else
964	inflight = part_in_flight(part: bdev);
965
966	if (inflight) {
967	part_stat_lock();
968	update_io_ticks(part: bdev, now: jiffies, end: true);
969	part_stat_unlock();
970	}
971	part_stat_read_all(part: bdev, stat: &stat);
972	return sprintf(buf,
973	fmt: "%8lu %8lu %8llu %8u "
974	"%8lu %8lu %8llu %8u "
975	"%8u %8u %8u "
976	"%8lu %8lu %8llu %8u "
977	"%8lu %8u"
978	"\n",
979	stat.ios[STAT_READ],
980	stat.merges[STAT_READ],
981	(unsigned long long)stat.sectors[STAT_READ],
982	(unsigned int)div_u64(dividend: stat.nsecs[STAT_READ], NSEC_PER_MSEC),
983	stat.ios[STAT_WRITE],
984	stat.merges[STAT_WRITE],
985	(unsigned long long)stat.sectors[STAT_WRITE],
986	(unsigned int)div_u64(dividend: stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
987	inflight,
988	jiffies_to_msecs(j: stat.io_ticks),
989	(unsigned int)div_u64(dividend: stat.nsecs[STAT_READ] +
990	stat.nsecs[STAT_WRITE] +
991	stat.nsecs[STAT_DISCARD] +
992	stat.nsecs[STAT_FLUSH],
993	NSEC_PER_MSEC),
994	stat.ios[STAT_DISCARD],
995	stat.merges[STAT_DISCARD],
996	(unsigned long long)stat.sectors[STAT_DISCARD],
997	(unsigned int)div_u64(dividend: stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
998	stat.ios[STAT_FLUSH],
999	(unsigned int)div_u64(dividend: stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
1000	}
1001
1002	ssize_t part_inflight_show(struct device dev, struct* device_attribute *attr,
1003	char *buf)
1004	{
1005	struct block_device *bdev = dev_to_bdev(dev);
1006	struct request_queue *q = bdev_get_queue(bdev);
1007	unsigned int inflight[`2`];
1008
1009	if (queue_is_mq(q))
1010	blk_mq_in_flight_rw(q, part: bdev, inflight);
1011	else
1012	part_in_flight_rw(part: bdev, inflight);
1013
1014	return sprintf(buf, fmt: "%8u %8u\n", inflight[`0`], inflight[`1`]);
1015	}
1016
1017	static ssize_t disk_capability_show(struct device *dev,
1018	struct device_attribute attr, char* *buf)
1019	{
1020	dev_warn_once(dev, "the capability attribute has been deprecated.\n");
1021	return sprintf(buf, fmt: "0\n");
1022	}
1023
1024	static ssize_t disk_alignment_offset_show(struct device *dev,
1025	struct device_attribute *attr,
1026	char *buf)
1027	{
1028	struct gendisk *disk = dev_to_disk(dev);
1029
1030	return sprintf(buf, fmt: "%d\n", bdev_alignment_offset(bdev: disk->part0));
1031	}
1032
1033	static ssize_t disk_discard_alignment_show(struct device *dev,
1034	struct device_attribute *attr,
1035	char *buf)
1036	{
1037	struct gendisk *disk = dev_to_disk(dev);
1038
1039	return sprintf(buf, fmt: "%d\n", bdev_alignment_offset(bdev: disk->part0));
1040	}
1041
1042	static ssize_t diskseq_show(struct device *dev,
1043	struct device_attribute attr, char* *buf)
1044	{
1045	struct gendisk *disk = dev_to_disk(dev);
1046
1047	return sprintf(buf, fmt: "%llu\n", disk->diskseq);
1048	}
1049
1050	static DEVICE_ATTR(range, `0444`, disk_range_show, NULL);
1051	static DEVICE_ATTR(ext_range, `0444`, disk_ext_range_show, NULL);
1052	static DEVICE_ATTR(removable, `0444`, disk_removable_show, NULL);
1053	static DEVICE_ATTR(hidden, `0444`, disk_hidden_show, NULL);
1054	static DEVICE_ATTR(ro, `0444`, disk_ro_show, NULL);
1055	static DEVICE_ATTR(size, `0444`, part_size_show, NULL);
1056	static DEVICE_ATTR(alignment_offset, `0444`, disk_alignment_offset_show, NULL);
1057	static DEVICE_ATTR(discard_alignment, `0444`, disk_discard_alignment_show, NULL);
1058	static DEVICE_ATTR(capability, `0444`, disk_capability_show, NULL);
1059	static DEVICE_ATTR(stat, `0444`, part_stat_show, NULL);
1060	static DEVICE_ATTR(inflight, `0444`, part_inflight_show, NULL);
1061	static DEVICE_ATTR(badblocks, `0644`, disk_badblocks_show, disk_badblocks_store);
1062	static DEVICE_ATTR(diskseq, `0444`, diskseq_show, NULL);
1063
1064	#ifdef CONFIG_FAIL_MAKE_REQUEST
1065	ssize_t part_fail_show(struct device *dev,
1066	struct device_attribute attr, char* *buf)
1067	{
1068	return sprintf(buf, fmt: "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
1069	}
1070
1071	ssize_t part_fail_store(struct device *dev,
1072	struct device_attribute *attr,
1073	const char *buf, size_t count)
1074	{
1075	int i;
1076
1077	if (count > `0` && sscanf(buf, "%d", &i) > `0`)
1078	dev_to_bdev(dev)->bd_make_it_fail = i;
1079
1080	return count;
1081	}
1082
1083	static struct device_attribute dev_attr_fail =
1084	__ATTR(make-it-fail, `0644`, part_fail_show, part_fail_store);
1085	#endif /* CONFIG_FAIL_MAKE_REQUEST */
1086
1087	#ifdef CONFIG_FAIL_IO_TIMEOUT
1088	static struct device_attribute dev_attr_fail_timeout =
1089	__ATTR(io-timeout-fail, `0644`, part_timeout_show, part_timeout_store);
1090	#endif
1091
1092	static struct attribute *disk_attrs[] = {
1093	&dev_attr_range.attr,
1094	&dev_attr_ext_range.attr,
1095	&dev_attr_removable.attr,
1096	&dev_attr_hidden.attr,
1097	&dev_attr_ro.attr,
1098	&dev_attr_size.attr,
1099	&dev_attr_alignment_offset.attr,
1100	&dev_attr_discard_alignment.attr,
1101	&dev_attr_capability.attr,
1102	&dev_attr_stat.attr,
1103	&dev_attr_inflight.attr,
1104	&dev_attr_badblocks.attr,
1105	&dev_attr_events.attr,
1106	&dev_attr_events_async.attr,
1107	&dev_attr_events_poll_msecs.attr,
1108	&dev_attr_diskseq.attr,
1109	#ifdef CONFIG_FAIL_MAKE_REQUEST
1110	&dev_attr_fail.attr,
1111	#endif
1112	#ifdef CONFIG_FAIL_IO_TIMEOUT
1113	&dev_attr_fail_timeout.attr,
1114	#endif
1115	NULL
1116	};
1117
1118	static umode_t disk_visible(struct kobject kobj, struct* attribute a, int* n)
1119	{
1120	struct device dev = container_of(kobj, typeof(dev), kobj);
1121	struct gendisk *disk = dev_to_disk(dev);
1122
1123	if (a == &dev_attr_badblocks.attr && !disk->bb)
1124	return `0`;
1125	return a->mode;
1126	}
1127
1128	static struct attribute_group disk_attr_group = {
1129	.attrs = disk_attrs,
1130	.is_visible = disk_visible,
1131	};
1132
1133	static const struct attribute_group *disk_attr_groups[] = {
1134	&disk_attr_group,
1135	#ifdef CONFIG_BLK_DEV_IO_TRACE
1136	&blk_trace_attr_group,
1137	#endif
1138	#ifdef CONFIG_BLK_DEV_INTEGRITY
1139	&blk_integrity_attr_group,
1140	#endif
1141	NULL
1142	};
1143
1144	/**
1145	* disk_release - releases all allocated resources of the gendisk
1146	* @dev: the device representing this disk
1147	*
1148	* This function releases all allocated resources of the gendisk.
1149	*
1150	* Drivers which used __device_add_disk() have a gendisk with a request_queue
1151	* assigned. Since the request_queue sits on top of the gendisk for these
1152	* drivers we also call blk_put_queue() for them, and we expect the
1153	* request_queue refcount to reach 0 at this point, and so the request_queue
1154	* will also be freed prior to the disk.
1155	*
1156	* Context: can sleep
1157	*/
1158	static void disk_release(struct device *dev)
1159	{
1160	struct gendisk *disk = dev_to_disk(dev);
1161
1162	might_sleep();
1163	WARN_ON_ONCE(disk_live(disk));
1164
1165	blk_trace_remove(q: disk->queue);
1166
1167	/*
1168	* To undo the all initialization from blk_mq_init_allocated_queue in
1169	* case of a probe failure where add_disk is never called we have to
1170	* call blk_mq_exit_queue here. We can't do this for the more common
1171	* teardown case (yet) as the tagset can be gone by the time the disk
1172	* is released once it was added.
1173	*/
1174	if (queue_is_mq(q: disk->queue) &&
1175	test_bit(GD_OWNS_QUEUE, &disk->state) &&
1176	!test_bit(GD_ADDED, &disk->state))
1177	blk_mq_exit_queue(q: disk->queue);
1178
1179	blkcg_exit_disk(disk);
1180
1181	bioset_exit(&disk->bio_split);
1182
1183	disk_release_events(disk);
1184	kfree(objp: disk->random);
1185	disk_free_zone_bitmaps(disk);
1186	xa_destroy(&disk->part_tbl);
1187
1188	disk->queue->disk = NULL;
1189	blk_put_queue(disk->queue);
1190
1191	if (test_bit(GD_ADDED, &disk->state) && disk->fops->free_disk)
1192	disk->fops->free_disk(disk);
1193
1194	iput(disk->part0->bd_inode); / frees the disk /
1195	}
1196
1197	static int block_uevent(const struct device dev, struct* kobj_uevent_env *env)
1198	{
1199	const struct gendisk *disk = dev_to_disk(dev);
1200
1201	return add_uevent_var(env, format: "DISKSEQ=%llu", disk->diskseq);
1202	}
1203
1204	const struct class block_class = {
1205	.name = "block",
1206	.dev_uevent = block_uevent,
1207	};
1208
1209	static char block_devnode(const* struct device dev, umode_t mode,
1210	kuid_t uid, kgid_t gid)
1211	{
1212	struct gendisk *disk = dev_to_disk(dev);
1213
1214	if (disk->fops->devnode)
1215	return disk->fops->devnode(disk, mode);
1216	return NULL;
1217	}
1218
1219	const struct device_type disk_type = {
1220	.name = "disk",
1221	.groups = disk_attr_groups,
1222	.release = disk_release,
1223	.devnode = block_devnode,
1224	};
1225
1226	#ifdef CONFIG_PROC_FS
1227	/*
1228	* aggregate disk stat collector. Uses the same stats that the sysfs
1229	* entries do, above, but makes them available through one seq_file.
1230	*
1231	* The output looks suspiciously like /proc/partitions with a bunch of
1232	* extra fields.
1233	*/
1234	static int diskstats_show(struct seq_file seqf, void* *v)
1235	{
1236	struct gendisk *gp = v;
1237	struct block_device *hd;
1238	unsigned int inflight;
1239	struct disk_stats stat;
1240	unsigned long idx;
1241
1242	/*
1243	if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
1244	seq_puts(seqf, "major minor name"
1245	" rio rmerge rsect ruse wio wmerge "
1246	"wsect wuse running use aveq"
1247	"\n\n");
1248	*/
1249
1250	rcu_read_lock();
1251	xa_for_each(&gp->part_tbl, idx, hd) {
1252	if (bdev_is_partition(bdev: hd) && !bdev_nr_sectors(bdev: hd))
1253	continue;
1254	if (queue_is_mq(q: gp->queue))
1255	inflight = blk_mq_in_flight(q: gp->queue, part: hd);
1256	else
1257	inflight = part_in_flight(part: hd);
1258
1259	if (inflight) {
1260	part_stat_lock();
1261	update_io_ticks(part: hd, now: jiffies, end: true);
1262	part_stat_unlock();
1263	}
1264	part_stat_read_all(part: hd, stat: &stat);
1265	seq_printf(m: seqf, fmt: "%4d %7d %pg "
1266	"%lu %lu %lu %u "
1267	"%lu %lu %lu %u "
1268	"%u %u %u "
1269	"%lu %lu %lu %u "
1270	"%lu %u"
1271	"\n",
1272	MAJOR(hd->bd_dev), MINOR(hd->bd_dev), hd,
1273	stat.ios[STAT_READ],
1274	stat.merges[STAT_READ],
1275	stat.sectors[STAT_READ],
1276	(unsigned int)div_u64(dividend: stat.nsecs[STAT_READ],
1277	NSEC_PER_MSEC),
1278	stat.ios[STAT_WRITE],
1279	stat.merges[STAT_WRITE],
1280	stat.sectors[STAT_WRITE],
1281	(unsigned int)div_u64(dividend: stat.nsecs[STAT_WRITE],
1282	NSEC_PER_MSEC),
1283	inflight,
1284	jiffies_to_msecs(j: stat.io_ticks),
1285	(unsigned int)div_u64(dividend: stat.nsecs[STAT_READ] +
1286	stat.nsecs[STAT_WRITE] +
1287	stat.nsecs[STAT_DISCARD] +
1288	stat.nsecs[STAT_FLUSH],
1289	NSEC_PER_MSEC),
1290	stat.ios[STAT_DISCARD],
1291	stat.merges[STAT_DISCARD],
1292	stat.sectors[STAT_DISCARD],
1293	(unsigned int)div_u64(dividend: stat.nsecs[STAT_DISCARD],
1294	NSEC_PER_MSEC),
1295	stat.ios[STAT_FLUSH],
1296	(unsigned int)div_u64(dividend: stat.nsecs[STAT_FLUSH],
1297	NSEC_PER_MSEC)
1298	);
1299	}
1300	rcu_read_unlock();
1301
1302	return `0`;
1303	}
1304
1305	static const struct seq_operations diskstats_op = {
1306	.start = disk_seqf_start,
1307	.next = disk_seqf_next,
1308	.stop = disk_seqf_stop,
1309	.show = diskstats_show
1310	};
1311
1312	static int __init proc_genhd_init(void)
1313	{
1314	proc_create_seq("diskstats", `0`, NULL, &diskstats_op);
1315	proc_create_seq("partitions", `0`, NULL, &partitions_op);
1316	return `0`;
1317	}
1318	module_init(proc_genhd_init);
1319	#endif /* CONFIG_PROC_FS */
1320
1321	dev_t part_devt(struct gendisk *disk, u8 partno)
1322	{
1323	struct block_device *part;
1324	dev_t devt = `0`;
1325
1326	rcu_read_lock();
1327	part = xa_load(&disk->part_tbl, index: partno);
1328	if (part)
1329	devt = part->bd_dev;
1330	rcu_read_unlock();
1331
1332	return devt;
1333	}
1334
1335	struct gendisk __alloc_disk_node(struct* request_queue q, int* node_id,
1336	struct lock_class_key *lkclass)
1337	{
1338	struct gendisk *disk;
1339
1340	disk = kzalloc_node(size: sizeof(struct gendisk), GFP_KERNEL, node: node_id);
1341	if (!disk)
1342	return NULL;
1343
1344	if (bioset_init(&disk->bio_split, BIO_POOL_SIZE, `0`, flags: `0`))
1345	goto out_free_disk;
1346
1347	disk->bdi = bdi_alloc(node_id);
1348	if (!disk->bdi)
1349	goto out_free_bioset;
1350
1351	/ bdev_alloc() might need the queue, set before the first call /
1352	disk->queue = q;
1353
1354	disk->part0 = bdev_alloc(disk, partno: `0`);
1355	if (!disk->part0)
1356	goto out_free_bdi;
1357
1358	disk->node_id = node_id;
1359	mutex_init(&disk->open_mutex);
1360	xa_init(xa: &disk->part_tbl);
1361	if (xa_insert(xa: &disk->part_tbl, index: `0`, entry: disk->part0, GFP_KERNEL))
1362	goto out_destroy_part_tbl;
1363
1364	if (blkcg_init_disk(disk))
1365	goto out_erase_part0;
1366
1367	rand_initialize_disk(disk);
1368	disk_to_dev(disk)->class = &block_class;
1369	disk_to_dev(disk)->type = &disk_type;
1370	device_initialize(disk_to_dev(disk));
1371	inc_diskseq(disk);
1372	q->disk = disk;
1373	lockdep_init_map(lock: &disk->lockdep_map, name: "(bio completion)", key: lkclass, subclass: `0`);
1374	#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
1375	INIT_LIST_HEAD(list: &disk->slave_bdevs);
1376	#endif
1377	return disk;
1378
1379	out_erase_part0:
1380	xa_erase(&disk->part_tbl, index: `0`);
1381	out_destroy_part_tbl:
1382	xa_destroy(&disk->part_tbl);
1383	disk->part0->bd_disk = NULL;
1384	iput(disk->part0->bd_inode);
1385	out_free_bdi:
1386	bdi_put(bdi: disk->bdi);
1387	out_free_bioset:
1388	bioset_exit(&disk->bio_split);
1389	out_free_disk:
1390	kfree(objp: disk);
1391	return NULL;
1392	}
1393
1394	struct gendisk __blk_alloc_disk(struct* queue_limits lim, int* node,
1395	struct lock_class_key *lkclass)
1396	{
1397	struct queue_limits default_lim = { };
1398	struct request_queue *q;
1399	struct gendisk *disk;
1400
1401	q = blk_alloc_queue(lim: lim ? lim : &default_lim, node_id: node);
1402	if (IS_ERR(ptr: q))
1403	return ERR_CAST(ptr: q);
1404
1405	disk = __alloc_disk_node(q, node_id: node, lkclass);
1406	if (!disk) {
1407	blk_put_queue(q);
1408	return ERR_PTR(error: -ENOMEM);
1409	}
1410	set_bit(GD_OWNS_QUEUE, addr: &disk->state);
1411	return disk;
1412	}
1413	EXPORT_SYMBOL(__blk_alloc_disk);
1414
1415	/**
1416	* put_disk - decrements the gendisk refcount
1417	* @disk: the struct gendisk to decrement the refcount for
1418	*
1419	* This decrements the refcount for the struct gendisk. When this reaches 0
1420	* we'll have disk_release() called.
1421	*
1422	* Note: for blk-mq disk put_disk must be called before freeing the tag_set
1423	* when handling probe errors (that is before add_disk() is called).
1424	*
1425	* Context: Any context, but the last reference must not be dropped from
1426	* atomic context.
1427	*/
1428	void put_disk(struct gendisk *disk)
1429	{
1430	if (disk)
1431	put_device(disk_to_dev(disk));
1432	}
1433	EXPORT_SYMBOL(put_disk);
1434
1435	static void set_disk_ro_uevent(struct gendisk gd, int* ro)
1436	{
1437	char event[] = "DISK_RO=1";
1438	char *envp[] = { event, NULL };
1439
1440	if (!ro)
1441	event[`8`] = `'0'`;
1442	kobject_uevent_env(kobj: &disk_to_dev(gd)->kobj, action: KOBJ_CHANGE, envp);
1443	}
1444
1445	/**
1446	* set_disk_ro - set a gendisk read-only
1447	* @disk: gendisk to operate on
1448	* @read_only: %true to set the disk read-only, %false set the disk read/write
1449	*
1450	* This function is used to indicate whether a given disk device should have its
1451	* read-only flag set. set_disk_ro() is typically used by device drivers to
1452	* indicate whether the underlying physical device is write-protected.
1453	*/
1454	void set_disk_ro(struct gendisk *disk, bool read_only)
1455	{
1456	if (read_only) {
1457	if (test_and_set_bit(GD_READ_ONLY, addr: &disk->state))
1458	return;
1459	} else {
1460	if (!test_and_clear_bit(GD_READ_ONLY, addr: &disk->state))
1461	return;
1462	}
1463	set_disk_ro_uevent(gd: disk, ro: read_only);
1464	}
1465	EXPORT_SYMBOL(set_disk_ro);
1466
1467	void inc_diskseq(struct gendisk *disk)
1468	{
1469	disk->diskseq = atomic64_inc_return(v: &diskseq);
1470	}
1471

source code of linux/block/genhd.c