genhd.c source code [linux/block/genhd.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* gendisk handling
4	*
5	* Portions Copyright (C) 2020 Christoph Hellwig
6	*/
7
8	#include <linux/module.h>
9	#include <linux/ctype.h>
10	#include <linux/fs.h>
11	#include <linux/kdev_t.h>
12	#include <linux/kernel.h>
13	#include <linux/blkdev.h>
14	#include <linux/backing-dev.h>
15	#include <linux/init.h>
16	#include <linux/spinlock.h>
17	#include <linux/proc_fs.h>
18	#include <linux/seq_file.h>
19	#include <linux/slab.h>
20	#include <linux/kmod.h>
21	#include <linux/major.h>
22	#include <linux/mutex.h>
23	#include <linux/idr.h>
24	#include <linux/log2.h>
25	#include <linux/pm_runtime.h>
26	#include <linux/badblocks.h>
27	#include <linux/part_stat.h>
28	#include <linux/blktrace_api.h>
29
30	#include "blk-throttle.h"
31	#include "blk.h"
32	#include "blk-mq-sched.h"
33	#include "blk-rq-qos.h"
34	#include "blk-cgroup.h"
35
36	static struct kobject *block_depr;
37
38	/*
39	* Unique, monotonically increasing sequential number associated with block
40	* devices instances (i.e. incremented each time a device is attached).
41	* Associating uevents with block devices in userspace is difficult and racy:
42	* the uevent netlink socket is lossy, and on slow and overloaded systems has
43	* a very high latency.
44	* Block devices do not have exclusive owners in userspace, any process can set
45	* one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0
46	* can be reused again and again).
47	* A userspace process setting up a block device and watching for its events
48	* cannot thus reliably tell whether an event relates to the device it just set
49	* up or another earlier instance with the same name.
50	* This sequential number allows userspace processes to solve this problem, and
51	* uniquely associate an uevent to the lifetime to a device.
52	*/
53	static atomic64_t diskseq;
54
55	/ for extended dynamic devt allocation, currently only one major is used /
56	#define NR_EXT_DEVT (1 << MINORBITS)
57	static DEFINE_IDA(ext_devt_ida);
58
59	void set_capacity(struct gendisk *disk, sector_t sectors)
60	{
61	bdev_set_nr_sectors(bdev: disk->part0, sectors);
62	}
63	EXPORT_SYMBOL(set_capacity);
64
65	/*
66	* Set disk capacity and notify if the size is not currently zero and will not
67	* be set to zero. Returns true if a uevent was sent, otherwise false.
68	*/
69	bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
70	{
71	sector_t capacity = get_capacity(disk);
72	char *envp[] = { "RESIZE=1", NULL };
73
74	set_capacity(disk, size);
75
76	/*
77	* Only print a message and send a uevent if the gendisk is user visible
78	* and alive. This avoids spamming the log and udev when setting the
79	* initial capacity during probing.
80	*/
81	if (size == capacity \|\|
82	!disk_live(disk) \|\|
83	(disk->flags & GENHD_FL_HIDDEN))
84	return false;
85
86	pr_info("%s: detected capacity change from %lld to %lld\n",
87	disk->disk_name, capacity, size);
88
89	/*
90	* Historically we did not send a uevent for changes to/from an empty
91	* device.
92	*/
93	if (!capacity \|\| !size)
94	return false;
95	kobject_uevent_env(kobj: &disk_to_dev(disk)->kobj, action: KOBJ_CHANGE, envp);
96	return true;
97	}
98	EXPORT_SYMBOL_GPL(set_capacity_and_notify);
99
100	static void part_stat_read_all(struct block_device *part,
101	struct disk_stats *stat)
102	{
103	int cpu;
104
105	memset(stat, `0`, sizeof(struct disk_stats));
106	for_each_possible_cpu(cpu) {
107	struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu);
108	int group;
109
110	for (group = `0`; group < NR_STAT_GROUPS; group++) {
111	stat->nsecs[group] += ptr->nsecs[group];
112	stat->sectors[group] += ptr->sectors[group];
113	stat->ios[group] += ptr->ios[group];
114	stat->merges[group] += ptr->merges[group];
115	}
116
117	stat->io_ticks += ptr->io_ticks;
118	}
119	}
120
121	static unsigned int part_in_flight(struct block_device *part)
122	{
123	unsigned int inflight = `0`;
124	int cpu;
125
126	for_each_possible_cpu(cpu) {
127	inflight += part_stat_local_read_cpu(part, in_flight[`0`], cpu) +
128	part_stat_local_read_cpu(part, in_flight[`1`], cpu);
129	}
130	if ((int)inflight < `0`)
131	inflight = `0`;
132
133	return inflight;
134	}
135
136	static void part_in_flight_rw(struct block_device *part,
137	unsigned int inflight[`2`])
138	{
139	int cpu;
140
141	inflight[`0`] = `0`;
142	inflight[`1`] = `0`;
143	for_each_possible_cpu(cpu) {
144	inflight[`0`] += part_stat_local_read_cpu(part, in_flight[`0`], cpu);
145	inflight[`1`] += part_stat_local_read_cpu(part, in_flight[`1`], cpu);
146	}
147	if ((int)inflight[`0`] < `0`)
148	inflight[`0`] = `0`;
149	if ((int)inflight[`1`] < `0`)
150	inflight[`1`] = `0`;
151	}
152
153	/*
154	* Can be deleted altogether. Later.
155	*
156	*/
157	#define BLKDEV_MAJOR_HASH_SIZE 255
158	static struct blk_major_name {
159	struct blk_major_name *next;
160	int major;
161	char name[`16`];
162	#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
163	void (*probe)(dev_t devt);
164	#endif
165	} *major_names[BLKDEV_MAJOR_HASH_SIZE];
166	static DEFINE_MUTEX(major_names_lock);
167	static DEFINE_SPINLOCK(major_names_spinlock);
168
169	/ index in the above - for now: assume no multimajor ranges /
170	static inline int major_to_index(unsigned major)
171	{
172	return major % BLKDEV_MAJOR_HASH_SIZE;
173	}
174
175	#ifdef CONFIG_PROC_FS
176	void blkdev_show(struct seq_file *seqf, off_t offset)
177	{
178	struct blk_major_name *dp;
179
180	spin_lock(lock: &major_names_spinlock);
181	for (dp = major_names[major_to_index(major: offset)]; dp; dp = dp->next)
182	if (dp->major == offset)
183	seq_printf(m: seqf, fmt: "%3d %s\n", dp->major, dp->name);
184	spin_unlock(lock: &major_names_spinlock);
185	}
186	#endif /* CONFIG_PROC_FS */
187
188	/**
189	* __register_blkdev - register a new block device
190	*
191	* @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If
192	* @major = 0, try to allocate any unused major number.
193	* @name: the name of the new block device as a zero terminated string
194	* @probe: pre-devtmpfs / pre-udev callback used to create disks when their
195	* pre-created device node is accessed. When a probe call uses
196	* add_disk() and it fails the driver must cleanup resources. This
197	* interface may soon be removed.
198	*
199	* The @name must be unique within the system.
200	*
201	* The return value depends on the @major input parameter:
202	*
203	* - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1]
204	* then the function returns zero on success, or a negative error code
205	* - if any unused major number was requested with @major = 0 parameter
206	* then the return value is the allocated major number in range
207	* [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise
208	*
209	* See Documentation/admin-guide/devices.txt for the list of allocated
210	* major numbers.
211	*
212	* Use register_blkdev instead for any new code.
213	*/
214	int __register_blkdev(unsigned int major, const char *name,
215	void (*probe)(dev_t devt))
216	{
217	struct blk_major_name *n, p;
218	int index, ret = `0`;
219
220	mutex_lock(&major_names_lock);
221
222	/ temporary /
223	if (major == `0`) {
224	for (index = ARRAY_SIZE(major_names)-`1`; index > `0`; index--) {
225	if (major_names[index] == NULL)
226	break;
227	}
228
229	if (index == `0`) {
230	printk("%s: failed to get major for %s\n",
231	__func__, name);
232	ret = -EBUSY;
233	goto out;
234	}
235	major = index;
236	ret = major;
237	}
238
239	if (major >= BLKDEV_MAJOR_MAX) {
240	pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n",
241	__func__, major, BLKDEV_MAJOR_MAX-`1`, name);
242
243	ret = -EINVAL;
244	goto out;
245	}
246
247	p = kmalloc(size: sizeof(struct blk_major_name), GFP_KERNEL);
248	if (p == NULL) {
249	ret = -ENOMEM;
250	goto out;
251	}
252
253	p->major = major;
254	#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
255	p->probe = probe;
256	#endif
257	strscpy(p: p->name, q: name, size: sizeof(p->name));
258	p->next = NULL;
259	index = major_to_index(major);
260
261	spin_lock(lock: &major_names_spinlock);
262	for (n = &major_names[index]; n; n = &(n)->next) {
263	if ((*n)->major == major)
264	break;
265	}
266	if (!*n)
267	*n = p;
268	else
269	ret = -EBUSY;
270	spin_unlock(lock: &major_names_spinlock);
271
272	if (ret < `0`) {
273	printk("register_blkdev: cannot get major %u for %s\n",
274	major, name);
275	kfree(objp: p);
276	}
277	out:
278	mutex_unlock(lock: &major_names_lock);
279	return ret;
280	}
281	EXPORT_SYMBOL(__register_blkdev);
282
283	void unregister_blkdev(unsigned int major, const char *name)
284	{
285	struct blk_major_name **n;
286	struct blk_major_name *p = NULL;
287	int index = major_to_index(major);
288
289	mutex_lock(&major_names_lock);
290	spin_lock(lock: &major_names_spinlock);
291	for (n = &major_names[index]; n; n = &(n)->next)
292	if ((*n)->major == major)
293	break;
294	if (!n \|\| strcmp((n)->name, name)) {
295	WARN_ON(`1`);
296	} else {
297	p = *n;
298	*n = p->next;
299	}
300	spin_unlock(lock: &major_names_spinlock);
301	mutex_unlock(lock: &major_names_lock);
302	kfree(objp: p);
303	}
304
305	EXPORT_SYMBOL(unregister_blkdev);
306
307	int blk_alloc_ext_minor(void)
308	{
309	int idx;
310
311	idx = ida_alloc_range(&ext_devt_ida, min: `0`, NR_EXT_DEVT - `1`, GFP_KERNEL);
312	if (idx == -ENOSPC)
313	return -EBUSY;
314	return idx;
315	}
316
317	void blk_free_ext_minor(unsigned int minor)
318	{
319	ida_free(&ext_devt_ida, id: minor);
320	}
321
322	void disk_uevent(struct gendisk disk, enum* kobject_action action)
323	{
324	struct block_device *part;
325	unsigned long idx;
326
327	rcu_read_lock();
328	xa_for_each(&disk->part_tbl, idx, part) {
329	if (bdev_is_partition(bdev: part) && !bdev_nr_sectors(bdev: part))
330	continue;
331	if (!kobject_get_unless_zero(kobj: &part->bd_device.kobj))
332	continue;
333
334	rcu_read_unlock();
335	kobject_uevent(bdev_kobj(part), action);
336	put_device(dev: &part->bd_device);
337	rcu_read_lock();
338	}
339	rcu_read_unlock();
340	}
341	EXPORT_SYMBOL_GPL(disk_uevent);
342
343	int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode)
344	{
345	struct bdev_handle *handle;
346	int ret = `0`;
347
348	if (disk->flags & (GENHD_FL_NO_PART \| GENHD_FL_HIDDEN))
349	return -EINVAL;
350	if (test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
351	return -EINVAL;
352	if (disk->open_partitions)
353	return -EBUSY;
354
355	/*
356	* If the device is opened exclusively by current thread already, it's
357	* safe to scan partitons, otherwise, use bd_prepare_to_claim() to
358	* synchronize with other exclusive openers and other partition
359	* scanners.
360	*/
361	if (!(mode & BLK_OPEN_EXCL)) {
362	ret = bd_prepare_to_claim(bdev: disk->part0, holder: disk_scan_partitions,
363	NULL);
364	if (ret)
365	return ret;
366	}
367
368	set_bit(GD_NEED_PART_SCAN, addr: &disk->state);
369	handle = bdev_open_by_dev(dev: disk_devt(disk), mode: mode & ~BLK_OPEN_EXCL, NULL,
370	NULL);
371	if (IS_ERR(ptr: handle))
372	ret = PTR_ERR(ptr: handle);
373	else
374	bdev_release(handle);
375
376	/*
377	* If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set,
378	* and this will cause that re-assemble partitioned raid device will
379	* creat partition for underlying disk.
380	*/
381	clear_bit(GD_NEED_PART_SCAN, addr: &disk->state);
382	if (!(mode & BLK_OPEN_EXCL))
383	bd_abort_claiming(bdev: disk->part0, holder: disk_scan_partitions);
384	return ret;
385	}
386
387	/**
388	* device_add_disk - add disk information to kernel list
389	* @parent: parent device for the disk
390	* @disk: per-device partitioning information
391	* @groups: Additional per-device sysfs groups
392	*
393	* This function registers the partitioning information in @disk
394	* with the kernel.
395	*/
396	int __must_check device_add_disk(struct device parent, struct* gendisk *disk,
397	const struct attribute_group **groups)
398
399	{
400	struct device *ddev = disk_to_dev(disk);
401	int ret;
402
403	/ Only makes sense for bio-based to set ->poll_bio /
404	if (queue_is_mq(q: disk->queue) && disk->fops->poll_bio)
405	return -EINVAL;
406
407	/*
408	* The disk queue should now be all set with enough information about
409	* the device for the elevator code to pick an adequate default
410	* elevator if one is needed, that is, for devices requesting queue
411	* registration.
412	*/
413	elevator_init_mq(q: disk->queue);
414
415	/ Mark bdev as having a submit_bio, if needed /
416	disk->part0->bd_has_submit_bio = disk->fops->submit_bio != NULL;
417
418	/*
419	* If the driver provides an explicit major number it also must provide
420	* the number of minors numbers supported, and those will be used to
421	* setup the gendisk.
422	* Otherwise just allocate the device numbers for both the whole device
423	* and all partitions from the extended dev_t space.
424	*/
425	ret = -EINVAL;
426	if (disk->major) {
427	if (WARN_ON(!disk->minors))
428	goto out_exit_elevator;
429
430	if (disk->minors > DISK_MAX_PARTS) {
431	pr_err("block: can't allocate more than %d partitions\n",
432	DISK_MAX_PARTS);
433	disk->minors = DISK_MAX_PARTS;
434	}
435	if (disk->first_minor + disk->minors > MINORMASK + `1`)
436	goto out_exit_elevator;
437	} else {
438	if (WARN_ON(disk->minors))
439	goto out_exit_elevator;
440
441	ret = blk_alloc_ext_minor();
442	if (ret < `0`)
443	goto out_exit_elevator;
444	disk->major = BLOCK_EXT_MAJOR;
445	disk->first_minor = ret;
446	}
447
448	/ delay uevents, until we scanned partition table /
449	dev_set_uevent_suppress(dev: ddev, val: `1`);
450
451	ddev->parent = parent;
452	ddev->groups = groups;
453	dev_set_name(dev: ddev, name: "%s", disk->disk_name);
454	if (!(disk->flags & GENHD_FL_HIDDEN))
455	ddev->devt = MKDEV(disk->major, disk->first_minor);
456	ret = device_add(dev: ddev);
457	if (ret)
458	goto out_free_ext_minor;
459
460	ret = disk_alloc_events(disk);
461	if (ret)
462	goto out_device_del;
463
464	ret = sysfs_create_link(kobj: block_depr, target: &ddev->kobj,
465	name: kobject_name(kobj: &ddev->kobj));
466	if (ret)
467	goto out_device_del;
468
469	/*
470	* avoid probable deadlock caused by allocating memory with
471	* GFP_KERNEL in runtime_resume callback of its all ancestor
472	* devices
473	*/
474	pm_runtime_set_memalloc_noio(dev: ddev, enable: true);
475
476	disk->part0->bd_holder_dir =
477	kobject_create_and_add(name: "holders", parent: &ddev->kobj);
478	if (!disk->part0->bd_holder_dir) {
479	ret = -ENOMEM;
480	goto out_del_block_link;
481	}
482	disk->slave_dir = kobject_create_and_add(name: "slaves", parent: &ddev->kobj);
483	if (!disk->slave_dir) {
484	ret = -ENOMEM;
485	goto out_put_holder_dir;
486	}
487
488	ret = blk_register_queue(disk);
489	if (ret)
490	goto out_put_slave_dir;
491
492	if (!(disk->flags & GENHD_FL_HIDDEN)) {
493	ret = bdi_register(bdi: disk->bdi, fmt: "%u:%u",
494	disk->major, disk->first_minor);
495	if (ret)
496	goto out_unregister_queue;
497	bdi_set_owner(bdi: disk->bdi, owner: ddev);
498	ret = sysfs_create_link(kobj: &ddev->kobj,
499	target: &disk->bdi->dev->kobj, name: "bdi");
500	if (ret)
501	goto out_unregister_bdi;
502
503	/ Make sure the first partition scan will be proceed /
504	if (get_capacity(disk) && !(disk->flags & GENHD_FL_NO_PART) &&
505	!test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
506	set_bit(GD_NEED_PART_SCAN, addr: &disk->state);
507
508	bdev_add(bdev: disk->part0, dev: ddev->devt);
509	if (get_capacity(disk))
510	disk_scan_partitions(disk, BLK_OPEN_READ);
511
512	/*
513	* Announce the disk and partitions after all partitions are
514	* created. (for hidden disks uevents remain suppressed forever)
515	*/
516	dev_set_uevent_suppress(dev: ddev, val: `0`);
517	disk_uevent(disk, KOBJ_ADD);
518	} else {
519	/*
520	* Even if the block_device for a hidden gendisk is not
521	* registered, it needs to have a valid bd_dev so that the
522	* freeing of the dynamic major works.
523	*/
524	disk->part0->bd_dev = MKDEV(disk->major, disk->first_minor);
525	}
526
527	disk_update_readahead(disk);
528	disk_add_events(disk);
529	set_bit(GD_ADDED, addr: &disk->state);
530	return `0`;
531
532	out_unregister_bdi:
533	if (!(disk->flags & GENHD_FL_HIDDEN))
534	bdi_unregister(bdi: disk->bdi);
535	out_unregister_queue:
536	blk_unregister_queue(disk);
537	rq_qos_exit(disk->queue);
538	out_put_slave_dir:
539	kobject_put(kobj: disk->slave_dir);
540	disk->slave_dir = NULL;
541	out_put_holder_dir:
542	kobject_put(kobj: disk->part0->bd_holder_dir);
543	out_del_block_link:
544	sysfs_remove_link(kobj: block_depr, name: dev_name(dev: ddev));
545	out_device_del:
546	device_del(dev: ddev);
547	out_free_ext_minor:
548	if (disk->major == BLOCK_EXT_MAJOR)
549	blk_free_ext_minor(minor: disk->first_minor);
550	out_exit_elevator:
551	if (disk->queue->elevator)
552	elevator_exit(q: disk->queue);
553	return ret;
554	}
555	EXPORT_SYMBOL(device_add_disk);
556
557	static void blk_report_disk_dead(struct gendisk *disk, bool surprise)
558	{
559	struct block_device *bdev;
560	unsigned long idx;
561
562	/*
563	* On surprise disk removal, bdev_mark_dead() may call into file
564	* systems below. Make it clear that we're expecting to not hold
565	* disk->open_mutex.
566	*/
567	lockdep_assert_not_held(&disk->open_mutex);
568
569	rcu_read_lock();
570	xa_for_each(&disk->part_tbl, idx, bdev) {
571	if (!kobject_get_unless_zero(kobj: &bdev->bd_device.kobj))
572	continue;
573	rcu_read_unlock();
574
575	bdev_mark_dead(bdev, surprise);
576
577	put_device(dev: &bdev->bd_device);
578	rcu_read_lock();
579	}
580	rcu_read_unlock();
581	}
582
583	static void __blk_mark_disk_dead(struct gendisk *disk)
584	{
585	/*
586	* Fail any new I/O.
587	*/
588	if (test_and_set_bit(GD_DEAD, addr: &disk->state))
589	return;
590
591	if (test_bit(GD_OWNS_QUEUE, &disk->state))
592	blk_queue_flag_set(QUEUE_FLAG_DYING, q: disk->queue);
593
594	/*
595	* Stop buffered writers from dirtying pages that can't be written out.
596	*/
597	set_capacity(disk, `0`);
598
599	/*
600	* Prevent new I/O from crossing bio_queue_enter().
601	*/
602	blk_queue_start_drain(q: disk->queue);
603	}
604
605	/**
606	* blk_mark_disk_dead - mark a disk as dead
607	* @disk: disk to mark as dead
608	*
609	* Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
610	* to this disk.
611	*/
612	void blk_mark_disk_dead(struct gendisk *disk)
613	{
614	__blk_mark_disk_dead(disk);
615	blk_report_disk_dead(disk, surprise: true);
616	}
617	EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
618
619	/**
620	* del_gendisk - remove the gendisk
621	* @disk: the struct gendisk to remove
622	*
623	* Removes the gendisk and all its associated resources. This deletes the
624	* partitions associated with the gendisk, and unregisters the associated
625	* request_queue.
626	*
627	* This is the counter to the respective __device_add_disk() call.
628	*
629	* The final removal of the struct gendisk happens when its refcount reaches 0
630	* with put_disk(), which should be called after del_gendisk(), if
631	* __device_add_disk() was used.
632	*
633	* Drivers exist which depend on the release of the gendisk to be synchronous,
634	* it should not be deferred.
635	*
636	* Context: can sleep
637	*/
638	void del_gendisk(struct gendisk *disk)
639	{
640	struct request_queue *q = disk->queue;
641	struct block_device *part;
642	unsigned long idx;
643
644	might_sleep();
645
646	if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
647	return;
648
649	disk_del_events(disk);
650
651	/*
652	* Prevent new openers by unlinked the bdev inode.
653	*/
654	mutex_lock(&disk->open_mutex);
655	xa_for_each(&disk->part_tbl, idx, part)
656	remove_inode_hash(inode: part->bd_inode);
657	mutex_unlock(lock: &disk->open_mutex);
658
659	/*
660	* Tell the file system to write back all dirty data and shut down if
661	* it hasn't been notified earlier.
662	*/
663	if (!test_bit(GD_DEAD, &disk->state))
664	blk_report_disk_dead(disk, surprise: false);
665	__blk_mark_disk_dead(disk);
666
667	/*
668	* Drop all partitions now that the disk is marked dead.
669	*/
670	mutex_lock(&disk->open_mutex);
671	xa_for_each_start(&disk->part_tbl, idx, part, `1`)
672	drop_partition(part);
673	mutex_unlock(lock: &disk->open_mutex);
674
675	if (!(disk->flags & GENHD_FL_HIDDEN)) {
676	sysfs_remove_link(kobj: &disk_to_dev(disk)->kobj, name: "bdi");
677
678	/*
679	* Unregister bdi before releasing device numbers (as they can
680	* get reused and we'd get clashes in sysfs).
681	*/
682	bdi_unregister(bdi: disk->bdi);
683	}
684
685	blk_unregister_queue(disk);
686
687	kobject_put(kobj: disk->part0->bd_holder_dir);
688	kobject_put(kobj: disk->slave_dir);
689	disk->slave_dir = NULL;
690
691	part_stat_set_all(part: disk->part0, value: `0`);
692	disk->part0->bd_stamp = `0`;
693	sysfs_remove_link(kobj: block_depr, name: dev_name(disk_to_dev(disk)));
694	pm_runtime_set_memalloc_noio(disk_to_dev(disk), enable: false);
695	device_del(disk_to_dev(disk));
696
697	blk_mq_freeze_queue_wait(q);
698
699	blk_throtl_cancel_bios(disk);
700
701	blk_sync_queue(q);
702	blk_flush_integrity();
703
704	if (queue_is_mq(q))
705	blk_mq_cancel_work_sync(q);
706
707	blk_mq_quiesce_queue(q);
708	if (q->elevator) {
709	mutex_lock(&q->sysfs_lock);
710	elevator_exit(q);
711	mutex_unlock(lock: &q->sysfs_lock);
712	}
713	rq_qos_exit(q);
714	blk_mq_unquiesce_queue(q);
715
716	/*
717	* If the disk does not own the queue, allow using passthrough requests
718	* again. Else leave the queue frozen to fail all I/O.
719	*/
720	if (!test_bit(GD_OWNS_QUEUE, &disk->state)) {
721	blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
722	__blk_mq_unfreeze_queue(q, force_atomic: true);
723	} else {
724	if (queue_is_mq(q))
725	blk_mq_exit_queue(q);
726	}
727	}
728	EXPORT_SYMBOL(del_gendisk);
729
730	/**
731	* invalidate_disk - invalidate the disk
732	* @disk: the struct gendisk to invalidate
733	*
734	* A helper to invalidates the disk. It will clean the disk's associated
735	* buffer/page caches and reset its internal states so that the disk
736	* can be reused by the drivers.
737	*
738	* Context: can sleep
739	*/
740	void invalidate_disk(struct gendisk *disk)
741	{
742	struct block_device *bdev = disk->part0;
743
744	invalidate_bdev(bdev);
745	bdev->bd_inode->i_mapping->wb_err = `0`;
746	set_capacity(disk, `0`);
747	}
748	EXPORT_SYMBOL(invalidate_disk);
749
750	/ sysfs access to bad-blocks list. /
751	static ssize_t disk_badblocks_show(struct device *dev,
752	struct device_attribute *attr,
753	char *page)
754	{
755	struct gendisk *disk = dev_to_disk(dev);
756
757	if (!disk->bb)
758	return sprintf(buf: page, fmt: "\n");
759
760	return badblocks_show(bb: disk->bb, page, unack: `0`);
761	}
762
763	static ssize_t disk_badblocks_store(struct device *dev,
764	struct device_attribute *attr,
765	const char *page, size_t len)
766	{
767	struct gendisk *disk = dev_to_disk(dev);
768
769	if (!disk->bb)
770	return -ENXIO;
771
772	return badblocks_store(bb: disk->bb, page, len, unack: `0`);
773	}
774
775	#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
776	void blk_request_module(dev_t devt)
777	{
778	unsigned int major = MAJOR(devt);
779	struct blk_major_name **n;
780
781	mutex_lock(&major_names_lock);
782	for (n = &major_names[major_to_index(major)]; n; n = &(n)->next) {
783	if ((n)->major == major && (n)->probe) {
784	(*n)->probe(devt);
785	mutex_unlock(lock: &major_names_lock);
786	return;
787	}
788	}
789	mutex_unlock(lock: &major_names_lock);
790
791	if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > `0`)
792	/ Make old-style 2.4 aliases work /
793	request_module("block-major-%d", MAJOR(devt));
794	}
795	#endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */
796
797	#ifdef CONFIG_PROC_FS
798	/ iterator /
799	static void disk_seqf_start(struct* seq_file seqf, loff_t pos)
800	{
801	loff_t skip = *pos;
802	struct class_dev_iter *iter;
803	struct device *dev;
804
805	iter = kmalloc(size: sizeof(*iter), GFP_KERNEL);
806	if (!iter)
807	return ERR_PTR(error: -ENOMEM);
808
809	seqf->private = iter;
810	class_dev_iter_init(iter, class: &block_class, NULL, type: &disk_type);
811	do {
812	dev = class_dev_iter_next(iter);
813	if (!dev)
814	return NULL;
815	} while (skip--);
816
817	return dev_to_disk(dev);
818	}
819
820	static void disk_seqf_next(struct* seq_file seqf, void* v, loff_t pos)
821	{
822	struct device *dev;
823
824	(*pos)++;
825	dev = class_dev_iter_next(iter: seqf->private);
826	if (dev)
827	return dev_to_disk(dev);
828
829	return NULL;
830	}
831
832	static void disk_seqf_stop(struct seq_file seqf, void* *v)
833	{
834	struct class_dev_iter *iter = seqf->private;
835
836	/ stop is called even after start failed :-( /
837	if (iter) {
838	class_dev_iter_exit(iter);
839	kfree(objp: iter);
840	seqf->private = NULL;
841	}
842	}
843
844	static void show_partition_start(struct* seq_file seqf, loff_t pos)
845	{
846	void *p;
847
848	p = disk_seqf_start(seqf, pos);
849	if (!IS_ERR_OR_NULL(ptr: p) && !*pos)
850	seq_puts(m: seqf, s: "major minor #blocks name\n\n");
851	return p;
852	}
853
854	static int show_partition(struct seq_file seqf, void* *v)
855	{
856	struct gendisk *sgp = v;
857	struct block_device *part;
858	unsigned long idx;
859
860	if (!get_capacity(disk: sgp) \|\| (sgp->flags & GENHD_FL_HIDDEN))
861	return `0`;
862
863	rcu_read_lock();
864	xa_for_each(&sgp->part_tbl, idx, part) {
865	if (!bdev_nr_sectors(bdev: part))
866	continue;
867	seq_printf(m: seqf, fmt: "%4d %7d %10llu %pg\n",
868	MAJOR(part->bd_dev), MINOR(part->bd_dev),
869	bdev_nr_sectors(bdev: part) >> `1`, part);
870	}
871	rcu_read_unlock();
872	return `0`;
873	}
874
875	static const struct seq_operations partitions_op = {
876	.start = show_partition_start,
877	.next = disk_seqf_next,
878	.stop = disk_seqf_stop,
879	.show = show_partition
880	};
881	#endif
882
883	static int __init genhd_device_init(void)
884	{
885	int error;
886
887	error = class_register(class: &block_class);
888	if (unlikely(error))
889	return error;
890	blk_dev_init();
891
892	register_blkdev(BLOCK_EXT_MAJOR, "blkext");
893
894	/ create top-level block dir /
895	block_depr = kobject_create_and_add(name: "block", NULL);
896	return `0`;
897	}
898
899	subsys_initcall(genhd_device_init);
900
901	static ssize_t disk_range_show(struct device *dev,
902	struct device_attribute attr, char* *buf)
903	{
904	struct gendisk *disk = dev_to_disk(dev);
905
906	return sprintf(buf, fmt: "%d\n", disk->minors);
907	}
908
909	static ssize_t disk_ext_range_show(struct device *dev,
910	struct device_attribute attr, char* *buf)
911	{
912	struct gendisk *disk = dev_to_disk(dev);
913
914	return sprintf(buf, fmt: "%d\n",
915	(disk->flags & GENHD_FL_NO_PART) ? `1` : DISK_MAX_PARTS);
916	}
917
918	static ssize_t disk_removable_show(struct device *dev,
919	struct device_attribute attr, char* *buf)
920	{
921	struct gendisk *disk = dev_to_disk(dev);
922
923	return sprintf(buf, fmt: "%d\n",
924	(disk->flags & GENHD_FL_REMOVABLE ? `1` : `0`));
925	}
926
927	static ssize_t disk_hidden_show(struct device *dev,
928	struct device_attribute attr, char* *buf)
929	{
930	struct gendisk *disk = dev_to_disk(dev);
931
932	return sprintf(buf, fmt: "%d\n",
933	(disk->flags & GENHD_FL_HIDDEN ? `1` : `0`));
934	}
935
936	static ssize_t disk_ro_show(struct device *dev,
937	struct device_attribute attr, char* *buf)
938	{
939	struct gendisk *disk = dev_to_disk(dev);
940
941	return sprintf(buf, fmt: "%d\n", get_disk_ro(disk) ? `1` : `0`);
942	}
943
944	ssize_t part_size_show(struct device *dev,
945	struct device_attribute attr, char* *buf)
946	{
947	return sprintf(buf, fmt: "%llu\n", bdev_nr_sectors(dev_to_bdev(dev)));
948	}
949
950	ssize_t part_stat_show(struct device *dev,
951	struct device_attribute attr, char* *buf)
952	{
953	struct block_device *bdev = dev_to_bdev(dev);
954	struct request_queue *q = bdev_get_queue(bdev);
955	struct disk_stats stat;
956	unsigned int inflight;
957
958	if (queue_is_mq(q))
959	inflight = blk_mq_in_flight(q, part: bdev);
960	else
961	inflight = part_in_flight(part: bdev);
962
963	if (inflight) {
964	part_stat_lock();
965	update_io_ticks(part: bdev, now: jiffies, end: true);
966	part_stat_unlock();
967	}
968	part_stat_read_all(part: bdev, stat: &stat);
969	return sprintf(buf,
970	fmt: "%8lu %8lu %8llu %8u "
971	"%8lu %8lu %8llu %8u "
972	"%8u %8u %8u "
973	"%8lu %8lu %8llu %8u "
974	"%8lu %8u"
975	"\n",
976	stat.ios[STAT_READ],
977	stat.merges[STAT_READ],
978	(unsigned long long)stat.sectors[STAT_READ],
979	(unsigned int)div_u64(dividend: stat.nsecs[STAT_READ], NSEC_PER_MSEC),
980	stat.ios[STAT_WRITE],
981	stat.merges[STAT_WRITE],
982	(unsigned long long)stat.sectors[STAT_WRITE],
983	(unsigned int)div_u64(dividend: stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
984	inflight,
985	jiffies_to_msecs(j: stat.io_ticks),
986	(unsigned int)div_u64(dividend: stat.nsecs[STAT_READ] +
987	stat.nsecs[STAT_WRITE] +
988	stat.nsecs[STAT_DISCARD] +
989	stat.nsecs[STAT_FLUSH],
990	NSEC_PER_MSEC),
991	stat.ios[STAT_DISCARD],
992	stat.merges[STAT_DISCARD],
993	(unsigned long long)stat.sectors[STAT_DISCARD],
994	(unsigned int)div_u64(dividend: stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
995	stat.ios[STAT_FLUSH],
996	(unsigned int)div_u64(dividend: stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
997	}
998
999	ssize_t part_inflight_show(struct device dev, struct* device_attribute *attr,
1000	char *buf)
1001	{
1002	struct block_device *bdev = dev_to_bdev(dev);
1003	struct request_queue *q = bdev_get_queue(bdev);
1004	unsigned int inflight[`2`];
1005
1006	if (queue_is_mq(q))
1007	blk_mq_in_flight_rw(q, part: bdev, inflight);
1008	else
1009	part_in_flight_rw(part: bdev, inflight);
1010
1011	return sprintf(buf, fmt: "%8u %8u\n", inflight[`0`], inflight[`1`]);
1012	}
1013
1014	static ssize_t disk_capability_show(struct device *dev,
1015	struct device_attribute attr, char* *buf)
1016	{
1017	dev_warn_once(dev, "the capability attribute has been deprecated.\n");
1018	return sprintf(buf, fmt: "0\n");
1019	}
1020
1021	static ssize_t disk_alignment_offset_show(struct device *dev,
1022	struct device_attribute *attr,
1023	char *buf)
1024	{
1025	struct gendisk *disk = dev_to_disk(dev);
1026
1027	return sprintf(buf, fmt: "%d\n", bdev_alignment_offset(bdev: disk->part0));
1028	}
1029
1030	static ssize_t disk_discard_alignment_show(struct device *dev,
1031	struct device_attribute *attr,
1032	char *buf)
1033	{
1034	struct gendisk *disk = dev_to_disk(dev);
1035
1036	return sprintf(buf, fmt: "%d\n", bdev_alignment_offset(bdev: disk->part0));
1037	}
1038
1039	static ssize_t diskseq_show(struct device *dev,
1040	struct device_attribute attr, char* *buf)
1041	{
1042	struct gendisk *disk = dev_to_disk(dev);
1043
1044	return sprintf(buf, fmt: "%llu\n", disk->diskseq);
1045	}
1046
1047	static DEVICE_ATTR(range, `0444`, disk_range_show, NULL);
1048	static DEVICE_ATTR(ext_range, `0444`, disk_ext_range_show, NULL);
1049	static DEVICE_ATTR(removable, `0444`, disk_removable_show, NULL);
1050	static DEVICE_ATTR(hidden, `0444`, disk_hidden_show, NULL);
1051	static DEVICE_ATTR(ro, `0444`, disk_ro_show, NULL);
1052	static DEVICE_ATTR(size, `0444`, part_size_show, NULL);
1053	static DEVICE_ATTR(alignment_offset, `0444`, disk_alignment_offset_show, NULL);
1054	static DEVICE_ATTR(discard_alignment, `0444`, disk_discard_alignment_show, NULL);
1055	static DEVICE_ATTR(capability, `0444`, disk_capability_show, NULL);
1056	static DEVICE_ATTR(stat, `0444`, part_stat_show, NULL);
1057	static DEVICE_ATTR(inflight, `0444`, part_inflight_show, NULL);
1058	static DEVICE_ATTR(badblocks, `0644`, disk_badblocks_show, disk_badblocks_store);
1059	static DEVICE_ATTR(diskseq, `0444`, diskseq_show, NULL);
1060
1061	#ifdef CONFIG_FAIL_MAKE_REQUEST
1062	ssize_t part_fail_show(struct device *dev,
1063	struct device_attribute attr, char* *buf)
1064	{
1065	return sprintf(buf, fmt: "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
1066	}
1067
1068	ssize_t part_fail_store(struct device *dev,
1069	struct device_attribute *attr,
1070	const char *buf, size_t count)
1071	{
1072	int i;
1073
1074	if (count > `0` && sscanf(buf, "%d", &i) > `0`)
1075	dev_to_bdev(dev)->bd_make_it_fail = i;
1076
1077	return count;
1078	}
1079
1080	static struct device_attribute dev_attr_fail =
1081	__ATTR(make-it-fail, `0644`, part_fail_show, part_fail_store);
1082	#endif /* CONFIG_FAIL_MAKE_REQUEST */
1083
1084	#ifdef CONFIG_FAIL_IO_TIMEOUT
1085	static struct device_attribute dev_attr_fail_timeout =
1086	__ATTR(io-timeout-fail, `0644`, part_timeout_show, part_timeout_store);
1087	#endif
1088
1089	static struct attribute *disk_attrs[] = {
1090	&dev_attr_range.attr,
1091	&dev_attr_ext_range.attr,
1092	&dev_attr_removable.attr,
1093	&dev_attr_hidden.attr,
1094	&dev_attr_ro.attr,
1095	&dev_attr_size.attr,
1096	&dev_attr_alignment_offset.attr,
1097	&dev_attr_discard_alignment.attr,
1098	&dev_attr_capability.attr,
1099	&dev_attr_stat.attr,
1100	&dev_attr_inflight.attr,
1101	&dev_attr_badblocks.attr,
1102	&dev_attr_events.attr,
1103	&dev_attr_events_async.attr,
1104	&dev_attr_events_poll_msecs.attr,
1105	&dev_attr_diskseq.attr,
1106	#ifdef CONFIG_FAIL_MAKE_REQUEST
1107	&dev_attr_fail.attr,
1108	#endif
1109	#ifdef CONFIG_FAIL_IO_TIMEOUT
1110	&dev_attr_fail_timeout.attr,
1111	#endif
1112	NULL
1113	};
1114
1115	static umode_t disk_visible(struct kobject kobj, struct* attribute a, int* n)
1116	{
1117	struct device dev = container_of(kobj, typeof(dev), kobj);
1118	struct gendisk *disk = dev_to_disk(dev);
1119
1120	if (a == &dev_attr_badblocks.attr && !disk->bb)
1121	return `0`;
1122	return a->mode;
1123	}
1124
1125	static struct attribute_group disk_attr_group = {
1126	.attrs = disk_attrs,
1127	.is_visible = disk_visible,
1128	};
1129
1130	static const struct attribute_group *disk_attr_groups[] = {
1131	&disk_attr_group,
1132	#ifdef CONFIG_BLK_DEV_IO_TRACE
1133	&blk_trace_attr_group,
1134	#endif
1135	#ifdef CONFIG_BLK_DEV_INTEGRITY
1136	&blk_integrity_attr_group,
1137	#endif
1138	NULL
1139	};
1140
1141	/**
1142	* disk_release - releases all allocated resources of the gendisk
1143	* @dev: the device representing this disk
1144	*
1145	* This function releases all allocated resources of the gendisk.
1146	*
1147	* Drivers which used __device_add_disk() have a gendisk with a request_queue
1148	* assigned. Since the request_queue sits on top of the gendisk for these
1149	* drivers we also call blk_put_queue() for them, and we expect the
1150	* request_queue refcount to reach 0 at this point, and so the request_queue
1151	* will also be freed prior to the disk.
1152	*
1153	* Context: can sleep
1154	*/
1155	static void disk_release(struct device *dev)
1156	{
1157	struct gendisk *disk = dev_to_disk(dev);
1158
1159	might_sleep();
1160	WARN_ON_ONCE(disk_live(disk));
1161
1162	blk_trace_remove(q: disk->queue);
1163
1164	/*
1165	* To undo the all initialization from blk_mq_init_allocated_queue in
1166	* case of a probe failure where add_disk is never called we have to
1167	* call blk_mq_exit_queue here. We can't do this for the more common
1168	* teardown case (yet) as the tagset can be gone by the time the disk
1169	* is released once it was added.
1170	*/
1171	if (queue_is_mq(q: disk->queue) &&
1172	test_bit(GD_OWNS_QUEUE, &disk->state) &&
1173	!test_bit(GD_ADDED, &disk->state))
1174	blk_mq_exit_queue(q: disk->queue);
1175
1176	blkcg_exit_disk(disk);
1177
1178	bioset_exit(&disk->bio_split);
1179
1180	disk_release_events(disk);
1181	kfree(objp: disk->random);
1182	disk_free_zone_bitmaps(disk);
1183	xa_destroy(&disk->part_tbl);
1184
1185	disk->queue->disk = NULL;
1186	blk_put_queue(disk->queue);
1187
1188	if (test_bit(GD_ADDED, &disk->state) && disk->fops->free_disk)
1189	disk->fops->free_disk(disk);
1190
1191	iput(disk->part0->bd_inode); / frees the disk /
1192	}
1193
1194	static int block_uevent(const struct device dev, struct* kobj_uevent_env *env)
1195	{
1196	const struct gendisk *disk = dev_to_disk(dev);
1197
1198	return add_uevent_var(env, format: "DISKSEQ=%llu", disk->diskseq);
1199	}
1200
1201	struct class block_class = {
1202	.name = "block",
1203	.dev_uevent = block_uevent,
1204	};
1205
1206	static char block_devnode(const* struct device dev, umode_t mode,
1207	kuid_t uid, kgid_t gid)
1208	{
1209	struct gendisk *disk = dev_to_disk(dev);
1210
1211	if (disk->fops->devnode)
1212	return disk->fops->devnode(disk, mode);
1213	return NULL;
1214	}
1215
1216	const struct device_type disk_type = {
1217	.name = "disk",
1218	.groups = disk_attr_groups,
1219	.release = disk_release,
1220	.devnode = block_devnode,
1221	};
1222
1223	#ifdef CONFIG_PROC_FS
1224	/*
1225	* aggregate disk stat collector. Uses the same stats that the sysfs
1226	* entries do, above, but makes them available through one seq_file.
1227	*
1228	* The output looks suspiciously like /proc/partitions with a bunch of
1229	* extra fields.
1230	*/
1231	static int diskstats_show(struct seq_file seqf, void* *v)
1232	{
1233	struct gendisk *gp = v;
1234	struct block_device *hd;
1235	unsigned int inflight;
1236	struct disk_stats stat;
1237	unsigned long idx;
1238
1239	/*
1240	if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
1241	seq_puts(seqf, "major minor name"
1242	" rio rmerge rsect ruse wio wmerge "
1243	"wsect wuse running use aveq"
1244	"\n\n");
1245	*/
1246
1247	rcu_read_lock();
1248	xa_for_each(&gp->part_tbl, idx, hd) {
1249	if (bdev_is_partition(bdev: hd) && !bdev_nr_sectors(bdev: hd))
1250	continue;
1251	if (queue_is_mq(q: gp->queue))
1252	inflight = blk_mq_in_flight(q: gp->queue, part: hd);
1253	else
1254	inflight = part_in_flight(part: hd);
1255
1256	if (inflight) {
1257	part_stat_lock();
1258	update_io_ticks(part: hd, now: jiffies, end: true);
1259	part_stat_unlock();
1260	}
1261	part_stat_read_all(part: hd, stat: &stat);
1262	seq_printf(m: seqf, fmt: "%4d %7d %pg "
1263	"%lu %lu %lu %u "
1264	"%lu %lu %lu %u "
1265	"%u %u %u "
1266	"%lu %lu %lu %u "
1267	"%lu %u"
1268	"\n",
1269	MAJOR(hd->bd_dev), MINOR(hd->bd_dev), hd,
1270	stat.ios[STAT_READ],
1271	stat.merges[STAT_READ],
1272	stat.sectors[STAT_READ],
1273	(unsigned int)div_u64(dividend: stat.nsecs[STAT_READ],
1274	NSEC_PER_MSEC),
1275	stat.ios[STAT_WRITE],
1276	stat.merges[STAT_WRITE],
1277	stat.sectors[STAT_WRITE],
1278	(unsigned int)div_u64(dividend: stat.nsecs[STAT_WRITE],
1279	NSEC_PER_MSEC),
1280	inflight,
1281	jiffies_to_msecs(j: stat.io_ticks),
1282	(unsigned int)div_u64(dividend: stat.nsecs[STAT_READ] +
1283	stat.nsecs[STAT_WRITE] +
1284	stat.nsecs[STAT_DISCARD] +
1285	stat.nsecs[STAT_FLUSH],
1286	NSEC_PER_MSEC),
1287	stat.ios[STAT_DISCARD],
1288	stat.merges[STAT_DISCARD],
1289	stat.sectors[STAT_DISCARD],
1290	(unsigned int)div_u64(dividend: stat.nsecs[STAT_DISCARD],
1291	NSEC_PER_MSEC),
1292	stat.ios[STAT_FLUSH],
1293	(unsigned int)div_u64(dividend: stat.nsecs[STAT_FLUSH],
1294	NSEC_PER_MSEC)
1295	);
1296	}
1297	rcu_read_unlock();
1298
1299	return `0`;
1300	}
1301
1302	static const struct seq_operations diskstats_op = {
1303	.start = disk_seqf_start,
1304	.next = disk_seqf_next,
1305	.stop = disk_seqf_stop,
1306	.show = diskstats_show
1307	};
1308
1309	static int __init proc_genhd_init(void)
1310	{
1311	proc_create_seq("diskstats", `0`, NULL, &diskstats_op);
1312	proc_create_seq("partitions", `0`, NULL, &partitions_op);
1313	return `0`;
1314	}
1315	module_init(proc_genhd_init);
1316	#endif /* CONFIG_PROC_FS */
1317
1318	dev_t part_devt(struct gendisk *disk, u8 partno)
1319	{
1320	struct block_device *part;
1321	dev_t devt = `0`;
1322
1323	rcu_read_lock();
1324	part = xa_load(&disk->part_tbl, index: partno);
1325	if (part)
1326	devt = part->bd_dev;
1327	rcu_read_unlock();
1328
1329	return devt;
1330	}
1331
1332	struct gendisk __alloc_disk_node(struct* request_queue q, int* node_id,
1333	struct lock_class_key *lkclass)
1334	{
1335	struct gendisk *disk;
1336
1337	disk = kzalloc_node(size: sizeof(struct gendisk), GFP_KERNEL, node: node_id);
1338	if (!disk)
1339	return NULL;
1340
1341	if (bioset_init(&disk->bio_split, BIO_POOL_SIZE, `0`, flags: `0`))
1342	goto out_free_disk;
1343
1344	disk->bdi = bdi_alloc(node_id);
1345	if (!disk->bdi)
1346	goto out_free_bioset;
1347
1348	/ bdev_alloc() might need the queue, set before the first call /
1349	disk->queue = q;
1350
1351	disk->part0 = bdev_alloc(disk, partno: `0`);
1352	if (!disk->part0)
1353	goto out_free_bdi;
1354
1355	disk->node_id = node_id;
1356	mutex_init(&disk->open_mutex);
1357	xa_init(xa: &disk->part_tbl);
1358	if (xa_insert(xa: &disk->part_tbl, index: `0`, entry: disk->part0, GFP_KERNEL))
1359	goto out_destroy_part_tbl;
1360
1361	if (blkcg_init_disk(disk))
1362	goto out_erase_part0;
1363
1364	rand_initialize_disk(disk);
1365	disk_to_dev(disk)->class = &block_class;
1366	disk_to_dev(disk)->type = &disk_type;
1367	device_initialize(disk_to_dev(disk));
1368	inc_diskseq(disk);
1369	q->disk = disk;
1370	lockdep_init_map(lock: &disk->lockdep_map, name: "(bio completion)", key: lkclass, subclass: `0`);
1371	#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
1372	INIT_LIST_HEAD(list: &disk->slave_bdevs);
1373	#endif
1374	return disk;
1375
1376	out_erase_part0:
1377	xa_erase(&disk->part_tbl, index: `0`);
1378	out_destroy_part_tbl:
1379	xa_destroy(&disk->part_tbl);
1380	disk->part0->bd_disk = NULL;
1381	iput(disk->part0->bd_inode);
1382	out_free_bdi:
1383	bdi_put(bdi: disk->bdi);
1384	out_free_bioset:
1385	bioset_exit(&disk->bio_split);
1386	out_free_disk:
1387	kfree(objp: disk);
1388	return NULL;
1389	}
1390
1391	struct gendisk __blk_alloc_disk(int* node, struct lock_class_key *lkclass)
1392	{
1393	struct request_queue *q;
1394	struct gendisk *disk;
1395
1396	q = blk_alloc_queue(node_id: node);
1397	if (!q)
1398	return NULL;
1399
1400	disk = __alloc_disk_node(q, node_id: node, lkclass);
1401	if (!disk) {
1402	blk_put_queue(q);
1403	return NULL;
1404	}
1405	set_bit(GD_OWNS_QUEUE, addr: &disk->state);
1406	return disk;
1407	}
1408	EXPORT_SYMBOL(__blk_alloc_disk);
1409
1410	/**
1411	* put_disk - decrements the gendisk refcount
1412	* @disk: the struct gendisk to decrement the refcount for
1413	*
1414	* This decrements the refcount for the struct gendisk. When this reaches 0
1415	* we'll have disk_release() called.
1416	*
1417	* Note: for blk-mq disk put_disk must be called before freeing the tag_set
1418	* when handling probe errors (that is before add_disk() is called).
1419	*
1420	* Context: Any context, but the last reference must not be dropped from
1421	* atomic context.
1422	*/
1423	void put_disk(struct gendisk *disk)
1424	{
1425	if (disk)
1426	put_device(disk_to_dev(disk));
1427	}
1428	EXPORT_SYMBOL(put_disk);
1429
1430	static void set_disk_ro_uevent(struct gendisk gd, int* ro)
1431	{
1432	char event[] = "DISK_RO=1";
1433	char *envp[] = { event, NULL };
1434
1435	if (!ro)
1436	event[`8`] = `'0'`;
1437	kobject_uevent_env(kobj: &disk_to_dev(gd)->kobj, action: KOBJ_CHANGE, envp);
1438	}
1439
1440	/**
1441	* set_disk_ro - set a gendisk read-only
1442	* @disk: gendisk to operate on
1443	* @read_only: %true to set the disk read-only, %false set the disk read/write
1444	*
1445	* This function is used to indicate whether a given disk device should have its
1446	* read-only flag set. set_disk_ro() is typically used by device drivers to
1447	* indicate whether the underlying physical device is write-protected.
1448	*/
1449	void set_disk_ro(struct gendisk *disk, bool read_only)
1450	{
1451	if (read_only) {
1452	if (test_and_set_bit(GD_READ_ONLY, addr: &disk->state))
1453	return;
1454	} else {
1455	if (!test_and_clear_bit(GD_READ_ONLY, addr: &disk->state))
1456	return;
1457	}
1458	set_disk_ro_uevent(gd: disk, ro: read_only);
1459	}
1460	EXPORT_SYMBOL(set_disk_ro);
1461
1462	void inc_diskseq(struct gendisk *disk)
1463	{
1464	disk->diskseq = atomic64_inc_return(v: &diskseq);
1465	}
1466

source code of linux/block/genhd.c