md.h source code [linux/drivers/md/md.h]

1	/ SPDX-License-Identifier: GPL-2.0-or-later /
2	/*
3	md.h : kernel internal structure of the Linux MD driver
4	Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
5
6	*/
7
8	#ifndef _MD_MD_H
9	#define _MD_MD_H
10
11	#include <linux/blkdev.h>
12	#include <linux/backing-dev.h>
13	#include <linux/badblocks.h>
14	#include <linux/kobject.h>
15	#include <linux/list.h>
16	#include <linux/mm.h>
17	#include <linux/mutex.h>
18	#include <linux/timer.h>
19	#include <linux/wait.h>
20	#include <linux/workqueue.h>
21	#include "md-cluster.h"
22
23	#define MaxSector (~(sector_t)0)
24
25	/*
26	* These flags should really be called "NO_RETRY" rather than
27	* "FAILFAST" because they don't make any promise about time lapse,
28	* only about the number of retries, which will be zero.
29	* REQ_FAILFAST_DRIVER is not included because
30	* Commit: 4a27446f3e39 ("[SCSI] modify scsi to handle new fail fast flags.")
31	* seems to suggest that the errors it avoids retrying should usually
32	* be retried.
33	*/
34	#define MD_FAILFAST (REQ_FAILFAST_DEV \| REQ_FAILFAST_TRANSPORT)
35
36	/*
37	* The struct embedded in rdev is used to serialize IO.
38	*/
39	struct serial_in_rdev {
40	struct rb_root_cached serial_rb;
41	spinlock_t serial_lock;
42	wait_queue_head_t serial_io_wait;
43	};
44
45	/*
46	* MD's 'extended' device
47	*/
48	struct md_rdev {
49	struct list_head same_set; / RAID devices within the same set /
50
51	sector_t sectors; / Device size (in 512bytes sectors) /
52	struct mddev mddev; /* RAID array if running /
53	int last_events; / IO event timestamp /
54
55	/*
56	* If meta_bdev is non-NULL, it means that a separate device is
57	* being used to store the metadata (superblock/bitmap) which
58	* would otherwise be contained on the same device as the data (bdev).
59	*/
60	struct block_device *meta_bdev;
61	struct block_device bdev; /* block device handle /
62	struct bdev_handle bdev_handle; /* Handle from open for bdev /
63
64	struct page sb_page, bb_page;
65	int sb_loaded;
66	__u64 sb_events;
67	sector_t data_offset; / start of data in array /
68	sector_t new_data_offset;/ only relevant while reshaping /
69	sector_t sb_start; / offset of the super block (in 512byte sectors) /
70	int sb_size; / bytes in the superblock /
71	int preferred_minor; / autorun support /
72
73	struct kobject kobj;
74
75	/ A device can be in one of three states based on two flags:*
76	* Not working: faulty==1 in_sync==0
77	* Fully working: faulty==0 in_sync==1
78	* Working, but not
79	* in sync with array
80	* faulty==0 in_sync==0
81	*
82	* It can never have faulty==1, in_sync==1
83	* This reduces the burden of testing multiple flags in many cases
84	*/
85
86	unsigned long flags; / bit set of 'enum flag_bits' bits. /
87	wait_queue_head_t blocked_wait;
88
89	int desc_nr; / descriptor index in the superblock /
90	int raid_disk; / role of device in array /
91	int new_raid_disk; / role that the device will have in*
92	* the array after a level-change completes.
93	*/
94	int saved_raid_disk; / role that device used to have in the*
95	* array and could again if we did a partial
96	* resync from the bitmap
97	*/
98	union {
99	sector_t recovery_offset;/ If this device has been partially*
100	* recovered, this is where we were
101	* up to.
102	*/
103	sector_t journal_tail; / If this device is a journal device,*
104	* this is the journal tail (journal
105	* recovery start point)
106	*/
107	};
108
109	atomic_t nr_pending; / number of pending requests.*
110	* only maintained for arrays that
111	* support hot removal
112	*/
113	atomic_t read_errors; / number of consecutive read errors that*
114	* we have tried to ignore.
115	*/
116	time64_t last_read_error; / monotonic time since our*
117	* last read error
118	*/
119	atomic_t corrected_errors; / number of corrected read errors,*
120	* for reporting to userspace and storing
121	* in superblock.
122	*/
123
124	struct serial_in_rdev serial; /* used for raid1 io serialization /
125
126	struct kernfs_node sysfs_state; /* handle for 'state'*
127	* sysfs entry */
128	/ handle for 'unacknowledged_bad_blocks' sysfs dentry /
129	struct kernfs_node *sysfs_unack_badblocks;
130	/ handle for 'bad_blocks' sysfs dentry /
131	struct kernfs_node *sysfs_badblocks;
132	struct badblocks badblocks;
133
134	struct {
135	short offset; / Offset from superblock to start of PPL.*
136	* Not used by external metadata. */
137	unsigned int size; / Size in sectors of the PPL space /
138	sector_t sector; / First sector of the PPL space /
139	} ppl;
140	};
141	enum flag_bits {
142	Faulty, / device is known to have a fault /
143	In_sync, / device is in_sync with rest of array /
144	Bitmap_sync, / ..actually, not quite In_sync. Need a*
145	* bitmap-based recovery to get fully in sync.
146	* The bit is only meaningful before device
147	* has been passed to pers->hot_add_disk.
148	*/
149	WriteMostly, / Avoid reading if at all possible /
150	AutoDetected, / added by auto-detect /
151	Blocked, / An error occurred but has not yet*
152	* been acknowledged by the metadata
153	* handler, so don't allow writes
154	* until it is cleared */
155	WriteErrorSeen, / A write error has been seen on this*
156	* device
157	*/
158	FaultRecorded, / Intermediate state for clearing*
159	* Blocked. The Fault is/will-be
160	* recorded in the metadata, but that
161	* metadata hasn't been stored safely
162	* on disk yet.
163	*/
164	BlockedBadBlocks, / A writer is blocked because they*
165	* found an unacknowledged bad-block.
166	* This can safely be cleared at any
167	* time, and the writer will re-check.
168	* It may be set at any time, and at
169	* worst the writer will timeout and
170	* re-check. So setting it as
171	* accurately as possible is good, but
172	* not absolutely critical.
173	*/
174	WantReplacement, / This device is a candidate to be*
175	* hot-replaced, either because it has
176	* reported some faults, or because
177	* of explicit request.
178	*/
179	Replacement, / This device is a replacement for*
180	* a want_replacement device with same
181	* raid_disk number.
182	*/
183	Candidate, / For clustered environments only:*
184	* This device is seen locally but not
185	* by the whole cluster
186	*/
187	Journal, / This device is used as journal for*
188	* raid-5/6.
189	* Usually, this device should be faster
190	* than other devices in the array
191	*/
192	ClusterRemove,
193	RemoveSynchronized, / synchronize_rcu() was called after*
194	* this device was known to be faulty,
195	* so it is safe to remove without
196	* another synchronize_rcu() call.
197	*/
198	ExternalBbl, / External metadata provides bad*
199	* block management for a disk
200	*/
201	FailFast, / Minimal retries should be attempted on*
202	* this device, so use REQ_FAILFAST_DEV.
203	* Also don't try to repair failed reads.
204	* It is expects that no bad block log
205	* is present.
206	*/
207	LastDev, / Seems to be the last working dev as*
208	* it didn't fail, so don't use FailFast
209	* any more for metadata
210	*/
211	CollisionCheck, /*
212	* check if there is collision between raid1
213	* serial bios.
214	*/
215	};
216
217	static inline int is_badblock(struct md_rdev rdev, sector_t s, int* sectors,
218	sector_t first_bad, int* *bad_sectors)
219	{
220	if (unlikely(rdev->badblocks.count)) {
221	int rv = badblocks_check(bb: &rdev->badblocks, s: rdev->data_offset + s,
222	sectors,
223	first_bad, bad_sectors);
224	if (rv)
225	*first_bad -= rdev->data_offset;
226	return rv;
227	}
228	return `0`;
229	}
230	extern int rdev_set_badblocks(struct md_rdev rdev, sector_t s, int* sectors,
231	int is_new);
232	extern int rdev_clear_badblocks(struct md_rdev rdev, sector_t s, int* sectors,
233	int is_new);
234	struct md_cluster_info;
235
236	/**
237	* enum mddev_flags - md device flags.
238	* @MD_ARRAY_FIRST_USE: First use of array, needs initialization.
239	* @MD_CLOSING: If set, we are closing the array, do not open it then.
240	* @MD_JOURNAL_CLEAN: A raid with journal is already clean.
241	* @MD_HAS_JOURNAL: The raid array has journal feature set.
242	* @MD_CLUSTER_RESYNC_LOCKED: cluster raid only, which means node, already took
243	* resync lock, need to release the lock.
244	* @MD_FAILFAST_SUPPORTED: Using MD_FAILFAST on metadata writes is supported as
245	* calls to md_error() will never cause the array to
246	* become failed.
247	* @MD_HAS_PPL: The raid array has PPL feature set.
248	* @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature set.
249	* @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
250	* array is ready yet.
251	* @MD_BROKEN: This is used to stop writes and mark array as failed.
252	* @MD_DELETED: This device is being deleted
253	*
254	* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
255	*/
256	enum mddev_flags {
257	MD_ARRAY_FIRST_USE,
258	MD_CLOSING,
259	MD_JOURNAL_CLEAN,
260	MD_HAS_JOURNAL,
261	MD_CLUSTER_RESYNC_LOCKED,
262	MD_FAILFAST_SUPPORTED,
263	MD_HAS_PPL,
264	MD_HAS_MULTIPLE_PPLS,
265	MD_NOT_READY,
266	MD_BROKEN,
267	MD_DELETED,
268	};
269
270	enum mddev_sb_flags {
271	MD_SB_CHANGE_DEVS, / Some device status has changed /
272	MD_SB_CHANGE_CLEAN, / transition to or from 'clean' /
273	MD_SB_CHANGE_PENDING, / switch from 'clean' to 'active' in progress /
274	MD_SB_NEED_REWRITE, / metadata write needs to be repeated /
275	};
276
277	#define NR_SERIAL_INFOS 8
278	/ record current range of serialize IOs /
279	struct serial_info {
280	struct rb_node node;
281	sector_t start; / start sector of rb node /
282	sector_t last; / end sector of rb node /
283	sector_t _subtree_last; / highest sector in subtree of rb node /
284	};
285
286	/*
287	* mddev->curr_resync stores the current sector of the resync but
288	* also has some overloaded values.
289	*/
290	enum {
291	/ No resync in progress /
292	MD_RESYNC_NONE = `0`,
293	/ Yielded to allow another conflicting resync to commence /
294	MD_RESYNC_YIELDED = `1`,
295	/ Delayed to check that there is no conflict with another sync /
296	MD_RESYNC_DELAYED = `2`,
297	/ Any value greater than or equal to this is in an active resync /
298	MD_RESYNC_ACTIVE = `3`,
299	};
300
301	struct mddev {
302	void *private;
303	struct md_personality *pers;
304	dev_t unit;
305	int md_minor;
306	struct list_head disks;
307	unsigned long flags;
308	unsigned long sb_flags;
309
310	int suspended;
311	struct mutex suspend_mutex;
312	struct percpu_ref active_io;
313	int ro;
314	int sysfs_active; / set when sysfs deletes*
315	* are happening, so run/
316	* takeover/stop are not safe
317	*/
318	struct gendisk *gendisk;
319
320	struct kobject kobj;
321	int hold_active;
322	#define UNTIL_IOCTL 1
323	#define UNTIL_STOP 2
324
325	/ Superblock information /
326	int major_version,
327	minor_version,
328	patch_version;
329	int persistent;
330	int external; / metadata is*
331	* managed externally */
332	char metadata_type[`17`]; / externally set/
333	int chunk_sectors;
334	time64_t ctime, utime;
335	int level, layout;
336	char clevel[`16`];
337	int raid_disks;
338	int max_disks;
339	sector_t dev_sectors; / used size of*
340	* component devices */
341	sector_t array_sectors; / exported array size /
342	int external_size; / size managed*
343	* externally */
344	__u64 events;
345	/ If the last 'event' was simply a clean->dirty transition, and*
346	* we didn't write it to the spares, then it is safe and simple
347	* to just decrement the event count on a dirty->clean transition.
348	* So we record that possibility here.
349	*/
350	int can_decrease_events;
351
352	char uuid[`16`];
353
354	/ If the array is being reshaped, we need to record the*
355	* new shape and an indication of where we are up to.
356	* This is written to the superblock.
357	* If reshape_position is MaxSector, then no reshape is happening (yet).
358	*/
359	sector_t reshape_position;
360	int delta_disks, new_level, new_layout;
361	int new_chunk_sectors;
362	int reshape_backwards;
363
364	struct md_thread __rcu thread; /* management thread /
365	struct md_thread __rcu sync_thread; /* doing resync or reconstruct /
366
367	/ 'last_sync_action' is initialized to "none". It is set when a*
368	* sync operation (i.e "data-check", "requested-resync", "resync",
369	* "recovery", or "reshape") is started. It holds this value even
370	* when the sync thread is "frozen" (interrupted) or "idle" (stopped
371	* or finished). It is overwritten when a new sync operation is begun.
372	*/
373	char *last_sync_action;
374	sector_t curr_resync; / last block scheduled /
375	/ As resync requests can complete out of order, we cannot easily track*
376	* how much resync has been completed. So we occasionally pause until
377	* everything completes, then set curr_resync_completed to curr_resync.
378	* As such it may be well behind the real resync mark, but it is a value
379	* we are certain of.
380	*/
381	sector_t curr_resync_completed;
382	unsigned long resync_mark; / a recent timestamp /
383	sector_t resync_mark_cnt;/ blocks written at resync_mark /
384	sector_t curr_mark_cnt; / blocks scheduled now /
385
386	sector_t resync_max_sectors; / may be set by personality /
387
388	atomic64_t resync_mismatches; / count of sectors where*
389	* parity/replica mismatch found
390	*/
391
392	/ allow user-space to request suspension of IO to regions of the array /
393	sector_t suspend_lo;
394	sector_t suspend_hi;
395	/ if zero, use the system-wide default /
396	int sync_speed_min;
397	int sync_speed_max;
398
399	/ resync even though the same disks are shared among md-devices /
400	int parallel_resync;
401
402	int ok_start_degraded;
403
404	unsigned long recovery;
405	/ If a RAID personality determines that recovery (of a particular*
406	* device) will fail due to a read error on the source device, it
407	* takes a copy of this number and does not attempt recovery again
408	* until this number changes.
409	*/
410	int recovery_disabled;
411
412	int in_sync; / know to not need resync /
413	/ 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so*
414	* that we are never stopping an array while it is open.
415	* 'reconfig_mutex' protects all other reconfiguration.
416	* These locks are separate due to conflicting interactions
417	* with disk->open_mutex.
418	* Lock ordering is:
419	* reconfig_mutex -> disk->open_mutex
420	* disk->open_mutex -> open_mutex: e.g. __blkdev_get -> md_open
421	*/
422	struct mutex open_mutex;
423	struct mutex reconfig_mutex;
424	atomic_t active; / general refcount /
425	atomic_t openers; / number of active opens /
426
427	int changed; / True if we might need to*
428	* reread partition info */
429	int degraded; / whether md should consider*
430	* adding a spare
431	*/
432
433	atomic_t recovery_active; / blocks scheduled, but not written /
434	wait_queue_head_t recovery_wait;
435	sector_t recovery_cp;
436	sector_t resync_min; / user requested sync*
437	* starts here */
438	sector_t resync_max; / resync should pause*
439	* when it gets here */
440
441	struct kernfs_node sysfs_state; /* handle for 'array_state'*
442	* file in sysfs.
443	*/
444	struct kernfs_node sysfs_action; /* handle for 'sync_action' /
445	struct kernfs_node sysfs_completed; /handle for 'sync_completed' /*
446	struct kernfs_node sysfs_degraded; /handle for 'degraded' /*
447	struct kernfs_node sysfs_level; /handle for 'level' /*
448
449	/ used for delayed sysfs removal /
450	struct work_struct del_work;
451	/ used for register new sync thread /
452	struct work_struct sync_work;
453
454	/ "lock" protects:*
455	* flush_bio transition from NULL to !NULL
456	* rdev superblocks, events
457	* clearing MD_CHANGE_*
458	* in_sync - and related safemode and MD_CHANGE changes
459	* pers (also protected by reconfig_mutex and pending IO).
460	* clearing ->bitmap
461	* clearing ->bitmap_info.file
462	* changing ->resync_{min,max}
463	* setting MD_RECOVERY_RUNNING (which interacts with resync_{min,max})
464	*/
465	spinlock_t lock;
466	wait_queue_head_t sb_wait; / for waiting on superblock updates /
467	atomic_t pending_writes; / number of active superblock writes /
468
469	unsigned int safemode; / if set, update "clean" superblock*
470	* when no writes pending.
471	*/
472	unsigned int safemode_delay;
473	struct timer_list safemode_timer;
474	struct percpu_ref writes_pending;
475	int sync_checkers; / # of threads checking writes_pending /
476	struct request_queue queue; /* for plugging ... /
477
478	struct bitmap bitmap; /* the bitmap for the device /
479	struct {
480	struct file file; /* the bitmap file /
481	loff_t offset; / offset from superblock of*
482	* start of bitmap. May be
483	* negative, but not '0'
484	* For external metadata, offset
485	* from start of device.
486	*/
487	unsigned long space; / space available at this offset /
488	loff_t default_offset; / this is the offset to use when*
489	* hot-adding a bitmap. It should
490	* eventually be settable by sysfs.
491	*/
492	unsigned long default_space; / space available at*
493	* default offset */
494	struct mutex mutex;
495	unsigned long chunksize;
496	unsigned long daemon_sleep; / how many jiffies between updates? /
497	unsigned long max_write_behind; / write-behind mode /
498	int external;
499	int nodes; / Maximum number of nodes in the cluster /
500	char cluster_name[`64`]; / Name of the cluster /
501	} bitmap_info;
502
503	atomic_t max_corr_read_errors; / max read retries /
504	struct list_head all_mddevs;
505
506	const struct attribute_group *to_remove;
507
508	struct bio_set bio_set;
509	struct bio_set sync_set; / for sync operations like*
510	* metadata and bitmap writes
511	*/
512	struct bio_set io_clone_set;
513
514	/ Generic flush handling.*
515	* The last to finish preflush schedules a worker to submit
516	* the rest of the request (without the REQ_PREFLUSH flag).
517	*/
518	struct bio *flush_bio;
519	atomic_t flush_pending;
520	ktime_t start_flush, prev_flush_start; / prev_flush_start is when the previous completed*
521	* flush was started.
522	*/
523	struct work_struct flush_work;
524	struct work_struct event_work; / used by dm to report failure event /
525	mempool_t *serial_info_pool;
526	void (sync_super)(struct* mddev mddev, struct* md_rdev *rdev);
527	struct md_cluster_info *cluster_info;
528	unsigned int good_device_nr; / good device num within cluster raid /
529	unsigned int noio_flag; / for memalloc scope API /
530
531	/*
532	* Temporarily store rdev that will be finally removed when
533	* reconfig_mutex is unlocked, protected by reconfig_mutex.
534	*/
535	struct list_head deleting;
536
537	/ Used to synchronize idle and frozen for action_store() /
538	struct mutex sync_mutex;
539	/ The sequence number for sync thread /
540	atomic_t sync_seq;
541
542	bool has_superblocks:`1`;
543	bool fail_last_dev:`1`;
544	bool serialize_policy:`1`;
545	};
546
547	enum recovery_flags {
548	/*
549	* If neither SYNC or RESHAPE are set, then it is a recovery.
550	*/
551	MD_RECOVERY_RUNNING, / a thread is running, or about to be started /
552	MD_RECOVERY_SYNC, / actually doing a resync, not a recovery /
553	MD_RECOVERY_RECOVER, / doing recovery, or need to try it. /
554	MD_RECOVERY_INTR, / resync needs to be aborted for some reason /
555	MD_RECOVERY_DONE, / thread is done and is waiting to be reaped /
556	MD_RECOVERY_NEEDED, / we might need to start a resync/recover /
557	MD_RECOVERY_REQUESTED, / user-space has requested a sync (used with SYNC) /
558	MD_RECOVERY_CHECK, / user-space request for check-only, no repair /
559	MD_RECOVERY_RESHAPE, / A reshape is happening /
560	MD_RECOVERY_FROZEN, / User request to abort, and not restart, any action /
561	MD_RECOVERY_ERROR, / sync-action interrupted because io-error /
562	MD_RECOVERY_WAIT, / waiting for pers->start() to finish /
563	MD_RESYNCING_REMOTE, / remote node is running resync thread /
564	};
565
566	static inline int __must_check mddev_lock(struct mddev *mddev)
567	{
568	return mutex_lock_interruptible(&mddev->reconfig_mutex);
569	}
570
571	/ Sometimes we need to take the lock in a situation where*
572	* failure due to interrupts is not acceptable.
573	*/
574	static inline void mddev_lock_nointr(struct mddev *mddev)
575	{
576	mutex_lock(&mddev->reconfig_mutex);
577	}
578
579	static inline int mddev_trylock(struct mddev *mddev)
580	{
581	return mutex_trylock(lock: &mddev->reconfig_mutex);
582	}
583	extern void mddev_unlock(struct mddev *mddev);
584
585	static inline void md_sync_acct(struct block_device bdev, unsigned* long nr_sectors)
586	{
587	atomic_add(i: nr_sectors, v: &bdev->bd_disk->sync_io);
588	}
589
590	static inline void md_sync_acct_bio(struct bio bio, unsigned* long nr_sectors)
591	{
592	md_sync_acct(bdev: bio->bi_bdev, nr_sectors);
593	}
594
595	struct md_personality
596	{
597	char *name;
598	int level;
599	struct list_head list;
600	struct module *owner;
601	bool __must_check (make_request)(struct* mddev mddev, struct* bio *bio);
602	/*
603	* start up works that do NOT require md_thread. tasks that
604	* requires md_thread should go into start()
605	*/
606	int (run)(struct* mddev *mddev);
607	/ start up works that require md threads /
608	int (start)(struct* mddev *mddev);
609	void (free)(struct* mddev mddev, void* *priv);
610	void (status)(struct* seq_file seq, struct* mddev *mddev);
611	/ error_handler must set ->faulty and clear ->in_sync*
612	* if appropriate, and should abort recovery if needed
613	*/
614	void (error_handler)(struct* mddev mddev, struct* md_rdev *rdev);
615	int (hot_add_disk) (struct* mddev mddev, struct* md_rdev *rdev);
616	int (hot_remove_disk) (struct* mddev mddev, struct* md_rdev *rdev);
617	int (spare_active) (struct* mddev *mddev);
618	sector_t (sync_request)(struct* mddev mddev, sector_t sector_nr, int* *skipped);
619	int (resize) (struct* mddev *mddev, sector_t sectors);
620	sector_t (size) (struct* mddev mddev, sector_t sectors, int* raid_disks);
621	int (check_reshape) (struct* mddev *mddev);
622	int (start_reshape) (struct* mddev *mddev);
623	void (finish_reshape) (struct* mddev *mddev);
624	void (update_reshape_pos) (struct* mddev *mddev);
625	/ quiesce suspends or resumes internal processing.*
626	* 1 - stop new actions and wait for action io to complete
627	* 0 - return to normal behaviour
628	*/
629	void (quiesce) (struct* mddev mddev, int* quiesce);
630	/ takeover is used to transition an array from one*
631	* personality to another. The new personality must be able
632	* to handle the data in the current layout.
633	* e.g. 2drive raid1 -> 2drive raid5
634	* ndrive raid5 -> degraded n+1drive raid6 with special layout
635	* If the takeover succeeds, a new 'private' structure is returned.
636	* This needs to be installed and then ->run used to activate the
637	* array.
638	*/
639	void (takeover) (struct mddev *mddev);
640	/ Changes the consistency policy of an active array. /
641	int (change_consistency_policy)(struct* mddev mddev, const* char *buf);
642	};
643
644	struct md_sysfs_entry {
645	struct attribute attr;
646	ssize_t (show)(struct* mddev , char* *);
647	ssize_t (store)(struct* mddev , const* char *, size_t);
648	};
649	extern const struct attribute_group md_bitmap_group;
650
651	static inline struct kernfs_node sysfs_get_dirent_safe(struct* kernfs_node sd, char* *name)
652	{
653	if (sd)
654	return sysfs_get_dirent(parent: sd, name);
655	return sd;
656	}
657	static inline void sysfs_notify_dirent_safe(struct kernfs_node *sd)
658	{
659	if (sd)
660	sysfs_notify_dirent(kn: sd);
661	}
662
663	static inline char * mdname (struct mddev * mddev)
664	{
665	return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
666	}
667
668	static inline int sysfs_link_rdev(struct mddev mddev, struct* md_rdev *rdev)
669	{
670	char nm[`20`];
671	if (!test_bit(Replacement, &rdev->flags) &&
672	!test_bit(Journal, &rdev->flags) &&
673	mddev->kobj.sd) {
674	sprintf(buf: nm, fmt: "rd%d", rdev->raid_disk);
675	return sysfs_create_link(kobj: &mddev->kobj, target: &rdev->kobj, name: nm);
676	} else
677	return `0`;
678	}
679
680	static inline void sysfs_unlink_rdev(struct mddev mddev, struct* md_rdev *rdev)
681	{
682	char nm[`20`];
683	if (!test_bit(Replacement, &rdev->flags) &&
684	!test_bit(Journal, &rdev->flags) &&
685	mddev->kobj.sd) {
686	sprintf(buf: nm, fmt: "rd%d", rdev->raid_disk);
687	sysfs_remove_link(kobj: &mddev->kobj, name: nm);
688	}
689	}
690
691	/*
692	* iterates through some rdev ringlist. It's safe to remove the
693	* current 'rdev'. Dont touch 'tmp' though.
694	*/
695	#define rdev_for_each_list(rdev, tmp, head) \
696	list_for_each_entry_safe(rdev, tmp, head, same_set)
697
698	/*
699	* iterates through the 'same array disks' ringlist
700	*/
701	#define rdev_for_each(rdev, mddev) \
702	list_for_each_entry(rdev, &((mddev)->disks), same_set)
703
704	#define rdev_for_each_safe(rdev, tmp, mddev) \
705	list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
706
707	#define rdev_for_each_rcu(rdev, mddev) \
708	list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
709
710	struct md_thread {
711	void (run) (struct* md_thread *thread);
712	struct mddev *mddev;
713	wait_queue_head_t wqueue;
714	unsigned long flags;
715	struct task_struct *tsk;
716	unsigned long timeout;
717	void *private;
718	};
719
720	struct md_io_clone {
721	struct mddev *mddev;
722	struct bio *orig_bio;
723	unsigned long start_time;
724	struct bio bio_clone;
725	};
726
727	#define THREAD_WAKEUP 0
728
729	static inline void safe_put_page(struct page *p)
730	{
731	if (p) put_page(page: p);
732	}
733
734	extern int register_md_personality(struct md_personality *p);
735	extern int unregister_md_personality(struct md_personality *p);
736	extern int register_md_cluster_operations(struct md_cluster_operations *ops,
737	struct module *module);
738	extern int unregister_md_cluster_operations(void);
739	extern int md_setup_cluster(struct mddev mddev, int* nodes);
740	extern void md_cluster_stop(struct mddev *mddev);
741	extern struct md_thread *md_register_thread(
742	void (run)(struct* md_thread *thread),
743	struct mddev *mddev,
744	const char *name);
745	extern void md_unregister_thread(struct mddev mddev, struct* md_thread __rcu **threadp);
746	extern void md_wakeup_thread(struct md_thread __rcu *thread);
747	extern void md_check_recovery(struct mddev *mddev);
748	extern void md_reap_sync_thread(struct mddev *mddev);
749	extern bool md_write_start(struct mddev mddev, struct* bio *bi);
750	extern void md_write_inc(struct mddev mddev, struct* bio *bi);
751	extern void md_write_end(struct mddev *mddev);
752	extern void md_done_sync(struct mddev mddev, int* blocks, int ok);
753	extern void md_error(struct mddev mddev, struct* md_rdev *rdev);
754	extern void md_finish_reshape(struct mddev *mddev);
755	void md_submit_discard_bio(struct mddev mddev, struct* md_rdev *rdev,
756	struct bio *bio, sector_t start, sector_t size);
757	void md_account_bio(struct mddev mddev, struct* bio **bio);
758
759	extern bool __must_check md_flush_request(struct mddev mddev, struct* bio *bio);
760	extern void md_super_write(struct mddev mddev, struct* md_rdev *rdev,
761	sector_t sector, int size, struct page *page);
762	extern int md_super_wait(struct mddev *mddev);
763	extern int sync_page_io(struct md_rdev rdev, sector_t sector, int* size,
764	struct page *page, blk_opf_t opf, bool metadata_op);
765	extern void md_do_sync(struct md_thread *thread);
766	extern void md_new_event(void);
767	extern void md_allow_write(struct mddev *mddev);
768	extern void md_wait_for_blocked_rdev(struct md_rdev rdev, struct* mddev *mddev);
769	extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
770	extern int md_check_no_bitmap(struct mddev *mddev);
771	extern int md_integrity_register(struct mddev *mddev);
772	extern int md_integrity_add_rdev(struct md_rdev rdev, struct* mddev *mddev);
773	extern int strict_strtoul_scaled(const char cp, unsigned* long res, int* scale);
774
775	extern int mddev_init(struct mddev *mddev);
776	extern void mddev_destroy(struct mddev *mddev);
777	struct mddev md_alloc(dev_t dev, char* *name);
778	void mddev_put(struct mddev *mddev);
779	extern int md_run(struct mddev *mddev);
780	extern int md_start(struct mddev *mddev);
781	extern void md_stop(struct mddev *mddev);
782	extern void md_stop_writes(struct mddev *mddev);
783	extern int md_rdev_init(struct md_rdev *rdev);
784	extern void md_rdev_clear(struct md_rdev *rdev);
785
786	extern void md_handle_request(struct mddev mddev, struct* bio *bio);
787	extern int mddev_suspend(struct mddev *mddev, bool interruptible);
788	extern void mddev_resume(struct mddev *mddev);
789
790	extern void md_reload_sb(struct mddev mddev, int* raid_disk);
791	extern void md_update_sb(struct mddev mddev, int* force);
792	extern void mddev_create_serial_pool(struct mddev mddev, struct* md_rdev *rdev);
793	extern void mddev_destroy_serial_pool(struct mddev *mddev,
794	struct md_rdev *rdev);
795	struct md_rdev md_find_rdev_nr_rcu(struct* mddev mddev, int* nr);
796	struct md_rdev md_find_rdev_rcu(struct* mddev *mddev, dev_t dev);
797
798	static inline bool is_rdev_broken(struct md_rdev *rdev)
799	{
800	return !disk_live(disk: rdev->bdev->bd_disk);
801	}
802
803	static inline void rdev_dec_pending(struct md_rdev rdev, struct* mddev *mddev)
804	{
805	int faulty = test_bit(Faulty, &rdev->flags);
806	if (atomic_dec_and_test(v: &rdev->nr_pending) && faulty) {
807	set_bit(nr: MD_RECOVERY_NEEDED, addr: &mddev->recovery);
808	md_wakeup_thread(thread: mddev->thread);
809	}
810	}
811
812	extern struct md_cluster_operations *md_cluster_ops;
813	static inline int mddev_is_clustered(struct mddev *mddev)
814	{
815	return mddev->cluster_info && mddev->bitmap_info.nodes > `1`;
816	}
817
818	/ clear unsupported mddev_flags /
819	static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
820	unsigned long unsupported_flags)
821	{
822	mddev->flags &= ~unsupported_flags;
823	}
824
825	static inline void mddev_check_write_zeroes(struct mddev mddev, struct* bio *bio)
826	{
827	if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
828	!bio->bi_bdev->bd_disk->queue->limits.max_write_zeroes_sectors)
829	mddev->queue->limits.max_write_zeroes_sectors = `0`;
830	}
831
832	static inline int mddev_suspend_and_lock(struct mddev *mddev)
833	{
834	int ret;
835
836	ret = mddev_suspend(mddev, interruptible: true);
837	if (ret)
838	return ret;
839
840	ret = mddev_lock(mddev);
841	if (ret)
842	mddev_resume(mddev);
843
844	return ret;
845	}
846
847	static inline void mddev_suspend_and_lock_nointr(struct mddev *mddev)
848	{
849	mddev_suspend(mddev, interruptible: false);
850	mutex_lock(&mddev->reconfig_mutex);
851	}
852
853	static inline void mddev_unlock_and_resume(struct mddev *mddev)
854	{
855	mddev_unlock(mddev);
856	mddev_resume(mddev);
857	}
858
859	struct mdu_array_info_s;
860	struct mdu_disk_info_s;
861
862	extern int mdp_major;
863	extern struct workqueue_struct *md_bitmap_wq;
864	void md_autostart_arrays(int part);
865	int md_set_array_info(struct mddev mddev, struct* mdu_array_info_s *info);
866	int md_add_new_disk(struct mddev mddev, struct* mdu_disk_info_s *info);
867	int do_md_run(struct mddev *mddev);
868
869	extern const struct block_device_operations md_fops;
870
871	#endif /* _MD_MD_H */
872

source code of linux/drivers/md/md.h