drbd_state.c source code [linux/drivers/block/drbd/drbd_state.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	drbd_state.c
4
5	This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6
7	Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8	Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9	Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10
11	Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
12	from Logicworks, Inc. for making SDP replication support possible.
13
14	*/
15
16	#include <linux/drbd_limits.h>
17	#include "drbd_int.h"
18	#include "drbd_protocol.h"
19	#include "drbd_req.h"
20	#include "drbd_state_change.h"
21
22	struct after_state_chg_work {
23	struct drbd_work w;
24	struct drbd_device *device;
25	union drbd_state os;
26	union drbd_state ns;
27	enum chg_state_flags flags;
28	struct completion *done;
29	struct drbd_state_change *state_change;
30	};
31
32	enum sanitize_state_warnings {
33	NO_WARNING,
34	ABORTED_ONLINE_VERIFY,
35	ABORTED_RESYNC,
36	CONNECTION_LOST_NEGOTIATING,
37	IMPLICITLY_UPGRADED_DISK,
38	IMPLICITLY_UPGRADED_PDSK,
39	};
40
41	static void count_objects(struct drbd_resource *resource,
42	unsigned int *n_devices,
43	unsigned int *n_connections)
44	{
45	struct drbd_device *device;
46	struct drbd_connection *connection;
47	int vnr;
48
49	*n_devices = `0`;
50	*n_connections = `0`;
51
52	idr_for_each_entry(&resource->devices, device, vnr)
53	(*n_devices)++;
54	for_each_connection(connection, resource)
55	(*n_connections)++;
56	}
57
58	static struct drbd_state_change alloc_state_change(unsigned* int n_devices, unsigned int n_connections, gfp_t gfp)
59	{
60	struct drbd_state_change *state_change;
61	unsigned int size, n;
62
63	size = sizeof(struct drbd_state_change) +
64	n_devices * sizeof(struct drbd_device_state_change) +
65	n_connections * sizeof(struct drbd_connection_state_change) +
66	n_devices * n_connections * sizeof(struct drbd_peer_device_state_change);
67	state_change = kmalloc(size, flags: gfp);
68	if (!state_change)
69	return NULL;
70	state_change->n_devices = n_devices;
71	state_change->n_connections = n_connections;
72	state_change->devices = (void *)(state_change + `1`);
73	state_change->connections = (void *)&state_change->devices[n_devices];
74	state_change->peer_devices = (void *)&state_change->connections[n_connections];
75	state_change->resource->resource = NULL;
76	for (n = `0`; n < n_devices; n++)
77	state_change->devices[n].device = NULL;
78	for (n = `0`; n < n_connections; n++)
79	state_change->connections[n].connection = NULL;
80	return state_change;
81	}
82
83	struct drbd_state_change remember_old_state(struct* drbd_resource *resource, gfp_t gfp)
84	{
85	struct drbd_state_change *state_change;
86	struct drbd_device *device;
87	unsigned int n_devices;
88	struct drbd_connection *connection;
89	unsigned int n_connections;
90	int vnr;
91
92	struct drbd_device_state_change *device_state_change;
93	struct drbd_peer_device_state_change *peer_device_state_change;
94	struct drbd_connection_state_change *connection_state_change;
95
96	/ Caller holds req_lock spinlock.*
97	* No state, no device IDR, no connections lists can change. */
98	count_objects(resource, n_devices: &n_devices, n_connections: &n_connections);
99	state_change = alloc_state_change(n_devices, n_connections, gfp);
100	if (!state_change)
101	return NULL;
102
103	kref_get(kref: &resource->kref);
104	state_change->resource->resource = resource;
105	state_change->resource->role[OLD] =
106	conn_highest_role(connection: first_connection(resource));
107	state_change->resource->susp[OLD] = resource->susp;
108	state_change->resource->susp_nod[OLD] = resource->susp_nod;
109	state_change->resource->susp_fen[OLD] = resource->susp_fen;
110
111	connection_state_change = state_change->connections;
112	for_each_connection(connection, resource) {
113	kref_get(kref: &connection->kref);
114	connection_state_change->connection = connection;
115	connection_state_change->cstate[OLD] =
116	connection->cstate;
117	connection_state_change->peer_role[OLD] =
118	conn_highest_peer(connection);
119	connection_state_change++;
120	}
121
122	device_state_change = state_change->devices;
123	peer_device_state_change = state_change->peer_devices;
124	idr_for_each_entry(&resource->devices, device, vnr) {
125	kref_get(kref: &device->kref);
126	device_state_change->device = device;
127	device_state_change->disk_state[OLD] = device->state.disk;
128
129	/ The peer_devices for each device have to be enumerated in*
130	the order of the connections. We may not use for_each_peer_device() here. /*
131	for_each_connection(connection, resource) {
132	struct drbd_peer_device *peer_device;
133
134	peer_device = conn_peer_device(connection, volume_number: device->vnr);
135	peer_device_state_change->peer_device = peer_device;
136	peer_device_state_change->disk_state[OLD] =
137	device->state.pdsk;
138	peer_device_state_change->repl_state[OLD] =
139	max_t(enum drbd_conns,
140	C_WF_REPORT_PARAMS, device->state.conn);
141	peer_device_state_change->resync_susp_user[OLD] =
142	device->state.user_isp;
143	peer_device_state_change->resync_susp_peer[OLD] =
144	device->state.peer_isp;
145	peer_device_state_change->resync_susp_dependency[OLD] =
146	device->state.aftr_isp;
147	peer_device_state_change++;
148	}
149	device_state_change++;
150	}
151
152	return state_change;
153	}
154
155	static void remember_new_state(struct drbd_state_change *state_change)
156	{
157	struct drbd_resource_state_change *resource_state_change;
158	struct drbd_resource *resource;
159	unsigned int n;
160
161	if (!state_change)
162	return;
163
164	resource_state_change = &state_change->resource[`0`];
165	resource = resource_state_change->resource;
166
167	resource_state_change->role[NEW] =
168	conn_highest_role(connection: first_connection(resource));
169	resource_state_change->susp[NEW] = resource->susp;
170	resource_state_change->susp_nod[NEW] = resource->susp_nod;
171	resource_state_change->susp_fen[NEW] = resource->susp_fen;
172
173	for (n = `0`; n < state_change->n_devices; n++) {
174	struct drbd_device_state_change *device_state_change =
175	&state_change->devices[n];
176	struct drbd_device *device = device_state_change->device;
177
178	device_state_change->disk_state[NEW] = device->state.disk;
179	}
180
181	for (n = `0`; n < state_change->n_connections; n++) {
182	struct drbd_connection_state_change *connection_state_change =
183	&state_change->connections[n];
184	struct drbd_connection *connection =
185	connection_state_change->connection;
186
187	connection_state_change->cstate[NEW] = connection->cstate;
188	connection_state_change->peer_role[NEW] =
189	conn_highest_peer(connection);
190	}
191
192	for (n = `0`; n < state_change->n_devices * state_change->n_connections; n++) {
193	struct drbd_peer_device_state_change *peer_device_state_change =
194	&state_change->peer_devices[n];
195	struct drbd_device *device =
196	peer_device_state_change->peer_device->device;
197	union drbd_dev_state state = device->state;
198
199	peer_device_state_change->disk_state[NEW] = state.pdsk;
200	peer_device_state_change->repl_state[NEW] =
201	max_t(enum drbd_conns, C_WF_REPORT_PARAMS, state.conn);
202	peer_device_state_change->resync_susp_user[NEW] =
203	state.user_isp;
204	peer_device_state_change->resync_susp_peer[NEW] =
205	state.peer_isp;
206	peer_device_state_change->resync_susp_dependency[NEW] =
207	state.aftr_isp;
208	}
209	}
210
211	void copy_old_to_new_state_change(struct drbd_state_change *state_change)
212	{
213	struct drbd_resource_state_change *resource_state_change = &state_change->resource[`0`];
214	unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
215
216	#define OLD_TO_NEW(x) \
217	(x[NEW] = x[OLD])
218
219	OLD_TO_NEW(resource_state_change->role);
220	OLD_TO_NEW(resource_state_change->susp);
221	OLD_TO_NEW(resource_state_change->susp_nod);
222	OLD_TO_NEW(resource_state_change->susp_fen);
223
224	for (n_connection = `0`; n_connection < state_change->n_connections; n_connection++) {
225	struct drbd_connection_state_change *connection_state_change =
226	&state_change->connections[n_connection];
227
228	OLD_TO_NEW(connection_state_change->peer_role);
229	OLD_TO_NEW(connection_state_change->cstate);
230	}
231
232	for (n_device = `0`; n_device < state_change->n_devices; n_device++) {
233	struct drbd_device_state_change *device_state_change =
234	&state_change->devices[n_device];
235
236	OLD_TO_NEW(device_state_change->disk_state);
237	}
238
239	n_peer_devices = state_change->n_devices * state_change->n_connections;
240	for (n_peer_device = `0`; n_peer_device < n_peer_devices; n_peer_device++) {
241	struct drbd_peer_device_state_change *p =
242	&state_change->peer_devices[n_peer_device];
243
244	OLD_TO_NEW(p->disk_state);
245	OLD_TO_NEW(p->repl_state);
246	OLD_TO_NEW(p->resync_susp_user);
247	OLD_TO_NEW(p->resync_susp_peer);
248	OLD_TO_NEW(p->resync_susp_dependency);
249	}
250
251	#undef OLD_TO_NEW
252	}
253
254	void forget_state_change(struct drbd_state_change *state_change)
255	{
256	unsigned int n;
257
258	if (!state_change)
259	return;
260
261	if (state_change->resource->resource)
262	kref_put(kref: &state_change->resource->resource->kref, release: drbd_destroy_resource);
263	for (n = `0`; n < state_change->n_devices; n++) {
264	struct drbd_device *device = state_change->devices[n].device;
265
266	if (device)
267	kref_put(kref: &device->kref, release: drbd_destroy_device);
268	}
269	for (n = `0`; n < state_change->n_connections; n++) {
270	struct drbd_connection *connection =
271	state_change->connections[n].connection;
272
273	if (connection)
274	kref_put(kref: &connection->kref, release: drbd_destroy_connection);
275	}
276	kfree(objp: state_change);
277	}
278
279	static int w_after_state_ch(struct drbd_work w, int* unused);
280	static void after_state_ch(struct drbd_device device, union* drbd_state os,
281	union drbd_state ns, enum chg_state_flags flags,
282	struct drbd_state_change *);
283	static enum drbd_state_rv is_valid_state(struct drbd_device , union* drbd_state);
284	static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *);
285	static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
286	static union drbd_state sanitize_state(struct drbd_device device, union* drbd_state os,
287	union drbd_state ns, enum sanitize_state_warnings *warn);
288
289	static inline bool is_susp(union drbd_state s)
290	{
291	return s.susp \|\| s.susp_nod \|\| s.susp_fen;
292	}
293
294	bool conn_all_vols_unconf(struct drbd_connection *connection)
295	{
296	struct drbd_peer_device *peer_device;
297	bool rv = true;
298	int vnr;
299
300	rcu_read_lock();
301	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
302	struct drbd_device *device = peer_device->device;
303	if (device->state.disk != D_DISKLESS \|\|
304	device->state.conn != C_STANDALONE \|\|
305	device->state.role != R_SECONDARY) {
306	rv = false;
307	break;
308	}
309	}
310	rcu_read_unlock();
311
312	return rv;
313	}
314
315	/ Unfortunately the states where not correctly ordered, when*
316	they where defined. therefore can not use max_t() here. /*
317	static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2)
318	{
319	if (role1 == R_PRIMARY \|\| role2 == R_PRIMARY)
320	return R_PRIMARY;
321	if (role1 == R_SECONDARY \|\| role2 == R_SECONDARY)
322	return R_SECONDARY;
323	return R_UNKNOWN;
324	}
325
326	static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
327	{
328	if (role1 == R_UNKNOWN \|\| role2 == R_UNKNOWN)
329	return R_UNKNOWN;
330	if (role1 == R_SECONDARY \|\| role2 == R_SECONDARY)
331	return R_SECONDARY;
332	return R_PRIMARY;
333	}
334
335	enum drbd_role conn_highest_role(struct drbd_connection *connection)
336	{
337	enum drbd_role role = R_SECONDARY;
338	struct drbd_peer_device *peer_device;
339	int vnr;
340
341	rcu_read_lock();
342	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
343	struct drbd_device *device = peer_device->device;
344	role = max_role(role1: role, role2: device->state.role);
345	}
346	rcu_read_unlock();
347
348	return role;
349	}
350
351	enum drbd_role conn_highest_peer(struct drbd_connection *connection)
352	{
353	enum drbd_role peer = R_UNKNOWN;
354	struct drbd_peer_device *peer_device;
355	int vnr;
356
357	rcu_read_lock();
358	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
359	struct drbd_device *device = peer_device->device;
360	peer = max_role(role1: peer, role2: device->state.peer);
361	}
362	rcu_read_unlock();
363
364	return peer;
365	}
366
367	enum drbd_disk_state conn_highest_disk(struct drbd_connection *connection)
368	{
369	enum drbd_disk_state disk_state = D_DISKLESS;
370	struct drbd_peer_device *peer_device;
371	int vnr;
372
373	rcu_read_lock();
374	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
375	struct drbd_device *device = peer_device->device;
376	disk_state = max_t(enum drbd_disk_state, disk_state, device->state.disk);
377	}
378	rcu_read_unlock();
379
380	return disk_state;
381	}
382
383	enum drbd_disk_state conn_lowest_disk(struct drbd_connection *connection)
384	{
385	enum drbd_disk_state disk_state = D_MASK;
386	struct drbd_peer_device *peer_device;
387	int vnr;
388
389	rcu_read_lock();
390	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
391	struct drbd_device *device = peer_device->device;
392	disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk);
393	}
394	rcu_read_unlock();
395
396	return disk_state;
397	}
398
399	enum drbd_disk_state conn_highest_pdsk(struct drbd_connection *connection)
400	{
401	enum drbd_disk_state disk_state = D_DISKLESS;
402	struct drbd_peer_device *peer_device;
403	int vnr;
404
405	rcu_read_lock();
406	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
407	struct drbd_device *device = peer_device->device;
408	disk_state = max_t(enum drbd_disk_state, disk_state, device->state.pdsk);
409	}
410	rcu_read_unlock();
411
412	return disk_state;
413	}
414
415	enum drbd_conns conn_lowest_conn(struct drbd_connection *connection)
416	{
417	enum drbd_conns conn = C_MASK;
418	struct drbd_peer_device *peer_device;
419	int vnr;
420
421	rcu_read_lock();
422	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
423	struct drbd_device *device = peer_device->device;
424	conn = min_t(enum drbd_conns, conn, device->state.conn);
425	}
426	rcu_read_unlock();
427
428	return conn;
429	}
430
431	static bool no_peer_wf_report_params(struct drbd_connection *connection)
432	{
433	struct drbd_peer_device *peer_device;
434	int vnr;
435	bool rv = true;
436
437	rcu_read_lock();
438	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
439	if (peer_device->device->state.conn == C_WF_REPORT_PARAMS) {
440	rv = false;
441	break;
442	}
443	rcu_read_unlock();
444
445	return rv;
446	}
447
448	static void wake_up_all_devices(struct drbd_connection *connection)
449	{
450	struct drbd_peer_device *peer_device;
451	int vnr;
452
453	rcu_read_lock();
454	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
455	wake_up(&peer_device->device->state_wait);
456	rcu_read_unlock();
457
458	}
459
460
461	/**
462	* cl_wide_st_chg() - true if the state change is a cluster wide one
463	* @device: DRBD device.
464	* @os: old (current) state.
465	* @ns: new (wanted) state.
466	*/
467	static int cl_wide_st_chg(struct drbd_device *device,
468	union drbd_state os, union drbd_state ns)
469	{
470	return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
471	((os.role != R_PRIMARY && ns.role == R_PRIMARY) \|\|
472	(os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) \|\|
473	(os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) \|\|
474	(os.disk != D_FAILED && ns.disk == D_FAILED))) \|\|
475	(os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) \|\|
476	(os.conn == C_CONNECTED && ns.conn == C_VERIFY_S) \|\|
477	(os.conn == C_CONNECTED && ns.conn == C_WF_REPORT_PARAMS);
478	}
479
480	static union drbd_state
481	apply_mask_val(union drbd_state os, union drbd_state mask, union drbd_state val)
482	{
483	union drbd_state ns;
484	ns.i = (os.i & ~mask.i) \| val.i;
485	return ns;
486	}
487
488	enum drbd_state_rv
489	drbd_change_state(struct drbd_device device, enum* chg_state_flags f,
490	union drbd_state mask, union drbd_state val)
491	{
492	unsigned long flags;
493	union drbd_state ns;
494	enum drbd_state_rv rv;
495
496	spin_lock_irqsave(&device->resource->req_lock, flags);
497	ns = apply_mask_val(os: drbd_read_state(device), mask, val);
498	rv = _drbd_set_state(device, ns, f, NULL);
499	spin_unlock_irqrestore(lock: &device->resource->req_lock, flags);
500
501	return rv;
502	}
503
504	/**
505	* drbd_force_state() - Impose a change which happens outside our control on our state
506	* @device: DRBD device.
507	* @mask: mask of state bits to change.
508	* @val: value of new state bits.
509	*/
510	void drbd_force_state(struct drbd_device *device,
511	union drbd_state mask, union drbd_state val)
512	{
513	drbd_change_state(device, f: CS_HARD, mask, val);
514	}
515
516	static enum drbd_state_rv
517	_req_st_cond(struct drbd_device device, union* drbd_state mask,
518	union drbd_state val)
519	{
520	union drbd_state os, ns;
521	unsigned long flags;
522	enum drbd_state_rv rv;
523
524	if (test_and_clear_bit(nr: CL_ST_CHG_SUCCESS, addr: &device->flags))
525	return SS_CW_SUCCESS;
526
527	if (test_and_clear_bit(nr: CL_ST_CHG_FAIL, addr: &device->flags))
528	return SS_CW_FAILED_BY_PEER;
529
530	spin_lock_irqsave(&device->resource->req_lock, flags);
531	os = drbd_read_state(device);
532	ns = sanitize_state(device, os, ns: apply_mask_val(os, mask, val), NULL);
533	rv = is_valid_transition(os, ns);
534	if (rv >= SS_SUCCESS)
535	rv = SS_UNKNOWN_ERROR; / cont waiting, otherwise fail. /
536
537	if (!cl_wide_st_chg(device, os, ns))
538	rv = SS_CW_NO_NEED;
539	if (rv == SS_UNKNOWN_ERROR) {
540	rv = is_valid_state(device, ns);
541	if (rv >= SS_SUCCESS) {
542	rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection);
543	if (rv >= SS_SUCCESS)
544	rv = SS_UNKNOWN_ERROR; / cont waiting, otherwise fail. /
545	}
546	}
547	spin_unlock_irqrestore(lock: &device->resource->req_lock, flags);
548
549	return rv;
550	}
551
552	/**
553	* drbd_req_state() - Perform an eventually cluster wide state change
554	* @device: DRBD device.
555	* @mask: mask of state bits to change.
556	* @val: value of new state bits.
557	* @f: flags
558	*
559	* Should not be called directly, use drbd_request_state() or
560	* _drbd_request_state().
561	*/
562	static enum drbd_state_rv
563	drbd_req_state(struct drbd_device device, union* drbd_state mask,
564	union drbd_state val, enum chg_state_flags f)
565	{
566	struct completion done;
567	unsigned long flags;
568	union drbd_state os, ns;
569	enum drbd_state_rv rv;
570	void *buffer = NULL;
571
572	init_completion(x: &done);
573
574	if (f & CS_SERIALIZE)
575	mutex_lock(device->state_mutex);
576	if (f & CS_INHIBIT_MD_IO)
577	buffer = drbd_md_get_buffer(device, intent: __func__);
578
579	spin_lock_irqsave(&device->resource->req_lock, flags);
580	os = drbd_read_state(device);
581	ns = sanitize_state(device, os, ns: apply_mask_val(os, mask, val), NULL);
582	rv = is_valid_transition(os, ns);
583	if (rv < SS_SUCCESS) {
584	spin_unlock_irqrestore(lock: &device->resource->req_lock, flags);
585	goto abort;
586	}
587
588	if (cl_wide_st_chg(device, os, ns)) {
589	rv = is_valid_state(device, ns);
590	if (rv == SS_SUCCESS)
591	rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection);
592	spin_unlock_irqrestore(lock: &device->resource->req_lock, flags);
593
594	if (rv < SS_SUCCESS) {
595	if (f & CS_VERBOSE)
596	print_st_err(device, os, ns, rv);
597	goto abort;
598	}
599
600	if (drbd_send_state_req(first_peer_device(device), mask, val)) {
601	rv = SS_CW_FAILED_BY_PEER;
602	if (f & CS_VERBOSE)
603	print_st_err(device, os, ns, rv);
604	goto abort;
605	}
606
607	wait_event(device->state_wait,
608	(rv = _req_st_cond(device, mask, val)));
609
610	if (rv < SS_SUCCESS) {
611	if (f & CS_VERBOSE)
612	print_st_err(device, os, ns, rv);
613	goto abort;
614	}
615	spin_lock_irqsave(&device->resource->req_lock, flags);
616	ns = apply_mask_val(os: drbd_read_state(device), mask, val);
617	rv = _drbd_set_state(device, ns, f, done: &done);
618	} else {
619	rv = _drbd_set_state(device, ns, f, done: &done);
620	}
621
622	spin_unlock_irqrestore(lock: &device->resource->req_lock, flags);
623
624	if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
625	D_ASSERT(device, current != first_peer_device(device)->connection->worker.task);
626	wait_for_completion(&done);
627	}
628
629	abort:
630	if (buffer)
631	drbd_md_put_buffer(device);
632	if (f & CS_SERIALIZE)
633	mutex_unlock(lock: device->state_mutex);
634
635	return rv;
636	}
637
638	/**
639	* _drbd_request_state() - Request a state change (with flags)
640	* @device: DRBD device.
641	* @mask: mask of state bits to change.
642	* @val: value of new state bits.
643	* @f: flags
644	*
645	* Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
646	* flag, or when logging of failed state change requests is not desired.
647	*/
648	enum drbd_state_rv
649	_drbd_request_state(struct drbd_device device, union* drbd_state mask,
650	union drbd_state val, enum chg_state_flags f)
651	{
652	enum drbd_state_rv rv;
653
654	wait_event(device->state_wait,
655	(rv = drbd_req_state(device, mask, val, f)) != SS_IN_TRANSIENT_STATE);
656
657	return rv;
658	}
659
660	/*
661	* We grab drbd_md_get_buffer(), because we don't want to "fail" the disk while
662	* there is IO in-flight: the transition into D_FAILED for detach purposes
663	* may get misinterpreted as actual IO error in a confused endio function.
664	*
665	* We wrap it all into wait_event(), to retry in case the drbd_req_state()
666	* returns SS_IN_TRANSIENT_STATE.
667	*
668	* To avoid potential deadlock with e.g. the receiver thread trying to grab
669	* drbd_md_get_buffer() while trying to get out of the "transient state", we
670	* need to grab and release the meta data buffer inside of that wait_event loop.
671	*/
672	static enum drbd_state_rv
673	request_detach(struct drbd_device *device)
674	{
675	return drbd_req_state(device, NS(disk, D_FAILED),
676	f: CS_VERBOSE \| CS_ORDERED \| CS_INHIBIT_MD_IO);
677	}
678
679	int drbd_request_detach_interruptible(struct drbd_device *device)
680	{
681	int ret, rv;
682
683	drbd_suspend_io(device); / so no-one is stuck in drbd_al_begin_io /
684	wait_event_interruptible(device->state_wait,
685	(rv = request_detach(device)) != SS_IN_TRANSIENT_STATE);
686	drbd_resume_io(device);
687
688	ret = wait_event_interruptible(device->misc_wait,
689	device->state.disk != D_FAILED);
690
691	if (rv == SS_IS_DISKLESS)
692	rv = SS_NOTHING_TO_DO;
693	if (ret)
694	rv = ERR_INTR;
695
696	return rv;
697	}
698
699	enum drbd_state_rv
700	_drbd_request_state_holding_state_mutex(struct drbd_device device, union* drbd_state mask,
701	union drbd_state val, enum chg_state_flags f)
702	{
703	enum drbd_state_rv rv;
704
705	BUG_ON(f & CS_SERIALIZE);
706
707	wait_event_cmd(device->state_wait,
708	(rv = drbd_req_state(device, mask, val, f)) != SS_IN_TRANSIENT_STATE,
709	mutex_unlock(device->state_mutex),
710	mutex_lock(device->state_mutex));
711
712	return rv;
713	}
714
715	static void print_st(struct drbd_device device, const* char name, union* drbd_state ns)
716	{
717	drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n",
718	name,
719	drbd_conn_str(ns.conn),
720	drbd_role_str(ns.role),
721	drbd_role_str(ns.peer),
722	drbd_disk_str(ns.disk),
723	drbd_disk_str(ns.pdsk),
724	is_susp(ns) ? `'s'` : `'r'`,
725	ns.aftr_isp ? `'a'` : `'-'`,
726	ns.peer_isp ? `'p'` : `'-'`,
727	ns.user_isp ? `'u'` : `'-'`,
728	ns.susp_fen ? `'F'` : `'-'`,
729	ns.susp_nod ? `'N'` : `'-'`
730	);
731	}
732
733	void print_st_err(struct drbd_device device, union* drbd_state os,
734	union drbd_state ns, enum drbd_state_rv err)
735	{
736	if (err == SS_IN_TRANSIENT_STATE)
737	return;
738	drbd_err(device, "State change failed: %s\n", drbd_set_st_err_str(err));
739	print_st(device, name: " state", ns: os);
740	print_st(device, name: "wanted", ns);
741	}
742
743	static long print_state_change(char pb, union* drbd_state os, union drbd_state ns,
744	enum chg_state_flags flags)
745	{
746	char *pbp;
747	pbp = pb;
748	*pbp = `0`;
749
750	if (ns.role != os.role && flags & CS_DC_ROLE)
751	pbp += sprintf(buf: pbp, fmt: "role( %s -> %s ) ",
752	drbd_role_str(os.role),
753	drbd_role_str(ns.role));
754	if (ns.peer != os.peer && flags & CS_DC_PEER)
755	pbp += sprintf(buf: pbp, fmt: "peer( %s -> %s ) ",
756	drbd_role_str(os.peer),
757	drbd_role_str(ns.peer));
758	if (ns.conn != os.conn && flags & CS_DC_CONN)
759	pbp += sprintf(buf: pbp, fmt: "conn( %s -> %s ) ",
760	drbd_conn_str(os.conn),
761	drbd_conn_str(ns.conn));
762	if (ns.disk != os.disk && flags & CS_DC_DISK)
763	pbp += sprintf(buf: pbp, fmt: "disk( %s -> %s ) ",
764	drbd_disk_str(os.disk),
765	drbd_disk_str(ns.disk));
766	if (ns.pdsk != os.pdsk && flags & CS_DC_PDSK)
767	pbp += sprintf(buf: pbp, fmt: "pdsk( %s -> %s ) ",
768	drbd_disk_str(os.pdsk),
769	drbd_disk_str(ns.pdsk));
770
771	return pbp - pb;
772	}
773
774	static void drbd_pr_state_change(struct drbd_device device, union* drbd_state os, union drbd_state ns,
775	enum chg_state_flags flags)
776	{
777	char pb[`300`];
778	char *pbp = pb;
779
780	pbp += print_state_change(pb: pbp, os, ns, flags: flags ^ CS_DC_MASK);
781
782	if (ns.aftr_isp != os.aftr_isp)
783	pbp += sprintf(buf: pbp, fmt: "aftr_isp( %d -> %d ) ",
784	os.aftr_isp,
785	ns.aftr_isp);
786	if (ns.peer_isp != os.peer_isp)
787	pbp += sprintf(buf: pbp, fmt: "peer_isp( %d -> %d ) ",
788	os.peer_isp,
789	ns.peer_isp);
790	if (ns.user_isp != os.user_isp)
791	pbp += sprintf(buf: pbp, fmt: "user_isp( %d -> %d ) ",
792	os.user_isp,
793	ns.user_isp);
794
795	if (pbp != pb)
796	drbd_info(device, "%s\n", pb);
797	}
798
799	static void conn_pr_state_change(struct drbd_connection connection, union* drbd_state os, union drbd_state ns,
800	enum chg_state_flags flags)
801	{
802	char pb[`300`];
803	char *pbp = pb;
804
805	pbp += print_state_change(pb: pbp, os, ns, flags);
806
807	if (is_susp(s: ns) != is_susp(s: os) && flags & CS_DC_SUSP)
808	pbp += sprintf(buf: pbp, fmt: "susp( %d -> %d ) ",
809	is_susp(s: os),
810	is_susp(s: ns));
811
812	if (pbp != pb)
813	drbd_info(connection, "%s\n", pb);
814	}
815
816
817	/**
818	* is_valid_state() - Returns an SS_ error code if ns is not valid
819	* @device: DRBD device.
820	* @ns: State to consider.
821	*/
822	static enum drbd_state_rv
823	is_valid_state(struct drbd_device device, union* drbd_state ns)
824	{
825	/ See drbd_state_sw_errors in drbd_strings.c /
826
827	enum drbd_fencing_p fp;
828	enum drbd_state_rv rv = SS_SUCCESS;
829	struct net_conf *nc;
830
831	rcu_read_lock();
832	fp = FP_DONT_CARE;
833	if (get_ldev(device)) {
834	fp = rcu_dereference(device->ldev->disk_conf)->fencing;
835	put_ldev(device);
836	}
837
838	nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
839	if (nc) {
840	if (!nc->two_primaries && ns.role == R_PRIMARY) {
841	if (ns.peer == R_PRIMARY)
842	rv = SS_TWO_PRIMARIES;
843	else if (conn_highest_peer(connection: first_peer_device(device)->connection) == R_PRIMARY)
844	rv = SS_O_VOL_PEER_PRI;
845	}
846	}
847
848	if (rv <= `0`)
849	goto out; / already found a reason to abort /
850	else if (ns.role == R_SECONDARY && device->open_cnt)
851	rv = SS_DEVICE_IN_USE;
852
853	else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
854	rv = SS_NO_UP_TO_DATE_DISK;
855
856	else if (fp >= FP_RESOURCE &&
857	ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
858	rv = SS_PRIMARY_NOP;
859
860	else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
861	rv = SS_NO_UP_TO_DATE_DISK;
862
863	else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
864	rv = SS_NO_LOCAL_DISK;
865
866	else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
867	rv = SS_NO_REMOTE_DISK;
868
869	else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
870	rv = SS_NO_UP_TO_DATE_DISK;
871
872	else if ((ns.conn == C_CONNECTED \|\|
873	ns.conn == C_WF_BITMAP_S \|\|
874	ns.conn == C_SYNC_SOURCE \|\|
875	ns.conn == C_PAUSED_SYNC_S) &&
876	ns.disk == D_OUTDATED)
877	rv = SS_CONNECTED_OUTDATES;
878
879	else if ((ns.conn == C_VERIFY_S \|\| ns.conn == C_VERIFY_T) &&
880	(nc->verify_alg[`0`] == `0`))
881	rv = SS_NO_VERIFY_ALG;
882
883	else if ((ns.conn == C_VERIFY_S \|\| ns.conn == C_VERIFY_T) &&
884	first_peer_device(device)->connection->agreed_pro_version < `88`)
885	rv = SS_NOT_SUPPORTED;
886
887	else if (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
888	rv = SS_NO_UP_TO_DATE_DISK;
889
890	else if ((ns.conn == C_STARTING_SYNC_S \|\| ns.conn == C_STARTING_SYNC_T) &&
891	ns.pdsk == D_UNKNOWN)
892	rv = SS_NEED_CONNECTION;
893
894	else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
895	rv = SS_CONNECTED_OUTDATES;
896
897	out:
898	rcu_read_unlock();
899
900	return rv;
901	}
902
903	/**
904	* is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible
905	* This function limits state transitions that may be declined by DRBD. I.e.
906	* user requests (aka soft transitions).
907	* @os: old state.
908	* @ns: new state.
909	* @connection: DRBD connection.
910	*/
911	static enum drbd_state_rv
912	is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_connection *connection)
913	{
914	enum drbd_state_rv rv = SS_SUCCESS;
915
916	if ((ns.conn == C_STARTING_SYNC_T \|\| ns.conn == C_STARTING_SYNC_S) &&
917	os.conn > C_CONNECTED)
918	rv = SS_RESYNC_RUNNING;
919
920	if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
921	rv = SS_ALREADY_STANDALONE;
922
923	if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
924	rv = SS_IS_DISKLESS;
925
926	if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
927	rv = SS_NO_NET_CONFIG;
928
929	if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
930	rv = SS_LOWER_THAN_OUTDATED;
931
932	if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
933	rv = SS_IN_TRANSIENT_STATE;
934
935	/ While establishing a connection only allow cstate to change.*
936	Delay/refuse role changes, detach attach etc... (they do not touch cstate) /*
937	if (test_bit(STATE_SENT, &connection->flags) &&
938	!((ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION) \|\|
939	(ns.conn >= C_CONNECTED && os.conn == C_WF_REPORT_PARAMS)))
940	rv = SS_IN_TRANSIENT_STATE;
941
942	/ Do not promote during resync handshake triggered by "force primary".*
943	* This is a hack. It should really be rejected by the peer during the
944	* cluster wide state change request. */
945	if (os.role != R_PRIMARY && ns.role == R_PRIMARY
946	&& ns.pdsk == D_UP_TO_DATE
947	&& ns.disk != D_UP_TO_DATE && ns.disk != D_DISKLESS
948	&& (ns.conn <= C_WF_SYNC_UUID \|\| ns.conn != os.conn))
949	rv = SS_IN_TRANSIENT_STATE;
950
951	if ((ns.conn == C_VERIFY_S \|\| ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
952	rv = SS_NEED_CONNECTION;
953
954	if ((ns.conn == C_VERIFY_S \|\| ns.conn == C_VERIFY_T) &&
955	ns.conn != os.conn && os.conn > C_CONNECTED)
956	rv = SS_RESYNC_RUNNING;
957
958	if ((ns.conn == C_STARTING_SYNC_S \|\| ns.conn == C_STARTING_SYNC_T) &&
959	os.conn < C_CONNECTED)
960	rv = SS_NEED_CONNECTION;
961
962	if ((ns.conn == C_SYNC_TARGET \|\| ns.conn == C_SYNC_SOURCE)
963	&& os.conn < C_WF_REPORT_PARAMS)
964	rv = SS_NEED_CONNECTION; / No NetworkFailure -> SyncTarget etc... /
965
966	if (ns.conn == C_DISCONNECTING && ns.pdsk == D_OUTDATED &&
967	os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)
968	rv = SS_OUTDATE_WO_CONN;
969
970	return rv;
971	}
972
973	static enum drbd_state_rv
974	is_valid_conn_transition(enum drbd_conns oc, enum drbd_conns nc)
975	{
976	/ no change -> nothing to do, at least for the connection part /
977	if (oc == nc)
978	return SS_NOTHING_TO_DO;
979
980	/ disconnect of an unconfigured connection does not make sense /
981	if (oc == C_STANDALONE && nc == C_DISCONNECTING)
982	return SS_ALREADY_STANDALONE;
983
984	/ from C_STANDALONE, we start with C_UNCONNECTED /
985	if (oc == C_STANDALONE && nc != C_UNCONNECTED)
986	return SS_NEED_CONNECTION;
987
988	/ When establishing a connection we need to go through WF_REPORT_PARAMS!*
989	Necessary to do the right thing upon invalidate-remote on a disconnected resource /*
990	if (oc < C_WF_REPORT_PARAMS && nc >= C_CONNECTED)
991	return SS_NEED_CONNECTION;
992
993	/ After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. /
994	if (oc >= C_TIMEOUT && oc <= C_TEAR_DOWN && nc != C_UNCONNECTED && nc != C_DISCONNECTING)
995	return SS_IN_TRANSIENT_STATE;
996
997	/ After C_DISCONNECTING only C_STANDALONE may follow /
998	if (oc == C_DISCONNECTING && nc != C_STANDALONE)
999	return SS_IN_TRANSIENT_STATE;
1000
1001	return SS_SUCCESS;
1002	}
1003
1004
1005	/**
1006	* is_valid_transition() - Returns an SS_ error code if the state transition is not possible
1007	* This limits hard state transitions. Hard state transitions are facts there are
1008	* imposed on DRBD by the environment. E.g. disk broke or network broke down.
1009	* But those hard state transitions are still not allowed to do everything.
1010	* @ns: new state.
1011	* @os: old state.
1012	*/
1013	static enum drbd_state_rv
1014	is_valid_transition(union drbd_state os, union drbd_state ns)
1015	{
1016	enum drbd_state_rv rv;
1017
1018	rv = is_valid_conn_transition(oc: os.conn, nc: ns.conn);
1019
1020	/ we cannot fail (again) if we already detached /
1021	if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
1022	rv = SS_IS_DISKLESS;
1023
1024	return rv;
1025	}
1026
1027	static void print_sanitize_warnings(struct drbd_device device, enum* sanitize_state_warnings warn)
1028	{
1029	static const char *msg_table[] = {
1030	[NO_WARNING] = "",
1031	[ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
1032	[ABORTED_RESYNC] = "Resync aborted.",
1033	[CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
1034	[IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
1035	[IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
1036	};
1037
1038	if (warn != NO_WARNING)
1039	drbd_warn(device, "%s\n", msg_table[warn]);
1040	}
1041
1042	/**
1043	* sanitize_state() - Resolves implicitly necessary additional changes to a state transition
1044	* @device: DRBD device.
1045	* @os: old state.
1046	* @ns: new state.
1047	* @warn: placeholder for returned state warning.
1048	*
1049	* When we loose connection, we have to set the state of the peers disk (pdsk)
1050	* to D_UNKNOWN. This rule and many more along those lines are in this function.
1051	*/
1052	static union drbd_state sanitize_state(struct drbd_device device, union* drbd_state os,
1053	union drbd_state ns, enum sanitize_state_warnings *warn)
1054	{
1055	enum drbd_fencing_p fp;
1056	enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
1057
1058	if (warn)
1059	*warn = NO_WARNING;
1060
1061	fp = FP_DONT_CARE;
1062	if (get_ldev(device)) {
1063	rcu_read_lock();
1064	fp = rcu_dereference(device->ldev->disk_conf)->fencing;
1065	rcu_read_unlock();
1066	put_ldev(device);
1067	}
1068
1069	/ Implications from connection to peer and peer_isp /
1070	if (ns.conn < C_CONNECTED) {
1071	ns.peer_isp = `0`;
1072	ns.peer = R_UNKNOWN;
1073	if (ns.pdsk > D_UNKNOWN \|\| ns.pdsk < D_INCONSISTENT)
1074	ns.pdsk = D_UNKNOWN;
1075	}
1076
1077	/ Clear the aftr_isp when becoming unconfigured /
1078	if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
1079	ns.aftr_isp = `0`;
1080
1081	/ An implication of the disk states onto the connection state /
1082	/ Abort resync if a disk fails/detaches /
1083	if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED \|\| ns.pdsk <= D_FAILED)) {
1084	if (warn)
1085	*warn = ns.conn == C_VERIFY_S \|\| ns.conn == C_VERIFY_T ?
1086	ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
1087	ns.conn = C_CONNECTED;
1088	}
1089
1090	/ Connection breaks down before we finished "Negotiating" /
1091	if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
1092	get_ldev_if_state(device, D_NEGOTIATING)) {
1093	if (device->ed_uuid == device->ldev->md.uuid[UI_CURRENT]) {
1094	ns.disk = device->new_state_tmp.disk;
1095	ns.pdsk = device->new_state_tmp.pdsk;
1096	} else {
1097	if (warn)
1098	*warn = CONNECTION_LOST_NEGOTIATING;
1099	ns.disk = D_DISKLESS;
1100	ns.pdsk = D_UNKNOWN;
1101	}
1102	put_ldev(device);
1103	}
1104
1105	/ D_CONSISTENT and D_OUTDATED vanish when we get connected /
1106	if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
1107	if (ns.disk == D_CONSISTENT \|\| ns.disk == D_OUTDATED)
1108	ns.disk = D_UP_TO_DATE;
1109	if (ns.pdsk == D_CONSISTENT \|\| ns.pdsk == D_OUTDATED)
1110	ns.pdsk = D_UP_TO_DATE;
1111	}
1112
1113	/ Implications of the connection state on the disk states /
1114	disk_min = D_DISKLESS;
1115	disk_max = D_UP_TO_DATE;
1116	pdsk_min = D_INCONSISTENT;
1117	pdsk_max = D_UNKNOWN;
1118	switch ((enum drbd_conns)ns.conn) {
1119	case C_WF_BITMAP_T:
1120	case C_PAUSED_SYNC_T:
1121	case C_STARTING_SYNC_T:
1122	case C_WF_SYNC_UUID:
1123	case C_BEHIND:
1124	disk_min = D_INCONSISTENT;
1125	disk_max = D_OUTDATED;
1126	pdsk_min = D_UP_TO_DATE;
1127	pdsk_max = D_UP_TO_DATE;
1128	break;
1129	case C_VERIFY_S:
1130	case C_VERIFY_T:
1131	disk_min = D_UP_TO_DATE;
1132	disk_max = D_UP_TO_DATE;
1133	pdsk_min = D_UP_TO_DATE;
1134	pdsk_max = D_UP_TO_DATE;
1135	break;
1136	case C_CONNECTED:
1137	disk_min = D_DISKLESS;
1138	disk_max = D_UP_TO_DATE;
1139	pdsk_min = D_DISKLESS;
1140	pdsk_max = D_UP_TO_DATE;
1141	break;
1142	case C_WF_BITMAP_S:
1143	case C_PAUSED_SYNC_S:
1144	case C_STARTING_SYNC_S:
1145	case C_AHEAD:
1146	disk_min = D_UP_TO_DATE;
1147	disk_max = D_UP_TO_DATE;
1148	pdsk_min = D_INCONSISTENT;
1149	pdsk_max = D_CONSISTENT; / D_OUTDATED would be nice. But explicit outdate necessary/
1150	break;
1151	case C_SYNC_TARGET:
1152	disk_min = D_INCONSISTENT;
1153	disk_max = D_INCONSISTENT;
1154	pdsk_min = D_UP_TO_DATE;
1155	pdsk_max = D_UP_TO_DATE;
1156	break;
1157	case C_SYNC_SOURCE:
1158	disk_min = D_UP_TO_DATE;
1159	disk_max = D_UP_TO_DATE;
1160	pdsk_min = D_INCONSISTENT;
1161	pdsk_max = D_INCONSISTENT;
1162	break;
1163	case C_STANDALONE:
1164	case C_DISCONNECTING:
1165	case C_UNCONNECTED:
1166	case C_TIMEOUT:
1167	case C_BROKEN_PIPE:
1168	case C_NETWORK_FAILURE:
1169	case C_PROTOCOL_ERROR:
1170	case C_TEAR_DOWN:
1171	case C_WF_CONNECTION:
1172	case C_WF_REPORT_PARAMS:
1173	case C_MASK:
1174	break;
1175	}
1176	if (ns.disk > disk_max)
1177	ns.disk = disk_max;
1178
1179	if (ns.disk < disk_min) {
1180	if (warn)
1181	*warn = IMPLICITLY_UPGRADED_DISK;
1182	ns.disk = disk_min;
1183	}
1184	if (ns.pdsk > pdsk_max)
1185	ns.pdsk = pdsk_max;
1186
1187	if (ns.pdsk < pdsk_min) {
1188	if (warn)
1189	*warn = IMPLICITLY_UPGRADED_PDSK;
1190	ns.pdsk = pdsk_min;
1191	}
1192
1193	if (fp == FP_STONITH &&
1194	(ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
1195	!(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
1196	ns.susp_fen = `1`; / Suspend IO while fence-peer handler runs (peer lost) /
1197
1198	if (device->resource->res_opts.on_no_data == OND_SUSPEND_IO &&
1199	(ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
1200	!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
1201	ns.susp_nod = `1`; / Suspend IO while no data available (no accessible data available) /
1202
1203	if (ns.aftr_isp \|\| ns.peer_isp \|\| ns.user_isp) {
1204	if (ns.conn == C_SYNC_SOURCE)
1205	ns.conn = C_PAUSED_SYNC_S;
1206	if (ns.conn == C_SYNC_TARGET)
1207	ns.conn = C_PAUSED_SYNC_T;
1208	} else {
1209	if (ns.conn == C_PAUSED_SYNC_S)
1210	ns.conn = C_SYNC_SOURCE;
1211	if (ns.conn == C_PAUSED_SYNC_T)
1212	ns.conn = C_SYNC_TARGET;
1213	}
1214
1215	return ns;
1216	}
1217
1218	void drbd_resume_al(struct drbd_device *device)
1219	{
1220	if (test_and_clear_bit(nr: AL_SUSPENDED, addr: &device->flags))
1221	drbd_info(device, "Resumed AL updates\n");
1222	}
1223
1224	/ helper for _drbd_set_state /
1225	static void set_ov_position(struct drbd_peer_device peer_device, enum* drbd_conns cs)
1226	{
1227	struct drbd_device *device = peer_device->device;
1228
1229	if (peer_device->connection->agreed_pro_version < `90`)
1230	device->ov_start_sector = `0`;
1231	device->rs_total = drbd_bm_bits(device);
1232	device->ov_position = `0`;
1233	if (cs == C_VERIFY_T) {
1234	/ starting online verify from an arbitrary position*
1235	* does not fit well into the existing protocol.
1236	* on C_VERIFY_T, we initialize ov_left and friends
1237	* implicitly in receive_DataRequest once the
1238	* first P_OV_REQUEST is received */
1239	device->ov_start_sector = ~(sector_t)`0`;
1240	} else {
1241	unsigned long bit = BM_SECT_TO_BIT(device->ov_start_sector);
1242	if (bit >= device->rs_total) {
1243	device->ov_start_sector =
1244	BM_BIT_TO_SECT(device->rs_total - `1`);
1245	device->rs_total = `1`;
1246	} else
1247	device->rs_total -= bit;
1248	device->ov_position = device->ov_start_sector;
1249	}
1250	device->ov_left = device->rs_total;
1251	}
1252
1253	/**
1254	* _drbd_set_state() - Set a new DRBD state
1255	* @device: DRBD device.
1256	* @ns: new state.
1257	* @flags: Flags
1258	* @done: Optional completion, that will get completed after the after_state_ch() finished
1259	*
1260	* Caller needs to hold req_lock. Do not call directly.
1261	*/
1262	enum drbd_state_rv
1263	_drbd_set_state(struct drbd_device device, union* drbd_state ns,
1264	enum chg_state_flags flags, struct completion *done)
1265	{
1266	struct drbd_peer_device *peer_device = first_peer_device(device);
1267	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
1268	union drbd_state os;
1269	enum drbd_state_rv rv = SS_SUCCESS;
1270	enum sanitize_state_warnings ssw;
1271	struct after_state_chg_work *ascw;
1272	struct drbd_state_change *state_change;
1273
1274	os = drbd_read_state(device);
1275
1276	ns = sanitize_state(device, os, ns, warn: &ssw);
1277	if (ns.i == os.i)
1278	return SS_NOTHING_TO_DO;
1279
1280	rv = is_valid_transition(os, ns);
1281	if (rv < SS_SUCCESS)
1282	return rv;
1283
1284	if (!(flags & CS_HARD)) {
1285	/ pre-state-change checks ; only look at ns /
1286	/ See drbd_state_sw_errors in drbd_strings.c /
1287
1288	rv = is_valid_state(device, ns);
1289	if (rv < SS_SUCCESS) {
1290	/ If the old state was illegal as well, then let*
1291	this happen.../*
1292
1293	if (is_valid_state(device, ns: os) == rv)
1294	rv = is_valid_soft_transition(os, ns, connection);
1295	} else
1296	rv = is_valid_soft_transition(os, ns, connection);
1297	}
1298
1299	if (rv < SS_SUCCESS) {
1300	if (flags & CS_VERBOSE)
1301	print_st_err(device, os, ns, err: rv);
1302	return rv;
1303	}
1304
1305	print_sanitize_warnings(device, warn: ssw);
1306
1307	drbd_pr_state_change(device, os, ns, flags);
1308
1309	/ Display changes to the susp* flags that where caused by the call to*
1310	sanitize_state(). Only display it here if we where not called from
1311	_conn_request_state() /*
1312	if (!(flags & CS_DC_SUSP))
1313	conn_pr_state_change(connection, os, ns,
1314	flags: (flags & ~CS_DC_MASK) \| CS_DC_SUSP);
1315
1316	/ if we are going -> D_FAILED or D_DISKLESS, grab one extra reference*
1317	* on the ldev here, to be sure the transition -> D_DISKLESS resp.
1318	* drbd_ldev_destroy() won't happen before our corresponding
1319	* after_state_ch works run, where we put_ldev again. */
1320	if ((os.disk != D_FAILED && ns.disk == D_FAILED) \|\|
1321	(os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
1322	atomic_inc(v: &device->local_cnt);
1323
1324	if (!is_sync_state(connection_state: os.conn) && is_sync_state(connection_state: ns.conn))
1325	clear_bit(nr: RS_DONE, addr: &device->flags);
1326
1327	/ FIXME: Have any flags been set earlier in this function already? /
1328	state_change = remember_old_state(resource: device->resource, GFP_ATOMIC);
1329
1330	/ changes to local_cnt and device flags should be visible before*
1331	* changes to state, which again should be visible before anything else
1332	* depending on that change happens. */
1333	smp_wmb();
1334	device->state.i = ns.i;
1335	device->resource->susp = ns.susp;
1336	device->resource->susp_nod = ns.susp_nod;
1337	device->resource->susp_fen = ns.susp_fen;
1338	smp_wmb();
1339
1340	remember_new_state(state_change);
1341
1342	/ put replicated vs not-replicated requests in seperate epochs /
1343	if (drbd_should_do_remote((union drbd_dev_state)os.i) !=
1344	drbd_should_do_remote((union drbd_dev_state)ns.i))
1345	start_new_tl_epoch(connection);
1346
1347	if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
1348	drbd_print_uuids(device, text: "attached to UUIDs");
1349
1350	/ Wake up role changes, that were delayed because of connection establishing /
1351	if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS &&
1352	no_peer_wf_report_params(connection)) {
1353	clear_bit(nr: STATE_SENT, addr: &connection->flags);
1354	wake_up_all_devices(connection);
1355	}
1356
1357	wake_up(&device->misc_wait);
1358	wake_up(&device->state_wait);
1359	wake_up(&connection->ping_wait);
1360
1361	/ Aborted verify run, or we reached the stop sector.*
1362	* Log the last position, unless end-of-device. */
1363	if ((os.conn == C_VERIFY_S \|\| os.conn == C_VERIFY_T) &&
1364	ns.conn <= C_CONNECTED) {
1365	device->ov_start_sector =
1366	BM_BIT_TO_SECT(drbd_bm_bits(device) - device->ov_left);
1367	if (device->ov_left)
1368	drbd_info(device, "Online Verify reached sector %llu\n",
1369	(unsigned long long)device->ov_start_sector);
1370	}
1371
1372	if ((os.conn == C_PAUSED_SYNC_T \|\| os.conn == C_PAUSED_SYNC_S) &&
1373	(ns.conn == C_SYNC_TARGET \|\| ns.conn == C_SYNC_SOURCE)) {
1374	drbd_info(device, "Syncer continues.\n");
1375	device->rs_paused += (long)jiffies
1376	-(long)device->rs_mark_time[device->rs_last_mark];
1377	if (ns.conn == C_SYNC_TARGET)
1378	mod_timer(timer: &device->resync_timer, expires: jiffies);
1379	}
1380
1381	if ((os.conn == C_SYNC_TARGET \|\| os.conn == C_SYNC_SOURCE) &&
1382	(ns.conn == C_PAUSED_SYNC_T \|\| ns.conn == C_PAUSED_SYNC_S)) {
1383	drbd_info(device, "Resync suspended\n");
1384	device->rs_mark_time[device->rs_last_mark] = jiffies;
1385	}
1386
1387	if (os.conn == C_CONNECTED &&
1388	(ns.conn == C_VERIFY_S \|\| ns.conn == C_VERIFY_T)) {
1389	unsigned long now = jiffies;
1390	int i;
1391
1392	set_ov_position(peer_device, cs: ns.conn);
1393	device->rs_start = now;
1394	device->rs_last_sect_ev = `0`;
1395	device->ov_last_oos_size = `0`;
1396	device->ov_last_oos_start = `0`;
1397
1398	for (i = `0`; i < DRBD_SYNC_MARKS; i++) {
1399	device->rs_mark_left[i] = device->ov_left;
1400	device->rs_mark_time[i] = now;
1401	}
1402
1403	drbd_rs_controller_reset(peer_device);
1404
1405	if (ns.conn == C_VERIFY_S) {
1406	drbd_info(device, "Starting Online Verify from sector %llu\n",
1407	(unsigned long long)device->ov_position);
1408	mod_timer(timer: &device->resync_timer, expires: jiffies);
1409	}
1410	}
1411
1412	if (get_ldev(device)) {
1413	u32 mdf = device->ldev->md.flags & ~(MDF_CONSISTENT\|MDF_PRIMARY_IND\|
1414	MDF_CONNECTED_IND\|MDF_WAS_UP_TO_DATE\|
1415	MDF_PEER_OUT_DATED\|MDF_CRASHED_PRIMARY);
1416
1417	mdf &= ~MDF_AL_CLEAN;
1418	if (test_bit(CRASHED_PRIMARY, &device->flags))
1419	mdf \|= MDF_CRASHED_PRIMARY;
1420	if (device->state.role == R_PRIMARY \|\|
1421	(device->state.pdsk < D_INCONSISTENT && device->state.peer == R_PRIMARY))
1422	mdf \|= MDF_PRIMARY_IND;
1423	if (device->state.conn > C_WF_REPORT_PARAMS)
1424	mdf \|= MDF_CONNECTED_IND;
1425	if (device->state.disk > D_INCONSISTENT)
1426	mdf \|= MDF_CONSISTENT;
1427	if (device->state.disk > D_OUTDATED)
1428	mdf \|= MDF_WAS_UP_TO_DATE;
1429	if (device->state.pdsk <= D_OUTDATED && device->state.pdsk >= D_INCONSISTENT)
1430	mdf \|= MDF_PEER_OUT_DATED;
1431	if (mdf != device->ldev->md.flags) {
1432	device->ldev->md.flags = mdf;
1433	drbd_md_mark_dirty(device);
1434	}
1435	if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
1436	drbd_set_ed_uuid(device, val: device->ldev->md.uuid[UI_CURRENT]);
1437	put_ldev(device);
1438	}
1439
1440	/ Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync /
1441	if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
1442	os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
1443	set_bit(nr: CONSIDER_RESYNC, addr: &device->flags);
1444
1445	/ Receiver should clean up itself /
1446	if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
1447	drbd_thread_stop_nowait(thi: &connection->receiver);
1448
1449	/ Now the receiver finished cleaning up itself, it should die /
1450	if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
1451	drbd_thread_stop_nowait(thi: &connection->receiver);
1452
1453	/ Upon network failure, we need to restart the receiver. /
1454	if (os.conn > C_WF_CONNECTION &&
1455	ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
1456	drbd_thread_restart_nowait(thi: &connection->receiver);
1457
1458	/ Resume AL writing if we get a connection /
1459	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
1460	drbd_resume_al(device);
1461	connection->connect_cnt++;
1462	}
1463
1464	/ remember last attach time so request_timer_fn() won't*
1465	* kill newly established sessions while we are still trying to thaw
1466	* previously frozen IO */
1467	if ((os.disk == D_ATTACHING \|\| os.disk == D_NEGOTIATING) &&
1468	ns.disk > D_NEGOTIATING)
1469	device->last_reattach_jif = jiffies;
1470
1471	ascw = kmalloc(size: sizeof(*ascw), GFP_ATOMIC);
1472	if (ascw) {
1473	ascw->os = os;
1474	ascw->ns = ns;
1475	ascw->flags = flags;
1476	ascw->w.cb = w_after_state_ch;
1477	ascw->device = device;
1478	ascw->done = done;
1479	ascw->state_change = state_change;
1480	drbd_queue_work(q: &connection->sender_work,
1481	w: &ascw->w);
1482	} else {
1483	drbd_err(device, "Could not kmalloc an ascw\n");
1484	}
1485
1486	return rv;
1487	}
1488
1489	static int w_after_state_ch(struct drbd_work w, int* unused)
1490	{
1491	struct after_state_chg_work *ascw =
1492	container_of(w, struct after_state_chg_work, w);
1493	struct drbd_device *device = ascw->device;
1494
1495	after_state_ch(device, os: ascw->os, ns: ascw->ns, flags: ascw->flags, ascw->state_change);
1496	forget_state_change(state_change: ascw->state_change);
1497	if (ascw->flags & CS_WAIT_COMPLETE)
1498	complete(ascw->done);
1499	kfree(objp: ascw);
1500
1501	return `0`;
1502	}
1503
1504	static void abw_start_sync(struct drbd_device device, int* rv)
1505	{
1506	if (rv) {
1507	drbd_err(device, "Writing the bitmap failed not starting resync.\n");
1508	_drbd_request_state(device, NS(conn, C_CONNECTED), f: CS_VERBOSE);
1509	return;
1510	}
1511
1512	switch (device->state.conn) {
1513	case C_STARTING_SYNC_T:
1514	_drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), f: CS_VERBOSE);
1515	break;
1516	case C_STARTING_SYNC_S:
1517	drbd_start_resync(device, side: C_SYNC_SOURCE);
1518	break;
1519	}
1520	}
1521
1522	int drbd_bitmap_io_from_worker(struct drbd_device *device,
1523	int (io_fn)(struct* drbd_device , struct* drbd_peer_device *),
1524	char why, enum* bm_flag flags,
1525	struct drbd_peer_device *peer_device)
1526	{
1527	int rv;
1528
1529	D_ASSERT(device, current == first_peer_device(device)->connection->worker.task);
1530
1531	/ open coded non-blocking drbd_suspend_io(device); /
1532	atomic_inc(v: &device->suspend_cnt);
1533
1534	drbd_bm_lock(device, why, flags);
1535	rv = io_fn(device, peer_device);
1536	drbd_bm_unlock(device);
1537
1538	drbd_resume_io(device);
1539
1540	return rv;
1541	}
1542
1543	int notify_resource_state_change(struct sk_buff *skb,
1544	unsigned int seq,
1545	struct drbd_resource_state_change *resource_state_change,
1546	enum drbd_notification_type type)
1547	{
1548	struct drbd_resource *resource = resource_state_change->resource;
1549	struct resource_info resource_info = {
1550	.res_role = resource_state_change->role[NEW],
1551	.res_susp = resource_state_change->susp[NEW],
1552	.res_susp_nod = resource_state_change->susp_nod[NEW],
1553	.res_susp_fen = resource_state_change->susp_fen[NEW],
1554	};
1555
1556	return notify_resource_state(skb, seq, resource, &resource_info, type);
1557	}
1558
1559	int notify_connection_state_change(struct sk_buff *skb,
1560	unsigned int seq,
1561	struct drbd_connection_state_change *connection_state_change,
1562	enum drbd_notification_type type)
1563	{
1564	struct drbd_connection *connection = connection_state_change->connection;
1565	struct connection_info connection_info = {
1566	.conn_connection_state = connection_state_change->cstate[NEW],
1567	.conn_role = connection_state_change->peer_role[NEW],
1568	};
1569
1570	return notify_connection_state(skb, seq, connection, &connection_info, type);
1571	}
1572
1573	int notify_device_state_change(struct sk_buff *skb,
1574	unsigned int seq,
1575	struct drbd_device_state_change *device_state_change,
1576	enum drbd_notification_type type)
1577	{
1578	struct drbd_device *device = device_state_change->device;
1579	struct device_info device_info = {
1580	.dev_disk_state = device_state_change->disk_state[NEW],
1581	};
1582
1583	return notify_device_state(skb, seq, device, &device_info, type);
1584	}
1585
1586	int notify_peer_device_state_change(struct sk_buff *skb,
1587	unsigned int seq,
1588	struct drbd_peer_device_state_change *p,
1589	enum drbd_notification_type type)
1590	{
1591	struct drbd_peer_device *peer_device = p->peer_device;
1592	struct peer_device_info peer_device_info = {
1593	.peer_repl_state = p->repl_state[NEW],
1594	.peer_disk_state = p->disk_state[NEW],
1595	.peer_resync_susp_user = p->resync_susp_user[NEW],
1596	.peer_resync_susp_peer = p->resync_susp_peer[NEW],
1597	.peer_resync_susp_dependency = p->resync_susp_dependency[NEW],
1598	};
1599
1600	return notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type);
1601	}
1602
1603	static void broadcast_state_change(struct drbd_state_change *state_change)
1604	{
1605	struct drbd_resource_state_change *resource_state_change = &state_change->resource[`0`];
1606	bool resource_state_has_changed;
1607	unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
1608	int (last_func)(struct* sk_buff , unsigned* int, void *,
1609	enum drbd_notification_type) = NULL;
1610	void *last_arg = NULL;
1611
1612	#define HAS_CHANGED(state) ((state)[OLD] != (state)[NEW])
1613	#define FINAL_STATE_CHANGE(type) \
1614	({ if (last_func) \
1615	last_func(NULL, 0, last_arg, type); \
1616	})
1617	#define REMEMBER_STATE_CHANGE(func, arg, type) \
1618	({ FINAL_STATE_CHANGE(type \| NOTIFY_CONTINUES); \
1619	last_func = (typeof(last_func))func; \
1620	last_arg = arg; \
1621	})
1622
1623	mutex_lock(&notification_mutex);
1624
1625	resource_state_has_changed =
1626	HAS_CHANGED(resource_state_change->role) \|\|
1627	HAS_CHANGED(resource_state_change->susp) \|\|
1628	HAS_CHANGED(resource_state_change->susp_nod) \|\|
1629	HAS_CHANGED(resource_state_change->susp_fen);
1630
1631	if (resource_state_has_changed)
1632	REMEMBER_STATE_CHANGE(notify_resource_state_change,
1633	resource_state_change, NOTIFY_CHANGE);
1634
1635	for (n_connection = `0`; n_connection < state_change->n_connections; n_connection++) {
1636	struct drbd_connection_state_change *connection_state_change =
1637	&state_change->connections[n_connection];
1638
1639	if (HAS_CHANGED(connection_state_change->peer_role) \|\|
1640	HAS_CHANGED(connection_state_change->cstate))
1641	REMEMBER_STATE_CHANGE(notify_connection_state_change,
1642	connection_state_change, NOTIFY_CHANGE);
1643	}
1644
1645	for (n_device = `0`; n_device < state_change->n_devices; n_device++) {
1646	struct drbd_device_state_change *device_state_change =
1647	&state_change->devices[n_device];
1648
1649	if (HAS_CHANGED(device_state_change->disk_state))
1650	REMEMBER_STATE_CHANGE(notify_device_state_change,
1651	device_state_change, NOTIFY_CHANGE);
1652	}
1653
1654	n_peer_devices = state_change->n_devices * state_change->n_connections;
1655	for (n_peer_device = `0`; n_peer_device < n_peer_devices; n_peer_device++) {
1656	struct drbd_peer_device_state_change *p =
1657	&state_change->peer_devices[n_peer_device];
1658
1659	if (HAS_CHANGED(p->disk_state) \|\|
1660	HAS_CHANGED(p->repl_state) \|\|
1661	HAS_CHANGED(p->resync_susp_user) \|\|
1662	HAS_CHANGED(p->resync_susp_peer) \|\|
1663	HAS_CHANGED(p->resync_susp_dependency))
1664	REMEMBER_STATE_CHANGE(notify_peer_device_state_change,
1665	p, NOTIFY_CHANGE);
1666	}
1667
1668	FINAL_STATE_CHANGE(NOTIFY_CHANGE);
1669	mutex_unlock(lock: &notification_mutex);
1670
1671	#undef HAS_CHANGED
1672	#undef FINAL_STATE_CHANGE
1673	#undef REMEMBER_STATE_CHANGE
1674	}
1675
1676	/ takes old and new peer disk state /
1677	static bool lost_contact_to_peer_data(enum drbd_disk_state os, enum drbd_disk_state ns)
1678	{
1679	if ((os >= D_INCONSISTENT && os != D_UNKNOWN && os != D_OUTDATED)
1680	&& (ns < D_INCONSISTENT \|\| ns == D_UNKNOWN \|\| ns == D_OUTDATED))
1681	return true;
1682
1683	/ Scenario, starting with normal operation*
1684	* Connected Primary/Secondary UpToDate/UpToDate
1685	* NetworkFailure Primary/Unknown UpToDate/DUnknown (frozen)
1686	* ...
1687	* Connected Primary/Secondary UpToDate/Diskless (resumed; needs to bump uuid!)
1688	*/
1689	if (os == D_UNKNOWN
1690	&& (ns == D_DISKLESS \|\| ns == D_FAILED \|\| ns == D_OUTDATED))
1691	return true;
1692
1693	return false;
1694	}
1695
1696	/**
1697	* after_state_ch() - Perform after state change actions that may sleep
1698	* @device: DRBD device.
1699	* @os: old state.
1700	* @ns: new state.
1701	* @flags: Flags
1702	* @state_change: state change to broadcast
1703	*/
1704	static void after_state_ch(struct drbd_device device, union* drbd_state os,
1705	union drbd_state ns, enum chg_state_flags flags,
1706	struct drbd_state_change *state_change)
1707	{
1708	struct drbd_resource *resource = device->resource;
1709	struct drbd_peer_device *peer_device = first_peer_device(device);
1710	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
1711	struct sib_info sib;
1712
1713	broadcast_state_change(state_change);
1714
1715	sib.sib_reason = SIB_STATE_CHANGE;
1716	sib.os = os;
1717	sib.ns = ns;
1718
1719	if ((os.disk != D_UP_TO_DATE \|\| os.pdsk != D_UP_TO_DATE)
1720	&& (ns.disk == D_UP_TO_DATE && ns.pdsk == D_UP_TO_DATE)) {
1721	clear_bit(nr: CRASHED_PRIMARY, addr: &device->flags);
1722	if (device->p_uuid)
1723	device->p_uuid[UI_FLAGS] &= ~((u64)`2`);
1724	}
1725
1726	/ Inform userspace about the change... /
1727	drbd_bcast_event(device, sib: &sib);
1728
1729	if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
1730	(ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
1731	drbd_khelper(device, cmd: "pri-on-incon-degr");
1732
1733	/ Here we have the actions that are performed after a*
1734	state change. This function might sleep /*
1735
1736	if (ns.susp_nod) {
1737	enum drbd_req_event what = NOTHING;
1738
1739	spin_lock_irq(lock: &device->resource->req_lock);
1740	if (os.conn < C_CONNECTED && conn_lowest_conn(connection) >= C_CONNECTED)
1741	what = RESEND;
1742
1743	if ((os.disk == D_ATTACHING \|\| os.disk == D_NEGOTIATING) &&
1744	conn_lowest_disk(connection) == D_UP_TO_DATE)
1745	what = RESTART_FROZEN_DISK_IO;
1746
1747	if (resource->susp_nod && what != NOTHING) {
1748	_tl_restart(connection, what);
1749	_conn_request_state(connection,
1750	mask: (union drbd_state) { { .susp_nod = `1` } },
1751	val: (union drbd_state) { { .susp_nod = `0` } },
1752	flags: CS_VERBOSE);
1753	}
1754	spin_unlock_irq(lock: &device->resource->req_lock);
1755	}
1756
1757	if (ns.susp_fen) {
1758	spin_lock_irq(lock: &device->resource->req_lock);
1759	if (resource->susp_fen && conn_lowest_conn(connection) >= C_CONNECTED) {
1760	/ case2: The connection was established again: /
1761	struct drbd_peer_device *peer_device;
1762	int vnr;
1763
1764	rcu_read_lock();
1765	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1766	clear_bit(nr: NEW_CUR_UUID, addr: &peer_device->device->flags);
1767	rcu_read_unlock();
1768
1769	/ We should actively create a new uuid, _before_*
1770	* we resume/resent, if the peer is diskless
1771	* (recovery from a multiple error scenario).
1772	* Currently, this happens with a slight delay
1773	* below when checking lost_contact_to_peer_data() ...
1774	*/
1775	_tl_restart(connection, what: RESEND);
1776	_conn_request_state(connection,
1777	mask: (union drbd_state) { { .susp_fen = `1` } },
1778	val: (union drbd_state) { { .susp_fen = `0` } },
1779	flags: CS_VERBOSE);
1780	}
1781	spin_unlock_irq(lock: &device->resource->req_lock);
1782	}
1783
1784	/ Became sync source. With protocol >= 96, we still need to send out*
1785	* the sync uuid now. Need to do that before any drbd_send_state, or
1786	* the other side may go "paused sync" before receiving the sync uuids,
1787	* which is unexpected. */
1788	if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
1789	(ns.conn == C_SYNC_SOURCE \|\| ns.conn == C_PAUSED_SYNC_S) &&
1790	connection->agreed_pro_version >= `96` && get_ldev(device)) {
1791	drbd_gen_and_send_sync_uuid(peer_device);
1792	put_ldev(device);
1793	}
1794
1795	/ Do not change the order of the if above and the two below... /
1796	if (os.pdsk == D_DISKLESS &&
1797	ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) { / attach on the peer /
1798	/ we probably will start a resync soon.*
1799	* make sure those things are properly reset. */
1800	device->rs_total = `0`;
1801	device->rs_failed = `0`;
1802	atomic_set(v: &device->rs_pending_cnt, i: `0`);
1803	drbd_rs_cancel_all(device);
1804
1805	drbd_send_uuids(peer_device);
1806	drbd_send_state(peer_device, s: ns);
1807	}
1808	/ No point in queuing send_bitmap if we don't have a connection*
1809	* anymore, so check also the _current_ state, not only the new state
1810	* at the time this work was queued. */
1811	if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
1812	device->state.conn == C_WF_BITMAP_S)
1813	drbd_queue_bitmap_io(device, io_fn: &drbd_send_bitmap, NULL,
1814	why: "send_bitmap (WFBitMapS)",
1815	flags: BM_LOCKED_TEST_ALLOWED, peer_device);
1816
1817	/ Lost contact to peer's copy of the data /
1818	if (lost_contact_to_peer_data(os: os.pdsk, ns: ns.pdsk)) {
1819	if (get_ldev(device)) {
1820	if ((ns.role == R_PRIMARY \|\| ns.peer == R_PRIMARY) &&
1821	device->ldev->md.uuid[UI_BITMAP] == `0` && ns.disk >= D_UP_TO_DATE) {
1822	if (drbd_suspended(device)) {
1823	set_bit(nr: NEW_CUR_UUID, addr: &device->flags);
1824	} else {
1825	drbd_uuid_new_current(device);
1826	drbd_send_uuids(peer_device);
1827	}
1828	}
1829	put_ldev(device);
1830	}
1831	}
1832
1833	if (ns.pdsk < D_INCONSISTENT && get_ldev(device)) {
1834	if (os.peer != R_PRIMARY && ns.peer == R_PRIMARY &&
1835	device->ldev->md.uuid[UI_BITMAP] == `0` && ns.disk >= D_UP_TO_DATE) {
1836	drbd_uuid_new_current(device);
1837	drbd_send_uuids(peer_device);
1838	}
1839	/ D_DISKLESS Peer becomes secondary /
1840	if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
1841	/ We may still be Primary ourselves.*
1842	* No harm done if the bitmap still changes,
1843	* redirtied pages will follow later. */
1844	drbd_bitmap_io_from_worker(device, io_fn: &drbd_bm_write,
1845	why: "demote diskless peer", flags: BM_LOCKED_SET_ALLOWED, peer_device);
1846	put_ldev(device);
1847	}
1848
1849	/ Write out all changed bits on demote.*
1850	* Though, no need to da that just yet
1851	* if there is a resync going on still */
1852	if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
1853	device->state.conn <= C_CONNECTED && get_ldev(device)) {
1854	/ No changes to the bitmap expected this time, so assert that,*
1855	* even though no harm was done if it did change. */
1856	drbd_bitmap_io_from_worker(device, io_fn: &drbd_bm_write,
1857	why: "demote", flags: BM_LOCKED_TEST_ALLOWED, peer_device);
1858	put_ldev(device);
1859	}
1860
1861	/ Last part of the attaching process ... /
1862	if (ns.conn >= C_CONNECTED &&
1863	os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
1864	drbd_send_sizes(peer_device, trigger_reply: `0`, flags: `0`); / to start sync... /
1865	drbd_send_uuids(peer_device);
1866	drbd_send_state(peer_device, s: ns);
1867	}
1868
1869	/ We want to pause/continue resync, tell peer. /
1870	if (ns.conn >= C_CONNECTED &&
1871	((os.aftr_isp != ns.aftr_isp) \|\|
1872	(os.user_isp != ns.user_isp)))
1873	drbd_send_state(peer_device, s: ns);
1874
1875	/ In case one of the isp bits got set, suspend other devices. /
1876	if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
1877	(ns.aftr_isp \|\| ns.peer_isp \|\| ns.user_isp))
1878	suspend_other_sg(device);
1879
1880	/ Make sure the peer gets informed about eventual state*
1881	changes (ISP bits) while we were in WFReportParams. /*
1882	if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
1883	drbd_send_state(peer_device, s: ns);
1884
1885	if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
1886	drbd_send_state(peer_device, s: ns);
1887
1888	/ We are in the progress to start a full sync... /
1889	if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) \|\|
1890	(os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
1891	/ no other bitmap changes expected during this phase /
1892	drbd_queue_bitmap_io(device,
1893	io_fn: &drbd_bmio_set_n_write, done: &abw_start_sync,
1894	why: "set_n_write from StartingSync", flags: BM_LOCKED_TEST_ALLOWED,
1895	peer_device);
1896
1897	/ first half of local IO error, failure to attach,*
1898	* or administrative detach */
1899	if (os.disk != D_FAILED && ns.disk == D_FAILED) {
1900	enum drbd_io_error_p eh = EP_PASS_ON;
1901	int was_io_error = `0`;
1902	/ corresponding get_ldev was in _drbd_set_state, to serialize*
1903	* our cleanup here with the transition to D_DISKLESS.
1904	* But is is still not save to dreference ldev here, since
1905	* we might come from an failed Attach before ldev was set. */
1906	if (device->ldev) {
1907	rcu_read_lock();
1908	eh = rcu_dereference(device->ldev->disk_conf)->on_io_error;
1909	rcu_read_unlock();
1910
1911	was_io_error = test_and_clear_bit(nr: WAS_IO_ERROR, addr: &device->flags);
1912
1913	/ Intentionally call this handler first, before drbd_send_state().*
1914	* See: 2932204 drbd: call local-io-error handler early
1915	* People may chose to hard-reset the box from this handler.
1916	* It is useful if this looks like a "regular node crash". */
1917	if (was_io_error && eh == EP_CALL_HELPER)
1918	drbd_khelper(device, cmd: "local-io-error");
1919
1920	/ Immediately allow completion of all application IO,*
1921	* that waits for completion from the local disk,
1922	* if this was a force-detach due to disk_timeout
1923	* or administrator request (drbdsetup detach --force).
1924	* Do NOT abort otherwise.
1925	* Aborting local requests may cause serious problems,
1926	* if requests are completed to upper layers already,
1927	* and then later the already submitted local bio completes.
1928	* This can cause DMA into former bio pages that meanwhile
1929	* have been re-used for other things.
1930	* So aborting local requests may cause crashes,
1931	* or even worse, silent data corruption.
1932	*/
1933	if (test_and_clear_bit(nr: FORCE_DETACH, addr: &device->flags))
1934	tl_abort_disk_io(device);
1935
1936	/ current state still has to be D_FAILED,*
1937	* there is only one way out: to D_DISKLESS,
1938	* and that may only happen after our put_ldev below. */
1939	if (device->state.disk != D_FAILED)
1940	drbd_err(device,
1941	"ASSERT FAILED: disk is %s during detach\n",
1942	drbd_disk_str(device->state.disk));
1943
1944	if (ns.conn >= C_CONNECTED)
1945	drbd_send_state(peer_device, s: ns);
1946
1947	drbd_rs_cancel_all(device);
1948
1949	/ In case we want to get something to stable storage still,*
1950	* this may be the last chance.
1951	* Following put_ldev may transition to D_DISKLESS. */
1952	drbd_md_sync(device);
1953	}
1954	put_ldev(device);
1955	}
1956
1957	/ second half of local IO error, failure to attach,*
1958	* or administrative detach,
1959	* after local_cnt references have reached zero again */
1960	if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
1961	/ We must still be diskless,*
1962	* re-attach has to be serialized with this! */
1963	if (device->state.disk != D_DISKLESS)
1964	drbd_err(device,
1965	"ASSERT FAILED: disk is %s while going diskless\n",
1966	drbd_disk_str(device->state.disk));
1967
1968	if (ns.conn >= C_CONNECTED)
1969	drbd_send_state(peer_device, s: ns);
1970	/ corresponding get_ldev in __drbd_set_state*
1971	* this may finally trigger drbd_ldev_destroy. */
1972	put_ldev(device);
1973	}
1974
1975	/ Notify peer that I had a local IO error, and did not detached.. /
1976	if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
1977	drbd_send_state(peer_device, s: ns);
1978
1979	/ Disks got bigger while they were detached /
1980	if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
1981	test_and_clear_bit(nr: RESYNC_AFTER_NEG, addr: &device->flags)) {
1982	if (ns.conn == C_CONNECTED)
1983	resync_after_online_grow(device);
1984	}
1985
1986	/ A resync finished or aborted, wake paused devices... /
1987	if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) \|\|
1988	(os.peer_isp && !ns.peer_isp) \|\|
1989	(os.user_isp && !ns.user_isp))
1990	resume_next_sg(device);
1991
1992	/ sync target done with resync. Explicitly notify peer, even though*
1993	* it should (at least for non-empty resyncs) already know itself. */
1994	if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
1995	drbd_send_state(peer_device, s: ns);
1996
1997	/ Verify finished, or reached stop sector. Peer did not know about*
1998	* the stop sector, and we may even have changed the stop sector during
1999	* verify to interrupt/stop early. Send the new state. */
2000	if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
2001	&& verify_can_do_stop_sector(device))
2002	drbd_send_state(peer_device, s: ns);
2003
2004	/ This triggers bitmap writeout of potentially still unwritten pages*
2005	* if the resync finished cleanly, or aborted because of peer disk
2006	* failure, or on transition from resync back to AHEAD/BEHIND.
2007	*
2008	* Connection loss is handled in drbd_disconnected() by the receiver.
2009	*
2010	* For resync aborted because of local disk failure, we cannot do
2011	* any bitmap writeout anymore.
2012	*
2013	* No harm done if some bits change during this phase.
2014	*/
2015	if ((os.conn > C_CONNECTED && os.conn < C_AHEAD) &&
2016	(ns.conn == C_CONNECTED \|\| ns.conn >= C_AHEAD) && get_ldev(device)) {
2017	drbd_queue_bitmap_io(device, io_fn: &drbd_bm_write_copy_pages, NULL,
2018	why: "write from resync_finished", flags: BM_LOCKED_CHANGE_ALLOWED,
2019	peer_device);
2020	put_ldev(device);
2021	}
2022
2023	if (ns.disk == D_DISKLESS &&
2024	ns.conn == C_STANDALONE &&
2025	ns.role == R_SECONDARY) {
2026	if (os.aftr_isp != ns.aftr_isp)
2027	resume_next_sg(device);
2028	}
2029
2030	drbd_md_sync(device);
2031	}
2032
2033	struct after_conn_state_chg_work {
2034	struct drbd_work w;
2035	enum drbd_conns oc;
2036	union drbd_state ns_min;
2037	union drbd_state ns_max; / new, max state, over all devices /
2038	enum chg_state_flags flags;
2039	struct drbd_connection *connection;
2040	struct drbd_state_change *state_change;
2041	};
2042
2043	static int w_after_conn_state_ch(struct drbd_work w, int* unused)
2044	{
2045	struct after_conn_state_chg_work *acscw =
2046	container_of(w, struct after_conn_state_chg_work, w);
2047	struct drbd_connection *connection = acscw->connection;
2048	enum drbd_conns oc = acscw->oc;
2049	union drbd_state ns_max = acscw->ns_max;
2050	struct drbd_peer_device *peer_device;
2051	int vnr;
2052
2053	broadcast_state_change(state_change: acscw->state_change);
2054	forget_state_change(state_change: acscw->state_change);
2055	kfree(objp: acscw);
2056
2057	/ Upon network configuration, we need to start the receiver /
2058	if (oc == C_STANDALONE && ns_max.conn == C_UNCONNECTED)
2059	drbd_thread_start(thi: &connection->receiver);
2060
2061	if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) {
2062	struct net_conf *old_conf;
2063
2064	mutex_lock(&notification_mutex);
2065	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2066	notify_peer_device_state(NULL, `0`, peer_device, NULL,
2067	NOTIFY_DESTROY \| NOTIFY_CONTINUES);
2068	notify_connection_state(NULL, `0`, connection, NULL, NOTIFY_DESTROY);
2069	mutex_unlock(lock: &notification_mutex);
2070
2071	mutex_lock(&connection->resource->conf_update);
2072	old_conf = connection->net_conf;
2073	connection->my_addr_len = `0`;
2074	connection->peer_addr_len = `0`;
2075	RCU_INIT_POINTER(connection->net_conf, NULL);
2076	conn_free_crypto(connection);
2077	mutex_unlock(lock: &connection->resource->conf_update);
2078
2079	kvfree_rcu_mightsleep(old_conf);
2080	}
2081
2082	if (ns_max.susp_fen) {
2083	/ case1: The outdate peer handler is successful: /
2084	if (ns_max.pdsk <= D_OUTDATED) {
2085	rcu_read_lock();
2086	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2087	struct drbd_device *device = peer_device->device;
2088	if (test_bit(NEW_CUR_UUID, &device->flags)) {
2089	drbd_uuid_new_current(device);
2090	clear_bit(nr: NEW_CUR_UUID, addr: &device->flags);
2091	}
2092	}
2093	rcu_read_unlock();
2094	spin_lock_irq(lock: &connection->resource->req_lock);
2095	_tl_restart(connection, what: CONNECTION_LOST_WHILE_PENDING);
2096	_conn_request_state(connection,
2097	mask: (union drbd_state) { { .susp_fen = `1` } },
2098	val: (union drbd_state) { { .susp_fen = `0` } },
2099	flags: CS_VERBOSE);
2100	spin_unlock_irq(lock: &connection->resource->req_lock);
2101	}
2102	}
2103	conn_md_sync(connection);
2104	kref_put(kref: &connection->kref, release: drbd_destroy_connection);
2105
2106	return `0`;
2107	}
2108
2109	static void conn_old_common_state(struct drbd_connection connection, union* drbd_state pcs, enum* chg_state_flags *pf)
2110	{
2111	enum chg_state_flags flags = ~`0`;
2112	struct drbd_peer_device *peer_device;
2113	int vnr, first_vol = `1`;
2114	union drbd_dev_state os, cs = {
2115	{ .role = R_SECONDARY,
2116	.peer = R_UNKNOWN,
2117	.conn = connection->cstate,
2118	.disk = D_DISKLESS,
2119	.pdsk = D_UNKNOWN,
2120	} };
2121
2122	rcu_read_lock();
2123	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2124	struct drbd_device *device = peer_device->device;
2125	os = device->state;
2126
2127	if (first_vol) {
2128	cs = os;
2129	first_vol = `0`;
2130	continue;
2131	}
2132
2133	if (cs.role != os.role)
2134	flags &= ~CS_DC_ROLE;
2135
2136	if (cs.peer != os.peer)
2137	flags &= ~CS_DC_PEER;
2138
2139	if (cs.conn != os.conn)
2140	flags &= ~CS_DC_CONN;
2141
2142	if (cs.disk != os.disk)
2143	flags &= ~CS_DC_DISK;
2144
2145	if (cs.pdsk != os.pdsk)
2146	flags &= ~CS_DC_PDSK;
2147	}
2148	rcu_read_unlock();
2149
2150	*pf \|= CS_DC_MASK;
2151	*pf &= flags;
2152	(*pcs).i = cs.i;
2153	}
2154
2155	static enum drbd_state_rv
2156	conn_is_valid_transition(struct drbd_connection connection, union* drbd_state mask, union drbd_state val,
2157	enum chg_state_flags flags)
2158	{
2159	enum drbd_state_rv rv = SS_SUCCESS;
2160	union drbd_state ns, os;
2161	struct drbd_peer_device *peer_device;
2162	int vnr;
2163
2164	rcu_read_lock();
2165	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2166	struct drbd_device *device = peer_device->device;
2167	os = drbd_read_state(device);
2168	ns = sanitize_state(device, os, ns: apply_mask_val(os, mask, val), NULL);
2169
2170	if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
2171	ns.disk = os.disk;
2172
2173	if (ns.i == os.i)
2174	continue;
2175
2176	rv = is_valid_transition(os, ns);
2177
2178	if (rv >= SS_SUCCESS && !(flags & CS_HARD)) {
2179	rv = is_valid_state(device, ns);
2180	if (rv < SS_SUCCESS) {
2181	if (is_valid_state(device, ns: os) == rv)
2182	rv = is_valid_soft_transition(os, ns, connection);
2183	} else
2184	rv = is_valid_soft_transition(os, ns, connection);
2185	}
2186
2187	if (rv < SS_SUCCESS) {
2188	if (flags & CS_VERBOSE)
2189	print_st_err(device, os, ns, err: rv);
2190	break;
2191	}
2192	}
2193	rcu_read_unlock();
2194
2195	return rv;
2196	}
2197
2198	static void
2199	conn_set_state(struct drbd_connection connection, union* drbd_state mask, union drbd_state val,
2200	union drbd_state pns_min, union* drbd_state pns_max, enum* chg_state_flags flags)
2201	{
2202	union drbd_state ns, os, ns_max = { };
2203	union drbd_state ns_min = {
2204	{ .role = R_MASK,
2205	.peer = R_MASK,
2206	.conn = val.conn,
2207	.disk = D_MASK,
2208	.pdsk = D_MASK
2209	} };
2210	struct drbd_peer_device *peer_device;
2211	enum drbd_state_rv rv;
2212	int vnr, number_of_volumes = `0`;
2213
2214	if (mask.conn == C_MASK) {
2215	/ remember last connect time so request_timer_fn() won't*
2216	* kill newly established sessions while we are still trying to thaw
2217	* previously frozen IO */
2218	if (connection->cstate != C_WF_REPORT_PARAMS && val.conn == C_WF_REPORT_PARAMS)
2219	connection->last_reconnect_jif = jiffies;
2220
2221	connection->cstate = val.conn;
2222	}
2223
2224	rcu_read_lock();
2225	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2226	struct drbd_device *device = peer_device->device;
2227	number_of_volumes++;
2228	os = drbd_read_state(device);
2229	ns = apply_mask_val(os, mask, val);
2230	ns = sanitize_state(device, os, ns, NULL);
2231
2232	if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
2233	ns.disk = os.disk;
2234
2235	rv = _drbd_set_state(device, ns, flags, NULL);
2236	BUG_ON(rv < SS_SUCCESS);
2237	ns.i = device->state.i;
2238	ns_max.role = max_role(role1: ns.role, role2: ns_max.role);
2239	ns_max.peer = max_role(role1: ns.peer, role2: ns_max.peer);
2240	ns_max.conn = max_t(enum drbd_conns, ns.conn, ns_max.conn);
2241	ns_max.disk = max_t(enum drbd_disk_state, ns.disk, ns_max.disk);
2242	ns_max.pdsk = max_t(enum drbd_disk_state, ns.pdsk, ns_max.pdsk);
2243
2244	ns_min.role = min_role(role1: ns.role, role2: ns_min.role);
2245	ns_min.peer = min_role(role1: ns.peer, role2: ns_min.peer);
2246	ns_min.conn = min_t(enum drbd_conns, ns.conn, ns_min.conn);
2247	ns_min.disk = min_t(enum drbd_disk_state, ns.disk, ns_min.disk);
2248	ns_min.pdsk = min_t(enum drbd_disk_state, ns.pdsk, ns_min.pdsk);
2249	}
2250	rcu_read_unlock();
2251
2252	if (number_of_volumes == `0`) {
2253	ns_min = ns_max = (union drbd_state) { {
2254	.role = R_SECONDARY,
2255	.peer = R_UNKNOWN,
2256	.conn = val.conn,
2257	.disk = D_DISKLESS,
2258	.pdsk = D_UNKNOWN
2259	} };
2260	}
2261
2262	ns_min.susp = ns_max.susp = connection->resource->susp;
2263	ns_min.susp_nod = ns_max.susp_nod = connection->resource->susp_nod;
2264	ns_min.susp_fen = ns_max.susp_fen = connection->resource->susp_fen;
2265
2266	*pns_min = ns_min;
2267	*pns_max = ns_max;
2268	}
2269
2270	static enum drbd_state_rv
2271	_conn_rq_cond(struct drbd_connection connection, union* drbd_state mask, union drbd_state val)
2272	{
2273	enum drbd_state_rv err, rv = SS_UNKNOWN_ERROR; / continue waiting /;
2274
2275	if (test_and_clear_bit(nr: CONN_WD_ST_CHG_OKAY, addr: &connection->flags))
2276	rv = SS_CW_SUCCESS;
2277
2278	if (test_and_clear_bit(nr: CONN_WD_ST_CHG_FAIL, addr: &connection->flags))
2279	rv = SS_CW_FAILED_BY_PEER;
2280
2281	err = conn_is_valid_transition(connection, mask, val, flags: `0`);
2282	if (err == SS_SUCCESS && connection->cstate == C_WF_REPORT_PARAMS)
2283	return rv;
2284
2285	return err;
2286	}
2287
2288	enum drbd_state_rv
2289	_conn_request_state(struct drbd_connection connection, union* drbd_state mask, union drbd_state val,
2290	enum chg_state_flags flags)
2291	{
2292	enum drbd_state_rv rv = SS_SUCCESS;
2293	struct after_conn_state_chg_work *acscw;
2294	enum drbd_conns oc = connection->cstate;
2295	union drbd_state ns_max, ns_min, os;
2296	bool have_mutex = false;
2297	struct drbd_state_change *state_change;
2298
2299	if (mask.conn) {
2300	rv = is_valid_conn_transition(oc, nc: val.conn);
2301	if (rv < SS_SUCCESS)
2302	goto abort;
2303	}
2304
2305	rv = conn_is_valid_transition(connection, mask, val, flags);
2306	if (rv < SS_SUCCESS)
2307	goto abort;
2308
2309	if (oc == C_WF_REPORT_PARAMS && val.conn == C_DISCONNECTING &&
2310	!(flags & (CS_LOCAL_ONLY \| CS_HARD))) {
2311
2312	/ This will be a cluster-wide state change.*
2313	* Need to give up the spinlock, grab the mutex,
2314	* then send the state change request, ... */
2315	spin_unlock_irq(lock: &connection->resource->req_lock);
2316	mutex_lock(&connection->cstate_mutex);
2317	have_mutex = true;
2318
2319	set_bit(nr: CONN_WD_ST_CHG_REQ, addr: &connection->flags);
2320	if (conn_send_state_req(connection, mask, val)) {
2321	/ sending failed. /
2322	clear_bit(nr: CONN_WD_ST_CHG_REQ, addr: &connection->flags);
2323	rv = SS_CW_FAILED_BY_PEER;
2324	/ need to re-aquire the spin lock, though /
2325	goto abort_unlocked;
2326	}
2327
2328	if (val.conn == C_DISCONNECTING)
2329	set_bit(nr: DISCONNECT_SENT, addr: &connection->flags);
2330
2331	/ ... and re-aquire the spinlock.*
2332	* If _conn_rq_cond() returned >= SS_SUCCESS, we must call
2333	* conn_set_state() within the same spinlock. */
2334	spin_lock_irq(lock: &connection->resource->req_lock);
2335	wait_event_lock_irq(connection->ping_wait,
2336	(rv = _conn_rq_cond(connection, mask, val)),
2337	connection->resource->req_lock);
2338	clear_bit(nr: CONN_WD_ST_CHG_REQ, addr: &connection->flags);
2339	if (rv < SS_SUCCESS)
2340	goto abort;
2341	}
2342
2343	state_change = remember_old_state(resource: connection->resource, GFP_ATOMIC);
2344	conn_old_common_state(connection, pcs: &os, pf: &flags);
2345	flags \|= CS_DC_SUSP;
2346	conn_set_state(connection, mask, val, pns_min: &ns_min, pns_max: &ns_max, flags);
2347	conn_pr_state_change(connection, os, ns: ns_max, flags);
2348	remember_new_state(state_change);
2349
2350	acscw = kmalloc(size: sizeof(*acscw), GFP_ATOMIC);
2351	if (acscw) {
2352	acscw->oc = os.conn;
2353	acscw->ns_min = ns_min;
2354	acscw->ns_max = ns_max;
2355	acscw->flags = flags;
2356	acscw->w.cb = w_after_conn_state_ch;
2357	kref_get(kref: &connection->kref);
2358	acscw->connection = connection;
2359	acscw->state_change = state_change;
2360	drbd_queue_work(q: &connection->sender_work, w: &acscw->w);
2361	} else {
2362	drbd_err(connection, "Could not kmalloc an acscw\n");
2363	}
2364
2365	abort:
2366	if (have_mutex) {
2367	/ mutex_unlock() "... must not be used in interrupt context.",*
2368	* so give up the spinlock, then re-aquire it */
2369	spin_unlock_irq(lock: &connection->resource->req_lock);
2370	abort_unlocked:
2371	mutex_unlock(lock: &connection->cstate_mutex);
2372	spin_lock_irq(lock: &connection->resource->req_lock);
2373	}
2374	if (rv < SS_SUCCESS && flags & CS_VERBOSE) {
2375	drbd_err(connection, "State change failed: %s\n", drbd_set_st_err_str(rv));
2376	drbd_err(connection, " mask = 0x%x val = 0x%x\n", mask.i, val.i);
2377	drbd_err(connection, " old_conn:%s wanted_conn:%s\n", drbd_conn_str(oc), drbd_conn_str(val.conn));
2378	}
2379	return rv;
2380	}
2381
2382	enum drbd_state_rv
2383	conn_request_state(struct drbd_connection connection, union* drbd_state mask, union drbd_state val,
2384	enum chg_state_flags flags)
2385	{
2386	enum drbd_state_rv rv;
2387
2388	spin_lock_irq(lock: &connection->resource->req_lock);
2389	rv = _conn_request_state(connection, mask, val, flags);
2390	spin_unlock_irq(lock: &connection->resource->req_lock);
2391
2392	return rv;
2393	}
2394

source code of linux/drivers/block/drbd/drbd_state.c