drbd_nl.c source code [linux/drivers/block/drbd/drbd_nl.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	drbd_nl.c
4
5	This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6
7	Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8	Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9	Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10
11
12	*/
13
14	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16	#include <linux/module.h>
17	#include <linux/drbd.h>
18	#include <linux/in.h>
19	#include <linux/fs.h>
20	#include <linux/file.h>
21	#include <linux/slab.h>
22	#include <linux/blkpg.h>
23	#include <linux/cpumask.h>
24	#include "drbd_int.h"
25	#include "drbd_protocol.h"
26	#include "drbd_req.h"
27	#include "drbd_state_change.h"
28	#include <asm/unaligned.h>
29	#include <linux/drbd_limits.h>
30	#include <linux/kthread.h>
31
32	#include <net/genetlink.h>
33
34	/ .doit /
35	// int drbd_adm_create_resource(struct sk_buff skb, struct genl_info info);
36	// int drbd_adm_delete_resource(struct sk_buff skb, struct genl_info info);
37
38	int drbd_adm_new_minor(struct sk_buff skb, struct* genl_info *info);
39	int drbd_adm_del_minor(struct sk_buff skb, struct* genl_info *info);
40
41	int drbd_adm_new_resource(struct sk_buff skb, struct* genl_info *info);
42	int drbd_adm_del_resource(struct sk_buff skb, struct* genl_info *info);
43	int drbd_adm_down(struct sk_buff skb, struct* genl_info *info);
44
45	int drbd_adm_set_role(struct sk_buff skb, struct* genl_info *info);
46	int drbd_adm_attach(struct sk_buff skb, struct* genl_info *info);
47	int drbd_adm_disk_opts(struct sk_buff skb, struct* genl_info *info);
48	int drbd_adm_detach(struct sk_buff skb, struct* genl_info *info);
49	int drbd_adm_connect(struct sk_buff skb, struct* genl_info *info);
50	int drbd_adm_net_opts(struct sk_buff skb, struct* genl_info *info);
51	int drbd_adm_resize(struct sk_buff skb, struct* genl_info *info);
52	int drbd_adm_start_ov(struct sk_buff skb, struct* genl_info *info);
53	int drbd_adm_new_c_uuid(struct sk_buff skb, struct* genl_info *info);
54	int drbd_adm_disconnect(struct sk_buff skb, struct* genl_info *info);
55	int drbd_adm_invalidate(struct sk_buff skb, struct* genl_info *info);
56	int drbd_adm_invalidate_peer(struct sk_buff skb, struct* genl_info *info);
57	int drbd_adm_pause_sync(struct sk_buff skb, struct* genl_info *info);
58	int drbd_adm_resume_sync(struct sk_buff skb, struct* genl_info *info);
59	int drbd_adm_suspend_io(struct sk_buff skb, struct* genl_info *info);
60	int drbd_adm_resume_io(struct sk_buff skb, struct* genl_info *info);
61	int drbd_adm_outdate(struct sk_buff skb, struct* genl_info *info);
62	int drbd_adm_resource_opts(struct sk_buff skb, struct* genl_info *info);
63	int drbd_adm_get_status(struct sk_buff skb, struct* genl_info *info);
64	int drbd_adm_get_timeout_type(struct sk_buff skb, struct* genl_info *info);
65	/ .dumpit /
66	int drbd_adm_get_status_all(struct sk_buff skb, struct* netlink_callback *cb);
67	int drbd_adm_dump_resources(struct sk_buff skb, struct* netlink_callback *cb);
68	int drbd_adm_dump_devices(struct sk_buff skb, struct* netlink_callback *cb);
69	int drbd_adm_dump_devices_done(struct netlink_callback *cb);
70	int drbd_adm_dump_connections(struct sk_buff skb, struct* netlink_callback *cb);
71	int drbd_adm_dump_connections_done(struct netlink_callback *cb);
72	int drbd_adm_dump_peer_devices(struct sk_buff skb, struct* netlink_callback *cb);
73	int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb);
74	int drbd_adm_get_initial_state(struct sk_buff skb, struct* netlink_callback *cb);
75
76	#include <linux/drbd_genl_api.h>
77	#include "drbd_nla.h"
78	#include <linux/genl_magic_func.h>
79
80	static atomic_t drbd_genl_seq = ATOMIC_INIT(`2`); / two. /
81	static atomic_t notify_genl_seq = ATOMIC_INIT(`2`); / two. /
82
83	DEFINE_MUTEX(notification_mutex);
84
85	/ used bdev_open_by_path, to claim our meta data device(s) /
86	static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
87
88	static void drbd_adm_send_reply(struct sk_buff skb, struct* genl_info *info)
89	{
90	genlmsg_end(skb, hdr: genlmsg_data(gnlh: nlmsg_data(nlh: nlmsg_hdr(skb))));
91	if (genlmsg_reply(skb, info))
92	pr_err("error sending genl reply\n");
93	}
94
95	/ Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only*
96	* reason it could fail was no space in skb, and there are 4k available. */
97	static int drbd_msg_put_info(struct sk_buff skb, const* char *info)
98	{
99	struct nlattr *nla;
100	int err = -EMSGSIZE;
101
102	if (!info \|\| !info[`0`])
103	return `0`;
104
105	nla = nla_nest_start_noflag(skb, attrtype: DRBD_NLA_CFG_REPLY);
106	if (!nla)
107	return err;
108
109	err = nla_put_string(skb, attrtype: T_info_text, str: info);
110	if (err) {
111	nla_nest_cancel(skb, start: nla);
112	return err;
113	} else
114	nla_nest_end(skb, start: nla);
115	return `0`;
116	}
117
118	__printf(`2`, `3`)
119	static int drbd_msg_sprintf_info(struct sk_buff skb, const* char *fmt, ...)
120	{
121	va_list args;
122	struct nlattr nla, txt;
123	int err = -EMSGSIZE;
124	int len;
125
126	nla = nla_nest_start_noflag(skb, attrtype: DRBD_NLA_CFG_REPLY);
127	if (!nla)
128	return err;
129
130	txt = nla_reserve(skb, attrtype: T_info_text, attrlen: `256`);
131	if (!txt) {
132	nla_nest_cancel(skb, start: nla);
133	return err;
134	}
135	va_start(args, fmt);
136	len = vscnprintf(buf: nla_data(nla: txt), size: `256`, fmt, args);
137	va_end(args);
138
139	/ maybe: retry with larger reserve, if truncated /
140	txt->nla_len = nla_attr_size(payload: len+`1`);
141	nlmsg_trim(skb, mark: (char*)txt + NLA_ALIGN(txt->nla_len));
142	nla_nest_end(skb, start: nla);
143
144	return `0`;
145	}
146
147	/ This would be a good candidate for a "pre_doit" hook,*
148	* and per-family private info->pointers.
149	* But we need to stay compatible with older kernels.
150	* If it returns successfully, adm_ctx members are valid.
151	*
152	* At this point, we still rely on the global genl_lock().
153	* If we want to avoid that, and allow "genl_family.parallel_ops", we may need
154	* to add additional synchronization against object destruction/modification.
155	*/
156	#define DRBD_ADM_NEED_MINOR 1
157	#define DRBD_ADM_NEED_RESOURCE 2
158	#define DRBD_ADM_NEED_CONNECTION 4
159	static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
160	struct sk_buff skb, struct* genl_info info, unsigned* flags)
161	{
162	struct drbd_genlmsghdr *d_in = genl_info_userhdr(info);
163	const u8 cmd = info->genlhdr->cmd;
164	int err;
165
166	memset(adm_ctx, `0`, sizeof(*adm_ctx));
167
168	/ genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( /
169	if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
170	return -EPERM;
171
172	adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
173	if (!adm_ctx->reply_skb) {
174	err = -ENOMEM;
175	goto fail;
176	}
177
178	adm_ctx->reply_dh = genlmsg_put_reply(skb: adm_ctx->reply_skb,
179	info, family: &drbd_genl_family, flags: `0`, cmd);
180	/ put of a few bytes into a fresh skb of >= 4k will always succeed.*
181	* but anyways */
182	if (!adm_ctx->reply_dh) {
183	err = -ENOMEM;
184	goto fail;
185	}
186
187	adm_ctx->reply_dh->minor = d_in->minor;
188	adm_ctx->reply_dh->ret_code = NO_ERROR;
189
190	adm_ctx->volume = VOLUME_UNSPECIFIED;
191	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
192	struct nlattr *nla;
193	/ parse and validate only /
194	err = drbd_cfg_context_from_attrs(NULL, info);
195	if (err)
196	goto fail;
197
198	/ It was present, and valid,*
199	* copy it over to the reply skb. */
200	err = nla_put_nohdr(skb: adm_ctx->reply_skb,
201	attrlen: info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
202	data: info->attrs[DRBD_NLA_CFG_CONTEXT]);
203	if (err)
204	goto fail;
205
206	/ and assign stuff to the adm_ctx /
207	nla = nested_attr_tb[__nla_type(T_ctx_volume)];
208	if (nla)
209	adm_ctx->volume = nla_get_u32(nla);
210	nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
211	if (nla)
212	adm_ctx->resource_name = nla_data(nla);
213	adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
214	adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
215	if ((adm_ctx->my_addr &&
216	nla_len(nla: adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) \|\|
217	(adm_ctx->peer_addr &&
218	nla_len(nla: adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
219	err = -EINVAL;
220	goto fail;
221	}
222	}
223
224	adm_ctx->minor = d_in->minor;
225	adm_ctx->device = minor_to_device(minor: d_in->minor);
226
227	/ We are protected by the global genl_lock().*
228	* But we may explicitly drop it/retake it in drbd_adm_set_role(),
229	* so make sure this object stays around. */
230	if (adm_ctx->device)
231	kref_get(kref: &adm_ctx->device->kref);
232
233	if (adm_ctx->resource_name) {
234	adm_ctx->resource = drbd_find_resource(name: adm_ctx->resource_name);
235	}
236
237	if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
238	drbd_msg_put_info(skb: adm_ctx->reply_skb, info: "unknown minor");
239	return ERR_MINOR_INVALID;
240	}
241	if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
242	drbd_msg_put_info(skb: adm_ctx->reply_skb, info: "unknown resource");
243	if (adm_ctx->resource_name)
244	return ERR_RES_NOT_KNOWN;
245	return ERR_INVALID_REQUEST;
246	}
247
248	if (flags & DRBD_ADM_NEED_CONNECTION) {
249	if (adm_ctx->resource) {
250	drbd_msg_put_info(skb: adm_ctx->reply_skb, info: "no resource name expected");
251	return ERR_INVALID_REQUEST;
252	}
253	if (adm_ctx->device) {
254	drbd_msg_put_info(skb: adm_ctx->reply_skb, info: "no minor number expected");
255	return ERR_INVALID_REQUEST;
256	}
257	if (adm_ctx->my_addr && adm_ctx->peer_addr)
258	adm_ctx->connection = conn_get_by_addrs(my_addr: nla_data(nla: adm_ctx->my_addr),
259	my_addr_len: nla_len(nla: adm_ctx->my_addr),
260	peer_addr: nla_data(nla: adm_ctx->peer_addr),
261	peer_addr_len: nla_len(nla: adm_ctx->peer_addr));
262	if (!adm_ctx->connection) {
263	drbd_msg_put_info(skb: adm_ctx->reply_skb, info: "unknown connection");
264	return ERR_INVALID_REQUEST;
265	}
266	}
267
268	/ some more paranoia, if the request was over-determined /
269	if (adm_ctx->device && adm_ctx->resource &&
270	adm_ctx->device->resource != adm_ctx->resource) {
271	pr_warn("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
272	adm_ctx->minor, adm_ctx->resource->name,
273	adm_ctx->device->resource->name);
274	drbd_msg_put_info(skb: adm_ctx->reply_skb, info: "minor exists in different resource");
275	return ERR_INVALID_REQUEST;
276	}
277	if (adm_ctx->device &&
278	adm_ctx->volume != VOLUME_UNSPECIFIED &&
279	adm_ctx->volume != adm_ctx->device->vnr) {
280	pr_warn("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
281	adm_ctx->minor, adm_ctx->volume,
282	adm_ctx->device->vnr, adm_ctx->device->resource->name);
283	drbd_msg_put_info(skb: adm_ctx->reply_skb, info: "minor exists as different volume");
284	return ERR_INVALID_REQUEST;
285	}
286
287	/ still, provide adm_ctx->resource always, if possible. /
288	if (!adm_ctx->resource) {
289	adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
290	: adm_ctx->connection ? adm_ctx->connection->resource : NULL;
291	if (adm_ctx->resource)
292	kref_get(kref: &adm_ctx->resource->kref);
293	}
294
295	return NO_ERROR;
296
297	fail:
298	nlmsg_free(skb: adm_ctx->reply_skb);
299	adm_ctx->reply_skb = NULL;
300	return err;
301	}
302
303	static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
304	struct genl_info info, int* retcode)
305	{
306	if (adm_ctx->device) {
307	kref_put(kref: &adm_ctx->device->kref, release: drbd_destroy_device);
308	adm_ctx->device = NULL;
309	}
310	if (adm_ctx->connection) {
311	kref_put(kref: &adm_ctx->connection->kref, release: &drbd_destroy_connection);
312	adm_ctx->connection = NULL;
313	}
314	if (adm_ctx->resource) {
315	kref_put(kref: &adm_ctx->resource->kref, release: drbd_destroy_resource);
316	adm_ctx->resource = NULL;
317	}
318
319	if (!adm_ctx->reply_skb)
320	return -ENOMEM;
321
322	adm_ctx->reply_dh->ret_code = retcode;
323	drbd_adm_send_reply(skb: adm_ctx->reply_skb, info);
324	return `0`;
325	}
326
327	static void setup_khelper_env(struct drbd_connection connection, char* **envp)
328	{
329	char *afs;
330
331	/ FIXME: A future version will not allow this case. /
332	if (connection->my_addr_len == `0` \|\| connection->peer_addr_len == `0`)
333	return;
334
335	switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
336	case AF_INET6:
337	afs = "ipv6";
338	snprintf(buf: envp[`4`], size: `60`, fmt: "DRBD_PEER_ADDRESS=%pI6",
339	&((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
340	break;
341	case AF_INET:
342	afs = "ipv4";
343	snprintf(buf: envp[`4`], size: `60`, fmt: "DRBD_PEER_ADDRESS=%pI4",
344	&((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
345	break;
346	default:
347	afs = "ssocks";
348	snprintf(buf: envp[`4`], size: `60`, fmt: "DRBD_PEER_ADDRESS=%pI4",
349	&((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
350	}
351	snprintf(buf: envp[`3`], size: `20`, fmt: "DRBD_PEER_AF=%s", afs);
352	}
353
354	int drbd_khelper(struct drbd_device device, char* *cmd)
355	{
356	char *envp[] = { "HOME=/",
357	"TERM=linux",
358	"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
359	(char[`20`]) { }, / address family /
360	(char[`60`]) { }, / address /
361	NULL };
362	char mb[`14`];
363	char *argv[] = {drbd_usermode_helper, cmd, mb, NULL };
364	struct drbd_connection *connection = first_peer_device(device)->connection;
365	struct sib_info sib;
366	int ret;
367
368	if (current == connection->worker.task)
369	set_bit(nr: CALLBACK_PENDING, addr: &connection->flags);
370
371	snprintf(buf: mb, size: `14`, fmt: "minor-%d", device_to_minor(device));
372	setup_khelper_env(connection, envp);
373
374	/ The helper may take some time.*
375	* write out any unsynced meta data changes now */
376	drbd_md_sync(device);
377
378	drbd_info(device, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, mb);
379	sib.sib_reason = SIB_HELPER_PRE;
380	sib.helper_name = cmd;
381	drbd_bcast_event(device, sib: &sib);
382	notify_helper(NOTIFY_CALL, device, connection, cmd, `0`);
383	ret = call_usermodehelper(path: drbd_usermode_helper, argv, envp, UMH_WAIT_PROC);
384	if (ret)
385	drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
386	drbd_usermode_helper, cmd, mb,
387	(ret >> `8`) & `0xff`, ret);
388	else
389	drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
390	drbd_usermode_helper, cmd, mb,
391	(ret >> `8`) & `0xff`, ret);
392	sib.sib_reason = SIB_HELPER_POST;
393	sib.helper_exit_code = ret;
394	drbd_bcast_event(device, sib: &sib);
395	notify_helper(NOTIFY_RESPONSE, device, connection, cmd, ret);
396
397	if (current == connection->worker.task)
398	clear_bit(nr: CALLBACK_PENDING, addr: &connection->flags);
399
400	if (ret < `0`) / Ignore any ERRNOs we got. /
401	ret = `0`;
402
403	return ret;
404	}
405
406	enum drbd_peer_state conn_khelper(struct drbd_connection connection, char* *cmd)
407	{
408	char *envp[] = { "HOME=/",
409	"TERM=linux",
410	"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
411	(char[`20`]) { }, / address family /
412	(char[`60`]) { }, / address /
413	NULL };
414	char *resource_name = connection->resource->name;
415	char *argv[] = {drbd_usermode_helper, cmd, resource_name, NULL };
416	int ret;
417
418	setup_khelper_env(connection, envp);
419	conn_md_sync(connection);
420
421	drbd_info(connection, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, resource_name);
422	/ TODO: conn_bcast_event() ?? /
423	notify_helper(NOTIFY_CALL, NULL, connection, cmd, `0`);
424
425	ret = call_usermodehelper(path: drbd_usermode_helper, argv, envp, UMH_WAIT_PROC);
426	if (ret)
427	drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
428	drbd_usermode_helper, cmd, resource_name,
429	(ret >> `8`) & `0xff`, ret);
430	else
431	drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
432	drbd_usermode_helper, cmd, resource_name,
433	(ret >> `8`) & `0xff`, ret);
434	/ TODO: conn_bcast_event() ?? /
435	notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret);
436
437	if (ret < `0`) / Ignore any ERRNOs we got. /
438	ret = `0`;
439
440	return ret;
441	}
442
443	static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
444	{
445	enum drbd_fencing_p fp = FP_NOT_AVAIL;
446	struct drbd_peer_device *peer_device;
447	int vnr;
448
449	rcu_read_lock();
450	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
451	struct drbd_device *device = peer_device->device;
452	if (get_ldev_if_state(device, D_CONSISTENT)) {
453	struct disk_conf *disk_conf =
454	rcu_dereference(peer_device->device->ldev->disk_conf);
455	fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
456	put_ldev(device);
457	}
458	}
459	rcu_read_unlock();
460
461	return fp;
462	}
463
464	static bool resource_is_supended(struct drbd_resource *resource)
465	{
466	return resource->susp \|\| resource->susp_fen \|\| resource->susp_nod;
467	}
468
469	bool conn_try_outdate_peer(struct drbd_connection *connection)
470	{
471	struct drbd_resource * const resource = connection->resource;
472	unsigned int connect_cnt;
473	union drbd_state mask = { };
474	union drbd_state val = { };
475	enum drbd_fencing_p fp;
476	char *ex_to_string;
477	int r;
478
479	spin_lock_irq(lock: &resource->req_lock);
480	if (connection->cstate >= C_WF_REPORT_PARAMS) {
481	drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
482	spin_unlock_irq(lock: &resource->req_lock);
483	return false;
484	}
485
486	connect_cnt = connection->connect_cnt;
487	spin_unlock_irq(lock: &resource->req_lock);
488
489	fp = highest_fencing_policy(connection);
490	switch (fp) {
491	case FP_NOT_AVAIL:
492	drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
493	spin_lock_irq(lock: &resource->req_lock);
494	if (connection->cstate < C_WF_REPORT_PARAMS) {
495	_conn_request_state(connection,
496	mask: (union drbd_state) { { .susp_fen = `1` } },
497	val: (union drbd_state) { { .susp_fen = `0` } },
498	flags: CS_VERBOSE \| CS_HARD \| CS_DC_SUSP);
499	/ We are no longer suspended due to the fencing policy.*
500	* We may still be suspended due to the on-no-data-accessible policy.
501	* If that was OND_IO_ERROR, fail pending requests. */
502	if (!resource_is_supended(resource))
503	_tl_restart(connection, what: CONNECTION_LOST_WHILE_PENDING);
504	}
505	/ Else: in case we raced with a connection handshake,*
506	* let the handshake figure out if we maybe can RESEND,
507	* and do not resume/fail pending requests here.
508	* Worst case is we stay suspended for now, which may be
509	* resolved by either re-establishing the replication link, or
510	* the next link failure, or eventually the administrator. */
511	spin_unlock_irq(lock: &resource->req_lock);
512	return false;
513
514	case FP_DONT_CARE:
515	return true;
516	default: ;
517	}
518
519	r = conn_khelper(connection, cmd: "fence-peer");
520
521	switch ((r>>`8`) & `0xff`) {
522	case P_INCONSISTENT: / peer is inconsistent /
523	ex_to_string = "peer is inconsistent or worse";
524	mask.pdsk = D_MASK;
525	val.pdsk = D_INCONSISTENT;
526	break;
527	case P_OUTDATED: / peer got outdated, or was already outdated /
528	ex_to_string = "peer was fenced";
529	mask.pdsk = D_MASK;
530	val.pdsk = D_OUTDATED;
531	break;
532	case P_DOWN: / peer was down /
533	if (conn_highest_disk(connection) == D_UP_TO_DATE) {
534	/ we will(have) create(d) a new UUID anyways... /
535	ex_to_string = "peer is unreachable, assumed to be dead";
536	mask.pdsk = D_MASK;
537	val.pdsk = D_OUTDATED;
538	} else {
539	ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
540	}
541	break;
542	case P_PRIMARY: / Peer is primary, voluntarily outdate myself.*
543	* This is useful when an unconnected R_SECONDARY is asked to
544	* become R_PRIMARY, but finds the other peer being active. */
545	ex_to_string = "peer is active";
546	drbd_warn(connection, "Peer is primary, outdating myself.\n");
547	mask.disk = D_MASK;
548	val.disk = D_OUTDATED;
549	break;
550	case P_FENCING:
551	/ THINK: do we need to handle this*
552	* like case 4, or more like case 5? */
553	if (fp != FP_STONITH)
554	drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
555	ex_to_string = "peer was stonithed";
556	mask.pdsk = D_MASK;
557	val.pdsk = D_OUTDATED;
558	break;
559	default:
560	/ The script is broken ... /
561	drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>`8`)&`0xff`);
562	return false; / Eventually leave IO frozen /
563	}
564
565	drbd_info(connection, "fence-peer helper returned %d (%s)\n",
566	(r>>`8`) & `0xff`, ex_to_string);
567
568	/ Not using*
569	conn_request_state(connection, mask, val, CS_VERBOSE);
570	here, because we might were able to re-establish the connection in the
571	meantime. /*
572	spin_lock_irq(lock: &resource->req_lock);
573	if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
574	if (connection->connect_cnt != connect_cnt)
575	/ In case the connection was established and droped*
576	while the fence-peer handler was running, ignore it /*
577	drbd_info(connection, "Ignoring fence-peer exit code\n");
578	else
579	_conn_request_state(connection, mask, val, flags: CS_VERBOSE);
580	}
581	spin_unlock_irq(lock: &resource->req_lock);
582
583	return conn_highest_pdsk(connection) <= D_OUTDATED;
584	}
585
586	static int _try_outdate_peer_async(void *data)
587	{
588	struct drbd_connection connection = (struct* drbd_connection *)data;
589
590	conn_try_outdate_peer(connection);
591
592	kref_put(kref: &connection->kref, release: drbd_destroy_connection);
593	return `0`;
594	}
595
596	void conn_try_outdate_peer_async(struct drbd_connection *connection)
597	{
598	struct task_struct *opa;
599
600	kref_get(kref: &connection->kref);
601	/ We may have just sent a signal to this thread*
602	* to get it out of some blocking network function.
603	* Clear signals; otherwise kthread_run(), which internally uses
604	* wait_on_completion_killable(), will mistake our pending signal
605	* for a new fatal signal and fail. */
606	flush_signals(current);
607	opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
608	if (IS_ERR(ptr: opa)) {
609	drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
610	kref_put(kref: &connection->kref, release: drbd_destroy_connection);
611	}
612	}
613
614	enum drbd_state_rv
615	drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
616	{
617	struct drbd_peer_device *const peer_device = first_peer_device(device);
618	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
619	const int max_tries = `4`;
620	enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
621	struct net_conf *nc;
622	int try = `0`;
623	int forced = `0`;
624	union drbd_state mask, val;
625
626	if (new_role == R_PRIMARY) {
627	struct drbd_connection *connection;
628
629	/ Detect dead peers as soon as possible. /
630
631	rcu_read_lock();
632	for_each_connection(connection, device->resource)
633	request_ping(connection);
634	rcu_read_unlock();
635	}
636
637	mutex_lock(device->state_mutex);
638
639	mask.i = `0`; mask.role = R_MASK;
640	val.i = `0`; val.role = new_role;
641
642	while (try++ < max_tries) {
643	rv = _drbd_request_state_holding_state_mutex(device, mask, val, CS_WAIT_COMPLETE);
644
645	/ in case we first succeeded to outdate,*
646	* but now suddenly could establish a connection */
647	if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != `0`) {
648	val.pdsk = `0`;
649	mask.pdsk = `0`;
650	continue;
651	}
652
653	if (rv == SS_NO_UP_TO_DATE_DISK && force &&
654	(device->state.disk < D_UP_TO_DATE &&
655	device->state.disk >= D_INCONSISTENT)) {
656	mask.disk = D_MASK;
657	val.disk = D_UP_TO_DATE;
658	forced = `1`;
659	continue;
660	}
661
662	if (rv == SS_NO_UP_TO_DATE_DISK &&
663	device->state.disk == D_CONSISTENT && mask.pdsk == `0`) {
664	D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
665
666	if (conn_try_outdate_peer(connection)) {
667	val.disk = D_UP_TO_DATE;
668	mask.disk = D_MASK;
669	}
670	continue;
671	}
672
673	if (rv == SS_NOTHING_TO_DO)
674	goto out;
675	if (rv == SS_PRIMARY_NOP && mask.pdsk == `0`) {
676	if (!conn_try_outdate_peer(connection) && force) {
677	drbd_warn(device, "Forced into split brain situation!\n");
678	mask.pdsk = D_MASK;
679	val.pdsk = D_OUTDATED;
680
681	}
682	continue;
683	}
684	if (rv == SS_TWO_PRIMARIES) {
685	/ Maybe the peer is detected as dead very soon...*
686	retry at most once more in this case. /*
687	if (try < max_tries) {
688	int timeo;
689	try = max_tries - `1`;
690	rcu_read_lock();
691	nc = rcu_dereference(connection->net_conf);
692	timeo = nc ? (nc->ping_timeo + `1`) * HZ / `10` : `1`;
693	rcu_read_unlock();
694	schedule_timeout_interruptible(timeout: timeo);
695	}
696	continue;
697	}
698	if (rv < SS_SUCCESS) {
699	rv = _drbd_request_state(device, mask, val,
700	CS_VERBOSE + CS_WAIT_COMPLETE);
701	if (rv < SS_SUCCESS)
702	goto out;
703	}
704	break;
705	}
706
707	if (rv < SS_SUCCESS)
708	goto out;
709
710	if (forced)
711	drbd_warn(device, "Forced to consider local data as UpToDate!\n");
712
713	/ Wait until nothing is on the fly :) /
714	wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == `0`);
715
716	/ FIXME also wait for all pending P_BARRIER_ACK? /
717
718	if (new_role == R_SECONDARY) {
719	if (get_ldev(device)) {
720	device->ldev->md.uuid[UI_CURRENT] &= ~(u64)`1`;
721	put_ldev(device);
722	}
723	} else {
724	mutex_lock(&device->resource->conf_update);
725	nc = connection->net_conf;
726	if (nc)
727	nc->discard_my_data = `0`; / without copy; single bit op is atomic /
728	mutex_unlock(lock: &device->resource->conf_update);
729
730	if (get_ldev(device)) {
731	if (((device->state.conn < C_CONNECTED \|\|
732	device->state.pdsk <= D_FAILED)
733	&& device->ldev->md.uuid[UI_BITMAP] == `0`) \|\| forced)
734	drbd_uuid_new_current(device);
735
736	device->ldev->md.uuid[UI_CURRENT] \|= (u64)`1`;
737	put_ldev(device);
738	}
739	}
740
741	/ writeout of activity log covered areas of the bitmap*
742	* to stable storage done in after state change already */
743
744	if (device->state.conn >= C_WF_REPORT_PARAMS) {
745	/ if this was forced, we should consider sync /
746	if (forced)
747	drbd_send_uuids(peer_device);
748	drbd_send_current_state(peer_device);
749	}
750
751	drbd_md_sync(device);
752	set_disk_ro(disk: device->vdisk, read_only: new_role == R_SECONDARY);
753	kobject_uevent(kobj: &disk_to_dev(device->vdisk)->kobj, action: KOBJ_CHANGE);
754	out:
755	mutex_unlock(lock: device->state_mutex);
756	return rv;
757	}
758
759	static const char from_attrs_err_to_txt(int* err)
760	{
761	return err == -ENOMSG ? "required attribute missing" :
762	err == -EOPNOTSUPP ? "unknown mandatory attribute" :
763	err == -EEXIST ? "can not change invariant setting" :
764	"invalid attribute value";
765	}
766
767	int drbd_adm_set_role(struct sk_buff skb, struct* genl_info *info)
768	{
769	struct drbd_config_context adm_ctx;
770	struct set_role_parms parms;
771	int err;
772	enum drbd_ret_code retcode;
773	enum drbd_state_rv rv;
774
775	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
776	if (!adm_ctx.reply_skb)
777	return retcode;
778	if (retcode != NO_ERROR)
779	goto out;
780
781	memset(&parms, `0`, sizeof(parms));
782	if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
783	err = set_role_parms_from_attrs(s: &parms, info);
784	if (err) {
785	retcode = ERR_MANDATORY_TAG;
786	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: from_attrs_err_to_txt(err));
787	goto out;
788	}
789	}
790	genl_unlock();
791	mutex_lock(&adm_ctx.resource->adm_mutex);
792
793	if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
794	rv = drbd_set_role(device: adm_ctx.device, new_role: R_PRIMARY, force: parms.assume_uptodate);
795	else
796	rv = drbd_set_role(device: adm_ctx.device, new_role: R_SECONDARY, force: `0`);
797
798	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
799	genl_lock();
800	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode: rv);
801	return `0`;
802	out:
803	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
804	return `0`;
805	}
806
807	/ Initializes the md._offset members, so we are able to find
808	* the on disk meta data.
809	*
810	* We currently have two possible layouts:
811	* external:
812	* \|----------- md_size_sect ------------------\|
813	* [ 4k superblock ][ activity log ][ Bitmap ]
814	* \| al_offset == 8 \|
815	* \| bm_offset = al_offset + X \|
816	* ==> bitmap sectors = md_size_sect - bm_offset
817	*
818	* internal:
819	* \|----------- md_size_sect ------------------\|
820	* [data.....][ Bitmap ][ activity log ][ 4k superblock ]
821	* \| al_offset < 0 \|
822	* \| bm_offset = al_offset - Y \|
823	* ==> bitmap sectors = Y = al_offset - bm_offset
824	*
825	* Activity log size used to be fixed 32kB,
826	* but is about to become configurable.
827	*/
828	static void drbd_md_set_sector_offsets(struct drbd_device *device,
829	struct drbd_backing_dev *bdev)
830	{
831	sector_t md_size_sect = `0`;
832	unsigned int al_size_sect = bdev->md.al_size_4k * `8`;
833
834	bdev->md.md_offset = drbd_md_ss(bdev);
835
836	switch (bdev->md.meta_dev_idx) {
837	default:
838	/ v07 style fixed size indexed meta data /
839	bdev->md.md_size_sect = MD_128MB_SECT;
840	bdev->md.al_offset = MD_4kB_SECT;
841	bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
842	break;
843	case DRBD_MD_INDEX_FLEX_EXT:
844	/ just occupy the full device; unit: sectors /
845	bdev->md.md_size_sect = drbd_get_capacity(bdev: bdev->md_bdev);
846	bdev->md.al_offset = MD_4kB_SECT;
847	bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
848	break;
849	case DRBD_MD_INDEX_INTERNAL:
850	case DRBD_MD_INDEX_FLEX_INT:
851	/ al size is still fixed /
852	bdev->md.al_offset = -al_size_sect;
853	/ we need (slightly less than) ~ this much bitmap sectors: /
854	md_size_sect = drbd_get_capacity(bdev: bdev->backing_bdev);
855	md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
856	md_size_sect = BM_SECT_TO_EXT(md_size_sect);
857	md_size_sect = ALIGN(md_size_sect, `8`);
858
859	/ plus the "drbd meta data super block",*
860	* and the activity log; */
861	md_size_sect += MD_4kB_SECT + al_size_sect;
862
863	bdev->md.md_size_sect = md_size_sect;
864	/ bitmap offset is adjusted by 'super' block size /
865	bdev->md.bm_offset = -md_size_sect + MD_4kB_SECT;
866	break;
867	}
868	}
869
870	/ input size is expected to be in KB /
871	char ppsize(char* buf, unsigned* long long size)
872	{
873	/ Needs 9 bytes at max including trailing NUL:*
874	* -1ULL ==> "16384 EB" */
875	static char units[] = { `'K'`, `'M'`, `'G'`, `'T'`, `'P'`, `'E'` };
876	int base = `0`;
877	while (size >= `10000` && base < sizeof(units)-`1`) {
878	/ shift + round /
879	size = (size >> `10`) + !!(size & (`1`<<`9`));
880	base++;
881	}
882	sprintf(buf, fmt: "%u %cB", (unsigned)size, units[base]);
883
884	return buf;
885	}
886
887	/ there is still a theoretical deadlock when called from receiver*
888	* on an D_INCONSISTENT R_PRIMARY:
889	* remote READ does inc_ap_bio, receiver would need to receive answer
890	* packet from remote to dec_ap_bio again.
891	* receiver receive_sizes(), comes here,
892	* waits for ap_bio_cnt == 0. -> deadlock.
893	* but this cannot happen, actually, because:
894	* R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
895	* (not connected, or bad/no disk on peer):
896	* see drbd_fail_request_early, ap_bio_cnt is zero.
897	* R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
898	* peer may not initiate a resize.
899	*/
900	/ Note these are not to be confused with*
901	* drbd_adm_suspend_io/drbd_adm_resume_io,
902	* which are (sub) state changes triggered by admin (drbdsetup),
903	* and can be long lived.
904	* This changes an device->flag, is triggered by drbd internals,
905	* and should be short-lived. */
906	/ It needs to be a counter, since multiple threads might*
907	independently suspend and resume IO. /*
908	void drbd_suspend_io(struct drbd_device *device)
909	{
910	atomic_inc(v: &device->suspend_cnt);
911	if (drbd_suspended(device))
912	return;
913	wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
914	}
915
916	void drbd_resume_io(struct drbd_device *device)
917	{
918	if (atomic_dec_and_test(v: &device->suspend_cnt))
919	wake_up(&device->misc_wait);
920	}
921
922	/*
923	* drbd_determine_dev_size() - Sets the right device size obeying all constraints
924	* @device: DRBD device.
925	*
926	* Returns 0 on success, negative return values indicate errors.
927	* You should call drbd_md_sync() after calling this function.
928	*/
929	enum determine_dev_size
930	drbd_determine_dev_size(struct drbd_device device, enum* dds_flags flags, struct resize_parms *rs) __must_hold(local)
931	{
932	struct md_offsets_and_sizes {
933	u64 last_agreed_sect;
934	u64 md_offset;
935	s32 al_offset;
936	s32 bm_offset;
937	u32 md_size_sect;
938
939	u32 al_stripes;
940	u32 al_stripe_size_4k;
941	} prev;
942	sector_t u_size, size;
943	struct drbd_md *md = &device->ldev->md;
944	void *buffer;
945
946	int md_moved, la_size_changed;
947	enum determine_dev_size rv = DS_UNCHANGED;
948
949	/ We may change the on-disk offsets of our meta data below. Lock out*
950	* anything that may cause meta data IO, to avoid acting on incomplete
951	* layout changes or scribbling over meta data that is in the process
952	* of being moved.
953	*
954	* Move is not exactly correct, btw, currently we have all our meta
955	* data in core memory, to "move" it we just write it all out, there
956	* are no reads. */
957	drbd_suspend_io(device);
958	buffer = drbd_md_get_buffer(device, intent: __func__); / Lock meta-data IO /
959	if (!buffer) {
960	drbd_resume_io(device);
961	return DS_ERROR;
962	}
963
964	/ remember current offset and sizes /
965	prev.last_agreed_sect = md->la_size_sect;
966	prev.md_offset = md->md_offset;
967	prev.al_offset = md->al_offset;
968	prev.bm_offset = md->bm_offset;
969	prev.md_size_sect = md->md_size_sect;
970	prev.al_stripes = md->al_stripes;
971	prev.al_stripe_size_4k = md->al_stripe_size_4k;
972
973	if (rs) {
974	/ rs is non NULL if we should change the AL layout only /
975	md->al_stripes = rs->al_stripes;
976	md->al_stripe_size_4k = rs->al_stripe_size / `4`;
977	md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / `4`;
978	}
979
980	drbd_md_set_sector_offsets(device, bdev: device->ldev);
981
982	rcu_read_lock();
983	u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
984	rcu_read_unlock();
985	size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
986
987	if (size < prev.last_agreed_sect) {
988	if (rs && u_size == `0`) {
989	/ Remove "rs &&" later. This check should always be active, but*
990	right now the receiver expects the permissive behavior /*
991	drbd_warn(device, "Implicit shrink not allowed. "
992	"Use --size=%llus for explicit shrink.\n",
993	(unsigned long long)size);
994	rv = DS_ERROR_SHRINK;
995	}
996	if (u_size > size)
997	rv = DS_ERROR_SPACE_MD;
998	if (rv != DS_UNCHANGED)
999	goto err_out;
1000	}
1001
1002	if (get_capacity(disk: device->vdisk) != size \|\|
1003	drbd_bm_capacity(device) != size) {
1004	int err;
1005	err = drbd_bm_resize(device, sectors: size, set_new_bits: !(flags & DDSF_NO_RESYNC));
1006	if (unlikely(err)) {
1007	/ currently there is only one error: ENOMEM! /
1008	size = drbd_bm_capacity(device);
1009	if (size == `0`) {
1010	drbd_err(device, "OUT OF MEMORY! "
1011	"Could not allocate bitmap!\n");
1012	} else {
1013	drbd_err(device, "BM resizing failed. "
1014	"Leaving size unchanged\n");
1015	}
1016	rv = DS_ERROR;
1017	}
1018	/ racy, see comments above. /
1019	drbd_set_my_capacity(device, size);
1020	md->la_size_sect = size;
1021	}
1022	if (rv <= DS_ERROR)
1023	goto err_out;
1024
1025	la_size_changed = (prev.last_agreed_sect != md->la_size_sect);
1026
1027	md_moved = prev.md_offset != md->md_offset
1028	\|\| prev.md_size_sect != md->md_size_sect;
1029
1030	if (la_size_changed \|\| md_moved \|\| rs) {
1031	u32 prev_flags;
1032
1033	/ We do some synchronous IO below, which may take some time.*
1034	* Clear the timer, to avoid scary "timer expired!" messages,
1035	* "Superblock" is written out at least twice below, anyways. */
1036	del_timer(timer: &device->md_sync_timer);
1037
1038	/ We won't change the "al-extents" setting, we just may need*
1039	* to move the on-disk location of the activity log ringbuffer.
1040	* Lock for transaction is good enough, it may well be "dirty"
1041	* or even "starving". */
1042	wait_event(device->al_wait, lc_try_lock_for_transaction(device->act_log));
1043
1044	/ mark current on-disk bitmap and activity log as unreliable /
1045	prev_flags = md->flags;
1046	md->flags \|= MDF_FULL_SYNC \| MDF_AL_DISABLED;
1047	drbd_md_write(device, buffer);
1048
1049	drbd_al_initialize(device, buffer);
1050
1051	drbd_info(device, "Writing the whole bitmap, %s\n",
1052	la_size_changed && md_moved ? "size changed and md moved" :
1053	la_size_changed ? "size changed" : "md moved");
1054	/ next line implicitly does drbd_suspend_io()+drbd_resume_io() /
1055	drbd_bitmap_io(device, io_fn: md_moved ? &drbd_bm_write_all : &drbd_bm_write,
1056	why: "size changed", flags: BM_LOCKED_MASK, NULL);
1057
1058	/ on-disk bitmap and activity log is authoritative again*
1059	* (unless there was an IO error meanwhile...) */
1060	md->flags = prev_flags;
1061	drbd_md_write(device, buffer);
1062
1063	if (rs)
1064	drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
1065	md->al_stripes, md->al_stripe_size_4k * `4`);
1066	}
1067
1068	if (size > prev.last_agreed_sect)
1069	rv = prev.last_agreed_sect ? DS_GREW : DS_GREW_FROM_ZERO;
1070	if (size < prev.last_agreed_sect)
1071	rv = DS_SHRUNK;
1072
1073	if (`0`) {
1074	err_out:
1075	/ restore previous offset and sizes /
1076	md->la_size_sect = prev.last_agreed_sect;
1077	md->md_offset = prev.md_offset;
1078	md->al_offset = prev.al_offset;
1079	md->bm_offset = prev.bm_offset;
1080	md->md_size_sect = prev.md_size_sect;
1081	md->al_stripes = prev.al_stripes;
1082	md->al_stripe_size_4k = prev.al_stripe_size_4k;
1083	md->al_size_4k = (u64)prev.al_stripes * prev.al_stripe_size_4k;
1084	}
1085	lc_unlock(lc: device->act_log);
1086	wake_up(&device->al_wait);
1087	drbd_md_put_buffer(device);
1088	drbd_resume_io(device);
1089
1090	return rv;
1091	}
1092
1093	sector_t
1094	drbd_new_dev_size(struct drbd_device device, struct* drbd_backing_dev *bdev,
1095	sector_t u_size, int assume_peer_has_space)
1096	{
1097	sector_t p_size = device->p_size; / partner's disk size. /
1098	sector_t la_size_sect = bdev->md.la_size_sect; / last agreed size. /
1099	sector_t m_size; / my size /
1100	sector_t size = `0`;
1101
1102	m_size = drbd_get_max_capacity(bdev);
1103
1104	if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
1105	drbd_warn(device, "Resize while not connected was forced by the user!\n");
1106	p_size = m_size;
1107	}
1108
1109	if (p_size && m_size) {
1110	size = min_t(sector_t, p_size, m_size);
1111	} else {
1112	if (la_size_sect) {
1113	size = la_size_sect;
1114	if (m_size && m_size < size)
1115	size = m_size;
1116	if (p_size && p_size < size)
1117	size = p_size;
1118	} else {
1119	if (m_size)
1120	size = m_size;
1121	if (p_size)
1122	size = p_size;
1123	}
1124	}
1125
1126	if (size == `0`)
1127	drbd_err(device, "Both nodes diskless!\n");
1128
1129	if (u_size) {
1130	if (u_size > size)
1131	drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1132	(unsigned long)u_size>>`1`, (unsigned long)size>>`1`);
1133	else
1134	size = u_size;
1135	}
1136
1137	return size;
1138	}
1139
1140	/*
1141	* drbd_check_al_size() - Ensures that the AL is of the right size
1142	* @device: DRBD device.
1143	*
1144	* Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1145	* failed, and 0 on success. You should call drbd_md_sync() after you called
1146	* this function.
1147	*/
1148	static int drbd_check_al_size(struct drbd_device device, struct* disk_conf *dc)
1149	{
1150	struct lru_cache n, t;
1151	struct lc_element *e;
1152	unsigned int in_use;
1153	int i;
1154
1155	if (device->act_log &&
1156	device->act_log->nr_elements == dc->al_extents)
1157	return `0`;
1158
1159	in_use = `0`;
1160	t = device->act_log;
1161	n = lc_create(name: "act_log", cache: drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1162	e_count: dc->al_extents, e_size: sizeof(struct lc_element), e_off: `0`);
1163
1164	if (n == NULL) {
1165	drbd_err(device, "Cannot allocate act_log lru!\n");
1166	return -ENOMEM;
1167	}
1168	spin_lock_irq(lock: &device->al_lock);
1169	if (t) {
1170	for (i = `0`; i < t->nr_elements; i++) {
1171	e = lc_element_by_index(lc: t, i);
1172	if (e->refcnt)
1173	drbd_err(device, "refcnt(%d)==%d\n",
1174	e->lc_number, e->refcnt);
1175	in_use += e->refcnt;
1176	}
1177	}
1178	if (!in_use)
1179	device->act_log = n;
1180	spin_unlock_irq(lock: &device->al_lock);
1181	if (in_use) {
1182	drbd_err(device, "Activity log still in use!\n");
1183	lc_destroy(lc: n);
1184	return -EBUSY;
1185	} else {
1186	lc_destroy(lc: t);
1187	}
1188	drbd_md_mark_dirty(device); / we changed device->act_log->nr_elemens /
1189	return `0`;
1190	}
1191
1192	static void blk_queue_discard_granularity(struct request_queue q, unsigned* int granularity)
1193	{
1194	q->limits.discard_granularity = granularity;
1195	}
1196
1197	static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
1198	{
1199	/ when we introduced REQ_WRITE_SAME support, we also bumped*
1200	* our maximum supported batch bio size used for discards. */
1201	if (connection->agreed_features & DRBD_FF_WSAME)
1202	return DRBD_MAX_BBIO_SECTORS;
1203	/ before, with DRBD <= 8.4.6, we only allowed up to one AL_EXTENT_SIZE. /
1204	return AL_EXTENT_SIZE >> `9`;
1205	}
1206
1207	static void decide_on_discard_support(struct drbd_device *device,
1208	struct drbd_backing_dev *bdev)
1209	{
1210	struct drbd_connection *connection =
1211	first_peer_device(device)->connection;
1212	struct request_queue *q = device->rq_queue;
1213	unsigned int max_discard_sectors;
1214
1215	if (bdev && !bdev_max_discard_sectors(bdev: bdev->backing_bdev))
1216	goto not_supported;
1217
1218	if (connection->cstate >= C_CONNECTED &&
1219	!(connection->agreed_features & DRBD_FF_TRIM)) {
1220	drbd_info(connection,
1221	"peer DRBD too old, does not support TRIM: disabling discards\n");
1222	goto not_supported;
1223	}
1224
1225	/*
1226	* We don't care for the granularity, really.
1227	*
1228	* Stacking limits below should fix it for the local device. Whether or
1229	* not it is a suitable granularity on the remote device is not our
1230	* problem, really. If you care, you need to use devices with similar
1231	* topology on all peers.
1232	*/
1233	blk_queue_discard_granularity(q, granularity: `512`);
1234	max_discard_sectors = drbd_max_discard_sectors(connection);
1235	blk_queue_max_discard_sectors(q, max_discard_sectors);
1236	blk_queue_max_write_zeroes_sectors(q, max_write_same_sectors: max_discard_sectors);
1237	return;
1238
1239	not_supported:
1240	blk_queue_discard_granularity(q, granularity: `0`);
1241	blk_queue_max_discard_sectors(q, max_discard_sectors: `0`);
1242	}
1243
1244	static void fixup_write_zeroes(struct drbd_device device, struct* request_queue *q)
1245	{
1246	/ Fixup max_write_zeroes_sectors after blk_stack_limits():*
1247	* if we can handle "zeroes" efficiently on the protocol,
1248	* we want to do that, even if our backend does not announce
1249	* max_write_zeroes_sectors itself. */
1250	struct drbd_connection *connection = first_peer_device(device)->connection;
1251	/ If the peer announces WZEROES support, use it. Otherwise, rather*
1252	* send explicit zeroes than rely on some discard-zeroes-data magic. */
1253	if (connection->agreed_features & DRBD_FF_WZEROES)
1254	q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
1255	else
1256	q->limits.max_write_zeroes_sectors = `0`;
1257	}
1258
1259	static void fixup_discard_support(struct drbd_device device, struct* request_queue *q)
1260	{
1261	unsigned int max_discard = device->rq_queue->limits.max_discard_sectors;
1262	unsigned int discard_granularity =
1263	device->rq_queue->limits.discard_granularity >> SECTOR_SHIFT;
1264
1265	if (discard_granularity > max_discard) {
1266	blk_queue_discard_granularity(q, granularity: `0`);
1267	blk_queue_max_discard_sectors(q, max_discard_sectors: `0`);
1268	}
1269	}
1270
1271	static void drbd_setup_queue_param(struct drbd_device device, struct* drbd_backing_dev *bdev,
1272	unsigned int max_bio_size, struct o_qlim *o)
1273	{
1274	struct request_queue * const q = device->rq_queue;
1275	unsigned int max_hw_sectors = max_bio_size >> `9`;
1276	unsigned int max_segments = `0`;
1277	struct request_queue *b = NULL;
1278	struct disk_conf *dc;
1279
1280	if (bdev) {
1281	b = bdev->backing_bdev->bd_disk->queue;
1282
1283	max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> `9`);
1284	rcu_read_lock();
1285	dc = rcu_dereference(device->ldev->disk_conf);
1286	max_segments = dc->max_bio_bvecs;
1287	rcu_read_unlock();
1288
1289	blk_set_stacking_limits(lim: &q->limits);
1290	}
1291
1292	blk_queue_max_hw_sectors(q, max_hw_sectors);
1293	/ This is the workaround for "bio would need to, but cannot, be split" /
1294	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
1295	blk_queue_segment_boundary(q, PAGE_SIZE-`1`);
1296	decide_on_discard_support(device, bdev);
1297
1298	if (b) {
1299	blk_stack_limits(t: &q->limits, b: &b->limits, offset: `0`);
1300	disk_update_readahead(disk: device->vdisk);
1301	}
1302	fixup_write_zeroes(device, q);
1303	fixup_discard_support(device, q);
1304	}
1305
1306	void drbd_reconsider_queue_parameters(struct drbd_device device, struct* drbd_backing_dev bdev, struct* o_qlim *o)
1307	{
1308	unsigned int now, new, local, peer;
1309
1310	now = queue_max_hw_sectors(q: device->rq_queue) << `9`;
1311	local = device->local_max_bio_size; / Eventually last known value, from volatile memory /
1312	peer = device->peer_max_bio_size; / Eventually last known value, from meta data /
1313
1314	if (bdev) {
1315	local = queue_max_hw_sectors(q: bdev->backing_bdev->bd_disk->queue) << `9`;
1316	device->local_max_bio_size = local;
1317	}
1318	local = min(local, DRBD_MAX_BIO_SIZE);
1319
1320	/ We may ignore peer limits if the peer is modern enough.*
1321	Because new from 8.3.8 onwards the peer can use multiple
1322	BIOs for a single peer_request /*
1323	if (device->state.conn >= C_WF_REPORT_PARAMS) {
1324	if (first_peer_device(device)->connection->agreed_pro_version < `94`)
1325	peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1326	/ Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB /
1327	else if (first_peer_device(device)->connection->agreed_pro_version == `94`)
1328	peer = DRBD_MAX_SIZE_H80_PACKET;
1329	else if (first_peer_device(device)->connection->agreed_pro_version < `100`)
1330	peer = DRBD_MAX_BIO_SIZE_P95; / drbd 8.3.8 onwards, before 8.4.0 /
1331	else
1332	peer = DRBD_MAX_BIO_SIZE;
1333
1334	/ We may later detach and re-attach on a disconnected Primary.*
1335	* Avoid this setting to jump back in that case.
1336	* We want to store what we know the peer DRBD can handle,
1337	* not what the peer IO backend can handle. */
1338	if (peer > device->peer_max_bio_size)
1339	device->peer_max_bio_size = peer;
1340	}
1341	new = min(local, peer);
1342
1343	if (device->state.role == R_PRIMARY && new < now)
1344	drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
1345
1346	if (new != now)
1347	drbd_info(device, "max BIO size = %u\n", new);
1348
1349	drbd_setup_queue_param(device, bdev, max_bio_size: new, o);
1350	}
1351
1352	/ Starts the worker thread /
1353	static void conn_reconfig_start(struct drbd_connection *connection)
1354	{
1355	drbd_thread_start(thi: &connection->worker);
1356	drbd_flush_workqueue(work_queue: &connection->sender_work);
1357	}
1358
1359	/ if still unconfigured, stops worker again. /
1360	static void conn_reconfig_done(struct drbd_connection *connection)
1361	{
1362	bool stop_threads;
1363	spin_lock_irq(lock: &connection->resource->req_lock);
1364	stop_threads = conn_all_vols_unconf(connection) &&
1365	connection->cstate == C_STANDALONE;
1366	spin_unlock_irq(lock: &connection->resource->req_lock);
1367	if (stop_threads) {
1368	/ ack_receiver thread and ack_sender workqueue are implicitly*
1369	* stopped by receiver in conn_disconnect() */
1370	drbd_thread_stop(thi: &connection->receiver);
1371	drbd_thread_stop(thi: &connection->worker);
1372	}
1373	}
1374
1375	/ Make sure IO is suspended before calling this function(). /
1376	static void drbd_suspend_al(struct drbd_device *device)
1377	{
1378	int s = `0`;
1379
1380	if (!lc_try_lock(lc: device->act_log)) {
1381	drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1382	return;
1383	}
1384
1385	drbd_al_shrink(device);
1386	spin_lock_irq(lock: &device->resource->req_lock);
1387	if (device->state.conn < C_CONNECTED)
1388	s = !test_and_set_bit(nr: AL_SUSPENDED, addr: &device->flags);
1389	spin_unlock_irq(lock: &device->resource->req_lock);
1390	lc_unlock(lc: device->act_log);
1391
1392	if (s)
1393	drbd_info(device, "Suspended AL updates\n");
1394	}
1395
1396
1397	static bool should_set_defaults(struct genl_info *info)
1398	{
1399	struct drbd_genlmsghdr *dh = genl_info_userhdr(info);
1400
1401	return `0` != (dh->flags & DRBD_GENL_F_SET_DEFAULTS);
1402	}
1403
1404	static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1405	{
1406	/ This is limited by 16 bit "slot" numbers,*
1407	* and by available on-disk context storage.
1408	*
1409	* Also (u16)~0 is special (denotes a "free" extent).
1410	*
1411	* One transaction occupies one 4kB on-disk block,
1412	* we have n such blocks in the on disk ring buffer,
1413	* the "current" transaction may fail (n-1),
1414	* and there is 919 slot numbers context information per transaction.
1415	*
1416	* 72 transaction blocks amounts to more than 2**16 context slots,
1417	* so cap there first.
1418	*/
1419	const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1420	const unsigned int sufficient_on_disk =
1421	(max_al_nr + AL_CONTEXT_PER_TRANSACTION -`1`)
1422	/AL_CONTEXT_PER_TRANSACTION;
1423
1424	unsigned int al_size_4k = bdev->md.al_size_4k;
1425
1426	if (al_size_4k > sufficient_on_disk)
1427	return max_al_nr;
1428
1429	return (al_size_4k - `1`) * AL_CONTEXT_PER_TRANSACTION;
1430	}
1431
1432	static bool write_ordering_changed(struct disk_conf a, struct* disk_conf *b)
1433	{
1434	return a->disk_barrier != b->disk_barrier \|\|
1435	a->disk_flushes != b->disk_flushes \|\|
1436	a->disk_drain != b->disk_drain;
1437	}
1438
1439	static void sanitize_disk_conf(struct drbd_device device, struct* disk_conf *disk_conf,
1440	struct drbd_backing_dev *nbc)
1441	{
1442	struct block_device *bdev = nbc->backing_bdev;
1443
1444	if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1445	disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1446	if (disk_conf->al_extents > drbd_al_extents_max(bdev: nbc))
1447	disk_conf->al_extents = drbd_al_extents_max(bdev: nbc);
1448
1449	if (!bdev_max_discard_sectors(bdev)) {
1450	if (disk_conf->rs_discard_granularity) {
1451	disk_conf->rs_discard_granularity = `0`; / disable feature /
1452	drbd_info(device, "rs_discard_granularity feature disabled\n");
1453	}
1454	}
1455
1456	if (disk_conf->rs_discard_granularity) {
1457	int orig_value = disk_conf->rs_discard_granularity;
1458	sector_t discard_size = bdev_max_discard_sectors(bdev) << `9`;
1459	unsigned int discard_granularity = bdev_discard_granularity(bdev);
1460	int remainder;
1461
1462	if (discard_granularity > disk_conf->rs_discard_granularity)
1463	disk_conf->rs_discard_granularity = discard_granularity;
1464
1465	remainder = disk_conf->rs_discard_granularity %
1466	discard_granularity;
1467	disk_conf->rs_discard_granularity += remainder;
1468
1469	if (disk_conf->rs_discard_granularity > discard_size)
1470	disk_conf->rs_discard_granularity = discard_size;
1471
1472	if (disk_conf->rs_discard_granularity != orig_value)
1473	drbd_info(device, "rs_discard_granularity changed to %d\n",
1474	disk_conf->rs_discard_granularity);
1475	}
1476	}
1477
1478	static int disk_opts_check_al_size(struct drbd_device device, struct* disk_conf *dc)
1479	{
1480	int err = -EBUSY;
1481
1482	if (device->act_log &&
1483	device->act_log->nr_elements == dc->al_extents)
1484	return `0`;
1485
1486	drbd_suspend_io(device);
1487	/ If IO completion is currently blocked, we would likely wait*
1488	* "forever" for the activity log to become unused. So we don't. */
1489	if (atomic_read(v: &device->ap_bio_cnt))
1490	goto out;
1491
1492	wait_event(device->al_wait, lc_try_lock(device->act_log));
1493	drbd_al_shrink(device);
1494	err = drbd_check_al_size(device, dc);
1495	lc_unlock(lc: device->act_log);
1496	wake_up(&device->al_wait);
1497	out:
1498	drbd_resume_io(device);
1499	return err;
1500	}
1501
1502	int drbd_adm_disk_opts(struct sk_buff skb, struct* genl_info *info)
1503	{
1504	struct drbd_config_context adm_ctx;
1505	enum drbd_ret_code retcode;
1506	struct drbd_device *device;
1507	struct disk_conf new_disk_conf, old_disk_conf;
1508	struct fifo_buffer old_plan = NULL, new_plan = NULL;
1509	int err;
1510	unsigned int fifo_size;
1511
1512	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1513	if (!adm_ctx.reply_skb)
1514	return retcode;
1515	if (retcode != NO_ERROR)
1516	goto finish;
1517
1518	device = adm_ctx.device;
1519	mutex_lock(&adm_ctx.resource->adm_mutex);
1520
1521	/ we also need a disk*
1522	* to change the options on */
1523	if (!get_ldev(device)) {
1524	retcode = ERR_NO_DISK;
1525	goto out;
1526	}
1527
1528	new_disk_conf = kmalloc(size: sizeof(struct disk_conf), GFP_KERNEL);
1529	if (!new_disk_conf) {
1530	retcode = ERR_NOMEM;
1531	goto fail;
1532	}
1533
1534	mutex_lock(&device->resource->conf_update);
1535	old_disk_conf = device->ldev->disk_conf;
1536	new_disk_conf = old_disk_conf;
1537	if (should_set_defaults(info))
1538	set_disk_conf_defaults(new_disk_conf);
1539
1540	err = disk_conf_from_attrs_for_change(s: new_disk_conf, info);
1541	if (err && err != -ENOMSG) {
1542	retcode = ERR_MANDATORY_TAG;
1543	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: from_attrs_err_to_txt(err));
1544	goto fail_unlock;
1545	}
1546
1547	if (!expect(device, new_disk_conf->resync_rate >= `1`))
1548	new_disk_conf->resync_rate = `1`;
1549
1550	sanitize_disk_conf(device, disk_conf: new_disk_conf, nbc: device->ldev);
1551
1552	if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1553	new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1554
1555	fifo_size = (new_disk_conf->c_plan_ahead * `10` * SLEEP_TIME) / HZ;
1556	if (fifo_size != device->rs_plan_s->size) {
1557	new_plan = fifo_alloc(fifo_size);
1558	if (!new_plan) {
1559	drbd_err(device, "kmalloc of fifo_buffer failed");
1560	retcode = ERR_NOMEM;
1561	goto fail_unlock;
1562	}
1563	}
1564
1565	err = disk_opts_check_al_size(device, dc: new_disk_conf);
1566	if (err) {
1567	/ Could be just "busy". Ignore?*
1568	* Introduce dedicated error code? */
1569	drbd_msg_put_info(skb: adm_ctx.reply_skb,
1570	info: "Try again without changing current al-extents setting");
1571	retcode = ERR_NOMEM;
1572	goto fail_unlock;
1573	}
1574
1575	lock_all_resources();
1576	retcode = drbd_resync_after_valid(device, o_minor: new_disk_conf->resync_after);
1577	if (retcode == NO_ERROR) {
1578	rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1579	drbd_resync_after_changed(device);
1580	}
1581	unlock_all_resources();
1582
1583	if (retcode != NO_ERROR)
1584	goto fail_unlock;
1585
1586	if (new_plan) {
1587	old_plan = device->rs_plan_s;
1588	rcu_assign_pointer(device->rs_plan_s, new_plan);
1589	}
1590
1591	mutex_unlock(lock: &device->resource->conf_update);
1592
1593	if (new_disk_conf->al_updates)
1594	device->ldev->md.flags &= ~MDF_AL_DISABLED;
1595	else
1596	device->ldev->md.flags \|= MDF_AL_DISABLED;
1597
1598	if (new_disk_conf->md_flushes)
1599	clear_bit(nr: MD_NO_FUA, addr: &device->flags);
1600	else
1601	set_bit(nr: MD_NO_FUA, addr: &device->flags);
1602
1603	if (write_ordering_changed(a: old_disk_conf, b: new_disk_conf))
1604	drbd_bump_write_ordering(resource: device->resource, NULL, wo: WO_BDEV_FLUSH);
1605
1606	if (old_disk_conf->discard_zeroes_if_aligned !=
1607	new_disk_conf->discard_zeroes_if_aligned)
1608	drbd_reconsider_queue_parameters(device, bdev: device->ldev, NULL);
1609
1610	drbd_md_sync(device);
1611
1612	if (device->state.conn >= C_CONNECTED) {
1613	struct drbd_peer_device *peer_device;
1614
1615	for_each_peer_device(peer_device, device)
1616	drbd_send_sync_param(peer_device);
1617	}
1618
1619	kvfree_rcu_mightsleep(old_disk_conf);
1620	kfree(objp: old_plan);
1621	mod_timer(timer: &device->request_timer, expires: jiffies + HZ);
1622	goto success;
1623
1624	fail_unlock:
1625	mutex_unlock(lock: &device->resource->conf_update);
1626	fail:
1627	kfree(objp: new_disk_conf);
1628	kfree(objp: new_plan);
1629	success:
1630	put_ldev(device);
1631	out:
1632	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
1633	finish:
1634	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
1635	return `0`;
1636	}
1637
1638	static struct bdev_handle open_backing_dev(struct* drbd_device *device,
1639	const char bdev_path, void* *claim_ptr, bool do_bd_link)
1640	{
1641	struct bdev_handle *handle;
1642	int err = `0`;
1643
1644	handle = bdev_open_by_path(path: bdev_path, BLK_OPEN_READ \| BLK_OPEN_WRITE,
1645	holder: claim_ptr, NULL);
1646	if (IS_ERR(ptr: handle)) {
1647	drbd_err(device, "open(\"%s\") failed with %ld\n",
1648	bdev_path, PTR_ERR(handle));
1649	return handle;
1650	}
1651
1652	if (!do_bd_link)
1653	return handle;
1654
1655	err = bd_link_disk_holder(bdev: handle->bdev, disk: device->vdisk);
1656	if (err) {
1657	bdev_release(handle);
1658	drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n",
1659	bdev_path, err);
1660	handle = ERR_PTR(error: err);
1661	}
1662	return handle;
1663	}
1664
1665	static int open_backing_devices(struct drbd_device *device,
1666	struct disk_conf *new_disk_conf,
1667	struct drbd_backing_dev *nbc)
1668	{
1669	struct bdev_handle *handle;
1670
1671	handle = open_backing_dev(device, bdev_path: new_disk_conf->backing_dev, claim_ptr: device,
1672	do_bd_link: true);
1673	if (IS_ERR(ptr: handle))
1674	return ERR_OPEN_DISK;
1675	nbc->backing_bdev = handle->bdev;
1676	nbc->backing_bdev_handle = handle;
1677
1678	/*
1679	* meta_dev_idx >= 0: external fixed size, possibly multiple
1680	* drbd sharing one meta device. TODO in that case, paranoia
1681	* check that [md_bdev, meta_dev_idx] is not yet used by some
1682	* other drbd minor! (if you use drbd.conf + drbdadm, that
1683	* should check it for you already; but if you don't, or
1684	* someone fooled it, we need to double check here)
1685	*/
1686	handle = open_backing_dev(device, bdev_path: new_disk_conf->meta_dev,
1687	/ claim ptr: device, if claimed exclusively; shared drbd_m_holder,*
1688	* if potentially shared with other drbd minors */
1689	claim_ptr: (new_disk_conf->meta_dev_idx < `0`) ? (void)device : (void**)drbd_m_holder,
1690	/ avoid double bd_claim_by_disk() for the same (source,target) tuple,*
1691	* as would happen with internal metadata. */
1692	do_bd_link: (new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT &&
1693	new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL));
1694	if (IS_ERR(ptr: handle))
1695	return ERR_OPEN_MD_DISK;
1696	nbc->md_bdev = handle->bdev;
1697	nbc->md_bdev_handle = handle;
1698	return NO_ERROR;
1699	}
1700
1701	static void close_backing_dev(struct drbd_device *device,
1702	struct bdev_handle *handle, bool do_bd_unlink)
1703	{
1704	if (!handle)
1705	return;
1706	if (do_bd_unlink)
1707	bd_unlink_disk_holder(bdev: handle->bdev, disk: device->vdisk);
1708	bdev_release(handle);
1709	}
1710
1711	void drbd_backing_dev_free(struct drbd_device device, struct* drbd_backing_dev *ldev)
1712	{
1713	if (ldev == NULL)
1714	return;
1715
1716	close_backing_dev(device, handle: ldev->md_bdev_handle,
1717	do_bd_unlink: ldev->md_bdev != ldev->backing_bdev);
1718	close_backing_dev(device, handle: ldev->backing_bdev_handle, do_bd_unlink: true);
1719
1720	kfree(objp: ldev->disk_conf);
1721	kfree(objp: ldev);
1722	}
1723
1724	int drbd_adm_attach(struct sk_buff skb, struct* genl_info *info)
1725	{
1726	struct drbd_config_context adm_ctx;
1727	struct drbd_device *device;
1728	struct drbd_peer_device *peer_device;
1729	struct drbd_connection *connection;
1730	int err;
1731	enum drbd_ret_code retcode;
1732	enum determine_dev_size dd;
1733	sector_t max_possible_sectors;
1734	sector_t min_md_device_sectors;
1735	struct drbd_backing_dev nbc = NULL; /* new_backing_conf /
1736	struct disk_conf *new_disk_conf = NULL;
1737	struct lru_cache *resync_lru = NULL;
1738	struct fifo_buffer *new_plan = NULL;
1739	union drbd_state ns, os;
1740	enum drbd_state_rv rv;
1741	struct net_conf *nc;
1742
1743	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1744	if (!adm_ctx.reply_skb)
1745	return retcode;
1746	if (retcode != NO_ERROR)
1747	goto finish;
1748
1749	device = adm_ctx.device;
1750	mutex_lock(&adm_ctx.resource->adm_mutex);
1751	peer_device = first_peer_device(device);
1752	connection = peer_device->connection;
1753	conn_reconfig_start(connection);
1754
1755	/ if you want to reconfigure, please tear down first /
1756	if (device->state.disk > D_DISKLESS) {
1757	retcode = ERR_DISK_CONFIGURED;
1758	goto fail;
1759	}
1760	/ It may just now have detached because of IO error. Make sure*
1761	* drbd_ldev_destroy is done already, we may end up here very fast,
1762	* e.g. if someone calls attach from the on-io-error handler,
1763	* to realize a "hot spare" feature (not that I'd recommend that) */
1764	wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags));
1765
1766	/ make sure there is no leftover from previous force-detach attempts /
1767	clear_bit(nr: FORCE_DETACH, addr: &device->flags);
1768	clear_bit(nr: WAS_IO_ERROR, addr: &device->flags);
1769	clear_bit(nr: WAS_READ_ERROR, addr: &device->flags);
1770
1771	/ and no leftover from previously aborted resync or verify, either /
1772	device->rs_total = `0`;
1773	device->rs_failed = `0`;
1774	atomic_set(v: &device->rs_pending_cnt, i: `0`);
1775
1776	/ allocation not in the IO path, drbdsetup context /
1777	nbc = kzalloc(size: sizeof(struct drbd_backing_dev), GFP_KERNEL);
1778	if (!nbc) {
1779	retcode = ERR_NOMEM;
1780	goto fail;
1781	}
1782	spin_lock_init(&nbc->md.uuid_lock);
1783
1784	new_disk_conf = kzalloc(size: sizeof(struct disk_conf), GFP_KERNEL);
1785	if (!new_disk_conf) {
1786	retcode = ERR_NOMEM;
1787	goto fail;
1788	}
1789	nbc->disk_conf = new_disk_conf;
1790
1791	set_disk_conf_defaults(new_disk_conf);
1792	err = disk_conf_from_attrs(s: new_disk_conf, info);
1793	if (err) {
1794	retcode = ERR_MANDATORY_TAG;
1795	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: from_attrs_err_to_txt(err));
1796	goto fail;
1797	}
1798
1799	if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1800	new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1801
1802	new_plan = fifo_alloc(fifo_size: (new_disk_conf->c_plan_ahead * `10` * SLEEP_TIME) / HZ);
1803	if (!new_plan) {
1804	retcode = ERR_NOMEM;
1805	goto fail;
1806	}
1807
1808	if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1809	retcode = ERR_MD_IDX_INVALID;
1810	goto fail;
1811	}
1812
1813	rcu_read_lock();
1814	nc = rcu_dereference(connection->net_conf);
1815	if (nc) {
1816	if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1817	rcu_read_unlock();
1818	retcode = ERR_STONITH_AND_PROT_A;
1819	goto fail;
1820	}
1821	}
1822	rcu_read_unlock();
1823
1824	retcode = open_backing_devices(device, new_disk_conf, nbc);
1825	if (retcode != NO_ERROR)
1826	goto fail;
1827
1828	if ((nbc->backing_bdev == nbc->md_bdev) !=
1829	(new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL \|\|
1830	new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1831	retcode = ERR_MD_IDX_INVALID;
1832	goto fail;
1833	}
1834
1835	resync_lru = lc_create(name: "resync", cache: drbd_bm_ext_cache,
1836	max_pending_changes: `1`, e_count: `61`, e_size: sizeof(struct bm_extent),
1837	offsetof(struct bm_extent, lce));
1838	if (!resync_lru) {
1839	retcode = ERR_NOMEM;
1840	goto fail;
1841	}
1842
1843	/ Read our meta data super block early.*
1844	* This also sets other on-disk offsets. */
1845	retcode = drbd_md_read(device, bdev: nbc);
1846	if (retcode != NO_ERROR)
1847	goto fail;
1848
1849	sanitize_disk_conf(device, disk_conf: new_disk_conf, nbc);
1850
1851	if (drbd_get_max_capacity(bdev: nbc) < new_disk_conf->disk_size) {
1852	drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1853	(unsigned long long) drbd_get_max_capacity(nbc),
1854	(unsigned long long) new_disk_conf->disk_size);
1855	retcode = ERR_DISK_TOO_SMALL;
1856	goto fail;
1857	}
1858
1859	if (new_disk_conf->meta_dev_idx < `0`) {
1860	max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1861	/ at least one MB, otherwise it does not make sense /
1862	min_md_device_sectors = (`2`<<`10`);
1863	} else {
1864	max_possible_sectors = DRBD_MAX_SECTORS;
1865	min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + `1`);
1866	}
1867
1868	if (drbd_get_capacity(bdev: nbc->md_bdev) < min_md_device_sectors) {
1869	retcode = ERR_MD_DISK_TOO_SMALL;
1870	drbd_warn(device, "refusing attach: md-device too small, "
1871	"at least %llu sectors needed for this meta-disk type\n",
1872	(unsigned long long) min_md_device_sectors);
1873	goto fail;
1874	}
1875
1876	/ Make sure the new disk is big enough*
1877	* (we may currently be R_PRIMARY with no local disk...) */
1878	if (drbd_get_max_capacity(bdev: nbc) < get_capacity(disk: device->vdisk)) {
1879	retcode = ERR_DISK_TOO_SMALL;
1880	goto fail;
1881	}
1882
1883	nbc->known_size = drbd_get_capacity(bdev: nbc->backing_bdev);
1884
1885	if (nbc->known_size > max_possible_sectors) {
1886	drbd_warn(device, "==> truncating very big lower level device "
1887	"to currently maximum possible %llu sectors <==\n",
1888	(unsigned long long) max_possible_sectors);
1889	if (new_disk_conf->meta_dev_idx >= `0`)
1890	drbd_warn(device, "==>> using internal or flexible "
1891	"meta data may help <<==\n");
1892	}
1893
1894	drbd_suspend_io(device);
1895	/ also wait for the last barrier ack. /
1896	/ FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171*
1897	* We need a way to either ignore barrier acks for barriers sent before a device
1898	* was attached, or a way to wait for all pending barrier acks to come in.
1899	* As barriers are counted per resource,
1900	* we'd need to suspend io on all devices of a resource.
1901	*/
1902	wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) \|\| drbd_suspended(device));
1903	/ and for any other previously queued work /
1904	drbd_flush_workqueue(work_queue: &connection->sender_work);
1905
1906	rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1907	retcode = (enum drbd_ret_code)rv;
1908	drbd_resume_io(device);
1909	if (rv < SS_SUCCESS)
1910	goto fail;
1911
1912	if (!get_ldev_if_state(device, D_ATTACHING))
1913	goto force_diskless;
1914
1915	if (!device->bitmap) {
1916	if (drbd_bm_init(device)) {
1917	retcode = ERR_NOMEM;
1918	goto force_diskless_dec;
1919	}
1920	}
1921
1922	if (device->state.pdsk != D_UP_TO_DATE && device->ed_uuid &&
1923	(device->state.role == R_PRIMARY \|\| device->state.peer == R_PRIMARY) &&
1924	(device->ed_uuid & ~((u64)`1`)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)`1`))) {
1925	drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1926	(unsigned long long)device->ed_uuid);
1927	retcode = ERR_DATA_NOT_CURRENT;
1928	goto force_diskless_dec;
1929	}
1930
1931	/ Since we are diskless, fix the activity log first... /
1932	if (drbd_check_al_size(device, dc: new_disk_conf)) {
1933	retcode = ERR_NOMEM;
1934	goto force_diskless_dec;
1935	}
1936
1937	/ Prevent shrinking of consistent devices ! /
1938	{
1939	unsigned long long nsz = drbd_new_dev_size(device, bdev: nbc, u_size: nbc->disk_conf->disk_size, assume_peer_has_space: `0`);
1940	unsigned long long eff = nbc->md.la_size_sect;
1941	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && nsz < eff) {
1942	if (nsz == nbc->disk_conf->disk_size) {
1943	drbd_warn(device, "truncating a consistent device during attach (%llu < %llu)\n", nsz, eff);
1944	} else {
1945	drbd_warn(device, "refusing to truncate a consistent device (%llu < %llu)\n", nsz, eff);
1946	drbd_msg_sprintf_info(skb: adm_ctx.reply_skb,
1947	fmt: "To-be-attached device has last effective > current size, and is consistent\n"
1948	"(%llu > %llu sectors). Refusing to attach.", eff, nsz);
1949	retcode = ERR_IMPLICIT_SHRINK;
1950	goto force_diskless_dec;
1951	}
1952	}
1953	}
1954
1955	lock_all_resources();
1956	retcode = drbd_resync_after_valid(device, o_minor: new_disk_conf->resync_after);
1957	if (retcode != NO_ERROR) {
1958	unlock_all_resources();
1959	goto force_diskless_dec;
1960	}
1961
1962	/ Reset the "barriers don't work" bits here, then force meta data to*
1963	* be written, to ensure we determine if barriers are supported. */
1964	if (new_disk_conf->md_flushes)
1965	clear_bit(nr: MD_NO_FUA, addr: &device->flags);
1966	else
1967	set_bit(nr: MD_NO_FUA, addr: &device->flags);
1968
1969	/ Point of no return reached.*
1970	* Devices and memory are no longer released by error cleanup below.
1971	* now device takes over responsibility, and the state engine should
1972	* clean it up somewhere. */
1973	D_ASSERT(device, device->ldev == NULL);
1974	device->ldev = nbc;
1975	device->resync = resync_lru;
1976	device->rs_plan_s = new_plan;
1977	nbc = NULL;
1978	resync_lru = NULL;
1979	new_disk_conf = NULL;
1980	new_plan = NULL;
1981
1982	drbd_resync_after_changed(device);
1983	drbd_bump_write_ordering(resource: device->resource, bdev: device->ldev, wo: WO_BDEV_FLUSH);
1984	unlock_all_resources();
1985
1986	if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1987	set_bit(nr: CRASHED_PRIMARY, addr: &device->flags);
1988	else
1989	clear_bit(nr: CRASHED_PRIMARY, addr: &device->flags);
1990
1991	if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1992	!(device->state.role == R_PRIMARY && device->resource->susp_nod))
1993	set_bit(nr: CRASHED_PRIMARY, addr: &device->flags);
1994
1995	device->send_cnt = `0`;
1996	device->recv_cnt = `0`;
1997	device->read_cnt = `0`;
1998	device->writ_cnt = `0`;
1999
2000	drbd_reconsider_queue_parameters(device, bdev: device->ldev, NULL);
2001
2002	/ If I am currently not R_PRIMARY,*
2003	* but meta data primary indicator is set,
2004	* I just now recover from a hard crash,
2005	* and have been R_PRIMARY before that crash.
2006	*
2007	* Now, if I had no connection before that crash
2008	* (have been degraded R_PRIMARY), chances are that
2009	* I won't find my peer now either.
2010	*
2011	* In that case, and _only_ in that case,
2012	* we use the degr-wfc-timeout instead of the default,
2013	* so we can automatically recover from a crash of a
2014	* degraded but active "cluster" after a certain timeout.
2015	*/
2016	clear_bit(nr: USE_DEGR_WFC_T, addr: &device->flags);
2017	if (device->state.role != R_PRIMARY &&
2018	drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
2019	!drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
2020	set_bit(nr: USE_DEGR_WFC_T, addr: &device->flags);
2021
2022	dd = drbd_determine_dev_size(device, flags: `0`, NULL);
2023	if (dd <= DS_ERROR) {
2024	retcode = ERR_NOMEM_BITMAP;
2025	goto force_diskless_dec;
2026	} else if (dd == DS_GREW)
2027	set_bit(nr: RESYNC_AFTER_NEG, addr: &device->flags);
2028
2029	if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) \|\|
2030	(test_bit(CRASHED_PRIMARY, &device->flags) &&
2031	drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
2032	drbd_info(device, "Assuming that all blocks are out of sync "
2033	"(aka FullSync)\n");
2034	if (drbd_bitmap_io(device, io_fn: &drbd_bmio_set_n_write,
2035	why: "set_n_write from attaching", flags: BM_LOCKED_MASK,
2036	NULL)) {
2037	retcode = ERR_IO_MD_DISK;
2038	goto force_diskless_dec;
2039	}
2040	} else {
2041	if (drbd_bitmap_io(device, io_fn: &drbd_bm_read,
2042	why: "read from attaching", flags: BM_LOCKED_MASK,
2043	NULL)) {
2044	retcode = ERR_IO_MD_DISK;
2045	goto force_diskless_dec;
2046	}
2047	}
2048
2049	if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
2050	drbd_suspend_al(device); / IO is still suspended here... /
2051
2052	spin_lock_irq(lock: &device->resource->req_lock);
2053	os = drbd_read_state(device);
2054	ns = os;
2055	/ If MDF_CONSISTENT is not set go into inconsistent state,*
2056	otherwise investigate MDF_WasUpToDate...
2057	If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
2058	otherwise into D_CONSISTENT state.
2059	*/
2060	if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
2061	if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
2062	ns.disk = D_CONSISTENT;
2063	else
2064	ns.disk = D_OUTDATED;
2065	} else {
2066	ns.disk = D_INCONSISTENT;
2067	}
2068
2069	if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
2070	ns.pdsk = D_OUTDATED;
2071
2072	rcu_read_lock();
2073	if (ns.disk == D_CONSISTENT &&
2074	(ns.pdsk == D_OUTDATED \|\| rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
2075	ns.disk = D_UP_TO_DATE;
2076
2077	/ All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,*
2078	MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
2079	this point, because drbd_request_state() modifies these
2080	flags. /*
2081
2082	if (rcu_dereference(device->ldev->disk_conf)->al_updates)
2083	device->ldev->md.flags &= ~MDF_AL_DISABLED;
2084	else
2085	device->ldev->md.flags \|= MDF_AL_DISABLED;
2086
2087	rcu_read_unlock();
2088
2089	/ In case we are C_CONNECTED postpone any decision on the new disk*
2090	state after the negotiation phase. /*
2091	if (device->state.conn == C_CONNECTED) {
2092	device->new_state_tmp.i = ns.i;
2093	ns.i = os.i;
2094	ns.disk = D_NEGOTIATING;
2095
2096	/ We expect to receive up-to-date UUIDs soon.*
2097	To avoid a race in receive_state, free p_uuid while
2098	holding req_lock. I.e. atomic with the state change /*
2099	kfree(objp: device->p_uuid);
2100	device->p_uuid = NULL;
2101	}
2102
2103	rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
2104	spin_unlock_irq(lock: &device->resource->req_lock);
2105
2106	if (rv < SS_SUCCESS)
2107	goto force_diskless_dec;
2108
2109	mod_timer(timer: &device->request_timer, expires: jiffies + HZ);
2110
2111	if (device->state.role == R_PRIMARY)
2112	device->ldev->md.uuid[UI_CURRENT] \|= (u64)`1`;
2113	else
2114	device->ldev->md.uuid[UI_CURRENT] &= ~(u64)`1`;
2115
2116	drbd_md_mark_dirty(device);
2117	drbd_md_sync(device);
2118
2119	kobject_uevent(kobj: &disk_to_dev(device->vdisk)->kobj, action: KOBJ_CHANGE);
2120	put_ldev(device);
2121	conn_reconfig_done(connection);
2122	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
2123	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
2124	return `0`;
2125
2126	force_diskless_dec:
2127	put_ldev(device);
2128	force_diskless:
2129	drbd_force_state(device, NS(disk, D_DISKLESS));
2130	drbd_md_sync(device);
2131	fail:
2132	conn_reconfig_done(connection);
2133	if (nbc) {
2134	close_backing_dev(device, handle: nbc->md_bdev_handle,
2135	do_bd_unlink: nbc->md_bdev != nbc->backing_bdev);
2136	close_backing_dev(device, handle: nbc->backing_bdev_handle, do_bd_unlink: true);
2137	kfree(objp: nbc);
2138	}
2139	kfree(objp: new_disk_conf);
2140	lc_destroy(lc: resync_lru);
2141	kfree(objp: new_plan);
2142	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
2143	finish:
2144	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
2145	return `0`;
2146	}
2147
2148	static int adm_detach(struct drbd_device device, int* force)
2149	{
2150	if (force) {
2151	set_bit(nr: FORCE_DETACH, addr: &device->flags);
2152	drbd_force_state(device, NS(disk, D_FAILED));
2153	return SS_SUCCESS;
2154	}
2155
2156	return drbd_request_detach_interruptible(device);
2157	}
2158
2159	/ Detaching the disk is a process in multiple stages. First we need to lock*
2160	* out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
2161	* Then we transition to D_DISKLESS, and wait for put_ldev() to return all
2162	* internal references as well.
2163	* Only then we have finally detached. */
2164	int drbd_adm_detach(struct sk_buff skb, struct* genl_info *info)
2165	{
2166	struct drbd_config_context adm_ctx;
2167	enum drbd_ret_code retcode;
2168	struct detach_parms parms = { };
2169	int err;
2170
2171	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2172	if (!adm_ctx.reply_skb)
2173	return retcode;
2174	if (retcode != NO_ERROR)
2175	goto out;
2176
2177	if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
2178	err = detach_parms_from_attrs(s: &parms, info);
2179	if (err) {
2180	retcode = ERR_MANDATORY_TAG;
2181	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: from_attrs_err_to_txt(err));
2182	goto out;
2183	}
2184	}
2185
2186	mutex_lock(&adm_ctx.resource->adm_mutex);
2187	retcode = adm_detach(device: adm_ctx.device, force: parms.force_detach);
2188	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
2189	out:
2190	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
2191	return `0`;
2192	}
2193
2194	static bool conn_resync_running(struct drbd_connection *connection)
2195	{
2196	struct drbd_peer_device *peer_device;
2197	bool rv = false;
2198	int vnr;
2199
2200	rcu_read_lock();
2201	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2202	struct drbd_device *device = peer_device->device;
2203	if (device->state.conn == C_SYNC_SOURCE \|\|
2204	device->state.conn == C_SYNC_TARGET \|\|
2205	device->state.conn == C_PAUSED_SYNC_S \|\|
2206	device->state.conn == C_PAUSED_SYNC_T) {
2207	rv = true;
2208	break;
2209	}
2210	}
2211	rcu_read_unlock();
2212
2213	return rv;
2214	}
2215
2216	static bool conn_ov_running(struct drbd_connection *connection)
2217	{
2218	struct drbd_peer_device *peer_device;
2219	bool rv = false;
2220	int vnr;
2221
2222	rcu_read_lock();
2223	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2224	struct drbd_device *device = peer_device->device;
2225	if (device->state.conn == C_VERIFY_S \|\|
2226	device->state.conn == C_VERIFY_T) {
2227	rv = true;
2228	break;
2229	}
2230	}
2231	rcu_read_unlock();
2232
2233	return rv;
2234	}
2235
2236	static enum drbd_ret_code
2237	_check_net_options(struct drbd_connection connection, struct* net_conf old_net_conf, struct* net_conf *new_net_conf)
2238	{
2239	struct drbd_peer_device *peer_device;
2240	int i;
2241
2242	if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < `100`) {
2243	if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
2244	return ERR_NEED_APV_100;
2245
2246	if (new_net_conf->two_primaries != old_net_conf->two_primaries)
2247	return ERR_NEED_APV_100;
2248
2249	if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
2250	return ERR_NEED_APV_100;
2251	}
2252
2253	if (!new_net_conf->two_primaries &&
2254	conn_highest_role(connection) == R_PRIMARY &&
2255	conn_highest_peer(connection) == R_PRIMARY)
2256	return ERR_NEED_ALLOW_TWO_PRI;
2257
2258	if (new_net_conf->two_primaries &&
2259	(new_net_conf->wire_protocol != DRBD_PROT_C))
2260	return ERR_NOT_PROTO_C;
2261
2262	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2263	struct drbd_device *device = peer_device->device;
2264	if (get_ldev(device)) {
2265	enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
2266	put_ldev(device);
2267	if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
2268	return ERR_STONITH_AND_PROT_A;
2269	}
2270	if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
2271	return ERR_DISCARD_IMPOSSIBLE;
2272	}
2273
2274	if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
2275	return ERR_CONG_NOT_PROTO_A;
2276
2277	return NO_ERROR;
2278	}
2279
2280	static enum drbd_ret_code
2281	check_net_options(struct drbd_connection connection, struct* net_conf *new_net_conf)
2282	{
2283	enum drbd_ret_code rv;
2284	struct drbd_peer_device *peer_device;
2285	int i;
2286
2287	rcu_read_lock();
2288	rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
2289	rcu_read_unlock();
2290
2291	/ connection->peer_devices protected by genl_lock() here /
2292	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2293	struct drbd_device *device = peer_device->device;
2294	if (!device->bitmap) {
2295	if (drbd_bm_init(device))
2296	return ERR_NOMEM;
2297	}
2298	}
2299
2300	return rv;
2301	}
2302
2303	struct crypto {
2304	struct crypto_shash *verify_tfm;
2305	struct crypto_shash *csums_tfm;
2306	struct crypto_shash *cram_hmac_tfm;
2307	struct crypto_shash *integrity_tfm;
2308	};
2309
2310	static int
2311	alloc_shash(struct crypto_shash *tfm, char* tfm_name, int* err_alg)
2312	{
2313	if (!tfm_name[`0`])
2314	return NO_ERROR;
2315
2316	*tfm = crypto_alloc_shash(alg_name: tfm_name, type: `0`, mask: `0`);
2317	if (IS_ERR(ptr: *tfm)) {
2318	*tfm = NULL;
2319	return err_alg;
2320	}
2321
2322	return NO_ERROR;
2323	}
2324
2325	static enum drbd_ret_code
2326	alloc_crypto(struct crypto crypto, struct* net_conf *new_net_conf)
2327	{
2328	char hmac_name[CRYPTO_MAX_ALG_NAME];
2329	enum drbd_ret_code rv;
2330
2331	rv = alloc_shash(tfm: &crypto->csums_tfm, tfm_name: new_net_conf->csums_alg,
2332	err_alg: ERR_CSUMS_ALG);
2333	if (rv != NO_ERROR)
2334	return rv;
2335	rv = alloc_shash(tfm: &crypto->verify_tfm, tfm_name: new_net_conf->verify_alg,
2336	err_alg: ERR_VERIFY_ALG);
2337	if (rv != NO_ERROR)
2338	return rv;
2339	rv = alloc_shash(tfm: &crypto->integrity_tfm, tfm_name: new_net_conf->integrity_alg,
2340	err_alg: ERR_INTEGRITY_ALG);
2341	if (rv != NO_ERROR)
2342	return rv;
2343	if (new_net_conf->cram_hmac_alg[`0`] != `0`) {
2344	snprintf(buf: hmac_name, CRYPTO_MAX_ALG_NAME, fmt: "hmac(%s)",
2345	new_net_conf->cram_hmac_alg);
2346
2347	rv = alloc_shash(tfm: &crypto->cram_hmac_tfm, tfm_name: hmac_name,
2348	err_alg: ERR_AUTH_ALG);
2349	}
2350
2351	return rv;
2352	}
2353
2354	static void free_crypto(struct crypto *crypto)
2355	{
2356	crypto_free_shash(tfm: crypto->cram_hmac_tfm);
2357	crypto_free_shash(tfm: crypto->integrity_tfm);
2358	crypto_free_shash(tfm: crypto->csums_tfm);
2359	crypto_free_shash(tfm: crypto->verify_tfm);
2360	}
2361
2362	int drbd_adm_net_opts(struct sk_buff skb, struct* genl_info *info)
2363	{
2364	struct drbd_config_context adm_ctx;
2365	enum drbd_ret_code retcode;
2366	struct drbd_connection *connection;
2367	struct net_conf old_net_conf, new_net_conf = NULL;
2368	int err;
2369	int ovr; / online verify running /
2370	int rsr; / re-sync running /
2371	struct crypto crypto = { };
2372
2373	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2374	if (!adm_ctx.reply_skb)
2375	return retcode;
2376	if (retcode != NO_ERROR)
2377	goto finish;
2378
2379	connection = adm_ctx.connection;
2380	mutex_lock(&adm_ctx.resource->adm_mutex);
2381
2382	new_net_conf = kzalloc(size: sizeof(struct net_conf), GFP_KERNEL);
2383	if (!new_net_conf) {
2384	retcode = ERR_NOMEM;
2385	goto out;
2386	}
2387
2388	conn_reconfig_start(connection);
2389
2390	mutex_lock(&connection->data.mutex);
2391	mutex_lock(&connection->resource->conf_update);
2392	old_net_conf = connection->net_conf;
2393
2394	if (!old_net_conf) {
2395	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: "net conf missing, try connect");
2396	retcode = ERR_INVALID_REQUEST;
2397	goto fail;
2398	}
2399
2400	new_net_conf = old_net_conf;
2401	if (should_set_defaults(info))
2402	set_net_conf_defaults(new_net_conf);
2403
2404	err = net_conf_from_attrs_for_change(s: new_net_conf, info);
2405	if (err && err != -ENOMSG) {
2406	retcode = ERR_MANDATORY_TAG;
2407	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: from_attrs_err_to_txt(err));
2408	goto fail;
2409	}
2410
2411	retcode = check_net_options(connection, new_net_conf);
2412	if (retcode != NO_ERROR)
2413	goto fail;
2414
2415	/ re-sync running /
2416	rsr = conn_resync_running(connection);
2417	if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2418	retcode = ERR_CSUMS_RESYNC_RUNNING;
2419	goto fail;
2420	}
2421
2422	/ online verify running /
2423	ovr = conn_ov_running(connection);
2424	if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2425	retcode = ERR_VERIFY_RUNNING;
2426	goto fail;
2427	}
2428
2429	retcode = alloc_crypto(crypto: &crypto, new_net_conf);
2430	if (retcode != NO_ERROR)
2431	goto fail;
2432
2433	rcu_assign_pointer(connection->net_conf, new_net_conf);
2434
2435	if (!rsr) {
2436	crypto_free_shash(tfm: connection->csums_tfm);
2437	connection->csums_tfm = crypto.csums_tfm;
2438	crypto.csums_tfm = NULL;
2439	}
2440	if (!ovr) {
2441	crypto_free_shash(tfm: connection->verify_tfm);
2442	connection->verify_tfm = crypto.verify_tfm;
2443	crypto.verify_tfm = NULL;
2444	}
2445
2446	crypto_free_shash(tfm: connection->integrity_tfm);
2447	connection->integrity_tfm = crypto.integrity_tfm;
2448	if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= `100`)
2449	/ Do this without trying to take connection->data.mutex again. /
2450	__drbd_send_protocol(connection, cmd: P_PROTOCOL_UPDATE);
2451
2452	crypto_free_shash(tfm: connection->cram_hmac_tfm);
2453	connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2454
2455	mutex_unlock(lock: &connection->resource->conf_update);
2456	mutex_unlock(lock: &connection->data.mutex);
2457	kvfree_rcu_mightsleep(old_net_conf);
2458
2459	if (connection->cstate >= C_WF_REPORT_PARAMS) {
2460	struct drbd_peer_device *peer_device;
2461	int vnr;
2462
2463	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2464	drbd_send_sync_param(peer_device);
2465	}
2466
2467	goto done;
2468
2469	fail:
2470	mutex_unlock(lock: &connection->resource->conf_update);
2471	mutex_unlock(lock: &connection->data.mutex);
2472	free_crypto(crypto: &crypto);
2473	kfree(objp: new_net_conf);
2474	done:
2475	conn_reconfig_done(connection);
2476	out:
2477	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
2478	finish:
2479	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
2480	return `0`;
2481	}
2482
2483	static void connection_to_info(struct connection_info *info,
2484	struct drbd_connection *connection)
2485	{
2486	info->conn_connection_state = connection->cstate;
2487	info->conn_role = conn_highest_peer(connection);
2488	}
2489
2490	static void peer_device_to_info(struct peer_device_info *info,
2491	struct drbd_peer_device *peer_device)
2492	{
2493	struct drbd_device *device = peer_device->device;
2494
2495	info->peer_repl_state =
2496	max_t(enum drbd_conns, C_WF_REPORT_PARAMS, device->state.conn);
2497	info->peer_disk_state = device->state.pdsk;
2498	info->peer_resync_susp_user = device->state.user_isp;
2499	info->peer_resync_susp_peer = device->state.peer_isp;
2500	info->peer_resync_susp_dependency = device->state.aftr_isp;
2501	}
2502
2503	int drbd_adm_connect(struct sk_buff skb, struct* genl_info *info)
2504	{
2505	struct connection_info connection_info;
2506	enum drbd_notification_type flags;
2507	unsigned int peer_devices = `0`;
2508	struct drbd_config_context adm_ctx;
2509	struct drbd_peer_device *peer_device;
2510	struct net_conf old_net_conf, new_net_conf = NULL;
2511	struct crypto crypto = { };
2512	struct drbd_resource *resource;
2513	struct drbd_connection *connection;
2514	enum drbd_ret_code retcode;
2515	enum drbd_state_rv rv;
2516	int i;
2517	int err;
2518
2519	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2520
2521	if (!adm_ctx.reply_skb)
2522	return retcode;
2523	if (retcode != NO_ERROR)
2524	goto out;
2525	if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2526	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: "connection endpoint(s) missing");
2527	retcode = ERR_INVALID_REQUEST;
2528	goto out;
2529	}
2530
2531	/ No need for _rcu here. All reconfiguration is*
2532	* strictly serialized on genl_lock(). We are protected against
2533	* concurrent reconfiguration/addition/deletion */
2534	for_each_resource(resource, &drbd_resources) {
2535	for_each_connection(connection, resource) {
2536	if (nla_len(nla: adm_ctx.my_addr) == connection->my_addr_len &&
2537	!memcmp(p: nla_data(nla: adm_ctx.my_addr), q: &connection->my_addr,
2538	size: connection->my_addr_len)) {
2539	retcode = ERR_LOCAL_ADDR;
2540	goto out;
2541	}
2542
2543	if (nla_len(nla: adm_ctx.peer_addr) == connection->peer_addr_len &&
2544	!memcmp(p: nla_data(nla: adm_ctx.peer_addr), q: &connection->peer_addr,
2545	size: connection->peer_addr_len)) {
2546	retcode = ERR_PEER_ADDR;
2547	goto out;
2548	}
2549	}
2550	}
2551
2552	mutex_lock(&adm_ctx.resource->adm_mutex);
2553	connection = first_connection(resource: adm_ctx.resource);
2554	conn_reconfig_start(connection);
2555
2556	if (connection->cstate > C_STANDALONE) {
2557	retcode = ERR_NET_CONFIGURED;
2558	goto fail;
2559	}
2560
2561	/ allocation not in the IO path, drbdsetup / netlink process context /
2562	new_net_conf = kzalloc(size: sizeof(*new_net_conf), GFP_KERNEL);
2563	if (!new_net_conf) {
2564	retcode = ERR_NOMEM;
2565	goto fail;
2566	}
2567
2568	set_net_conf_defaults(new_net_conf);
2569
2570	err = net_conf_from_attrs(s: new_net_conf, info);
2571	if (err && err != -ENOMSG) {
2572	retcode = ERR_MANDATORY_TAG;
2573	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: from_attrs_err_to_txt(err));
2574	goto fail;
2575	}
2576
2577	retcode = check_net_options(connection, new_net_conf);
2578	if (retcode != NO_ERROR)
2579	goto fail;
2580
2581	retcode = alloc_crypto(crypto: &crypto, new_net_conf);
2582	if (retcode != NO_ERROR)
2583	goto fail;
2584
2585	((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-`1`] = `0`;
2586
2587	drbd_flush_workqueue(work_queue: &connection->sender_work);
2588
2589	mutex_lock(&adm_ctx.resource->conf_update);
2590	old_net_conf = connection->net_conf;
2591	if (old_net_conf) {
2592	retcode = ERR_NET_CONFIGURED;
2593	mutex_unlock(lock: &adm_ctx.resource->conf_update);
2594	goto fail;
2595	}
2596	rcu_assign_pointer(connection->net_conf, new_net_conf);
2597
2598	conn_free_crypto(connection);
2599	connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2600	connection->integrity_tfm = crypto.integrity_tfm;
2601	connection->csums_tfm = crypto.csums_tfm;
2602	connection->verify_tfm = crypto.verify_tfm;
2603
2604	connection->my_addr_len = nla_len(nla: adm_ctx.my_addr);
2605	memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2606	connection->peer_addr_len = nla_len(nla: adm_ctx.peer_addr);
2607	memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2608
2609	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2610	peer_devices++;
2611	}
2612
2613	connection_to_info(info: &connection_info, connection);
2614	flags = (peer_devices--) ? NOTIFY_CONTINUES : `0`;
2615	mutex_lock(&notification_mutex);
2616	notify_connection_state(NULL, `0`, connection, &connection_info, NOTIFY_CREATE \| flags);
2617	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2618	struct peer_device_info peer_device_info;
2619
2620	peer_device_to_info(info: &peer_device_info, peer_device);
2621	flags = (peer_devices--) ? NOTIFY_CONTINUES : `0`;
2622	notify_peer_device_state(NULL, `0`, peer_device, &peer_device_info, NOTIFY_CREATE \| flags);
2623	}
2624	mutex_unlock(lock: &notification_mutex);
2625	mutex_unlock(lock: &adm_ctx.resource->conf_update);
2626
2627	rcu_read_lock();
2628	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2629	struct drbd_device *device = peer_device->device;
2630	device->send_cnt = `0`;
2631	device->recv_cnt = `0`;
2632	}
2633	rcu_read_unlock();
2634
2635	rv = conn_request_state(connection, NS(conn, C_UNCONNECTED), flags: CS_VERBOSE);
2636
2637	conn_reconfig_done(connection);
2638	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
2639	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode: rv);
2640	return `0`;
2641
2642	fail:
2643	free_crypto(crypto: &crypto);
2644	kfree(objp: new_net_conf);
2645
2646	conn_reconfig_done(connection);
2647	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
2648	out:
2649	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
2650	return `0`;
2651	}
2652
2653	static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2654	{
2655	enum drbd_conns cstate;
2656	enum drbd_state_rv rv;
2657
2658	repeat:
2659	rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2660	flags: force ? CS_HARD : `0`);
2661
2662	switch (rv) {
2663	case SS_NOTHING_TO_DO:
2664	break;
2665	case SS_ALREADY_STANDALONE:
2666	return SS_SUCCESS;
2667	case SS_PRIMARY_NOP:
2668	/ Our state checking code wants to see the peer outdated. /
2669	rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), flags: `0`);
2670
2671	if (rv == SS_OUTDATE_WO_CONN) / lost connection before graceful disconnect succeeded /
2672	rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), flags: CS_VERBOSE);
2673
2674	break;
2675	case SS_CW_FAILED_BY_PEER:
2676	spin_lock_irq(lock: &connection->resource->req_lock);
2677	cstate = connection->cstate;
2678	spin_unlock_irq(lock: &connection->resource->req_lock);
2679	if (cstate <= C_WF_CONNECTION)
2680	goto repeat;
2681	/ The peer probably wants to see us outdated. /
2682	rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2683	disk, D_OUTDATED), flags: `0`);
2684	if (rv == SS_IS_DISKLESS \|\| rv == SS_LOWER_THAN_OUTDATED) {
2685	rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2686	flags: CS_HARD);
2687	}
2688	break;
2689	default:;
2690	/ no special handling necessary /
2691	}
2692
2693	if (rv >= SS_SUCCESS) {
2694	enum drbd_state_rv rv2;
2695	/ No one else can reconfigure the network while I am here.*
2696	* The state handling only uses drbd_thread_stop_nowait(),
2697	* we want to really wait here until the receiver is no more.
2698	*/
2699	drbd_thread_stop(thi: &connection->receiver);
2700
2701	/ Race breaker. This additional state change request may be*
2702	* necessary, if this was a forced disconnect during a receiver
2703	* restart. We may have "killed" the receiver thread just
2704	* after drbd_receiver() returned. Typically, we should be
2705	* C_STANDALONE already, now, and this becomes a no-op.
2706	*/
2707	rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2708	flags: CS_VERBOSE \| CS_HARD);
2709	if (rv2 < SS_SUCCESS)
2710	drbd_err(connection,
2711	"unexpected rv2=%d in conn_try_disconnect()\n",
2712	rv2);
2713	/ Unlike in DRBD 9, the state engine has generated*
2714	* NOTIFY_DESTROY events before clearing connection->net_conf. */
2715	}
2716	return rv;
2717	}
2718
2719	int drbd_adm_disconnect(struct sk_buff skb, struct* genl_info *info)
2720	{
2721	struct drbd_config_context adm_ctx;
2722	struct disconnect_parms parms;
2723	struct drbd_connection *connection;
2724	enum drbd_state_rv rv;
2725	enum drbd_ret_code retcode;
2726	int err;
2727
2728	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2729	if (!adm_ctx.reply_skb)
2730	return retcode;
2731	if (retcode != NO_ERROR)
2732	goto fail;
2733
2734	connection = adm_ctx.connection;
2735	memset(&parms, `0`, sizeof(parms));
2736	if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2737	err = disconnect_parms_from_attrs(s: &parms, info);
2738	if (err) {
2739	retcode = ERR_MANDATORY_TAG;
2740	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: from_attrs_err_to_txt(err));
2741	goto fail;
2742	}
2743	}
2744
2745	mutex_lock(&adm_ctx.resource->adm_mutex);
2746	rv = conn_try_disconnect(connection, force: parms.force_disconnect);
2747	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
2748	if (rv < SS_SUCCESS) {
2749	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode: rv);
2750	return `0`;
2751	}
2752	retcode = NO_ERROR;
2753	fail:
2754	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
2755	return `0`;
2756	}
2757
2758	void resync_after_online_grow(struct drbd_device *device)
2759	{
2760	int iass; / I am sync source /
2761
2762	drbd_info(device, "Resync of new storage after online grow\n");
2763	if (device->state.role != device->state.peer)
2764	iass = (device->state.role == R_PRIMARY);
2765	else
2766	iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2767
2768	if (iass)
2769	drbd_start_resync(device, side: C_SYNC_SOURCE);
2770	else
2771	_drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2772	}
2773
2774	int drbd_adm_resize(struct sk_buff skb, struct* genl_info *info)
2775	{
2776	struct drbd_config_context adm_ctx;
2777	struct disk_conf old_disk_conf, new_disk_conf = NULL;
2778	struct resize_parms rs;
2779	struct drbd_device *device;
2780	enum drbd_ret_code retcode;
2781	enum determine_dev_size dd;
2782	bool change_al_layout = false;
2783	enum dds_flags ddsf;
2784	sector_t u_size;
2785	int err;
2786
2787	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2788	if (!adm_ctx.reply_skb)
2789	return retcode;
2790	if (retcode != NO_ERROR)
2791	goto finish;
2792
2793	mutex_lock(&adm_ctx.resource->adm_mutex);
2794	device = adm_ctx.device;
2795	if (!get_ldev(device)) {
2796	retcode = ERR_NO_DISK;
2797	goto fail;
2798	}
2799
2800	memset(&rs, `0`, sizeof(struct resize_parms));
2801	rs.al_stripes = device->ldev->md.al_stripes;
2802	rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * `4`;
2803	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2804	err = resize_parms_from_attrs(s: &rs, info);
2805	if (err) {
2806	retcode = ERR_MANDATORY_TAG;
2807	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: from_attrs_err_to_txt(err));
2808	goto fail_ldev;
2809	}
2810	}
2811
2812	if (device->state.conn > C_CONNECTED) {
2813	retcode = ERR_RESIZE_RESYNC;
2814	goto fail_ldev;
2815	}
2816
2817	if (device->state.role == R_SECONDARY &&
2818	device->state.peer == R_SECONDARY) {
2819	retcode = ERR_NO_PRIMARY;
2820	goto fail_ldev;
2821	}
2822
2823	if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < `93`) {
2824	retcode = ERR_NEED_APV_93;
2825	goto fail_ldev;
2826	}
2827
2828	rcu_read_lock();
2829	u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2830	rcu_read_unlock();
2831	if (u_size != (sector_t)rs.resize_size) {
2832	new_disk_conf = kmalloc(size: sizeof(struct disk_conf), GFP_KERNEL);
2833	if (!new_disk_conf) {
2834	retcode = ERR_NOMEM;
2835	goto fail_ldev;
2836	}
2837	}
2838
2839	if (device->ldev->md.al_stripes != rs.al_stripes \|\|
2840	device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / `4`) {
2841	u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2842
2843	if (al_size_k > (`16` * `1024` * `1024`)) {
2844	retcode = ERR_MD_LAYOUT_TOO_BIG;
2845	goto fail_ldev;
2846	}
2847
2848	if (al_size_k < MD_32kB_SECT/`2`) {
2849	retcode = ERR_MD_LAYOUT_TOO_SMALL;
2850	goto fail_ldev;
2851	}
2852
2853	if (device->state.conn != C_CONNECTED && !rs.resize_force) {
2854	retcode = ERR_MD_LAYOUT_CONNECTED;
2855	goto fail_ldev;
2856	}
2857
2858	change_al_layout = true;
2859	}
2860
2861	if (device->ldev->known_size != drbd_get_capacity(bdev: device->ldev->backing_bdev))
2862	device->ldev->known_size = drbd_get_capacity(bdev: device->ldev->backing_bdev);
2863
2864	if (new_disk_conf) {
2865	mutex_lock(&device->resource->conf_update);
2866	old_disk_conf = device->ldev->disk_conf;
2867	new_disk_conf = old_disk_conf;
2868	new_disk_conf->disk_size = (sector_t)rs.resize_size;
2869	rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2870	mutex_unlock(lock: &device->resource->conf_update);
2871	kvfree_rcu_mightsleep(old_disk_conf);
2872	new_disk_conf = NULL;
2873	}
2874
2875	ddsf = (rs.resize_force ? DDSF_FORCED : `0`) \| (rs.no_resync ? DDSF_NO_RESYNC : `0`);
2876	dd = drbd_determine_dev_size(device, flags: ddsf, rs: change_al_layout ? &rs : NULL);
2877	drbd_md_sync(device);
2878	put_ldev(device);
2879	if (dd == DS_ERROR) {
2880	retcode = ERR_NOMEM_BITMAP;
2881	goto fail;
2882	} else if (dd == DS_ERROR_SPACE_MD) {
2883	retcode = ERR_MD_LAYOUT_NO_FIT;
2884	goto fail;
2885	} else if (dd == DS_ERROR_SHRINK) {
2886	retcode = ERR_IMPLICIT_SHRINK;
2887	goto fail;
2888	}
2889
2890	if (device->state.conn == C_CONNECTED) {
2891	if (dd == DS_GREW)
2892	set_bit(nr: RESIZE_PENDING, addr: &device->flags);
2893
2894	drbd_send_uuids(first_peer_device(device));
2895	drbd_send_sizes(first_peer_device(device), trigger_reply: `1`, flags: ddsf);
2896	}
2897
2898	fail:
2899	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
2900	finish:
2901	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
2902	return `0`;
2903
2904	fail_ldev:
2905	put_ldev(device);
2906	kfree(objp: new_disk_conf);
2907	goto fail;
2908	}
2909
2910	int drbd_adm_resource_opts(struct sk_buff skb, struct* genl_info *info)
2911	{
2912	struct drbd_config_context adm_ctx;
2913	enum drbd_ret_code retcode;
2914	struct res_opts res_opts;
2915	int err;
2916
2917	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2918	if (!adm_ctx.reply_skb)
2919	return retcode;
2920	if (retcode != NO_ERROR)
2921	goto fail;
2922
2923	res_opts = adm_ctx.resource->res_opts;
2924	if (should_set_defaults(info))
2925	set_res_opts_defaults(&res_opts);
2926
2927	err = res_opts_from_attrs(s: &res_opts, info);
2928	if (err && err != -ENOMSG) {
2929	retcode = ERR_MANDATORY_TAG;
2930	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: from_attrs_err_to_txt(err));
2931	goto fail;
2932	}
2933
2934	mutex_lock(&adm_ctx.resource->adm_mutex);
2935	err = set_resource_options(resource: adm_ctx.resource, res_opts: &res_opts);
2936	if (err) {
2937	retcode = ERR_INVALID_REQUEST;
2938	if (err == -ENOMEM)
2939	retcode = ERR_NOMEM;
2940	}
2941	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
2942
2943	fail:
2944	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
2945	return `0`;
2946	}
2947
2948	int drbd_adm_invalidate(struct sk_buff skb, struct* genl_info *info)
2949	{
2950	struct drbd_config_context adm_ctx;
2951	struct drbd_device *device;
2952	int retcode; / enum drbd_ret_code rsp. enum drbd_state_rv /
2953
2954	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2955	if (!adm_ctx.reply_skb)
2956	return retcode;
2957	if (retcode != NO_ERROR)
2958	goto out;
2959
2960	device = adm_ctx.device;
2961	if (!get_ldev(device)) {
2962	retcode = ERR_NO_DISK;
2963	goto out;
2964	}
2965
2966	mutex_lock(&adm_ctx.resource->adm_mutex);
2967
2968	/ If there is still bitmap IO pending, probably because of a previous*
2969	* resync just being finished, wait for it before requesting a new resync.
2970	* Also wait for it's after_state_ch(). */
2971	drbd_suspend_io(device);
2972	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2973	drbd_flush_workqueue(work_queue: &first_peer_device(device)->connection->sender_work);
2974
2975	/ If we happen to be C_STANDALONE R_SECONDARY, just change to*
2976	* D_INCONSISTENT, and set all bits in the bitmap. Otherwise,
2977	* try to start a resync handshake as sync target for full sync.
2978	*/
2979	if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
2980	retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
2981	if (retcode >= SS_SUCCESS) {
2982	if (drbd_bitmap_io(device, io_fn: &drbd_bmio_set_n_write,
2983	why: "set_n_write from invalidate", flags: BM_LOCKED_MASK, NULL))
2984	retcode = ERR_IO_MD_DISK;
2985	}
2986	} else
2987	retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
2988	drbd_resume_io(device);
2989	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
2990	put_ldev(device);
2991	out:
2992	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
2993	return `0`;
2994	}
2995
2996	static int drbd_adm_simple_request_state(struct sk_buff skb, struct* genl_info *info,
2997	union drbd_state mask, union drbd_state val)
2998	{
2999	struct drbd_config_context adm_ctx;
3000	enum drbd_ret_code retcode;
3001
3002	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3003	if (!adm_ctx.reply_skb)
3004	return retcode;
3005	if (retcode != NO_ERROR)
3006	goto out;
3007
3008	mutex_lock(&adm_ctx.resource->adm_mutex);
3009	retcode = drbd_request_state(device: adm_ctx.device, mask, val);
3010	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
3011	out:
3012	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
3013	return `0`;
3014	}
3015
3016	static int drbd_bmio_set_susp_al(struct drbd_device *device,
3017	struct drbd_peer_device *peer_device) __must_hold(local)
3018	{
3019	int rv;
3020
3021	rv = drbd_bmio_set_n_write(device, peer_device);
3022	drbd_suspend_al(device);
3023	return rv;
3024	}
3025
3026	int drbd_adm_invalidate_peer(struct sk_buff skb, struct* genl_info *info)
3027	{
3028	struct drbd_config_context adm_ctx;
3029	int retcode; / drbd_ret_code, drbd_state_rv /
3030	struct drbd_device *device;
3031
3032	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3033	if (!adm_ctx.reply_skb)
3034	return retcode;
3035	if (retcode != NO_ERROR)
3036	goto out;
3037
3038	device = adm_ctx.device;
3039	if (!get_ldev(device)) {
3040	retcode = ERR_NO_DISK;
3041	goto out;
3042	}
3043
3044	mutex_lock(&adm_ctx.resource->adm_mutex);
3045
3046	/ If there is still bitmap IO pending, probably because of a previous*
3047	* resync just being finished, wait for it before requesting a new resync.
3048	* Also wait for it's after_state_ch(). */
3049	drbd_suspend_io(device);
3050	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3051	drbd_flush_workqueue(work_queue: &first_peer_device(device)->connection->sender_work);
3052
3053	/ If we happen to be C_STANDALONE R_PRIMARY, just set all bits*
3054	* in the bitmap. Otherwise, try to start a resync handshake
3055	* as sync source for full sync.
3056	*/
3057	if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
3058	/ The peer will get a resync upon connect anyways. Just make that*
3059	into a full resync. /*
3060	retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
3061	if (retcode >= SS_SUCCESS) {
3062	if (drbd_bitmap_io(device, io_fn: &drbd_bmio_set_susp_al,
3063	why: "set_n_write from invalidate_peer",
3064	flags: BM_LOCKED_SET_ALLOWED, NULL))
3065	retcode = ERR_IO_MD_DISK;
3066	}
3067	} else
3068	retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
3069	drbd_resume_io(device);
3070	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
3071	put_ldev(device);
3072	out:
3073	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
3074	return `0`;
3075	}
3076
3077	int drbd_adm_pause_sync(struct sk_buff skb, struct* genl_info *info)
3078	{
3079	struct drbd_config_context adm_ctx;
3080	enum drbd_ret_code retcode;
3081
3082	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3083	if (!adm_ctx.reply_skb)
3084	return retcode;
3085	if (retcode != NO_ERROR)
3086	goto out;
3087
3088	mutex_lock(&adm_ctx.resource->adm_mutex);
3089	if (drbd_request_state(device: adm_ctx.device, NS(user_isp, `1`)) == SS_NOTHING_TO_DO)
3090	retcode = ERR_PAUSE_IS_SET;
3091	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
3092	out:
3093	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
3094	return `0`;
3095	}
3096
3097	int drbd_adm_resume_sync(struct sk_buff skb, struct* genl_info *info)
3098	{
3099	struct drbd_config_context adm_ctx;
3100	union drbd_dev_state s;
3101	enum drbd_ret_code retcode;
3102
3103	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3104	if (!adm_ctx.reply_skb)
3105	return retcode;
3106	if (retcode != NO_ERROR)
3107	goto out;
3108
3109	mutex_lock(&adm_ctx.resource->adm_mutex);
3110	if (drbd_request_state(device: adm_ctx.device, NS(user_isp, `0`)) == SS_NOTHING_TO_DO) {
3111	s = adm_ctx.device->state;
3112	if (s.conn == C_PAUSED_SYNC_S \|\| s.conn == C_PAUSED_SYNC_T) {
3113	retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
3114	s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
3115	} else {
3116	retcode = ERR_PAUSE_IS_CLEAR;
3117	}
3118	}
3119	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
3120	out:
3121	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
3122	return `0`;
3123	}
3124
3125	int drbd_adm_suspend_io(struct sk_buff skb, struct* genl_info *info)
3126	{
3127	return drbd_adm_simple_request_state(skb, info, NS(susp, `1`));
3128	}
3129
3130	int drbd_adm_resume_io(struct sk_buff skb, struct* genl_info *info)
3131	{
3132	struct drbd_config_context adm_ctx;
3133	struct drbd_device *device;
3134	int retcode; / enum drbd_ret_code rsp. enum drbd_state_rv /
3135
3136	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3137	if (!adm_ctx.reply_skb)
3138	return retcode;
3139	if (retcode != NO_ERROR)
3140	goto out;
3141
3142	mutex_lock(&adm_ctx.resource->adm_mutex);
3143	device = adm_ctx.device;
3144	if (test_bit(NEW_CUR_UUID, &device->flags)) {
3145	if (get_ldev_if_state(device, D_ATTACHING)) {
3146	drbd_uuid_new_current(device);
3147	put_ldev(device);
3148	} else {
3149	/ This is effectively a multi-stage "forced down".*
3150	* The NEW_CUR_UUID bit is supposedly only set, if we
3151	* lost the replication connection, and are configured
3152	* to freeze IO and wait for some fence-peer handler.
3153	* So we still don't have a replication connection.
3154	* And now we don't have a local disk either. After
3155	* resume, we will fail all pending and new IO, because
3156	* we don't have any data anymore. Which means we will
3157	* eventually be able to terminate all users of this
3158	* device, and then take it down. By bumping the
3159	* "effective" data uuid, we make sure that you really
3160	* need to tear down before you reconfigure, we will
3161	* the refuse to re-connect or re-attach (because no
3162	* matching real data uuid exists).
3163	*/
3164	u64 val;
3165	get_random_bytes(buf: &val, len: sizeof(u64));
3166	drbd_set_ed_uuid(device, val);
3167	drbd_warn(device, "Resumed without access to data; please tear down before attempting to re-configure.\n");
3168	}
3169	clear_bit(nr: NEW_CUR_UUID, addr: &device->flags);
3170	}
3171	drbd_suspend_io(device);
3172	retcode = drbd_request_state(device, NS3(susp, `0`, susp_nod, `0`, susp_fen, `0`));
3173	if (retcode == SS_SUCCESS) {
3174	if (device->state.conn < C_CONNECTED)
3175	tl_clear(first_peer_device(device)->connection);
3176	if (device->state.disk == D_DISKLESS \|\| device->state.disk == D_FAILED)
3177	tl_restart(connection: first_peer_device(device)->connection, what: FAIL_FROZEN_DISK_IO);
3178	}
3179	drbd_resume_io(device);
3180	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
3181	out:
3182	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
3183	return `0`;
3184	}
3185
3186	int drbd_adm_outdate(struct sk_buff skb, struct* genl_info *info)
3187	{
3188	return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
3189	}
3190
3191	static int nla_put_drbd_cfg_context(struct sk_buff *skb,
3192	struct drbd_resource *resource,
3193	struct drbd_connection *connection,
3194	struct drbd_device *device)
3195	{
3196	struct nlattr *nla;
3197	nla = nla_nest_start_noflag(skb, attrtype: DRBD_NLA_CFG_CONTEXT);
3198	if (!nla)
3199	goto nla_put_failure;
3200	if (device &&
3201	nla_put_u32(skb, attrtype: T_ctx_volume, value: device->vnr))
3202	goto nla_put_failure;
3203	if (nla_put_string(skb, attrtype: T_ctx_resource_name, str: resource->name))
3204	goto nla_put_failure;
3205	if (connection) {
3206	if (connection->my_addr_len &&
3207	nla_put(skb, attrtype: T_ctx_my_addr, attrlen: connection->my_addr_len, data: &connection->my_addr))
3208	goto nla_put_failure;
3209	if (connection->peer_addr_len &&
3210	nla_put(skb, attrtype: T_ctx_peer_addr, attrlen: connection->peer_addr_len, data: &connection->peer_addr))
3211	goto nla_put_failure;
3212	}
3213	nla_nest_end(skb, start: nla);
3214	return `0`;
3215
3216	nla_put_failure:
3217	if (nla)
3218	nla_nest_cancel(skb, start: nla);
3219	return -EMSGSIZE;
3220	}
3221
3222	/*
3223	* The generic netlink dump callbacks are called outside the genl_lock(), so
3224	* they cannot use the simple attribute parsing code which uses global
3225	* attribute tables.
3226	*/
3227	static struct nlattr find_cfg_context_attr(const* struct nlmsghdr nlh, int* attr)
3228	{
3229	const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3230	const int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - `1`;
3231	struct nlattr *nla;
3232
3233	nla = nla_find(head: nlmsg_attrdata(nlh, hdrlen), len: nlmsg_attrlen(nlh, hdrlen),
3234	attrtype: DRBD_NLA_CFG_CONTEXT);
3235	if (!nla)
3236	return NULL;
3237	return drbd_nla_find_nested(maxtype, nla, __nla_type(attr));
3238	}
3239
3240	static void resource_to_info(struct resource_info , struct* drbd_resource *);
3241
3242	int drbd_adm_dump_resources(struct sk_buff skb, struct* netlink_callback *cb)
3243	{
3244	struct drbd_genlmsghdr *dh;
3245	struct drbd_resource *resource;
3246	struct resource_info resource_info;
3247	struct resource_statistics resource_statistics;
3248	int err;
3249
3250	rcu_read_lock();
3251	if (cb->args[`0`]) {
3252	for_each_resource_rcu(resource, &drbd_resources)
3253	if (resource == (struct drbd_resource *)cb->args[`0`])
3254	goto found_resource;
3255	err = `0`; / resource was probably deleted /
3256	goto out;
3257	}
3258	resource = list_entry(&drbd_resources,
3259	struct drbd_resource, resources);
3260
3261	found_resource:
3262	list_for_each_entry_continue_rcu(resource, &drbd_resources, resources) {
3263	goto put_result;
3264	}
3265	err = `0`;
3266	goto out;
3267
3268	put_result:
3269	dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3270	seq: cb->nlh->nlmsg_seq, family: &drbd_genl_family,
3271	NLM_F_MULTI, cmd: DRBD_ADM_GET_RESOURCES);
3272	err = -ENOMEM;
3273	if (!dh)
3274	goto out;
3275	dh->minor = -`1U`;
3276	dh->ret_code = NO_ERROR;
3277	err = nla_put_drbd_cfg_context(skb, resource, NULL, NULL);
3278	if (err)
3279	goto out;
3280	err = res_opts_to_skb(skb, s: &resource->res_opts, exclude_sensitive: !capable(CAP_SYS_ADMIN));
3281	if (err)
3282	goto out;
3283	resource_to_info(&resource_info, resource);
3284	err = resource_info_to_skb(skb, s: &resource_info, exclude_sensitive: !capable(CAP_SYS_ADMIN));
3285	if (err)
3286	goto out;
3287	resource_statistics.res_stat_write_ordering = resource->write_ordering;
3288	err = resource_statistics_to_skb(skb, s: &resource_statistics, exclude_sensitive: !capable(CAP_SYS_ADMIN));
3289	if (err)
3290	goto out;
3291	cb->args[`0`] = (long)resource;
3292	genlmsg_end(skb, hdr: dh);
3293	err = `0`;
3294
3295	out:
3296	rcu_read_unlock();
3297	if (err)
3298	return err;
3299	return skb->len;
3300	}
3301
3302	static void device_to_statistics(struct device_statistics *s,
3303	struct drbd_device *device)
3304	{
3305	memset(s, `0`, sizeof(*s));
3306	s->dev_upper_blocked = !may_inc_ap_bio(device);
3307	if (get_ldev(device)) {
3308	struct drbd_md *md = &device->ldev->md;
3309	u64 history_uuids = (u64 )s->history_uuids;
3310	int n;
3311
3312	spin_lock_irq(lock: &md->uuid_lock);
3313	s->dev_current_uuid = md->uuid[UI_CURRENT];
3314	BUILD_BUG_ON(sizeof(s->history_uuids) < UI_HISTORY_END - UI_HISTORY_START + `1`);
3315	for (n = `0`; n < UI_HISTORY_END - UI_HISTORY_START + `1`; n++)
3316	history_uuids[n] = md->uuid[UI_HISTORY_START + n];
3317	for (; n < HISTORY_UUIDS; n++)
3318	history_uuids[n] = `0`;
3319	s->history_uuids_len = HISTORY_UUIDS;
3320	spin_unlock_irq(lock: &md->uuid_lock);
3321
3322	s->dev_disk_flags = md->flags;
3323	put_ldev(device);
3324	}
3325	s->dev_size = get_capacity(disk: device->vdisk);
3326	s->dev_read = device->read_cnt;
3327	s->dev_write = device->writ_cnt;
3328	s->dev_al_writes = device->al_writ_cnt;
3329	s->dev_bm_writes = device->bm_writ_cnt;
3330	s->dev_upper_pending = atomic_read(v: &device->ap_bio_cnt);
3331	s->dev_lower_pending = atomic_read(v: &device->local_cnt);
3332	s->dev_al_suspended = test_bit(AL_SUSPENDED, &device->flags);
3333	s->dev_exposed_data_uuid = device->ed_uuid;
3334	}
3335
3336	static int put_resource_in_arg0(struct netlink_callback cb, int* holder_nr)
3337	{
3338	if (cb->args[`0`]) {
3339	struct drbd_resource *resource =
3340	(struct drbd_resource *)cb->args[`0`];
3341	kref_put(kref: &resource->kref, release: drbd_destroy_resource);
3342	}
3343
3344	return `0`;
3345	}
3346
3347	int drbd_adm_dump_devices_done(struct netlink_callback *cb) {
3348	return put_resource_in_arg0(cb, holder_nr: `7`);
3349	}
3350
3351	static void device_to_info(struct device_info , struct* drbd_device *);
3352
3353	int drbd_adm_dump_devices(struct sk_buff skb, struct* netlink_callback *cb)
3354	{
3355	struct nlattr *resource_filter;
3356	struct drbd_resource *resource;
3357	struct drbd_device *device;
3358	int minor, err, retcode;
3359	struct drbd_genlmsghdr *dh;
3360	struct device_info device_info;
3361	struct device_statistics device_statistics;
3362	struct idr *idr_to_search;
3363
3364	resource = (struct drbd_resource *)cb->args[`0`];
3365	if (!cb->args[`0`] && !cb->args[`1`]) {
3366	resource_filter = find_cfg_context_attr(nlh: cb->nlh, attr: T_ctx_resource_name);
3367	if (resource_filter) {
3368	retcode = ERR_RES_NOT_KNOWN;
3369	resource = drbd_find_resource(name: nla_data(nla: resource_filter));
3370	if (!resource)
3371	goto put_result;
3372	cb->args[`0`] = (long)resource;
3373	}
3374	}
3375
3376	rcu_read_lock();
3377	minor = cb->args[`1`];
3378	idr_to_search = resource ? &resource->devices : &drbd_devices;
3379	device = idr_get_next(idr_to_search, nextid: &minor);
3380	if (!device) {
3381	err = `0`;
3382	goto out;
3383	}
3384	idr_for_each_entry_continue(idr_to_search, device, minor) {
3385	retcode = NO_ERROR;
3386	goto put_result; / only one iteration /
3387	}
3388	err = `0`;
3389	goto out; / no more devices /
3390
3391	put_result:
3392	dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3393	seq: cb->nlh->nlmsg_seq, family: &drbd_genl_family,
3394	NLM_F_MULTI, cmd: DRBD_ADM_GET_DEVICES);
3395	err = -ENOMEM;
3396	if (!dh)
3397	goto out;
3398	dh->ret_code = retcode;
3399	dh->minor = -`1U`;
3400	if (retcode == NO_ERROR) {
3401	dh->minor = device->minor;
3402	err = nla_put_drbd_cfg_context(skb, resource: device->resource, NULL, device);
3403	if (err)
3404	goto out;
3405	if (get_ldev(device)) {
3406	struct disk_conf *disk_conf =
3407	rcu_dereference(device->ldev->disk_conf);
3408
3409	err = disk_conf_to_skb(skb, s: disk_conf, exclude_sensitive: !capable(CAP_SYS_ADMIN));
3410	put_ldev(device);
3411	if (err)
3412	goto out;
3413	}
3414	device_to_info(&device_info, device);
3415	err = device_info_to_skb(skb, s: &device_info, exclude_sensitive: !capable(CAP_SYS_ADMIN));
3416	if (err)
3417	goto out;
3418
3419	device_to_statistics(s: &device_statistics, device);
3420	err = device_statistics_to_skb(skb, s: &device_statistics, exclude_sensitive: !capable(CAP_SYS_ADMIN));
3421	if (err)
3422	goto out;
3423	cb->args[`1`] = minor + `1`;
3424	}
3425	genlmsg_end(skb, hdr: dh);
3426	err = `0`;
3427
3428	out:
3429	rcu_read_unlock();
3430	if (err)
3431	return err;
3432	return skb->len;
3433	}
3434
3435	int drbd_adm_dump_connections_done(struct netlink_callback *cb)
3436	{
3437	return put_resource_in_arg0(cb, holder_nr: `6`);
3438	}
3439
3440	enum { SINGLE_RESOURCE, ITERATE_RESOURCES };
3441
3442	int drbd_adm_dump_connections(struct sk_buff skb, struct* netlink_callback *cb)
3443	{
3444	struct nlattr *resource_filter;
3445	struct drbd_resource resource = NULL, next_resource;
3446	struct drbd_connection *connection;
3447	int err = `0`, retcode;
3448	struct drbd_genlmsghdr *dh;
3449	struct connection_info connection_info;
3450	struct connection_statistics connection_statistics;
3451
3452	rcu_read_lock();
3453	resource = (struct drbd_resource *)cb->args[`0`];
3454	if (!cb->args[`0`]) {
3455	resource_filter = find_cfg_context_attr(nlh: cb->nlh, attr: T_ctx_resource_name);
3456	if (resource_filter) {
3457	retcode = ERR_RES_NOT_KNOWN;
3458	resource = drbd_find_resource(name: nla_data(nla: resource_filter));
3459	if (!resource)
3460	goto put_result;
3461	cb->args[`0`] = (long)resource;
3462	cb->args[`1`] = SINGLE_RESOURCE;
3463	}
3464	}
3465	if (!resource) {
3466	if (list_empty(head: &drbd_resources))
3467	goto out;
3468	resource = list_first_entry(&drbd_resources, struct drbd_resource, resources);
3469	kref_get(kref: &resource->kref);
3470	cb->args[`0`] = (long)resource;
3471	cb->args[`1`] = ITERATE_RESOURCES;
3472	}
3473
3474	next_resource:
3475	rcu_read_unlock();
3476	mutex_lock(&resource->conf_update);
3477	rcu_read_lock();
3478	if (cb->args[`2`]) {
3479	for_each_connection_rcu(connection, resource)
3480	if (connection == (struct drbd_connection *)cb->args[`2`])
3481	goto found_connection;
3482	/ connection was probably deleted /
3483	goto no_more_connections;
3484	}
3485	connection = list_entry(&resource->connections, struct drbd_connection, connections);
3486
3487	found_connection:
3488	list_for_each_entry_continue_rcu(connection, &resource->connections, connections) {
3489	if (!has_net_conf(connection))
3490	continue;
3491	retcode = NO_ERROR;
3492	goto put_result; / only one iteration /
3493	}
3494
3495	no_more_connections:
3496	if (cb->args[`1`] == ITERATE_RESOURCES) {
3497	for_each_resource_rcu(next_resource, &drbd_resources) {
3498	if (next_resource == resource)
3499	goto found_resource;
3500	}
3501	/ resource was probably deleted /
3502	}
3503	goto out;
3504
3505	found_resource:
3506	list_for_each_entry_continue_rcu(next_resource, &drbd_resources, resources) {
3507	mutex_unlock(lock: &resource->conf_update);
3508	kref_put(kref: &resource->kref, release: drbd_destroy_resource);
3509	resource = next_resource;
3510	kref_get(kref: &resource->kref);
3511	cb->args[`0`] = (long)resource;
3512	cb->args[`2`] = `0`;
3513	goto next_resource;
3514	}
3515	goto out; / no more resources /
3516
3517	put_result:
3518	dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3519	seq: cb->nlh->nlmsg_seq, family: &drbd_genl_family,
3520	NLM_F_MULTI, cmd: DRBD_ADM_GET_CONNECTIONS);
3521	err = -ENOMEM;
3522	if (!dh)
3523	goto out;
3524	dh->ret_code = retcode;
3525	dh->minor = -`1U`;
3526	if (retcode == NO_ERROR) {
3527	struct net_conf *net_conf;
3528
3529	err = nla_put_drbd_cfg_context(skb, resource, connection, NULL);
3530	if (err)
3531	goto out;
3532	net_conf = rcu_dereference(connection->net_conf);
3533	if (net_conf) {
3534	err = net_conf_to_skb(skb, s: net_conf, exclude_sensitive: !capable(CAP_SYS_ADMIN));
3535	if (err)
3536	goto out;
3537	}
3538	connection_to_info(info: &connection_info, connection);
3539	err = connection_info_to_skb(skb, s: &connection_info, exclude_sensitive: !capable(CAP_SYS_ADMIN));
3540	if (err)
3541	goto out;
3542	connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
3543	err = connection_statistics_to_skb(skb, s: &connection_statistics, exclude_sensitive: !capable(CAP_SYS_ADMIN));
3544	if (err)
3545	goto out;
3546	cb->args[`2`] = (long)connection;
3547	}
3548	genlmsg_end(skb, hdr: dh);
3549	err = `0`;
3550
3551	out:
3552	rcu_read_unlock();
3553	if (resource)
3554	mutex_unlock(lock: &resource->conf_update);
3555	if (err)
3556	return err;
3557	return skb->len;
3558	}
3559
3560	enum mdf_peer_flag {
3561	MDF_PEER_CONNECTED = `1` << `0`,
3562	MDF_PEER_OUTDATED = `1` << `1`,
3563	MDF_PEER_FENCING = `1` << `2`,
3564	MDF_PEER_FULL_SYNC = `1` << `3`,
3565	};
3566
3567	static void peer_device_to_statistics(struct peer_device_statistics *s,
3568	struct drbd_peer_device *peer_device)
3569	{
3570	struct drbd_device *device = peer_device->device;
3571
3572	memset(s, `0`, sizeof(*s));
3573	s->peer_dev_received = device->recv_cnt;
3574	s->peer_dev_sent = device->send_cnt;
3575	s->peer_dev_pending = atomic_read(v: &device->ap_pending_cnt) +
3576	atomic_read(v: &device->rs_pending_cnt);
3577	s->peer_dev_unacked = atomic_read(v: &device->unacked_cnt);
3578	s->peer_dev_out_of_sync = drbd_bm_total_weight(device) << (BM_BLOCK_SHIFT - `9`);
3579	s->peer_dev_resync_failed = device->rs_failed << (BM_BLOCK_SHIFT - `9`);
3580	if (get_ldev(device)) {
3581	struct drbd_md *md = &device->ldev->md;
3582
3583	spin_lock_irq(lock: &md->uuid_lock);
3584	s->peer_dev_bitmap_uuid = md->uuid[UI_BITMAP];
3585	spin_unlock_irq(lock: &md->uuid_lock);
3586	s->peer_dev_flags =
3587	(drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND) ?
3588	MDF_PEER_CONNECTED : `0`) +
3589	(drbd_md_test_flag(device->ldev, MDF_CONSISTENT) &&
3590	!drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE) ?
3591	MDF_PEER_OUTDATED : `0`) +
3592	/ FIXME: MDF_PEER_FENCING? /
3593	(drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ?
3594	MDF_PEER_FULL_SYNC : `0`);
3595	put_ldev(device);
3596	}
3597	}
3598
3599	int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb)
3600	{
3601	return put_resource_in_arg0(cb, holder_nr: `9`);
3602	}
3603
3604	int drbd_adm_dump_peer_devices(struct sk_buff skb, struct* netlink_callback *cb)
3605	{
3606	struct nlattr *resource_filter;
3607	struct drbd_resource *resource;
3608	struct drbd_device *device;
3609	struct drbd_peer_device *peer_device = NULL;
3610	int minor, err, retcode;
3611	struct drbd_genlmsghdr *dh;
3612	struct idr *idr_to_search;
3613
3614	resource = (struct drbd_resource *)cb->args[`0`];
3615	if (!cb->args[`0`] && !cb->args[`1`]) {
3616	resource_filter = find_cfg_context_attr(nlh: cb->nlh, attr: T_ctx_resource_name);
3617	if (resource_filter) {
3618	retcode = ERR_RES_NOT_KNOWN;
3619	resource = drbd_find_resource(name: nla_data(nla: resource_filter));
3620	if (!resource)
3621	goto put_result;
3622	}
3623	cb->args[`0`] = (long)resource;
3624	}
3625
3626	rcu_read_lock();
3627	minor = cb->args[`1`];
3628	idr_to_search = resource ? &resource->devices : &drbd_devices;
3629	device = idr_find(idr_to_search, id: minor);
3630	if (!device) {
3631	next_device:
3632	minor++;
3633	cb->args[`2`] = `0`;
3634	device = idr_get_next(idr_to_search, nextid: &minor);
3635	if (!device) {
3636	err = `0`;
3637	goto out;
3638	}
3639	}
3640	if (cb->args[`2`]) {
3641	for_each_peer_device(peer_device, device)
3642	if (peer_device == (struct drbd_peer_device *)cb->args[`2`])
3643	goto found_peer_device;
3644	/ peer device was probably deleted /
3645	goto next_device;
3646	}
3647	/ Make peer_device point to the list head (not the first entry). /
3648	peer_device = list_entry(&device->peer_devices, struct drbd_peer_device, peer_devices);
3649
3650	found_peer_device:
3651	list_for_each_entry_continue_rcu(peer_device, &device->peer_devices, peer_devices) {
3652	if (!has_net_conf(connection: peer_device->connection))
3653	continue;
3654	retcode = NO_ERROR;
3655	goto put_result; / only one iteration /
3656	}
3657	goto next_device;
3658
3659	put_result:
3660	dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3661	seq: cb->nlh->nlmsg_seq, family: &drbd_genl_family,
3662	NLM_F_MULTI, cmd: DRBD_ADM_GET_PEER_DEVICES);
3663	err = -ENOMEM;
3664	if (!dh)
3665	goto out;
3666	dh->ret_code = retcode;
3667	dh->minor = -`1U`;
3668	if (retcode == NO_ERROR) {
3669	struct peer_device_info peer_device_info;
3670	struct peer_device_statistics peer_device_statistics;
3671
3672	dh->minor = minor;
3673	err = nla_put_drbd_cfg_context(skb, resource: device->resource, connection: peer_device->connection, device);
3674	if (err)
3675	goto out;
3676	peer_device_to_info(info: &peer_device_info, peer_device);
3677	err = peer_device_info_to_skb(skb, s: &peer_device_info, exclude_sensitive: !capable(CAP_SYS_ADMIN));
3678	if (err)
3679	goto out;
3680	peer_device_to_statistics(s: &peer_device_statistics, peer_device);
3681	err = peer_device_statistics_to_skb(skb, s: &peer_device_statistics, exclude_sensitive: !capable(CAP_SYS_ADMIN));
3682	if (err)
3683	goto out;
3684	cb->args[`1`] = minor;
3685	cb->args[`2`] = (long)peer_device;
3686	}
3687	genlmsg_end(skb, hdr: dh);
3688	err = `0`;
3689
3690	out:
3691	rcu_read_unlock();
3692	if (err)
3693	return err;
3694	return skb->len;
3695	}
3696	/*
3697	* Return the connection of @resource if @resource has exactly one connection.
3698	*/
3699	static struct drbd_connection the_only_connection(struct* drbd_resource *resource)
3700	{
3701	struct list_head *connections = &resource->connections;
3702
3703	if (list_empty(head: connections) \|\| connections->next->next != connections)
3704	return NULL;
3705	return list_first_entry(&resource->connections, struct drbd_connection, connections);
3706	}
3707
3708	static int nla_put_status_info(struct sk_buff skb, struct* drbd_device *device,
3709	const struct sib_info *sib)
3710	{
3711	struct drbd_resource *resource = device->resource;
3712	struct state_info si = NULL; /* for sizeof(si->member); /
3713	struct nlattr *nla;
3714	int got_ldev;
3715	int err = `0`;
3716	int exclude_sensitive;
3717
3718	/ If sib != NULL, this is drbd_bcast_event, which anyone can listen*
3719	* to. So we better exclude_sensitive information.
3720	*
3721	* If sib == NULL, this is drbd_adm_get_status, executed synchronously
3722	* in the context of the requesting user process. Exclude sensitive
3723	* information, unless current has superuser.
3724	*
3725	* NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
3726	* relies on the current implementation of netlink_dump(), which
3727	* executes the dump callback successively from netlink_recvmsg(),
3728	* always in the context of the receiving process */
3729	exclude_sensitive = sib \|\| !capable(CAP_SYS_ADMIN);
3730
3731	got_ldev = get_ldev(device);
3732
3733	/ We need to add connection name and volume number information still.*
3734	* Minor number is in drbd_genlmsghdr. */
3735	if (nla_put_drbd_cfg_context(skb, resource, connection: the_only_connection(resource), device))
3736	goto nla_put_failure;
3737
3738	if (res_opts_to_skb(skb, s: &device->resource->res_opts, exclude_sensitive))
3739	goto nla_put_failure;
3740
3741	rcu_read_lock();
3742	if (got_ldev) {
3743	struct disk_conf *disk_conf;
3744
3745	disk_conf = rcu_dereference(device->ldev->disk_conf);
3746	err = disk_conf_to_skb(skb, s: disk_conf, exclude_sensitive);
3747	}
3748	if (!err) {
3749	struct net_conf *nc;
3750
3751	nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
3752	if (nc)
3753	err = net_conf_to_skb(skb, s: nc, exclude_sensitive);
3754	}
3755	rcu_read_unlock();
3756	if (err)
3757	goto nla_put_failure;
3758
3759	nla = nla_nest_start_noflag(skb, attrtype: DRBD_NLA_STATE_INFO);
3760	if (!nla)
3761	goto nla_put_failure;
3762	if (nla_put_u32(skb, attrtype: T_sib_reason, value: sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) \|\|
3763	nla_put_u32(skb, attrtype: T_current_state, value: device->state.i) \|\|
3764	nla_put_u64_0pad(skb, attrtype: T_ed_uuid, value: device->ed_uuid) \|\|
3765	nla_put_u64_0pad(skb, attrtype: T_capacity, value: get_capacity(disk: device->vdisk)) \|\|
3766	nla_put_u64_0pad(skb, attrtype: T_send_cnt, value: device->send_cnt) \|\|
3767	nla_put_u64_0pad(skb, attrtype: T_recv_cnt, value: device->recv_cnt) \|\|
3768	nla_put_u64_0pad(skb, attrtype: T_read_cnt, value: device->read_cnt) \|\|
3769	nla_put_u64_0pad(skb, attrtype: T_writ_cnt, value: device->writ_cnt) \|\|
3770	nla_put_u64_0pad(skb, attrtype: T_al_writ_cnt, value: device->al_writ_cnt) \|\|
3771	nla_put_u64_0pad(skb, attrtype: T_bm_writ_cnt, value: device->bm_writ_cnt) \|\|
3772	nla_put_u32(skb, attrtype: T_ap_bio_cnt, value: atomic_read(v: &device->ap_bio_cnt)) \|\|
3773	nla_put_u32(skb, attrtype: T_ap_pending_cnt, value: atomic_read(v: &device->ap_pending_cnt)) \|\|
3774	nla_put_u32(skb, attrtype: T_rs_pending_cnt, value: atomic_read(v: &device->rs_pending_cnt)))
3775	goto nla_put_failure;
3776
3777	if (got_ldev) {
3778	int err;
3779
3780	spin_lock_irq(lock: &device->ldev->md.uuid_lock);
3781	err = nla_put(skb, attrtype: T_uuids, attrlen: sizeof(si->uuids), data: device->ldev->md.uuid);
3782	spin_unlock_irq(lock: &device->ldev->md.uuid_lock);
3783
3784	if (err)
3785	goto nla_put_failure;
3786
3787	if (nla_put_u32(skb, attrtype: T_disk_flags, value: device->ldev->md.flags) \|\|
3788	nla_put_u64_0pad(skb, attrtype: T_bits_total, value: drbd_bm_bits(device)) \|\|
3789	nla_put_u64_0pad(skb, attrtype: T_bits_oos,
3790	value: drbd_bm_total_weight(device)))
3791	goto nla_put_failure;
3792	if (C_SYNC_SOURCE <= device->state.conn &&
3793	C_PAUSED_SYNC_T >= device->state.conn) {
3794	if (nla_put_u64_0pad(skb, attrtype: T_bits_rs_total,
3795	value: device->rs_total) \|\|
3796	nla_put_u64_0pad(skb, attrtype: T_bits_rs_failed,
3797	value: device->rs_failed))
3798	goto nla_put_failure;
3799	}
3800	}
3801
3802	if (sib) {
3803	switch(sib->sib_reason) {
3804	case SIB_SYNC_PROGRESS:
3805	case SIB_GET_STATUS_REPLY:
3806	break;
3807	case SIB_STATE_CHANGE:
3808	if (nla_put_u32(skb, attrtype: T_prev_state, value: sib->os.i) \|\|
3809	nla_put_u32(skb, attrtype: T_new_state, value: sib->ns.i))
3810	goto nla_put_failure;
3811	break;
3812	case SIB_HELPER_POST:
3813	if (nla_put_u32(skb, attrtype: T_helper_exit_code,
3814	value: sib->helper_exit_code))
3815	goto nla_put_failure;
3816	fallthrough;
3817	case SIB_HELPER_PRE:
3818	if (nla_put_string(skb, attrtype: T_helper, str: sib->helper_name))
3819	goto nla_put_failure;
3820	break;
3821	}
3822	}
3823	nla_nest_end(skb, start: nla);
3824
3825	if (`0`)
3826	nla_put_failure:
3827	err = -EMSGSIZE;
3828	if (got_ldev)
3829	put_ldev(device);
3830	return err;
3831	}
3832
3833	int drbd_adm_get_status(struct sk_buff skb, struct* genl_info *info)
3834	{
3835	struct drbd_config_context adm_ctx;
3836	enum drbd_ret_code retcode;
3837	int err;
3838
3839	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3840	if (!adm_ctx.reply_skb)
3841	return retcode;
3842	if (retcode != NO_ERROR)
3843	goto out;
3844
3845	err = nla_put_status_info(skb: adm_ctx.reply_skb, device: adm_ctx.device, NULL);
3846	if (err) {
3847	nlmsg_free(skb: adm_ctx.reply_skb);
3848	return err;
3849	}
3850	out:
3851	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
3852	return `0`;
3853	}
3854
3855	static int get_one_status(struct sk_buff skb, struct* netlink_callback *cb)
3856	{
3857	struct drbd_device *device;
3858	struct drbd_genlmsghdr *dh;
3859	struct drbd_resource pos = (struct* drbd_resource *)cb->args[`0`];
3860	struct drbd_resource *resource = NULL;
3861	struct drbd_resource *tmp;
3862	unsigned volume = cb->args[`1`];
3863
3864	/ Open coded, deferred, iteration:*
3865	* for_each_resource_safe(resource, tmp, &drbd_resources) {
3866	* connection = "first connection of resource or undefined";
3867	* idr_for_each_entry(&resource->devices, device, i) {
3868	* ...
3869	* }
3870	* }
3871	* where resource is cb->args[0];
3872	* and i is cb->args[1];
3873	*
3874	* cb->args[2] indicates if we shall loop over all resources,
3875	* or just dump all volumes of a single resource.
3876	*
3877	* This may miss entries inserted after this dump started,
3878	* or entries deleted before they are reached.
3879	*
3880	* We need to make sure the device won't disappear while
3881	* we are looking at it, and revalidate our iterators
3882	* on each iteration.
3883	*/
3884
3885	/ synchronize with conn_create()/drbd_destroy_connection() /
3886	rcu_read_lock();
3887	/ revalidate iterator position /
3888	for_each_resource_rcu(tmp, &drbd_resources) {
3889	if (pos == NULL) {
3890	/ first iteration /
3891	pos = tmp;
3892	resource = pos;
3893	break;
3894	}
3895	if (tmp == pos) {
3896	resource = pos;
3897	break;
3898	}
3899	}
3900	if (resource) {
3901	next_resource:
3902	device = idr_get_next(&resource->devices, nextid: &volume);
3903	if (!device) {
3904	/ No more volumes to dump on this resource.*
3905	* Advance resource iterator. */
3906	pos = list_entry_rcu(resource->resources.next,
3907	struct drbd_resource, resources);
3908	/ Did we dump any volume of this resource yet? /
3909	if (volume != `0`) {
3910	/ If we reached the end of the list,*
3911	* or only a single resource dump was requested,
3912	* we are done. */
3913	if (&pos->resources == &drbd_resources \|\| cb->args[`2`])
3914	goto out;
3915	volume = `0`;
3916	resource = pos;
3917	goto next_resource;
3918	}
3919	}
3920
3921	dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3922	seq: cb->nlh->nlmsg_seq, family: &drbd_genl_family,
3923	NLM_F_MULTI, cmd: DRBD_ADM_GET_STATUS);
3924	if (!dh)
3925	goto out;
3926
3927	if (!device) {
3928	/ This is a connection without a single volume.*
3929	* Suprisingly enough, it may have a network
3930	* configuration. */
3931	struct drbd_connection *connection;
3932
3933	dh->minor = -`1U`;
3934	dh->ret_code = NO_ERROR;
3935	connection = the_only_connection(resource);
3936	if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3937	goto cancel;
3938	if (connection) {
3939	struct net_conf *nc;
3940
3941	nc = rcu_dereference(connection->net_conf);
3942	if (nc && net_conf_to_skb(skb, s: nc, exclude_sensitive: `1`) != `0`)
3943	goto cancel;
3944	}
3945	goto done;
3946	}
3947
3948	D_ASSERT(device, device->vnr == volume);
3949	D_ASSERT(device, device->resource == resource);
3950
3951	dh->minor = device_to_minor(device);
3952	dh->ret_code = NO_ERROR;
3953
3954	if (nla_put_status_info(skb, device, NULL)) {
3955	cancel:
3956	genlmsg_cancel(skb, hdr: dh);
3957	goto out;
3958	}
3959	done:
3960	genlmsg_end(skb, hdr: dh);
3961	}
3962
3963	out:
3964	rcu_read_unlock();
3965	/ where to start the next iteration /
3966	cb->args[`0`] = (long)pos;
3967	cb->args[`1`] = (pos == resource) ? volume + `1` : `0`;
3968
3969	/ No more resources/volumes/minors found results in an empty skb.*
3970	* Which will terminate the dump. */
3971	return skb->len;
3972	}
3973
3974	/*
3975	* Request status of all resources, or of all volumes within a single resource.
3976	*
3977	* This is a dump, as the answer may not fit in a single reply skb otherwise.
3978	* Which means we cannot use the family->attrbuf or other such members, because
3979	* dump is NOT protected by the genl_lock(). During dump, we only have access
3980	* to the incoming skb, and need to opencode "parsing" of the nlattr payload.
3981	*
3982	* Once things are setup properly, we call into get_one_status().
3983	*/
3984	int drbd_adm_get_status_all(struct sk_buff skb, struct* netlink_callback *cb)
3985	{
3986	const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3987	struct nlattr *nla;
3988	const char *resource_name;
3989	struct drbd_resource *resource;
3990	int maxtype;
3991
3992	/ Is this a followup call? /
3993	if (cb->args[`0`]) {
3994	/ ... of a single resource dump,*
3995	* and the resource iterator has been advanced already? */
3996	if (cb->args[`2`] && cb->args[`2`] != cb->args[`0`])
3997	return `0`; / DONE. /
3998	goto dump;
3999	}
4000
4001	/ First call (from netlink_dump_start). We need to figure out*
4002	* which resource(s) the user wants us to dump. */
4003	nla = nla_find(head: nlmsg_attrdata(nlh: cb->nlh, hdrlen),
4004	len: nlmsg_attrlen(nlh: cb->nlh, hdrlen),
4005	attrtype: DRBD_NLA_CFG_CONTEXT);
4006
4007	/ No explicit context given. Dump all. /
4008	if (!nla)
4009	goto dump;
4010	maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - `1`;
4011	nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
4012	if (IS_ERR(ptr: nla))
4013	return PTR_ERR(ptr: nla);
4014	/ context given, but no name present? /
4015	if (!nla)
4016	return -EINVAL;
4017	resource_name = nla_data(nla);
4018	if (!*resource_name)
4019	return -ENODEV;
4020	resource = drbd_find_resource(name: resource_name);
4021	if (!resource)
4022	return -ENODEV;
4023
4024	kref_put(kref: &resource->kref, release: drbd_destroy_resource); / get_one_status() revalidates the resource /
4025
4026	/ prime iterators, and set "filter" mode mark:*
4027	* only dump this connection. */
4028	cb->args[`0`] = (long)resource;
4029	/ cb->args[1] = 0; passed in this way. /
4030	cb->args[`2`] = (long)resource;
4031
4032	dump:
4033	return get_one_status(skb, cb);
4034	}
4035
4036	int drbd_adm_get_timeout_type(struct sk_buff skb, struct* genl_info *info)
4037	{
4038	struct drbd_config_context adm_ctx;
4039	enum drbd_ret_code retcode;
4040	struct timeout_parms tp;
4041	int err;
4042
4043	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4044	if (!adm_ctx.reply_skb)
4045	return retcode;
4046	if (retcode != NO_ERROR)
4047	goto out;
4048
4049	tp.timeout_type =
4050	adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
4051	test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
4052	UT_DEFAULT;
4053
4054	err = timeout_parms_to_priv_skb(skb: adm_ctx.reply_skb, s: &tp);
4055	if (err) {
4056	nlmsg_free(skb: adm_ctx.reply_skb);
4057	return err;
4058	}
4059	out:
4060	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
4061	return `0`;
4062	}
4063
4064	int drbd_adm_start_ov(struct sk_buff skb, struct* genl_info *info)
4065	{
4066	struct drbd_config_context adm_ctx;
4067	struct drbd_device *device;
4068	enum drbd_ret_code retcode;
4069	struct start_ov_parms parms;
4070
4071	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4072	if (!adm_ctx.reply_skb)
4073	return retcode;
4074	if (retcode != NO_ERROR)
4075	goto out;
4076
4077	device = adm_ctx.device;
4078
4079	/ resume from last known position, if possible /
4080	parms.ov_start_sector = device->ov_start_sector;
4081	parms.ov_stop_sector = ULLONG_MAX;
4082	if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
4083	int err = start_ov_parms_from_attrs(s: &parms, info);
4084	if (err) {
4085	retcode = ERR_MANDATORY_TAG;
4086	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: from_attrs_err_to_txt(err));
4087	goto out;
4088	}
4089	}
4090	mutex_lock(&adm_ctx.resource->adm_mutex);
4091
4092	/ w_make_ov_request expects position to be aligned /
4093	device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-`1`);
4094	device->ov_stop_sector = parms.ov_stop_sector;
4095
4096	/ If there is still bitmap IO pending, e.g. previous resync or verify*
4097	* just being finished, wait for it before requesting a new resync. */
4098	drbd_suspend_io(device);
4099	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
4100	retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
4101	drbd_resume_io(device);
4102
4103	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
4104	out:
4105	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
4106	return `0`;
4107	}
4108
4109
4110	int drbd_adm_new_c_uuid(struct sk_buff skb, struct* genl_info *info)
4111	{
4112	struct drbd_config_context adm_ctx;
4113	struct drbd_device *device;
4114	enum drbd_ret_code retcode;
4115	int skip_initial_sync = `0`;
4116	int err;
4117	struct new_c_uuid_parms args;
4118
4119	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4120	if (!adm_ctx.reply_skb)
4121	return retcode;
4122	if (retcode != NO_ERROR)
4123	goto out_nolock;
4124
4125	device = adm_ctx.device;
4126	memset(&args, `0`, sizeof(args));
4127	if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
4128	err = new_c_uuid_parms_from_attrs(s: &args, info);
4129	if (err) {
4130	retcode = ERR_MANDATORY_TAG;
4131	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: from_attrs_err_to_txt(err));
4132	goto out_nolock;
4133	}
4134	}
4135
4136	mutex_lock(&adm_ctx.resource->adm_mutex);
4137	mutex_lock(device->state_mutex); / Protects us against serialized state changes. /
4138
4139	if (!get_ldev(device)) {
4140	retcode = ERR_NO_DISK;
4141	goto out;
4142	}
4143
4144	/ this is "skip initial sync", assume to be clean /
4145	if (device->state.conn == C_CONNECTED &&
4146	first_peer_device(device)->connection->agreed_pro_version >= `90` &&
4147	device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
4148	drbd_info(device, "Preparing to skip initial sync\n");
4149	skip_initial_sync = `1`;
4150	} else if (device->state.conn != C_STANDALONE) {
4151	retcode = ERR_CONNECTED;
4152	goto out_dec;
4153	}
4154
4155	drbd_uuid_set(device, idx: UI_BITMAP, val: `0`); / Rotate UI_BITMAP to History 1, etc... /
4156	drbd_uuid_new_current(device); / New current, previous to UI_BITMAP /
4157
4158	if (args.clear_bm) {
4159	err = drbd_bitmap_io(device, io_fn: &drbd_bmio_clear_n_write,
4160	why: "clear_n_write from new_c_uuid", flags: BM_LOCKED_MASK, NULL);
4161	if (err) {
4162	drbd_err(device, "Writing bitmap failed with %d\n", err);
4163	retcode = ERR_IO_MD_DISK;
4164	}
4165	if (skip_initial_sync) {
4166	drbd_send_uuids_skip_initial_sync(first_peer_device(device));
4167	_drbd_uuid_set(device, idx: UI_BITMAP, val: `0`);
4168	drbd_print_uuids(device, text: "cleared bitmap UUID");
4169	spin_lock_irq(lock: &device->resource->req_lock);
4170	_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
4171	CS_VERBOSE, NULL);
4172	spin_unlock_irq(lock: &device->resource->req_lock);
4173	}
4174	}
4175
4176	drbd_md_sync(device);
4177	out_dec:
4178	put_ldev(device);
4179	out:
4180	mutex_unlock(lock: device->state_mutex);
4181	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
4182	out_nolock:
4183	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
4184	return `0`;
4185	}
4186
4187	static enum drbd_ret_code
4188	drbd_check_resource_name(struct drbd_config_context *adm_ctx)
4189	{
4190	const char *name = adm_ctx->resource_name;
4191	if (!name \|\| !name[`0`]) {
4192	drbd_msg_put_info(skb: adm_ctx->reply_skb, info: "resource name missing");
4193	return ERR_MANDATORY_TAG;
4194	}
4195	/ if we want to use these in sysfs/configfs/debugfs some day,*
4196	* we must not allow slashes */
4197	if (strchr(name, `'/'`)) {
4198	drbd_msg_put_info(skb: adm_ctx->reply_skb, info: "invalid resource name");
4199	return ERR_INVALID_REQUEST;
4200	}
4201	return NO_ERROR;
4202	}
4203
4204	static void resource_to_info(struct resource_info *info,
4205	struct drbd_resource *resource)
4206	{
4207	info->res_role = conn_highest_role(connection: first_connection(resource));
4208	info->res_susp = resource->susp;
4209	info->res_susp_nod = resource->susp_nod;
4210	info->res_susp_fen = resource->susp_fen;
4211	}
4212
4213	int drbd_adm_new_resource(struct sk_buff skb, struct* genl_info *info)
4214	{
4215	struct drbd_connection *connection;
4216	struct drbd_config_context adm_ctx;
4217	enum drbd_ret_code retcode;
4218	struct res_opts res_opts;
4219	int err;
4220
4221	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, flags: `0`);
4222	if (!adm_ctx.reply_skb)
4223	return retcode;
4224	if (retcode != NO_ERROR)
4225	goto out;
4226
4227	set_res_opts_defaults(&res_opts);
4228	err = res_opts_from_attrs(s: &res_opts, info);
4229	if (err && err != -ENOMSG) {
4230	retcode = ERR_MANDATORY_TAG;
4231	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: from_attrs_err_to_txt(err));
4232	goto out;
4233	}
4234
4235	retcode = drbd_check_resource_name(adm_ctx: &adm_ctx);
4236	if (retcode != NO_ERROR)
4237	goto out;
4238
4239	if (adm_ctx.resource) {
4240	if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
4241	retcode = ERR_INVALID_REQUEST;
4242	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: "resource exists");
4243	}
4244	/ else: still NO_ERROR /
4245	goto out;
4246	}
4247
4248	/ not yet safe for genl_family.parallel_ops /
4249	mutex_lock(&resources_mutex);
4250	connection = conn_create(name: adm_ctx.resource_name, res_opts: &res_opts);
4251	mutex_unlock(lock: &resources_mutex);
4252
4253	if (connection) {
4254	struct resource_info resource_info;
4255
4256	mutex_lock(&notification_mutex);
4257	resource_to_info(info: &resource_info, resource: connection->resource);
4258	notify_resource_state(NULL, `0`, connection->resource,
4259	&resource_info, NOTIFY_CREATE);
4260	mutex_unlock(lock: &notification_mutex);
4261	} else
4262	retcode = ERR_NOMEM;
4263
4264	out:
4265	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
4266	return `0`;
4267	}
4268
4269	static void device_to_info(struct device_info *info,
4270	struct drbd_device *device)
4271	{
4272	info->dev_disk_state = device->state.disk;
4273	}
4274
4275
4276	int drbd_adm_new_minor(struct sk_buff skb, struct* genl_info *info)
4277	{
4278	struct drbd_config_context adm_ctx;
4279	struct drbd_genlmsghdr *dh = genl_info_userhdr(info);
4280	enum drbd_ret_code retcode;
4281
4282	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
4283	if (!adm_ctx.reply_skb)
4284	return retcode;
4285	if (retcode != NO_ERROR)
4286	goto out;
4287
4288	if (dh->minor > MINORMASK) {
4289	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: "requested minor out of range");
4290	retcode = ERR_INVALID_REQUEST;
4291	goto out;
4292	}
4293	if (adm_ctx.volume > DRBD_VOLUME_MAX) {
4294	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: "requested volume id out of range");
4295	retcode = ERR_INVALID_REQUEST;
4296	goto out;
4297	}
4298
4299	/ drbd_adm_prepare made sure already*
4300	* that first_peer_device(device)->connection and device->vnr match the request. */
4301	if (adm_ctx.device) {
4302	if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
4303	retcode = ERR_MINOR_OR_VOLUME_EXISTS;
4304	/ else: still NO_ERROR /
4305	goto out;
4306	}
4307
4308	mutex_lock(&adm_ctx.resource->adm_mutex);
4309	retcode = drbd_create_device(adm_ctx: &adm_ctx, minor: dh->minor);
4310	if (retcode == NO_ERROR) {
4311	struct drbd_device *device;
4312	struct drbd_peer_device *peer_device;
4313	struct device_info info;
4314	unsigned int peer_devices = `0`;
4315	enum drbd_notification_type flags;
4316
4317	device = minor_to_device(minor: dh->minor);
4318	for_each_peer_device(peer_device, device) {
4319	if (!has_net_conf(connection: peer_device->connection))
4320	continue;
4321	peer_devices++;
4322	}
4323
4324	device_to_info(info: &info, device);
4325	mutex_lock(&notification_mutex);
4326	flags = (peer_devices--) ? NOTIFY_CONTINUES : `0`;
4327	notify_device_state(NULL, `0`, device, &info, NOTIFY_CREATE \| flags);
4328	for_each_peer_device(peer_device, device) {
4329	struct peer_device_info peer_device_info;
4330
4331	if (!has_net_conf(connection: peer_device->connection))
4332	continue;
4333	peer_device_to_info(info: &peer_device_info, peer_device);
4334	flags = (peer_devices--) ? NOTIFY_CONTINUES : `0`;
4335	notify_peer_device_state(NULL, `0`, peer_device, &peer_device_info,
4336	NOTIFY_CREATE \| flags);
4337	}
4338	mutex_unlock(lock: &notification_mutex);
4339	}
4340	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
4341	out:
4342	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
4343	return `0`;
4344	}
4345
4346	static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
4347	{
4348	struct drbd_peer_device *peer_device;
4349
4350	if (device->state.disk == D_DISKLESS &&
4351	/ no need to be device->state.conn == C_STANDALONE &&*
4352	* we may want to delete a minor from a live replication group.
4353	*/
4354	device->state.role == R_SECONDARY) {
4355	struct drbd_connection *connection =
4356	first_connection(resource: device->resource);
4357
4358	_drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
4359	CS_VERBOSE + CS_WAIT_COMPLETE);
4360
4361	/ If the state engine hasn't stopped the sender thread yet, we*
4362	* need to flush the sender work queue before generating the
4363	* DESTROY events here. */
4364	if (get_t_state(thi: &connection->worker) == RUNNING)
4365	drbd_flush_workqueue(work_queue: &connection->sender_work);
4366
4367	mutex_lock(&notification_mutex);
4368	for_each_peer_device(peer_device, device) {
4369	if (!has_net_conf(connection: peer_device->connection))
4370	continue;
4371	notify_peer_device_state(NULL, `0`, peer_device, NULL,
4372	NOTIFY_DESTROY \| NOTIFY_CONTINUES);
4373	}
4374	notify_device_state(NULL, `0`, device, NULL, NOTIFY_DESTROY);
4375	mutex_unlock(lock: &notification_mutex);
4376
4377	drbd_delete_device(device);
4378	return NO_ERROR;
4379	} else
4380	return ERR_MINOR_CONFIGURED;
4381	}
4382
4383	int drbd_adm_del_minor(struct sk_buff skb, struct* genl_info *info)
4384	{
4385	struct drbd_config_context adm_ctx;
4386	enum drbd_ret_code retcode;
4387
4388	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4389	if (!adm_ctx.reply_skb)
4390	return retcode;
4391	if (retcode != NO_ERROR)
4392	goto out;
4393
4394	mutex_lock(&adm_ctx.resource->adm_mutex);
4395	retcode = adm_del_minor(device: adm_ctx.device);
4396	mutex_unlock(lock: &adm_ctx.resource->adm_mutex);
4397	out:
4398	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
4399	return `0`;
4400	}
4401
4402	static int adm_del_resource(struct drbd_resource *resource)
4403	{
4404	struct drbd_connection *connection;
4405
4406	for_each_connection(connection, resource) {
4407	if (connection->cstate > C_STANDALONE)
4408	return ERR_NET_CONFIGURED;
4409	}
4410	if (!idr_is_empty(idr: &resource->devices))
4411	return ERR_RES_IN_USE;
4412
4413	/ The state engine has stopped the sender thread, so we don't*
4414	* need to flush the sender work queue before generating the
4415	* DESTROY event here. */
4416	mutex_lock(&notification_mutex);
4417	notify_resource_state(NULL, `0`, resource, NULL, NOTIFY_DESTROY);
4418	mutex_unlock(lock: &notification_mutex);
4419
4420	mutex_lock(&resources_mutex);
4421	list_del_rcu(entry: &resource->resources);
4422	mutex_unlock(lock: &resources_mutex);
4423	/ Make sure all threads have actually stopped: state handling only*
4424	* does drbd_thread_stop_nowait(). */
4425	list_for_each_entry(connection, &resource->connections, connections)
4426	drbd_thread_stop(thi: &connection->worker);
4427	synchronize_rcu();
4428	drbd_free_resource(resource);
4429	return NO_ERROR;
4430	}
4431
4432	int drbd_adm_down(struct sk_buff skb, struct* genl_info *info)
4433	{
4434	struct drbd_config_context adm_ctx;
4435	struct drbd_resource *resource;
4436	struct drbd_connection *connection;
4437	struct drbd_device *device;
4438	int retcode; / enum drbd_ret_code rsp. enum drbd_state_rv /
4439	unsigned i;
4440
4441	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
4442	if (!adm_ctx.reply_skb)
4443	return retcode;
4444	if (retcode != NO_ERROR)
4445	goto finish;
4446
4447	resource = adm_ctx.resource;
4448	mutex_lock(&resource->adm_mutex);
4449	/ demote /
4450	for_each_connection(connection, resource) {
4451	struct drbd_peer_device *peer_device;
4452
4453	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
4454	retcode = drbd_set_role(device: peer_device->device, new_role: R_SECONDARY, force: `0`);
4455	if (retcode < SS_SUCCESS) {
4456	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: "failed to demote");
4457	goto out;
4458	}
4459	}
4460
4461	retcode = conn_try_disconnect(connection, force: `0`);
4462	if (retcode < SS_SUCCESS) {
4463	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: "failed to disconnect");
4464	goto out;
4465	}
4466	}
4467
4468	/ detach /
4469	idr_for_each_entry(&resource->devices, device, i) {
4470	retcode = adm_detach(device, force: `0`);
4471	if (retcode < SS_SUCCESS \|\| retcode > NO_ERROR) {
4472	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: "failed to detach");
4473	goto out;
4474	}
4475	}
4476
4477	/ delete volumes /
4478	idr_for_each_entry(&resource->devices, device, i) {
4479	retcode = adm_del_minor(device);
4480	if (retcode != NO_ERROR) {
4481	/ "can not happen" /
4482	drbd_msg_put_info(skb: adm_ctx.reply_skb, info: "failed to delete volume");
4483	goto out;
4484	}
4485	}
4486
4487	retcode = adm_del_resource(resource);
4488	out:
4489	mutex_unlock(lock: &resource->adm_mutex);
4490	finish:
4491	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
4492	return `0`;
4493	}
4494
4495	int drbd_adm_del_resource(struct sk_buff skb, struct* genl_info *info)
4496	{
4497	struct drbd_config_context adm_ctx;
4498	struct drbd_resource *resource;
4499	enum drbd_ret_code retcode;
4500
4501	retcode = drbd_adm_prepare(adm_ctx: &adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
4502	if (!adm_ctx.reply_skb)
4503	return retcode;
4504	if (retcode != NO_ERROR)
4505	goto finish;
4506	resource = adm_ctx.resource;
4507
4508	mutex_lock(&resource->adm_mutex);
4509	retcode = adm_del_resource(resource);
4510	mutex_unlock(lock: &resource->adm_mutex);
4511	finish:
4512	drbd_adm_finish(adm_ctx: &adm_ctx, info, retcode);
4513	return `0`;
4514	}
4515
4516	void drbd_bcast_event(struct drbd_device device, const* struct sib_info *sib)
4517	{
4518	struct sk_buff *msg;
4519	struct drbd_genlmsghdr *d_out;
4520	unsigned seq;
4521	int err = -ENOMEM;
4522
4523	seq = atomic_inc_return(v: &drbd_genl_seq);
4524	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4525	if (!msg)
4526	goto failed;
4527
4528	err = -EMSGSIZE;
4529	d_out = genlmsg_put(skb: msg, portid: `0`, seq, family: &drbd_genl_family, flags: `0`, cmd: DRBD_EVENT);
4530	if (!d_out) / cannot happen, but anyways. /
4531	goto nla_put_failure;
4532	d_out->minor = device_to_minor(device);
4533	d_out->ret_code = NO_ERROR;
4534
4535	if (nla_put_status_info(skb: msg, device, sib))
4536	goto nla_put_failure;
4537	genlmsg_end(skb: msg, hdr: d_out);
4538	err = drbd_genl_multicast_events(skb: msg, GFP_NOWAIT);
4539	/ msg has been consumed or freed in netlink_broadcast() /
4540	if (err && err != -ESRCH)
4541	goto failed;
4542
4543	return;
4544
4545	nla_put_failure:
4546	nlmsg_free(skb: msg);
4547	failed:
4548	drbd_err(device, "Error %d while broadcasting event. "
4549	"Event seq:%u sib_reason:%u\n",
4550	err, seq, sib->sib_reason);
4551	}
4552
4553	static int nla_put_notification_header(struct sk_buff *msg,
4554	enum drbd_notification_type type)
4555	{
4556	struct drbd_notification_header nh = {
4557	.nh_type = type,
4558	};
4559
4560	return drbd_notification_header_to_skb(skb: msg, s: &nh, exclude_sensitive: true);
4561	}
4562
4563	int notify_resource_state(struct sk_buff *skb,
4564	unsigned int seq,
4565	struct drbd_resource *resource,
4566	struct resource_info *resource_info,
4567	enum drbd_notification_type type)
4568	{
4569	struct resource_statistics resource_statistics;
4570	struct drbd_genlmsghdr *dh;
4571	bool multicast = false;
4572	int err;
4573
4574	if (!skb) {
4575	seq = atomic_inc_return(v: &notify_genl_seq);
4576	skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4577	err = -ENOMEM;
4578	if (!skb)
4579	goto failed;
4580	multicast = true;
4581	}
4582
4583	err = -EMSGSIZE;
4584	dh = genlmsg_put(skb, portid: `0`, seq, family: &drbd_genl_family, flags: `0`, cmd: DRBD_RESOURCE_STATE);
4585	if (!dh)
4586	goto nla_put_failure;
4587	dh->minor = -`1U`;
4588	dh->ret_code = NO_ERROR;
4589	if (nla_put_drbd_cfg_context(skb, resource, NULL, NULL) \|\|
4590	nla_put_notification_header(msg: skb, type) \|\|
4591	((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4592	resource_info_to_skb(skb, s: resource_info, exclude_sensitive: true)))
4593	goto nla_put_failure;
4594	resource_statistics.res_stat_write_ordering = resource->write_ordering;
4595	err = resource_statistics_to_skb(skb, s: &resource_statistics, exclude_sensitive: !capable(CAP_SYS_ADMIN));
4596	if (err)
4597	goto nla_put_failure;
4598	genlmsg_end(skb, hdr: dh);
4599	if (multicast) {
4600	err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4601	/ skb has been consumed or freed in netlink_broadcast() /
4602	if (err && err != -ESRCH)
4603	goto failed;
4604	}
4605	return `0`;
4606
4607	nla_put_failure:
4608	nlmsg_free(skb);
4609	failed:
4610	drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
4611	err, seq);
4612	return err;
4613	}
4614
4615	int notify_device_state(struct sk_buff *skb,
4616	unsigned int seq,
4617	struct drbd_device *device,
4618	struct device_info *device_info,
4619	enum drbd_notification_type type)
4620	{
4621	struct device_statistics device_statistics;
4622	struct drbd_genlmsghdr *dh;
4623	bool multicast = false;
4624	int err;
4625
4626	if (!skb) {
4627	seq = atomic_inc_return(v: &notify_genl_seq);
4628	skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4629	err = -ENOMEM;
4630	if (!skb)
4631	goto failed;
4632	multicast = true;
4633	}
4634
4635	err = -EMSGSIZE;
4636	dh = genlmsg_put(skb, portid: `0`, seq, family: &drbd_genl_family, flags: `0`, cmd: DRBD_DEVICE_STATE);
4637	if (!dh)
4638	goto nla_put_failure;
4639	dh->minor = device->minor;
4640	dh->ret_code = NO_ERROR;
4641	if (nla_put_drbd_cfg_context(skb, resource: device->resource, NULL, device) \|\|
4642	nla_put_notification_header(msg: skb, type) \|\|
4643	((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4644	device_info_to_skb(skb, s: device_info, exclude_sensitive: true)))
4645	goto nla_put_failure;
4646	device_to_statistics(s: &device_statistics, device);
4647	device_statistics_to_skb(skb, s: &device_statistics, exclude_sensitive: !capable(CAP_SYS_ADMIN));
4648	genlmsg_end(skb, hdr: dh);
4649	if (multicast) {
4650	err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4651	/ skb has been consumed or freed in netlink_broadcast() /
4652	if (err && err != -ESRCH)
4653	goto failed;
4654	}
4655	return `0`;
4656
4657	nla_put_failure:
4658	nlmsg_free(skb);
4659	failed:
4660	drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n",
4661	err, seq);
4662	return err;
4663	}
4664
4665	int notify_connection_state(struct sk_buff *skb,
4666	unsigned int seq,
4667	struct drbd_connection *connection,
4668	struct connection_info *connection_info,
4669	enum drbd_notification_type type)
4670	{
4671	struct connection_statistics connection_statistics;
4672	struct drbd_genlmsghdr *dh;
4673	bool multicast = false;
4674	int err;
4675
4676	if (!skb) {
4677	seq = atomic_inc_return(v: &notify_genl_seq);
4678	skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4679	err = -ENOMEM;
4680	if (!skb)
4681	goto failed;
4682	multicast = true;
4683	}
4684
4685	err = -EMSGSIZE;
4686	dh = genlmsg_put(skb, portid: `0`, seq, family: &drbd_genl_family, flags: `0`, cmd: DRBD_CONNECTION_STATE);
4687	if (!dh)
4688	goto nla_put_failure;
4689	dh->minor = -`1U`;
4690	dh->ret_code = NO_ERROR;
4691	if (nla_put_drbd_cfg_context(skb, resource: connection->resource, connection, NULL) \|\|
4692	nla_put_notification_header(msg: skb, type) \|\|
4693	((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4694	connection_info_to_skb(skb, s: connection_info, exclude_sensitive: true)))
4695	goto nla_put_failure;
4696	connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
4697	connection_statistics_to_skb(skb, s: &connection_statistics, exclude_sensitive: !capable(CAP_SYS_ADMIN));
4698	genlmsg_end(skb, hdr: dh);
4699	if (multicast) {
4700	err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4701	/ skb has been consumed or freed in netlink_broadcast() /
4702	if (err && err != -ESRCH)
4703	goto failed;
4704	}
4705	return `0`;
4706
4707	nla_put_failure:
4708	nlmsg_free(skb);
4709	failed:
4710	drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n",
4711	err, seq);
4712	return err;
4713	}
4714
4715	int notify_peer_device_state(struct sk_buff *skb,
4716	unsigned int seq,
4717	struct drbd_peer_device *peer_device,
4718	struct peer_device_info *peer_device_info,
4719	enum drbd_notification_type type)
4720	{
4721	struct peer_device_statistics peer_device_statistics;
4722	struct drbd_resource *resource = peer_device->device->resource;
4723	struct drbd_genlmsghdr *dh;
4724	bool multicast = false;
4725	int err;
4726
4727	if (!skb) {
4728	seq = atomic_inc_return(v: &notify_genl_seq);
4729	skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4730	err = -ENOMEM;
4731	if (!skb)
4732	goto failed;
4733	multicast = true;
4734	}
4735
4736	err = -EMSGSIZE;
4737	dh = genlmsg_put(skb, portid: `0`, seq, family: &drbd_genl_family, flags: `0`, cmd: DRBD_PEER_DEVICE_STATE);
4738	if (!dh)
4739	goto nla_put_failure;
4740	dh->minor = -`1U`;
4741	dh->ret_code = NO_ERROR;
4742	if (nla_put_drbd_cfg_context(skb, resource, connection: peer_device->connection, device: peer_device->device) \|\|
4743	nla_put_notification_header(msg: skb, type) \|\|
4744	((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4745	peer_device_info_to_skb(skb, s: peer_device_info, exclude_sensitive: true)))
4746	goto nla_put_failure;
4747	peer_device_to_statistics(s: &peer_device_statistics, peer_device);
4748	peer_device_statistics_to_skb(skb, s: &peer_device_statistics, exclude_sensitive: !capable(CAP_SYS_ADMIN));
4749	genlmsg_end(skb, hdr: dh);
4750	if (multicast) {
4751	err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4752	/ skb has been consumed or freed in netlink_broadcast() /
4753	if (err && err != -ESRCH)
4754	goto failed;
4755	}
4756	return `0`;
4757
4758	nla_put_failure:
4759	nlmsg_free(skb);
4760	failed:
4761	drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n",
4762	err, seq);
4763	return err;
4764	}
4765
4766	void notify_helper(enum drbd_notification_type type,
4767	struct drbd_device device, struct* drbd_connection *connection,
4768	const char name, int* status)
4769	{
4770	struct drbd_resource *resource = device ? device->resource : connection->resource;
4771	struct drbd_helper_info helper_info;
4772	unsigned int seq = atomic_inc_return(v: &notify_genl_seq);
4773	struct sk_buff *skb = NULL;
4774	struct drbd_genlmsghdr *dh;
4775	int err;
4776
4777	strscpy(p: helper_info.helper_name, q: name, size: sizeof(helper_info.helper_name));
4778	helper_info.helper_name_len = min(strlen(name), sizeof(helper_info.helper_name));
4779	helper_info.helper_status = status;
4780
4781	skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4782	err = -ENOMEM;
4783	if (!skb)
4784	goto fail;
4785
4786	err = -EMSGSIZE;
4787	dh = genlmsg_put(skb, portid: `0`, seq, family: &drbd_genl_family, flags: `0`, cmd: DRBD_HELPER);
4788	if (!dh)
4789	goto fail;
4790	dh->minor = device ? device->minor : -`1`;
4791	dh->ret_code = NO_ERROR;
4792	mutex_lock(&notification_mutex);
4793	if (nla_put_drbd_cfg_context(skb, resource, connection, device) \|\|
4794	nla_put_notification_header(msg: skb, type) \|\|
4795	drbd_helper_info_to_skb(skb, s: &helper_info, exclude_sensitive: true))
4796	goto unlock_fail;
4797	genlmsg_end(skb, hdr: dh);
4798	err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4799	skb = NULL;
4800	/ skb has been consumed or freed in netlink_broadcast() /
4801	if (err && err != -ESRCH)
4802	goto unlock_fail;
4803	mutex_unlock(lock: &notification_mutex);
4804	return;
4805
4806	unlock_fail:
4807	mutex_unlock(lock: &notification_mutex);
4808	fail:
4809	nlmsg_free(skb);
4810	drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
4811	err, seq);
4812	}
4813
4814	static int notify_initial_state_done(struct sk_buff skb, unsigned* int seq)
4815	{
4816	struct drbd_genlmsghdr *dh;
4817	int err;
4818
4819	err = -EMSGSIZE;
4820	dh = genlmsg_put(skb, portid: `0`, seq, family: &drbd_genl_family, flags: `0`, cmd: DRBD_INITIAL_STATE_DONE);
4821	if (!dh)
4822	goto nla_put_failure;
4823	dh->minor = -`1U`;
4824	dh->ret_code = NO_ERROR;
4825	if (nla_put_notification_header(msg: skb, type: NOTIFY_EXISTS))
4826	goto nla_put_failure;
4827	genlmsg_end(skb, hdr: dh);
4828	return `0`;
4829
4830	nla_put_failure:
4831	nlmsg_free(skb);
4832	pr_err("Error %d sending event. Event seq:%u\n", err, seq);
4833	return err;
4834	}
4835
4836	static void free_state_changes(struct list_head *list)
4837	{
4838	while (!list_empty(head: list)) {
4839	struct drbd_state_change *state_change =
4840	list_first_entry(list, struct drbd_state_change, list);
4841	list_del(entry: &state_change->list);
4842	forget_state_change(state_change);
4843	}
4844	}
4845
4846	static unsigned int notifications_for_state_change(struct drbd_state_change *state_change)
4847	{
4848	return `1` +
4849	state_change->n_connections +
4850	state_change->n_devices +
4851	state_change->n_devices * state_change->n_connections;
4852	}
4853
4854	static int get_initial_state(struct sk_buff skb, struct* netlink_callback *cb)
4855	{
4856	struct drbd_state_change state_change = (struct* drbd_state_change *)cb->args[`0`];
4857	unsigned int seq = cb->args[`2`];
4858	unsigned int n;
4859	enum drbd_notification_type flags = `0`;
4860	int err = `0`;
4861
4862	/ There is no need for taking notification_mutex here: it doesn't*
4863	matter if the initial state events mix with later state chage
4864	events; we can always tell the events apart by the NOTIFY_EXISTS
4865	flag. /*
4866
4867	cb->args[`5`]--;
4868	if (cb->args[`5`] == `1`) {
4869	err = notify_initial_state_done(skb, seq);
4870	goto out;
4871	}
4872	n = cb->args[`4`]++;
4873	if (cb->args[`4`] < cb->args[`3`])
4874	flags \|= NOTIFY_CONTINUES;
4875	if (n < `1`) {
4876	err = notify_resource_state_change(skb, seq, state_change->resource,
4877	type: NOTIFY_EXISTS \| flags);
4878	goto next;
4879	}
4880	n--;
4881	if (n < state_change->n_connections) {
4882	err = notify_connection_state_change(skb, seq, &state_change->connections[n],
4883	type: NOTIFY_EXISTS \| flags);
4884	goto next;
4885	}
4886	n -= state_change->n_connections;
4887	if (n < state_change->n_devices) {
4888	err = notify_device_state_change(skb, seq, &state_change->devices[n],
4889	type: NOTIFY_EXISTS \| flags);
4890	goto next;
4891	}
4892	n -= state_change->n_devices;
4893	if (n < state_change->n_devices * state_change->n_connections) {
4894	err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
4895	type: NOTIFY_EXISTS \| flags);
4896	goto next;
4897	}
4898
4899	next:
4900	if (cb->args[`4`] == cb->args[`3`]) {
4901	struct drbd_state_change *next_state_change =
4902	list_entry(state_change->list.next,
4903	struct drbd_state_change, list);
4904	cb->args[`0`] = (long)next_state_change;
4905	cb->args[`3`] = notifications_for_state_change(state_change: next_state_change);
4906	cb->args[`4`] = `0`;
4907	}
4908	out:
4909	if (err)
4910	return err;
4911	else
4912	return skb->len;
4913	}
4914
4915	int drbd_adm_get_initial_state(struct sk_buff skb, struct* netlink_callback *cb)
4916	{
4917	struct drbd_resource *resource;
4918	LIST_HEAD(head);
4919
4920	if (cb->args[`5`] >= `1`) {
4921	if (cb->args[`5`] > `1`)
4922	return get_initial_state(skb, cb);
4923	if (cb->args[`0`]) {
4924	struct drbd_state_change *state_change =
4925	(struct drbd_state_change *)cb->args[`0`];
4926
4927	/ connect list to head /
4928	list_add(new: &head, head: &state_change->list);
4929	free_state_changes(list: &head);
4930	}
4931	return `0`;
4932	}
4933
4934	cb->args[`5`] = `2`; / number of iterations /
4935	mutex_lock(&resources_mutex);
4936	for_each_resource(resource, &drbd_resources) {
4937	struct drbd_state_change *state_change;
4938
4939	state_change = remember_old_state(resource, GFP_KERNEL);
4940	if (!state_change) {
4941	if (!list_empty(head: &head))
4942	free_state_changes(list: &head);
4943	mutex_unlock(lock: &resources_mutex);
4944	return -ENOMEM;
4945	}
4946	copy_old_to_new_state_change(state_change);
4947	list_add_tail(new: &state_change->list, head: &head);
4948	cb->args[`5`] += notifications_for_state_change(state_change);
4949	}
4950	mutex_unlock(lock: &resources_mutex);
4951
4952	if (!list_empty(head: &head)) {
4953	struct drbd_state_change *state_change =
4954	list_entry(head.next, struct drbd_state_change, list);
4955	cb->args[`0`] = (long)state_change;
4956	cb->args[`3`] = notifications_for_state_change(state_change);
4957	list_del(entry: &head); / detach list from head /
4958	}
4959
4960	cb->args[`2`] = cb->nlh->nlmsg_seq;
4961	return get_initial_state(skb, cb);
4962	}
4963

source code of linux/drivers/block/drbd/drbd_nl.c