cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

drbd_nl.c (147395B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3   drbd_nl.c
      4
      5   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
      6
      7   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
      8   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
      9   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
     10
     11
     12 */
     13
     14#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
     15
     16#include <linux/module.h>
     17#include <linux/drbd.h>
     18#include <linux/in.h>
     19#include <linux/fs.h>
     20#include <linux/file.h>
     21#include <linux/slab.h>
     22#include <linux/blkpg.h>
     23#include <linux/cpumask.h>
     24#include "drbd_int.h"
     25#include "drbd_protocol.h"
     26#include "drbd_req.h"
     27#include "drbd_state_change.h"
     28#include <asm/unaligned.h>
     29#include <linux/drbd_limits.h>
     30#include <linux/kthread.h>
     31
     32#include <net/genetlink.h>
     33
     34/* .doit */
     35// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
     36// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
     37
     38int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
     39int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
     40
     41int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
     42int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
     43int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
     44
     45int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
     46int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
     47int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
     48int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
     49int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
     50int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
     51int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
     52int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
     53int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
     54int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
     55int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
     56int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
     57int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
     58int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
     59int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
     60int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
     61int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
     62int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
     63int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
     64int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
     65/* .dumpit */
     66int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
     67int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb);
     68int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb);
     69int drbd_adm_dump_devices_done(struct netlink_callback *cb);
     70int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb);
     71int drbd_adm_dump_connections_done(struct netlink_callback *cb);
     72int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb);
     73int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb);
     74int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb);
     75
     76#include <linux/drbd_genl_api.h>
     77#include "drbd_nla.h"
     78#include <linux/genl_magic_func.h>
     79
     80static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
     81static atomic_t notify_genl_seq = ATOMIC_INIT(2); /* two. */
     82
     83DEFINE_MUTEX(notification_mutex);
     84
     85/* used blkdev_get_by_path, to claim our meta data device(s) */
     86static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
     87
     88static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
     89{
     90	genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
     91	if (genlmsg_reply(skb, info))
     92		pr_err("error sending genl reply\n");
     93}
     94
     95/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
     96 * reason it could fail was no space in skb, and there are 4k available. */
     97static int drbd_msg_put_info(struct sk_buff *skb, const char *info)
     98{
     99	struct nlattr *nla;
    100	int err = -EMSGSIZE;
    101
    102	if (!info || !info[0])
    103		return 0;
    104
    105	nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_REPLY);
    106	if (!nla)
    107		return err;
    108
    109	err = nla_put_string(skb, T_info_text, info);
    110	if (err) {
    111		nla_nest_cancel(skb, nla);
    112		return err;
    113	} else
    114		nla_nest_end(skb, nla);
    115	return 0;
    116}
    117
    118__printf(2, 3)
    119static int drbd_msg_sprintf_info(struct sk_buff *skb, const char *fmt, ...)
    120{
    121	va_list args;
    122	struct nlattr *nla, *txt;
    123	int err = -EMSGSIZE;
    124	int len;
    125
    126	nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_REPLY);
    127	if (!nla)
    128		return err;
    129
    130	txt = nla_reserve(skb, T_info_text, 256);
    131	if (!txt) {
    132		nla_nest_cancel(skb, nla);
    133		return err;
    134	}
    135	va_start(args, fmt);
    136	len = vscnprintf(nla_data(txt), 256, fmt, args);
    137	va_end(args);
    138
    139	/* maybe: retry with larger reserve, if truncated */
    140	txt->nla_len = nla_attr_size(len+1);
    141	nlmsg_trim(skb, (char*)txt + NLA_ALIGN(txt->nla_len));
    142	nla_nest_end(skb, nla);
    143
    144	return 0;
    145}
    146
    147/* This would be a good candidate for a "pre_doit" hook,
    148 * and per-family private info->pointers.
    149 * But we need to stay compatible with older kernels.
    150 * If it returns successfully, adm_ctx members are valid.
    151 *
    152 * At this point, we still rely on the global genl_lock().
    153 * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
    154 * to add additional synchronization against object destruction/modification.
    155 */
    156#define DRBD_ADM_NEED_MINOR	1
    157#define DRBD_ADM_NEED_RESOURCE	2
    158#define DRBD_ADM_NEED_CONNECTION 4
    159static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
    160	struct sk_buff *skb, struct genl_info *info, unsigned flags)
    161{
    162	struct drbd_genlmsghdr *d_in = info->userhdr;
    163	const u8 cmd = info->genlhdr->cmd;
    164	int err;
    165
    166	memset(adm_ctx, 0, sizeof(*adm_ctx));
    167
    168	/* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
    169	if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
    170	       return -EPERM;
    171
    172	adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
    173	if (!adm_ctx->reply_skb) {
    174		err = -ENOMEM;
    175		goto fail;
    176	}
    177
    178	adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
    179					info, &drbd_genl_family, 0, cmd);
    180	/* put of a few bytes into a fresh skb of >= 4k will always succeed.
    181	 * but anyways */
    182	if (!adm_ctx->reply_dh) {
    183		err = -ENOMEM;
    184		goto fail;
    185	}
    186
    187	adm_ctx->reply_dh->minor = d_in->minor;
    188	adm_ctx->reply_dh->ret_code = NO_ERROR;
    189
    190	adm_ctx->volume = VOLUME_UNSPECIFIED;
    191	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
    192		struct nlattr *nla;
    193		/* parse and validate only */
    194		err = drbd_cfg_context_from_attrs(NULL, info);
    195		if (err)
    196			goto fail;
    197
    198		/* It was present, and valid,
    199		 * copy it over to the reply skb. */
    200		err = nla_put_nohdr(adm_ctx->reply_skb,
    201				info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
    202				info->attrs[DRBD_NLA_CFG_CONTEXT]);
    203		if (err)
    204			goto fail;
    205
    206		/* and assign stuff to the adm_ctx */
    207		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
    208		if (nla)
    209			adm_ctx->volume = nla_get_u32(nla);
    210		nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
    211		if (nla)
    212			adm_ctx->resource_name = nla_data(nla);
    213		adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
    214		adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
    215		if ((adm_ctx->my_addr &&
    216		     nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
    217		    (adm_ctx->peer_addr &&
    218		     nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
    219			err = -EINVAL;
    220			goto fail;
    221		}
    222	}
    223
    224	adm_ctx->minor = d_in->minor;
    225	adm_ctx->device = minor_to_device(d_in->minor);
    226
    227	/* We are protected by the global genl_lock().
    228	 * But we may explicitly drop it/retake it in drbd_adm_set_role(),
    229	 * so make sure this object stays around. */
    230	if (adm_ctx->device)
    231		kref_get(&adm_ctx->device->kref);
    232
    233	if (adm_ctx->resource_name) {
    234		adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
    235	}
    236
    237	if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
    238		drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
    239		return ERR_MINOR_INVALID;
    240	}
    241	if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
    242		drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
    243		if (adm_ctx->resource_name)
    244			return ERR_RES_NOT_KNOWN;
    245		return ERR_INVALID_REQUEST;
    246	}
    247
    248	if (flags & DRBD_ADM_NEED_CONNECTION) {
    249		if (adm_ctx->resource) {
    250			drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
    251			return ERR_INVALID_REQUEST;
    252		}
    253		if (adm_ctx->device) {
    254			drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
    255			return ERR_INVALID_REQUEST;
    256		}
    257		if (adm_ctx->my_addr && adm_ctx->peer_addr)
    258			adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
    259							  nla_len(adm_ctx->my_addr),
    260							  nla_data(adm_ctx->peer_addr),
    261							  nla_len(adm_ctx->peer_addr));
    262		if (!adm_ctx->connection) {
    263			drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
    264			return ERR_INVALID_REQUEST;
    265		}
    266	}
    267
    268	/* some more paranoia, if the request was over-determined */
    269	if (adm_ctx->device && adm_ctx->resource &&
    270	    adm_ctx->device->resource != adm_ctx->resource) {
    271		pr_warn("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
    272			adm_ctx->minor, adm_ctx->resource->name,
    273			adm_ctx->device->resource->name);
    274		drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
    275		return ERR_INVALID_REQUEST;
    276	}
    277	if (adm_ctx->device &&
    278	    adm_ctx->volume != VOLUME_UNSPECIFIED &&
    279	    adm_ctx->volume != adm_ctx->device->vnr) {
    280		pr_warn("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
    281			adm_ctx->minor, adm_ctx->volume,
    282			adm_ctx->device->vnr, adm_ctx->device->resource->name);
    283		drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
    284		return ERR_INVALID_REQUEST;
    285	}
    286
    287	/* still, provide adm_ctx->resource always, if possible. */
    288	if (!adm_ctx->resource) {
    289		adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
    290			: adm_ctx->connection ? adm_ctx->connection->resource : NULL;
    291		if (adm_ctx->resource)
    292			kref_get(&adm_ctx->resource->kref);
    293	}
    294
    295	return NO_ERROR;
    296
    297fail:
    298	nlmsg_free(adm_ctx->reply_skb);
    299	adm_ctx->reply_skb = NULL;
    300	return err;
    301}
    302
    303static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
    304	struct genl_info *info, int retcode)
    305{
    306	if (adm_ctx->device) {
    307		kref_put(&adm_ctx->device->kref, drbd_destroy_device);
    308		adm_ctx->device = NULL;
    309	}
    310	if (adm_ctx->connection) {
    311		kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
    312		adm_ctx->connection = NULL;
    313	}
    314	if (adm_ctx->resource) {
    315		kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
    316		adm_ctx->resource = NULL;
    317	}
    318
    319	if (!adm_ctx->reply_skb)
    320		return -ENOMEM;
    321
    322	adm_ctx->reply_dh->ret_code = retcode;
    323	drbd_adm_send_reply(adm_ctx->reply_skb, info);
    324	return 0;
    325}
    326
    327static void setup_khelper_env(struct drbd_connection *connection, char **envp)
    328{
    329	char *afs;
    330
    331	/* FIXME: A future version will not allow this case. */
    332	if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
    333		return;
    334
    335	switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
    336	case AF_INET6:
    337		afs = "ipv6";
    338		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
    339			 &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
    340		break;
    341	case AF_INET:
    342		afs = "ipv4";
    343		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
    344			 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
    345		break;
    346	default:
    347		afs = "ssocks";
    348		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
    349			 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
    350	}
    351	snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
    352}
    353
    354int drbd_khelper(struct drbd_device *device, char *cmd)
    355{
    356	char *envp[] = { "HOME=/",
    357			"TERM=linux",
    358			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
    359			 (char[20]) { }, /* address family */
    360			 (char[60]) { }, /* address */
    361			NULL };
    362	char mb[14];
    363	char *argv[] = {drbd_usermode_helper, cmd, mb, NULL };
    364	struct drbd_connection *connection = first_peer_device(device)->connection;
    365	struct sib_info sib;
    366	int ret;
    367
    368	if (current == connection->worker.task)
    369		set_bit(CALLBACK_PENDING, &connection->flags);
    370
    371	snprintf(mb, 14, "minor-%d", device_to_minor(device));
    372	setup_khelper_env(connection, envp);
    373
    374	/* The helper may take some time.
    375	 * write out any unsynced meta data changes now */
    376	drbd_md_sync(device);
    377
    378	drbd_info(device, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, mb);
    379	sib.sib_reason = SIB_HELPER_PRE;
    380	sib.helper_name = cmd;
    381	drbd_bcast_event(device, &sib);
    382	notify_helper(NOTIFY_CALL, device, connection, cmd, 0);
    383	ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC);
    384	if (ret)
    385		drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
    386				drbd_usermode_helper, cmd, mb,
    387				(ret >> 8) & 0xff, ret);
    388	else
    389		drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
    390				drbd_usermode_helper, cmd, mb,
    391				(ret >> 8) & 0xff, ret);
    392	sib.sib_reason = SIB_HELPER_POST;
    393	sib.helper_exit_code = ret;
    394	drbd_bcast_event(device, &sib);
    395	notify_helper(NOTIFY_RESPONSE, device, connection, cmd, ret);
    396
    397	if (current == connection->worker.task)
    398		clear_bit(CALLBACK_PENDING, &connection->flags);
    399
    400	if (ret < 0) /* Ignore any ERRNOs we got. */
    401		ret = 0;
    402
    403	return ret;
    404}
    405
    406enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd)
    407{
    408	char *envp[] = { "HOME=/",
    409			"TERM=linux",
    410			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
    411			 (char[20]) { }, /* address family */
    412			 (char[60]) { }, /* address */
    413			NULL };
    414	char *resource_name = connection->resource->name;
    415	char *argv[] = {drbd_usermode_helper, cmd, resource_name, NULL };
    416	int ret;
    417
    418	setup_khelper_env(connection, envp);
    419	conn_md_sync(connection);
    420
    421	drbd_info(connection, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, resource_name);
    422	/* TODO: conn_bcast_event() ?? */
    423	notify_helper(NOTIFY_CALL, NULL, connection, cmd, 0);
    424
    425	ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC);
    426	if (ret)
    427		drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
    428			  drbd_usermode_helper, cmd, resource_name,
    429			  (ret >> 8) & 0xff, ret);
    430	else
    431		drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
    432			  drbd_usermode_helper, cmd, resource_name,
    433			  (ret >> 8) & 0xff, ret);
    434	/* TODO: conn_bcast_event() ?? */
    435	notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret);
    436
    437	if (ret < 0) /* Ignore any ERRNOs we got. */
    438		ret = 0;
    439
    440	return ret;
    441}
    442
    443static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
    444{
    445	enum drbd_fencing_p fp = FP_NOT_AVAIL;
    446	struct drbd_peer_device *peer_device;
    447	int vnr;
    448
    449	rcu_read_lock();
    450	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
    451		struct drbd_device *device = peer_device->device;
    452		if (get_ldev_if_state(device, D_CONSISTENT)) {
    453			struct disk_conf *disk_conf =
    454				rcu_dereference(peer_device->device->ldev->disk_conf);
    455			fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
    456			put_ldev(device);
    457		}
    458	}
    459	rcu_read_unlock();
    460
    461	return fp;
    462}
    463
    464static bool resource_is_supended(struct drbd_resource *resource)
    465{
    466	return resource->susp || resource->susp_fen || resource->susp_nod;
    467}
    468
    469bool conn_try_outdate_peer(struct drbd_connection *connection)
    470{
    471	struct drbd_resource * const resource = connection->resource;
    472	unsigned int connect_cnt;
    473	union drbd_state mask = { };
    474	union drbd_state val = { };
    475	enum drbd_fencing_p fp;
    476	char *ex_to_string;
    477	int r;
    478
    479	spin_lock_irq(&resource->req_lock);
    480	if (connection->cstate >= C_WF_REPORT_PARAMS) {
    481		drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
    482		spin_unlock_irq(&resource->req_lock);
    483		return false;
    484	}
    485
    486	connect_cnt = connection->connect_cnt;
    487	spin_unlock_irq(&resource->req_lock);
    488
    489	fp = highest_fencing_policy(connection);
    490	switch (fp) {
    491	case FP_NOT_AVAIL:
    492		drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
    493		spin_lock_irq(&resource->req_lock);
    494		if (connection->cstate < C_WF_REPORT_PARAMS) {
    495			_conn_request_state(connection,
    496					    (union drbd_state) { { .susp_fen = 1 } },
    497					    (union drbd_state) { { .susp_fen = 0 } },
    498					    CS_VERBOSE | CS_HARD | CS_DC_SUSP);
    499			/* We are no longer suspended due to the fencing policy.
    500			 * We may still be suspended due to the on-no-data-accessible policy.
    501			 * If that was OND_IO_ERROR, fail pending requests. */
    502			if (!resource_is_supended(resource))
    503				_tl_restart(connection, CONNECTION_LOST_WHILE_PENDING);
    504		}
    505		/* Else: in case we raced with a connection handshake,
    506		 * let the handshake figure out if we maybe can RESEND,
    507		 * and do not resume/fail pending requests here.
    508		 * Worst case is we stay suspended for now, which may be
    509		 * resolved by either re-establishing the replication link, or
    510		 * the next link failure, or eventually the administrator.  */
    511		spin_unlock_irq(&resource->req_lock);
    512		return false;
    513
    514	case FP_DONT_CARE:
    515		return true;
    516	default: ;
    517	}
    518
    519	r = conn_khelper(connection, "fence-peer");
    520
    521	switch ((r>>8) & 0xff) {
    522	case P_INCONSISTENT: /* peer is inconsistent */
    523		ex_to_string = "peer is inconsistent or worse";
    524		mask.pdsk = D_MASK;
    525		val.pdsk = D_INCONSISTENT;
    526		break;
    527	case P_OUTDATED: /* peer got outdated, or was already outdated */
    528		ex_to_string = "peer was fenced";
    529		mask.pdsk = D_MASK;
    530		val.pdsk = D_OUTDATED;
    531		break;
    532	case P_DOWN: /* peer was down */
    533		if (conn_highest_disk(connection) == D_UP_TO_DATE) {
    534			/* we will(have) create(d) a new UUID anyways... */
    535			ex_to_string = "peer is unreachable, assumed to be dead";
    536			mask.pdsk = D_MASK;
    537			val.pdsk = D_OUTDATED;
    538		} else {
    539			ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
    540		}
    541		break;
    542	case P_PRIMARY: /* Peer is primary, voluntarily outdate myself.
    543		 * This is useful when an unconnected R_SECONDARY is asked to
    544		 * become R_PRIMARY, but finds the other peer being active. */
    545		ex_to_string = "peer is active";
    546		drbd_warn(connection, "Peer is primary, outdating myself.\n");
    547		mask.disk = D_MASK;
    548		val.disk = D_OUTDATED;
    549		break;
    550	case P_FENCING:
    551		/* THINK: do we need to handle this
    552		 * like case 4, or more like case 5? */
    553		if (fp != FP_STONITH)
    554			drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
    555		ex_to_string = "peer was stonithed";
    556		mask.pdsk = D_MASK;
    557		val.pdsk = D_OUTDATED;
    558		break;
    559	default:
    560		/* The script is broken ... */
    561		drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
    562		return false; /* Eventually leave IO frozen */
    563	}
    564
    565	drbd_info(connection, "fence-peer helper returned %d (%s)\n",
    566		  (r>>8) & 0xff, ex_to_string);
    567
    568	/* Not using
    569	   conn_request_state(connection, mask, val, CS_VERBOSE);
    570	   here, because we might were able to re-establish the connection in the
    571	   meantime. */
    572	spin_lock_irq(&resource->req_lock);
    573	if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
    574		if (connection->connect_cnt != connect_cnt)
    575			/* In case the connection was established and droped
    576			   while the fence-peer handler was running, ignore it */
    577			drbd_info(connection, "Ignoring fence-peer exit code\n");
    578		else
    579			_conn_request_state(connection, mask, val, CS_VERBOSE);
    580	}
    581	spin_unlock_irq(&resource->req_lock);
    582
    583	return conn_highest_pdsk(connection) <= D_OUTDATED;
    584}
    585
    586static int _try_outdate_peer_async(void *data)
    587{
    588	struct drbd_connection *connection = (struct drbd_connection *)data;
    589
    590	conn_try_outdate_peer(connection);
    591
    592	kref_put(&connection->kref, drbd_destroy_connection);
    593	return 0;
    594}
    595
    596void conn_try_outdate_peer_async(struct drbd_connection *connection)
    597{
    598	struct task_struct *opa;
    599
    600	kref_get(&connection->kref);
    601	/* We may have just sent a signal to this thread
    602	 * to get it out of some blocking network function.
    603	 * Clear signals; otherwise kthread_run(), which internally uses
    604	 * wait_on_completion_killable(), will mistake our pending signal
    605	 * for a new fatal signal and fail. */
    606	flush_signals(current);
    607	opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
    608	if (IS_ERR(opa)) {
    609		drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
    610		kref_put(&connection->kref, drbd_destroy_connection);
    611	}
    612}
    613
    614enum drbd_state_rv
    615drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
    616{
    617	struct drbd_peer_device *const peer_device = first_peer_device(device);
    618	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
    619	const int max_tries = 4;
    620	enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
    621	struct net_conf *nc;
    622	int try = 0;
    623	int forced = 0;
    624	union drbd_state mask, val;
    625
    626	if (new_role == R_PRIMARY) {
    627		struct drbd_connection *connection;
    628
    629		/* Detect dead peers as soon as possible.  */
    630
    631		rcu_read_lock();
    632		for_each_connection(connection, device->resource)
    633			request_ping(connection);
    634		rcu_read_unlock();
    635	}
    636
    637	mutex_lock(device->state_mutex);
    638
    639	mask.i = 0; mask.role = R_MASK;
    640	val.i  = 0; val.role  = new_role;
    641
    642	while (try++ < max_tries) {
    643		rv = _drbd_request_state_holding_state_mutex(device, mask, val, CS_WAIT_COMPLETE);
    644
    645		/* in case we first succeeded to outdate,
    646		 * but now suddenly could establish a connection */
    647		if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
    648			val.pdsk = 0;
    649			mask.pdsk = 0;
    650			continue;
    651		}
    652
    653		if (rv == SS_NO_UP_TO_DATE_DISK && force &&
    654		    (device->state.disk < D_UP_TO_DATE &&
    655		     device->state.disk >= D_INCONSISTENT)) {
    656			mask.disk = D_MASK;
    657			val.disk  = D_UP_TO_DATE;
    658			forced = 1;
    659			continue;
    660		}
    661
    662		if (rv == SS_NO_UP_TO_DATE_DISK &&
    663		    device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
    664			D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
    665
    666			if (conn_try_outdate_peer(connection)) {
    667				val.disk = D_UP_TO_DATE;
    668				mask.disk = D_MASK;
    669			}
    670			continue;
    671		}
    672
    673		if (rv == SS_NOTHING_TO_DO)
    674			goto out;
    675		if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
    676			if (!conn_try_outdate_peer(connection) && force) {
    677				drbd_warn(device, "Forced into split brain situation!\n");
    678				mask.pdsk = D_MASK;
    679				val.pdsk  = D_OUTDATED;
    680
    681			}
    682			continue;
    683		}
    684		if (rv == SS_TWO_PRIMARIES) {
    685			/* Maybe the peer is detected as dead very soon...
    686			   retry at most once more in this case. */
    687			if (try < max_tries) {
    688				int timeo;
    689				try = max_tries - 1;
    690				rcu_read_lock();
    691				nc = rcu_dereference(connection->net_conf);
    692				timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
    693				rcu_read_unlock();
    694				schedule_timeout_interruptible(timeo);
    695			}
    696			continue;
    697		}
    698		if (rv < SS_SUCCESS) {
    699			rv = _drbd_request_state(device, mask, val,
    700						CS_VERBOSE + CS_WAIT_COMPLETE);
    701			if (rv < SS_SUCCESS)
    702				goto out;
    703		}
    704		break;
    705	}
    706
    707	if (rv < SS_SUCCESS)
    708		goto out;
    709
    710	if (forced)
    711		drbd_warn(device, "Forced to consider local data as UpToDate!\n");
    712
    713	/* Wait until nothing is on the fly :) */
    714	wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
    715
    716	/* FIXME also wait for all pending P_BARRIER_ACK? */
    717
    718	if (new_role == R_SECONDARY) {
    719		if (get_ldev(device)) {
    720			device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
    721			put_ldev(device);
    722		}
    723	} else {
    724		mutex_lock(&device->resource->conf_update);
    725		nc = connection->net_conf;
    726		if (nc)
    727			nc->discard_my_data = 0; /* without copy; single bit op is atomic */
    728		mutex_unlock(&device->resource->conf_update);
    729
    730		if (get_ldev(device)) {
    731			if (((device->state.conn < C_CONNECTED ||
    732			       device->state.pdsk <= D_FAILED)
    733			      && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
    734				drbd_uuid_new_current(device);
    735
    736			device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
    737			put_ldev(device);
    738		}
    739	}
    740
    741	/* writeout of activity log covered areas of the bitmap
    742	 * to stable storage done in after state change already */
    743
    744	if (device->state.conn >= C_WF_REPORT_PARAMS) {
    745		/* if this was forced, we should consider sync */
    746		if (forced)
    747			drbd_send_uuids(peer_device);
    748		drbd_send_current_state(peer_device);
    749	}
    750
    751	drbd_md_sync(device);
    752	set_disk_ro(device->vdisk, new_role == R_SECONDARY);
    753	kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
    754out:
    755	mutex_unlock(device->state_mutex);
    756	return rv;
    757}
    758
    759static const char *from_attrs_err_to_txt(int err)
    760{
    761	return	err == -ENOMSG ? "required attribute missing" :
    762		err == -EOPNOTSUPP ? "unknown mandatory attribute" :
    763		err == -EEXIST ? "can not change invariant setting" :
    764		"invalid attribute value";
    765}
    766
    767int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
    768{
    769	struct drbd_config_context adm_ctx;
    770	struct set_role_parms parms;
    771	int err;
    772	enum drbd_ret_code retcode;
    773	enum drbd_state_rv rv;
    774
    775	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
    776	if (!adm_ctx.reply_skb)
    777		return retcode;
    778	if (retcode != NO_ERROR)
    779		goto out;
    780
    781	memset(&parms, 0, sizeof(parms));
    782	if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
    783		err = set_role_parms_from_attrs(&parms, info);
    784		if (err) {
    785			retcode = ERR_MANDATORY_TAG;
    786			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
    787			goto out;
    788		}
    789	}
    790	genl_unlock();
    791	mutex_lock(&adm_ctx.resource->adm_mutex);
    792
    793	if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
    794		rv = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
    795	else
    796		rv = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
    797
    798	mutex_unlock(&adm_ctx.resource->adm_mutex);
    799	genl_lock();
    800	drbd_adm_finish(&adm_ctx, info, rv);
    801	return 0;
    802out:
    803	drbd_adm_finish(&adm_ctx, info, retcode);
    804	return 0;
    805}
    806
    807/* Initializes the md.*_offset members, so we are able to find
    808 * the on disk meta data.
    809 *
    810 * We currently have two possible layouts:
    811 * external:
    812 *   |----------- md_size_sect ------------------|
    813 *   [ 4k superblock ][ activity log ][  Bitmap  ]
    814 *   | al_offset == 8 |
    815 *   | bm_offset = al_offset + X      |
    816 *  ==> bitmap sectors = md_size_sect - bm_offset
    817 *
    818 * internal:
    819 *            |----------- md_size_sect ------------------|
    820 * [data.....][  Bitmap  ][ activity log ][ 4k superblock ]
    821 *                        | al_offset < 0 |
    822 *            | bm_offset = al_offset - Y |
    823 *  ==> bitmap sectors = Y = al_offset - bm_offset
    824 *
    825 *  Activity log size used to be fixed 32kB,
    826 *  but is about to become configurable.
    827 */
    828static void drbd_md_set_sector_offsets(struct drbd_device *device,
    829				       struct drbd_backing_dev *bdev)
    830{
    831	sector_t md_size_sect = 0;
    832	unsigned int al_size_sect = bdev->md.al_size_4k * 8;
    833
    834	bdev->md.md_offset = drbd_md_ss(bdev);
    835
    836	switch (bdev->md.meta_dev_idx) {
    837	default:
    838		/* v07 style fixed size indexed meta data */
    839		bdev->md.md_size_sect = MD_128MB_SECT;
    840		bdev->md.al_offset = MD_4kB_SECT;
    841		bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
    842		break;
    843	case DRBD_MD_INDEX_FLEX_EXT:
    844		/* just occupy the full device; unit: sectors */
    845		bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
    846		bdev->md.al_offset = MD_4kB_SECT;
    847		bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
    848		break;
    849	case DRBD_MD_INDEX_INTERNAL:
    850	case DRBD_MD_INDEX_FLEX_INT:
    851		/* al size is still fixed */
    852		bdev->md.al_offset = -al_size_sect;
    853		/* we need (slightly less than) ~ this much bitmap sectors: */
    854		md_size_sect = drbd_get_capacity(bdev->backing_bdev);
    855		md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
    856		md_size_sect = BM_SECT_TO_EXT(md_size_sect);
    857		md_size_sect = ALIGN(md_size_sect, 8);
    858
    859		/* plus the "drbd meta data super block",
    860		 * and the activity log; */
    861		md_size_sect += MD_4kB_SECT + al_size_sect;
    862
    863		bdev->md.md_size_sect = md_size_sect;
    864		/* bitmap offset is adjusted by 'super' block size */
    865		bdev->md.bm_offset   = -md_size_sect + MD_4kB_SECT;
    866		break;
    867	}
    868}
    869
    870/* input size is expected to be in KB */
    871char *ppsize(char *buf, unsigned long long size)
    872{
    873	/* Needs 9 bytes at max including trailing NUL:
    874	 * -1ULL ==> "16384 EB" */
    875	static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
    876	int base = 0;
    877	while (size >= 10000 && base < sizeof(units)-1) {
    878		/* shift + round */
    879		size = (size >> 10) + !!(size & (1<<9));
    880		base++;
    881	}
    882	sprintf(buf, "%u %cB", (unsigned)size, units[base]);
    883
    884	return buf;
    885}
    886
    887/* there is still a theoretical deadlock when called from receiver
    888 * on an D_INCONSISTENT R_PRIMARY:
    889 *  remote READ does inc_ap_bio, receiver would need to receive answer
    890 *  packet from remote to dec_ap_bio again.
    891 *  receiver receive_sizes(), comes here,
    892 *  waits for ap_bio_cnt == 0. -> deadlock.
    893 * but this cannot happen, actually, because:
    894 *  R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
    895 *  (not connected, or bad/no disk on peer):
    896 *  see drbd_fail_request_early, ap_bio_cnt is zero.
    897 *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
    898 *  peer may not initiate a resize.
    899 */
    900/* Note these are not to be confused with
    901 * drbd_adm_suspend_io/drbd_adm_resume_io,
    902 * which are (sub) state changes triggered by admin (drbdsetup),
    903 * and can be long lived.
    904 * This changes an device->flag, is triggered by drbd internals,
    905 * and should be short-lived. */
    906/* It needs to be a counter, since multiple threads might
    907   independently suspend and resume IO. */
    908void drbd_suspend_io(struct drbd_device *device)
    909{
    910	atomic_inc(&device->suspend_cnt);
    911	if (drbd_suspended(device))
    912		return;
    913	wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
    914}
    915
    916void drbd_resume_io(struct drbd_device *device)
    917{
    918	if (atomic_dec_and_test(&device->suspend_cnt))
    919		wake_up(&device->misc_wait);
    920}
    921
    922/*
    923 * drbd_determine_dev_size() -  Sets the right device size obeying all constraints
    924 * @device:	DRBD device.
    925 *
    926 * Returns 0 on success, negative return values indicate errors.
    927 * You should call drbd_md_sync() after calling this function.
    928 */
    929enum determine_dev_size
    930drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
    931{
    932	struct md_offsets_and_sizes {
    933		u64 last_agreed_sect;
    934		u64 md_offset;
    935		s32 al_offset;
    936		s32 bm_offset;
    937		u32 md_size_sect;
    938
    939		u32 al_stripes;
    940		u32 al_stripe_size_4k;
    941	} prev;
    942	sector_t u_size, size;
    943	struct drbd_md *md = &device->ldev->md;
    944	void *buffer;
    945
    946	int md_moved, la_size_changed;
    947	enum determine_dev_size rv = DS_UNCHANGED;
    948
    949	/* We may change the on-disk offsets of our meta data below.  Lock out
    950	 * anything that may cause meta data IO, to avoid acting on incomplete
    951	 * layout changes or scribbling over meta data that is in the process
    952	 * of being moved.
    953	 *
    954	 * Move is not exactly correct, btw, currently we have all our meta
    955	 * data in core memory, to "move" it we just write it all out, there
    956	 * are no reads. */
    957	drbd_suspend_io(device);
    958	buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
    959	if (!buffer) {
    960		drbd_resume_io(device);
    961		return DS_ERROR;
    962	}
    963
    964	/* remember current offset and sizes */
    965	prev.last_agreed_sect = md->la_size_sect;
    966	prev.md_offset = md->md_offset;
    967	prev.al_offset = md->al_offset;
    968	prev.bm_offset = md->bm_offset;
    969	prev.md_size_sect = md->md_size_sect;
    970	prev.al_stripes = md->al_stripes;
    971	prev.al_stripe_size_4k = md->al_stripe_size_4k;
    972
    973	if (rs) {
    974		/* rs is non NULL if we should change the AL layout only */
    975		md->al_stripes = rs->al_stripes;
    976		md->al_stripe_size_4k = rs->al_stripe_size / 4;
    977		md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
    978	}
    979
    980	drbd_md_set_sector_offsets(device, device->ldev);
    981
    982	rcu_read_lock();
    983	u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
    984	rcu_read_unlock();
    985	size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
    986
    987	if (size < prev.last_agreed_sect) {
    988		if (rs && u_size == 0) {
    989			/* Remove "rs &&" later. This check should always be active, but
    990			   right now the receiver expects the permissive behavior */
    991			drbd_warn(device, "Implicit shrink not allowed. "
    992				 "Use --size=%llus for explicit shrink.\n",
    993				 (unsigned long long)size);
    994			rv = DS_ERROR_SHRINK;
    995		}
    996		if (u_size > size)
    997			rv = DS_ERROR_SPACE_MD;
    998		if (rv != DS_UNCHANGED)
    999			goto err_out;
   1000	}
   1001
   1002	if (get_capacity(device->vdisk) != size ||
   1003	    drbd_bm_capacity(device) != size) {
   1004		int err;
   1005		err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
   1006		if (unlikely(err)) {
   1007			/* currently there is only one error: ENOMEM! */
   1008			size = drbd_bm_capacity(device);
   1009			if (size == 0) {
   1010				drbd_err(device, "OUT OF MEMORY! "
   1011				    "Could not allocate bitmap!\n");
   1012			} else {
   1013				drbd_err(device, "BM resizing failed. "
   1014				    "Leaving size unchanged\n");
   1015			}
   1016			rv = DS_ERROR;
   1017		}
   1018		/* racy, see comments above. */
   1019		drbd_set_my_capacity(device, size);
   1020		md->la_size_sect = size;
   1021	}
   1022	if (rv <= DS_ERROR)
   1023		goto err_out;
   1024
   1025	la_size_changed = (prev.last_agreed_sect != md->la_size_sect);
   1026
   1027	md_moved = prev.md_offset    != md->md_offset
   1028		|| prev.md_size_sect != md->md_size_sect;
   1029
   1030	if (la_size_changed || md_moved || rs) {
   1031		u32 prev_flags;
   1032
   1033		/* We do some synchronous IO below, which may take some time.
   1034		 * Clear the timer, to avoid scary "timer expired!" messages,
   1035		 * "Superblock" is written out at least twice below, anyways. */
   1036		del_timer(&device->md_sync_timer);
   1037
   1038		/* We won't change the "al-extents" setting, we just may need
   1039		 * to move the on-disk location of the activity log ringbuffer.
   1040		 * Lock for transaction is good enough, it may well be "dirty"
   1041		 * or even "starving". */
   1042		wait_event(device->al_wait, lc_try_lock_for_transaction(device->act_log));
   1043
   1044		/* mark current on-disk bitmap and activity log as unreliable */
   1045		prev_flags = md->flags;
   1046		md->flags |= MDF_FULL_SYNC | MDF_AL_DISABLED;
   1047		drbd_md_write(device, buffer);
   1048
   1049		drbd_al_initialize(device, buffer);
   1050
   1051		drbd_info(device, "Writing the whole bitmap, %s\n",
   1052			 la_size_changed && md_moved ? "size changed and md moved" :
   1053			 la_size_changed ? "size changed" : "md moved");
   1054		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
   1055		drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
   1056			       "size changed", BM_LOCKED_MASK);
   1057
   1058		/* on-disk bitmap and activity log is authoritative again
   1059		 * (unless there was an IO error meanwhile...) */
   1060		md->flags = prev_flags;
   1061		drbd_md_write(device, buffer);
   1062
   1063		if (rs)
   1064			drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
   1065				  md->al_stripes, md->al_stripe_size_4k * 4);
   1066	}
   1067
   1068	if (size > prev.last_agreed_sect)
   1069		rv = prev.last_agreed_sect ? DS_GREW : DS_GREW_FROM_ZERO;
   1070	if (size < prev.last_agreed_sect)
   1071		rv = DS_SHRUNK;
   1072
   1073	if (0) {
   1074	err_out:
   1075		/* restore previous offset and sizes */
   1076		md->la_size_sect = prev.last_agreed_sect;
   1077		md->md_offset = prev.md_offset;
   1078		md->al_offset = prev.al_offset;
   1079		md->bm_offset = prev.bm_offset;
   1080		md->md_size_sect = prev.md_size_sect;
   1081		md->al_stripes = prev.al_stripes;
   1082		md->al_stripe_size_4k = prev.al_stripe_size_4k;
   1083		md->al_size_4k = (u64)prev.al_stripes * prev.al_stripe_size_4k;
   1084	}
   1085	lc_unlock(device->act_log);
   1086	wake_up(&device->al_wait);
   1087	drbd_md_put_buffer(device);
   1088	drbd_resume_io(device);
   1089
   1090	return rv;
   1091}
   1092
   1093sector_t
   1094drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
   1095		  sector_t u_size, int assume_peer_has_space)
   1096{
   1097	sector_t p_size = device->p_size;   /* partner's disk size. */
   1098	sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
   1099	sector_t m_size; /* my size */
   1100	sector_t size = 0;
   1101
   1102	m_size = drbd_get_max_capacity(bdev);
   1103
   1104	if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
   1105		drbd_warn(device, "Resize while not connected was forced by the user!\n");
   1106		p_size = m_size;
   1107	}
   1108
   1109	if (p_size && m_size) {
   1110		size = min_t(sector_t, p_size, m_size);
   1111	} else {
   1112		if (la_size_sect) {
   1113			size = la_size_sect;
   1114			if (m_size && m_size < size)
   1115				size = m_size;
   1116			if (p_size && p_size < size)
   1117				size = p_size;
   1118		} else {
   1119			if (m_size)
   1120				size = m_size;
   1121			if (p_size)
   1122				size = p_size;
   1123		}
   1124	}
   1125
   1126	if (size == 0)
   1127		drbd_err(device, "Both nodes diskless!\n");
   1128
   1129	if (u_size) {
   1130		if (u_size > size)
   1131			drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
   1132			    (unsigned long)u_size>>1, (unsigned long)size>>1);
   1133		else
   1134			size = u_size;
   1135	}
   1136
   1137	return size;
   1138}
   1139
   1140/*
   1141 * drbd_check_al_size() - Ensures that the AL is of the right size
   1142 * @device:	DRBD device.
   1143 *
   1144 * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
   1145 * failed, and 0 on success. You should call drbd_md_sync() after you called
   1146 * this function.
   1147 */
   1148static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
   1149{
   1150	struct lru_cache *n, *t;
   1151	struct lc_element *e;
   1152	unsigned int in_use;
   1153	int i;
   1154
   1155	if (device->act_log &&
   1156	    device->act_log->nr_elements == dc->al_extents)
   1157		return 0;
   1158
   1159	in_use = 0;
   1160	t = device->act_log;
   1161	n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
   1162		dc->al_extents, sizeof(struct lc_element), 0);
   1163
   1164	if (n == NULL) {
   1165		drbd_err(device, "Cannot allocate act_log lru!\n");
   1166		return -ENOMEM;
   1167	}
   1168	spin_lock_irq(&device->al_lock);
   1169	if (t) {
   1170		for (i = 0; i < t->nr_elements; i++) {
   1171			e = lc_element_by_index(t, i);
   1172			if (e->refcnt)
   1173				drbd_err(device, "refcnt(%d)==%d\n",
   1174				    e->lc_number, e->refcnt);
   1175			in_use += e->refcnt;
   1176		}
   1177	}
   1178	if (!in_use)
   1179		device->act_log = n;
   1180	spin_unlock_irq(&device->al_lock);
   1181	if (in_use) {
   1182		drbd_err(device, "Activity log still in use!\n");
   1183		lc_destroy(n);
   1184		return -EBUSY;
   1185	} else {
   1186		lc_destroy(t);
   1187	}
   1188	drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
   1189	return 0;
   1190}
   1191
   1192static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity)
   1193{
   1194	q->limits.discard_granularity = granularity;
   1195}
   1196
   1197static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
   1198{
   1199	/* when we introduced REQ_WRITE_SAME support, we also bumped
   1200	 * our maximum supported batch bio size used for discards. */
   1201	if (connection->agreed_features & DRBD_FF_WSAME)
   1202		return DRBD_MAX_BBIO_SECTORS;
   1203	/* before, with DRBD <= 8.4.6, we only allowed up to one AL_EXTENT_SIZE. */
   1204	return AL_EXTENT_SIZE >> 9;
   1205}
   1206
   1207static void decide_on_discard_support(struct drbd_device *device,
   1208		struct drbd_backing_dev *bdev)
   1209{
   1210	struct drbd_connection *connection =
   1211		first_peer_device(device)->connection;
   1212	struct request_queue *q = device->rq_queue;
   1213
   1214	if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev))
   1215		goto not_supported;
   1216
   1217	if (connection->cstate >= C_CONNECTED &&
   1218	    !(connection->agreed_features & DRBD_FF_TRIM)) {
   1219		drbd_info(connection,
   1220			"peer DRBD too old, does not support TRIM: disabling discards\n");
   1221		goto not_supported;
   1222	}
   1223
   1224	/*
   1225	 * We don't care for the granularity, really.
   1226	 *
   1227	 * Stacking limits below should fix it for the local device.  Whether or
   1228	 * not it is a suitable granularity on the remote device is not our
   1229	 * problem, really. If you care, you need to use devices with similar
   1230	 * topology on all peers.
   1231	 */
   1232	blk_queue_discard_granularity(q, 512);
   1233	q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
   1234	q->limits.max_write_zeroes_sectors =
   1235		drbd_max_discard_sectors(connection);
   1236	return;
   1237
   1238not_supported:
   1239	blk_queue_discard_granularity(q, 0);
   1240	q->limits.max_discard_sectors = 0;
   1241	q->limits.max_write_zeroes_sectors = 0;
   1242}
   1243
   1244static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q)
   1245{
   1246	/* Fixup max_write_zeroes_sectors after blk_stack_limits():
   1247	 * if we can handle "zeroes" efficiently on the protocol,
   1248	 * we want to do that, even if our backend does not announce
   1249	 * max_write_zeroes_sectors itself. */
   1250	struct drbd_connection *connection = first_peer_device(device)->connection;
   1251	/* If the peer announces WZEROES support, use it.  Otherwise, rather
   1252	 * send explicit zeroes than rely on some discard-zeroes-data magic. */
   1253	if (connection->agreed_features & DRBD_FF_WZEROES)
   1254		q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
   1255	else
   1256		q->limits.max_write_zeroes_sectors = 0;
   1257}
   1258
   1259static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
   1260				   unsigned int max_bio_size, struct o_qlim *o)
   1261{
   1262	struct request_queue * const q = device->rq_queue;
   1263	unsigned int max_hw_sectors = max_bio_size >> 9;
   1264	unsigned int max_segments = 0;
   1265	struct request_queue *b = NULL;
   1266	struct disk_conf *dc;
   1267
   1268	if (bdev) {
   1269		b = bdev->backing_bdev->bd_disk->queue;
   1270
   1271		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
   1272		rcu_read_lock();
   1273		dc = rcu_dereference(device->ldev->disk_conf);
   1274		max_segments = dc->max_bio_bvecs;
   1275		rcu_read_unlock();
   1276
   1277		blk_set_stacking_limits(&q->limits);
   1278	}
   1279
   1280	blk_queue_max_hw_sectors(q, max_hw_sectors);
   1281	/* This is the workaround for "bio would need to, but cannot, be split" */
   1282	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
   1283	blk_queue_segment_boundary(q, PAGE_SIZE-1);
   1284	decide_on_discard_support(device, bdev);
   1285
   1286	if (b) {
   1287		blk_stack_limits(&q->limits, &b->limits, 0);
   1288		disk_update_readahead(device->vdisk);
   1289	}
   1290	fixup_write_zeroes(device, q);
   1291}
   1292
   1293void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o)
   1294{
   1295	unsigned int now, new, local, peer;
   1296
   1297	now = queue_max_hw_sectors(device->rq_queue) << 9;
   1298	local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
   1299	peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
   1300
   1301	if (bdev) {
   1302		local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
   1303		device->local_max_bio_size = local;
   1304	}
   1305	local = min(local, DRBD_MAX_BIO_SIZE);
   1306
   1307	/* We may ignore peer limits if the peer is modern enough.
   1308	   Because new from 8.3.8 onwards the peer can use multiple
   1309	   BIOs for a single peer_request */
   1310	if (device->state.conn >= C_WF_REPORT_PARAMS) {
   1311		if (first_peer_device(device)->connection->agreed_pro_version < 94)
   1312			peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
   1313			/* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
   1314		else if (first_peer_device(device)->connection->agreed_pro_version == 94)
   1315			peer = DRBD_MAX_SIZE_H80_PACKET;
   1316		else if (first_peer_device(device)->connection->agreed_pro_version < 100)
   1317			peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
   1318		else
   1319			peer = DRBD_MAX_BIO_SIZE;
   1320
   1321		/* We may later detach and re-attach on a disconnected Primary.
   1322		 * Avoid this setting to jump back in that case.
   1323		 * We want to store what we know the peer DRBD can handle,
   1324		 * not what the peer IO backend can handle. */
   1325		if (peer > device->peer_max_bio_size)
   1326			device->peer_max_bio_size = peer;
   1327	}
   1328	new = min(local, peer);
   1329
   1330	if (device->state.role == R_PRIMARY && new < now)
   1331		drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
   1332
   1333	if (new != now)
   1334		drbd_info(device, "max BIO size = %u\n", new);
   1335
   1336	drbd_setup_queue_param(device, bdev, new, o);
   1337}
   1338
   1339/* Starts the worker thread */
   1340static void conn_reconfig_start(struct drbd_connection *connection)
   1341{
   1342	drbd_thread_start(&connection->worker);
   1343	drbd_flush_workqueue(&connection->sender_work);
   1344}
   1345
   1346/* if still unconfigured, stops worker again. */
   1347static void conn_reconfig_done(struct drbd_connection *connection)
   1348{
   1349	bool stop_threads;
   1350	spin_lock_irq(&connection->resource->req_lock);
   1351	stop_threads = conn_all_vols_unconf(connection) &&
   1352		connection->cstate == C_STANDALONE;
   1353	spin_unlock_irq(&connection->resource->req_lock);
   1354	if (stop_threads) {
   1355		/* ack_receiver thread and ack_sender workqueue are implicitly
   1356		 * stopped by receiver in conn_disconnect() */
   1357		drbd_thread_stop(&connection->receiver);
   1358		drbd_thread_stop(&connection->worker);
   1359	}
   1360}
   1361
   1362/* Make sure IO is suspended before calling this function(). */
   1363static void drbd_suspend_al(struct drbd_device *device)
   1364{
   1365	int s = 0;
   1366
   1367	if (!lc_try_lock(device->act_log)) {
   1368		drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
   1369		return;
   1370	}
   1371
   1372	drbd_al_shrink(device);
   1373	spin_lock_irq(&device->resource->req_lock);
   1374	if (device->state.conn < C_CONNECTED)
   1375		s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
   1376	spin_unlock_irq(&device->resource->req_lock);
   1377	lc_unlock(device->act_log);
   1378
   1379	if (s)
   1380		drbd_info(device, "Suspended AL updates\n");
   1381}
   1382
   1383
   1384static bool should_set_defaults(struct genl_info *info)
   1385{
   1386	unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
   1387	return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
   1388}
   1389
   1390static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
   1391{
   1392	/* This is limited by 16 bit "slot" numbers,
   1393	 * and by available on-disk context storage.
   1394	 *
   1395	 * Also (u16)~0 is special (denotes a "free" extent).
   1396	 *
   1397	 * One transaction occupies one 4kB on-disk block,
   1398	 * we have n such blocks in the on disk ring buffer,
   1399	 * the "current" transaction may fail (n-1),
   1400	 * and there is 919 slot numbers context information per transaction.
   1401	 *
   1402	 * 72 transaction blocks amounts to more than 2**16 context slots,
   1403	 * so cap there first.
   1404	 */
   1405	const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
   1406	const unsigned int sufficient_on_disk =
   1407		(max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
   1408		/AL_CONTEXT_PER_TRANSACTION;
   1409
   1410	unsigned int al_size_4k = bdev->md.al_size_4k;
   1411
   1412	if (al_size_4k > sufficient_on_disk)
   1413		return max_al_nr;
   1414
   1415	return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
   1416}
   1417
   1418static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
   1419{
   1420	return	a->disk_barrier != b->disk_barrier ||
   1421		a->disk_flushes != b->disk_flushes ||
   1422		a->disk_drain != b->disk_drain;
   1423}
   1424
   1425static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf,
   1426			       struct drbd_backing_dev *nbc)
   1427{
   1428	struct block_device *bdev = nbc->backing_bdev;
   1429
   1430	if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
   1431		disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
   1432	if (disk_conf->al_extents > drbd_al_extents_max(nbc))
   1433		disk_conf->al_extents = drbd_al_extents_max(nbc);
   1434
   1435	if (!bdev_max_discard_sectors(bdev)) {
   1436		if (disk_conf->rs_discard_granularity) {
   1437			disk_conf->rs_discard_granularity = 0; /* disable feature */
   1438			drbd_info(device, "rs_discard_granularity feature disabled\n");
   1439		}
   1440	}
   1441
   1442	if (disk_conf->rs_discard_granularity) {
   1443		int orig_value = disk_conf->rs_discard_granularity;
   1444		sector_t discard_size = bdev_max_discard_sectors(bdev) << 9;
   1445		unsigned int discard_granularity = bdev_discard_granularity(bdev);
   1446		int remainder;
   1447
   1448		if (discard_granularity > disk_conf->rs_discard_granularity)
   1449			disk_conf->rs_discard_granularity = discard_granularity;
   1450
   1451		remainder = disk_conf->rs_discard_granularity %
   1452				discard_granularity;
   1453		disk_conf->rs_discard_granularity += remainder;
   1454
   1455		if (disk_conf->rs_discard_granularity > discard_size)
   1456			disk_conf->rs_discard_granularity = discard_size;
   1457
   1458		if (disk_conf->rs_discard_granularity != orig_value)
   1459			drbd_info(device, "rs_discard_granularity changed to %d\n",
   1460				  disk_conf->rs_discard_granularity);
   1461	}
   1462}
   1463
   1464static int disk_opts_check_al_size(struct drbd_device *device, struct disk_conf *dc)
   1465{
   1466	int err = -EBUSY;
   1467
   1468	if (device->act_log &&
   1469	    device->act_log->nr_elements == dc->al_extents)
   1470		return 0;
   1471
   1472	drbd_suspend_io(device);
   1473	/* If IO completion is currently blocked, we would likely wait
   1474	 * "forever" for the activity log to become unused. So we don't. */
   1475	if (atomic_read(&device->ap_bio_cnt))
   1476		goto out;
   1477
   1478	wait_event(device->al_wait, lc_try_lock(device->act_log));
   1479	drbd_al_shrink(device);
   1480	err = drbd_check_al_size(device, dc);
   1481	lc_unlock(device->act_log);
   1482	wake_up(&device->al_wait);
   1483out:
   1484	drbd_resume_io(device);
   1485	return err;
   1486}
   1487
   1488int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
   1489{
   1490	struct drbd_config_context adm_ctx;
   1491	enum drbd_ret_code retcode;
   1492	struct drbd_device *device;
   1493	struct disk_conf *new_disk_conf, *old_disk_conf;
   1494	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
   1495	int err;
   1496	unsigned int fifo_size;
   1497
   1498	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
   1499	if (!adm_ctx.reply_skb)
   1500		return retcode;
   1501	if (retcode != NO_ERROR)
   1502		goto finish;
   1503
   1504	device = adm_ctx.device;
   1505	mutex_lock(&adm_ctx.resource->adm_mutex);
   1506
   1507	/* we also need a disk
   1508	 * to change the options on */
   1509	if (!get_ldev(device)) {
   1510		retcode = ERR_NO_DISK;
   1511		goto out;
   1512	}
   1513
   1514	new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
   1515	if (!new_disk_conf) {
   1516		retcode = ERR_NOMEM;
   1517		goto fail;
   1518	}
   1519
   1520	mutex_lock(&device->resource->conf_update);
   1521	old_disk_conf = device->ldev->disk_conf;
   1522	*new_disk_conf = *old_disk_conf;
   1523	if (should_set_defaults(info))
   1524		set_disk_conf_defaults(new_disk_conf);
   1525
   1526	err = disk_conf_from_attrs_for_change(new_disk_conf, info);
   1527	if (err && err != -ENOMSG) {
   1528		retcode = ERR_MANDATORY_TAG;
   1529		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
   1530		goto fail_unlock;
   1531	}
   1532
   1533	if (!expect(new_disk_conf->resync_rate >= 1))
   1534		new_disk_conf->resync_rate = 1;
   1535
   1536	sanitize_disk_conf(device, new_disk_conf, device->ldev);
   1537
   1538	if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
   1539		new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
   1540
   1541	fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
   1542	if (fifo_size != device->rs_plan_s->size) {
   1543		new_plan = fifo_alloc(fifo_size);
   1544		if (!new_plan) {
   1545			drbd_err(device, "kmalloc of fifo_buffer failed");
   1546			retcode = ERR_NOMEM;
   1547			goto fail_unlock;
   1548		}
   1549	}
   1550
   1551	err = disk_opts_check_al_size(device, new_disk_conf);
   1552	if (err) {
   1553		/* Could be just "busy". Ignore?
   1554		 * Introduce dedicated error code? */
   1555		drbd_msg_put_info(adm_ctx.reply_skb,
   1556			"Try again without changing current al-extents setting");
   1557		retcode = ERR_NOMEM;
   1558		goto fail_unlock;
   1559	}
   1560
   1561	lock_all_resources();
   1562	retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
   1563	if (retcode == NO_ERROR) {
   1564		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
   1565		drbd_resync_after_changed(device);
   1566	}
   1567	unlock_all_resources();
   1568
   1569	if (retcode != NO_ERROR)
   1570		goto fail_unlock;
   1571
   1572	if (new_plan) {
   1573		old_plan = device->rs_plan_s;
   1574		rcu_assign_pointer(device->rs_plan_s, new_plan);
   1575	}
   1576
   1577	mutex_unlock(&device->resource->conf_update);
   1578
   1579	if (new_disk_conf->al_updates)
   1580		device->ldev->md.flags &= ~MDF_AL_DISABLED;
   1581	else
   1582		device->ldev->md.flags |= MDF_AL_DISABLED;
   1583
   1584	if (new_disk_conf->md_flushes)
   1585		clear_bit(MD_NO_FUA, &device->flags);
   1586	else
   1587		set_bit(MD_NO_FUA, &device->flags);
   1588
   1589	if (write_ordering_changed(old_disk_conf, new_disk_conf))
   1590		drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH);
   1591
   1592	if (old_disk_conf->discard_zeroes_if_aligned !=
   1593	    new_disk_conf->discard_zeroes_if_aligned)
   1594		drbd_reconsider_queue_parameters(device, device->ldev, NULL);
   1595
   1596	drbd_md_sync(device);
   1597
   1598	if (device->state.conn >= C_CONNECTED) {
   1599		struct drbd_peer_device *peer_device;
   1600
   1601		for_each_peer_device(peer_device, device)
   1602			drbd_send_sync_param(peer_device);
   1603	}
   1604
   1605	kvfree_rcu(old_disk_conf);
   1606	kfree(old_plan);
   1607	mod_timer(&device->request_timer, jiffies + HZ);
   1608	goto success;
   1609
   1610fail_unlock:
   1611	mutex_unlock(&device->resource->conf_update);
   1612 fail:
   1613	kfree(new_disk_conf);
   1614	kfree(new_plan);
   1615success:
   1616	put_ldev(device);
   1617 out:
   1618	mutex_unlock(&adm_ctx.resource->adm_mutex);
   1619 finish:
   1620	drbd_adm_finish(&adm_ctx, info, retcode);
   1621	return 0;
   1622}
   1623
   1624static struct block_device *open_backing_dev(struct drbd_device *device,
   1625		const char *bdev_path, void *claim_ptr, bool do_bd_link)
   1626{
   1627	struct block_device *bdev;
   1628	int err = 0;
   1629
   1630	bdev = blkdev_get_by_path(bdev_path,
   1631				  FMODE_READ | FMODE_WRITE | FMODE_EXCL, claim_ptr);
   1632	if (IS_ERR(bdev)) {
   1633		drbd_err(device, "open(\"%s\") failed with %ld\n",
   1634				bdev_path, PTR_ERR(bdev));
   1635		return bdev;
   1636	}
   1637
   1638	if (!do_bd_link)
   1639		return bdev;
   1640
   1641	err = bd_link_disk_holder(bdev, device->vdisk);
   1642	if (err) {
   1643		blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
   1644		drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n",
   1645				bdev_path, err);
   1646		bdev = ERR_PTR(err);
   1647	}
   1648	return bdev;
   1649}
   1650
   1651static int open_backing_devices(struct drbd_device *device,
   1652		struct disk_conf *new_disk_conf,
   1653		struct drbd_backing_dev *nbc)
   1654{
   1655	struct block_device *bdev;
   1656
   1657	bdev = open_backing_dev(device, new_disk_conf->backing_dev, device, true);
   1658	if (IS_ERR(bdev))
   1659		return ERR_OPEN_DISK;
   1660	nbc->backing_bdev = bdev;
   1661
   1662	/*
   1663	 * meta_dev_idx >= 0: external fixed size, possibly multiple
   1664	 * drbd sharing one meta device.  TODO in that case, paranoia
   1665	 * check that [md_bdev, meta_dev_idx] is not yet used by some
   1666	 * other drbd minor!  (if you use drbd.conf + drbdadm, that
   1667	 * should check it for you already; but if you don't, or
   1668	 * someone fooled it, we need to double check here)
   1669	 */
   1670	bdev = open_backing_dev(device, new_disk_conf->meta_dev,
   1671		/* claim ptr: device, if claimed exclusively; shared drbd_m_holder,
   1672		 * if potentially shared with other drbd minors */
   1673			(new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder,
   1674		/* avoid double bd_claim_by_disk() for the same (source,target) tuple,
   1675		 * as would happen with internal metadata. */
   1676			(new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT &&
   1677			 new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL));
   1678	if (IS_ERR(bdev))
   1679		return ERR_OPEN_MD_DISK;
   1680	nbc->md_bdev = bdev;
   1681	return NO_ERROR;
   1682}
   1683
   1684static void close_backing_dev(struct drbd_device *device, struct block_device *bdev,
   1685	bool do_bd_unlink)
   1686{
   1687	if (!bdev)
   1688		return;
   1689	if (do_bd_unlink)
   1690		bd_unlink_disk_holder(bdev, device->vdisk);
   1691	blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
   1692}
   1693
   1694void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev)
   1695{
   1696	if (ldev == NULL)
   1697		return;
   1698
   1699	close_backing_dev(device, ldev->md_bdev, ldev->md_bdev != ldev->backing_bdev);
   1700	close_backing_dev(device, ldev->backing_bdev, true);
   1701
   1702	kfree(ldev->disk_conf);
   1703	kfree(ldev);
   1704}
   1705
   1706int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
   1707{
   1708	struct drbd_config_context adm_ctx;
   1709	struct drbd_device *device;
   1710	struct drbd_peer_device *peer_device;
   1711	struct drbd_connection *connection;
   1712	int err;
   1713	enum drbd_ret_code retcode;
   1714	enum determine_dev_size dd;
   1715	sector_t max_possible_sectors;
   1716	sector_t min_md_device_sectors;
   1717	struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
   1718	struct disk_conf *new_disk_conf = NULL;
   1719	struct lru_cache *resync_lru = NULL;
   1720	struct fifo_buffer *new_plan = NULL;
   1721	union drbd_state ns, os;
   1722	enum drbd_state_rv rv;
   1723	struct net_conf *nc;
   1724
   1725	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
   1726	if (!adm_ctx.reply_skb)
   1727		return retcode;
   1728	if (retcode != NO_ERROR)
   1729		goto finish;
   1730
   1731	device = adm_ctx.device;
   1732	mutex_lock(&adm_ctx.resource->adm_mutex);
   1733	peer_device = first_peer_device(device);
   1734	connection = peer_device->connection;
   1735	conn_reconfig_start(connection);
   1736
   1737	/* if you want to reconfigure, please tear down first */
   1738	if (device->state.disk > D_DISKLESS) {
   1739		retcode = ERR_DISK_CONFIGURED;
   1740		goto fail;
   1741	}
   1742	/* It may just now have detached because of IO error.  Make sure
   1743	 * drbd_ldev_destroy is done already, we may end up here very fast,
   1744	 * e.g. if someone calls attach from the on-io-error handler,
   1745	 * to realize a "hot spare" feature (not that I'd recommend that) */
   1746	wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags));
   1747
   1748	/* make sure there is no leftover from previous force-detach attempts */
   1749	clear_bit(FORCE_DETACH, &device->flags);
   1750	clear_bit(WAS_IO_ERROR, &device->flags);
   1751	clear_bit(WAS_READ_ERROR, &device->flags);
   1752
   1753	/* and no leftover from previously aborted resync or verify, either */
   1754	device->rs_total = 0;
   1755	device->rs_failed = 0;
   1756	atomic_set(&device->rs_pending_cnt, 0);
   1757
   1758	/* allocation not in the IO path, drbdsetup context */
   1759	nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
   1760	if (!nbc) {
   1761		retcode = ERR_NOMEM;
   1762		goto fail;
   1763	}
   1764	spin_lock_init(&nbc->md.uuid_lock);
   1765
   1766	new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
   1767	if (!new_disk_conf) {
   1768		retcode = ERR_NOMEM;
   1769		goto fail;
   1770	}
   1771	nbc->disk_conf = new_disk_conf;
   1772
   1773	set_disk_conf_defaults(new_disk_conf);
   1774	err = disk_conf_from_attrs(new_disk_conf, info);
   1775	if (err) {
   1776		retcode = ERR_MANDATORY_TAG;
   1777		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
   1778		goto fail;
   1779	}
   1780
   1781	if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
   1782		new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
   1783
   1784	new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
   1785	if (!new_plan) {
   1786		retcode = ERR_NOMEM;
   1787		goto fail;
   1788	}
   1789
   1790	if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
   1791		retcode = ERR_MD_IDX_INVALID;
   1792		goto fail;
   1793	}
   1794
   1795	rcu_read_lock();
   1796	nc = rcu_dereference(connection->net_conf);
   1797	if (nc) {
   1798		if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
   1799			rcu_read_unlock();
   1800			retcode = ERR_STONITH_AND_PROT_A;
   1801			goto fail;
   1802		}
   1803	}
   1804	rcu_read_unlock();
   1805
   1806	retcode = open_backing_devices(device, new_disk_conf, nbc);
   1807	if (retcode != NO_ERROR)
   1808		goto fail;
   1809
   1810	if ((nbc->backing_bdev == nbc->md_bdev) !=
   1811	    (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
   1812	     new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
   1813		retcode = ERR_MD_IDX_INVALID;
   1814		goto fail;
   1815	}
   1816
   1817	resync_lru = lc_create("resync", drbd_bm_ext_cache,
   1818			1, 61, sizeof(struct bm_extent),
   1819			offsetof(struct bm_extent, lce));
   1820	if (!resync_lru) {
   1821		retcode = ERR_NOMEM;
   1822		goto fail;
   1823	}
   1824
   1825	/* Read our meta data super block early.
   1826	 * This also sets other on-disk offsets. */
   1827	retcode = drbd_md_read(device, nbc);
   1828	if (retcode != NO_ERROR)
   1829		goto fail;
   1830
   1831	sanitize_disk_conf(device, new_disk_conf, nbc);
   1832
   1833	if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
   1834		drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
   1835			(unsigned long long) drbd_get_max_capacity(nbc),
   1836			(unsigned long long) new_disk_conf->disk_size);
   1837		retcode = ERR_DISK_TOO_SMALL;
   1838		goto fail;
   1839	}
   1840
   1841	if (new_disk_conf->meta_dev_idx < 0) {
   1842		max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
   1843		/* at least one MB, otherwise it does not make sense */
   1844		min_md_device_sectors = (2<<10);
   1845	} else {
   1846		max_possible_sectors = DRBD_MAX_SECTORS;
   1847		min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
   1848	}
   1849
   1850	if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
   1851		retcode = ERR_MD_DISK_TOO_SMALL;
   1852		drbd_warn(device, "refusing attach: md-device too small, "
   1853		     "at least %llu sectors needed for this meta-disk type\n",
   1854		     (unsigned long long) min_md_device_sectors);
   1855		goto fail;
   1856	}
   1857
   1858	/* Make sure the new disk is big enough
   1859	 * (we may currently be R_PRIMARY with no local disk...) */
   1860	if (drbd_get_max_capacity(nbc) < get_capacity(device->vdisk)) {
   1861		retcode = ERR_DISK_TOO_SMALL;
   1862		goto fail;
   1863	}
   1864
   1865	nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
   1866
   1867	if (nbc->known_size > max_possible_sectors) {
   1868		drbd_warn(device, "==> truncating very big lower level device "
   1869			"to currently maximum possible %llu sectors <==\n",
   1870			(unsigned long long) max_possible_sectors);
   1871		if (new_disk_conf->meta_dev_idx >= 0)
   1872			drbd_warn(device, "==>> using internal or flexible "
   1873				      "meta data may help <<==\n");
   1874	}
   1875
   1876	drbd_suspend_io(device);
   1877	/* also wait for the last barrier ack. */
   1878	/* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
   1879	 * We need a way to either ignore barrier acks for barriers sent before a device
   1880	 * was attached, or a way to wait for all pending barrier acks to come in.
   1881	 * As barriers are counted per resource,
   1882	 * we'd need to suspend io on all devices of a resource.
   1883	 */
   1884	wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
   1885	/* and for any other previously queued work */
   1886	drbd_flush_workqueue(&connection->sender_work);
   1887
   1888	rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
   1889	retcode = (enum drbd_ret_code)rv;
   1890	drbd_resume_io(device);
   1891	if (rv < SS_SUCCESS)
   1892		goto fail;
   1893
   1894	if (!get_ldev_if_state(device, D_ATTACHING))
   1895		goto force_diskless;
   1896
   1897	if (!device->bitmap) {
   1898		if (drbd_bm_init(device)) {
   1899			retcode = ERR_NOMEM;
   1900			goto force_diskless_dec;
   1901		}
   1902	}
   1903
   1904	if (device->state.pdsk != D_UP_TO_DATE && device->ed_uuid &&
   1905	    (device->state.role == R_PRIMARY || device->state.peer == R_PRIMARY) &&
   1906            (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
   1907		drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
   1908		    (unsigned long long)device->ed_uuid);
   1909		retcode = ERR_DATA_NOT_CURRENT;
   1910		goto force_diskless_dec;
   1911	}
   1912
   1913	/* Since we are diskless, fix the activity log first... */
   1914	if (drbd_check_al_size(device, new_disk_conf)) {
   1915		retcode = ERR_NOMEM;
   1916		goto force_diskless_dec;
   1917	}
   1918
   1919	/* Prevent shrinking of consistent devices ! */
   1920	{
   1921	unsigned long long nsz = drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0);
   1922	unsigned long long eff = nbc->md.la_size_sect;
   1923	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && nsz < eff) {
   1924		if (nsz == nbc->disk_conf->disk_size) {
   1925			drbd_warn(device, "truncating a consistent device during attach (%llu < %llu)\n", nsz, eff);
   1926		} else {
   1927			drbd_warn(device, "refusing to truncate a consistent device (%llu < %llu)\n", nsz, eff);
   1928			drbd_msg_sprintf_info(adm_ctx.reply_skb,
   1929				"To-be-attached device has last effective > current size, and is consistent\n"
   1930				"(%llu > %llu sectors). Refusing to attach.", eff, nsz);
   1931			retcode = ERR_IMPLICIT_SHRINK;
   1932			goto force_diskless_dec;
   1933		}
   1934	}
   1935	}
   1936
   1937	lock_all_resources();
   1938	retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
   1939	if (retcode != NO_ERROR) {
   1940		unlock_all_resources();
   1941		goto force_diskless_dec;
   1942	}
   1943
   1944	/* Reset the "barriers don't work" bits here, then force meta data to
   1945	 * be written, to ensure we determine if barriers are supported. */
   1946	if (new_disk_conf->md_flushes)
   1947		clear_bit(MD_NO_FUA, &device->flags);
   1948	else
   1949		set_bit(MD_NO_FUA, &device->flags);
   1950
   1951	/* Point of no return reached.
   1952	 * Devices and memory are no longer released by error cleanup below.
   1953	 * now device takes over responsibility, and the state engine should
   1954	 * clean it up somewhere.  */
   1955	D_ASSERT(device, device->ldev == NULL);
   1956	device->ldev = nbc;
   1957	device->resync = resync_lru;
   1958	device->rs_plan_s = new_plan;
   1959	nbc = NULL;
   1960	resync_lru = NULL;
   1961	new_disk_conf = NULL;
   1962	new_plan = NULL;
   1963
   1964	drbd_resync_after_changed(device);
   1965	drbd_bump_write_ordering(device->resource, device->ldev, WO_BDEV_FLUSH);
   1966	unlock_all_resources();
   1967
   1968	if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
   1969		set_bit(CRASHED_PRIMARY, &device->flags);
   1970	else
   1971		clear_bit(CRASHED_PRIMARY, &device->flags);
   1972
   1973	if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
   1974	    !(device->state.role == R_PRIMARY && device->resource->susp_nod))
   1975		set_bit(CRASHED_PRIMARY, &device->flags);
   1976
   1977	device->send_cnt = 0;
   1978	device->recv_cnt = 0;
   1979	device->read_cnt = 0;
   1980	device->writ_cnt = 0;
   1981
   1982	drbd_reconsider_queue_parameters(device, device->ldev, NULL);
   1983
   1984	/* If I am currently not R_PRIMARY,
   1985	 * but meta data primary indicator is set,
   1986	 * I just now recover from a hard crash,
   1987	 * and have been R_PRIMARY before that crash.
   1988	 *
   1989	 * Now, if I had no connection before that crash
   1990	 * (have been degraded R_PRIMARY), chances are that
   1991	 * I won't find my peer now either.
   1992	 *
   1993	 * In that case, and _only_ in that case,
   1994	 * we use the degr-wfc-timeout instead of the default,
   1995	 * so we can automatically recover from a crash of a
   1996	 * degraded but active "cluster" after a certain timeout.
   1997	 */
   1998	clear_bit(USE_DEGR_WFC_T, &device->flags);
   1999	if (device->state.role != R_PRIMARY &&
   2000	     drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
   2001	    !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
   2002		set_bit(USE_DEGR_WFC_T, &device->flags);
   2003
   2004	dd = drbd_determine_dev_size(device, 0, NULL);
   2005	if (dd <= DS_ERROR) {
   2006		retcode = ERR_NOMEM_BITMAP;
   2007		goto force_diskless_dec;
   2008	} else if (dd == DS_GREW)
   2009		set_bit(RESYNC_AFTER_NEG, &device->flags);
   2010
   2011	if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
   2012	    (test_bit(CRASHED_PRIMARY, &device->flags) &&
   2013	     drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
   2014		drbd_info(device, "Assuming that all blocks are out of sync "
   2015		     "(aka FullSync)\n");
   2016		if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
   2017			"set_n_write from attaching", BM_LOCKED_MASK)) {
   2018			retcode = ERR_IO_MD_DISK;
   2019			goto force_diskless_dec;
   2020		}
   2021	} else {
   2022		if (drbd_bitmap_io(device, &drbd_bm_read,
   2023			"read from attaching", BM_LOCKED_MASK)) {
   2024			retcode = ERR_IO_MD_DISK;
   2025			goto force_diskless_dec;
   2026		}
   2027	}
   2028
   2029	if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
   2030		drbd_suspend_al(device); /* IO is still suspended here... */
   2031
   2032	spin_lock_irq(&device->resource->req_lock);
   2033	os = drbd_read_state(device);
   2034	ns = os;
   2035	/* If MDF_CONSISTENT is not set go into inconsistent state,
   2036	   otherwise investigate MDF_WasUpToDate...
   2037	   If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
   2038	   otherwise into D_CONSISTENT state.
   2039	*/
   2040	if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
   2041		if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
   2042			ns.disk = D_CONSISTENT;
   2043		else
   2044			ns.disk = D_OUTDATED;
   2045	} else {
   2046		ns.disk = D_INCONSISTENT;
   2047	}
   2048
   2049	if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
   2050		ns.pdsk = D_OUTDATED;
   2051
   2052	rcu_read_lock();
   2053	if (ns.disk == D_CONSISTENT &&
   2054	    (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
   2055		ns.disk = D_UP_TO_DATE;
   2056
   2057	/* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
   2058	   MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
   2059	   this point, because drbd_request_state() modifies these
   2060	   flags. */
   2061
   2062	if (rcu_dereference(device->ldev->disk_conf)->al_updates)
   2063		device->ldev->md.flags &= ~MDF_AL_DISABLED;
   2064	else
   2065		device->ldev->md.flags |= MDF_AL_DISABLED;
   2066
   2067	rcu_read_unlock();
   2068
   2069	/* In case we are C_CONNECTED postpone any decision on the new disk
   2070	   state after the negotiation phase. */
   2071	if (device->state.conn == C_CONNECTED) {
   2072		device->new_state_tmp.i = ns.i;
   2073		ns.i = os.i;
   2074		ns.disk = D_NEGOTIATING;
   2075
   2076		/* We expect to receive up-to-date UUIDs soon.
   2077		   To avoid a race in receive_state, free p_uuid while
   2078		   holding req_lock. I.e. atomic with the state change */
   2079		kfree(device->p_uuid);
   2080		device->p_uuid = NULL;
   2081	}
   2082
   2083	rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
   2084	spin_unlock_irq(&device->resource->req_lock);
   2085
   2086	if (rv < SS_SUCCESS)
   2087		goto force_diskless_dec;
   2088
   2089	mod_timer(&device->request_timer, jiffies + HZ);
   2090
   2091	if (device->state.role == R_PRIMARY)
   2092		device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
   2093	else
   2094		device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
   2095
   2096	drbd_md_mark_dirty(device);
   2097	drbd_md_sync(device);
   2098
   2099	kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
   2100	put_ldev(device);
   2101	conn_reconfig_done(connection);
   2102	mutex_unlock(&adm_ctx.resource->adm_mutex);
   2103	drbd_adm_finish(&adm_ctx, info, retcode);
   2104	return 0;
   2105
   2106 force_diskless_dec:
   2107	put_ldev(device);
   2108 force_diskless:
   2109	drbd_force_state(device, NS(disk, D_DISKLESS));
   2110	drbd_md_sync(device);
   2111 fail:
   2112	conn_reconfig_done(connection);
   2113	if (nbc) {
   2114		close_backing_dev(device, nbc->md_bdev, nbc->md_bdev != nbc->backing_bdev);
   2115		close_backing_dev(device, nbc->backing_bdev, true);
   2116		kfree(nbc);
   2117	}
   2118	kfree(new_disk_conf);
   2119	lc_destroy(resync_lru);
   2120	kfree(new_plan);
   2121	mutex_unlock(&adm_ctx.resource->adm_mutex);
   2122 finish:
   2123	drbd_adm_finish(&adm_ctx, info, retcode);
   2124	return 0;
   2125}
   2126
   2127static int adm_detach(struct drbd_device *device, int force)
   2128{
   2129	if (force) {
   2130		set_bit(FORCE_DETACH, &device->flags);
   2131		drbd_force_state(device, NS(disk, D_FAILED));
   2132		return SS_SUCCESS;
   2133	}
   2134
   2135	return drbd_request_detach_interruptible(device);
   2136}
   2137
   2138/* Detaching the disk is a process in multiple stages.  First we need to lock
   2139 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
   2140 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
   2141 * internal references as well.
   2142 * Only then we have finally detached. */
   2143int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
   2144{
   2145	struct drbd_config_context adm_ctx;
   2146	enum drbd_ret_code retcode;
   2147	struct detach_parms parms = { };
   2148	int err;
   2149
   2150	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
   2151	if (!adm_ctx.reply_skb)
   2152		return retcode;
   2153	if (retcode != NO_ERROR)
   2154		goto out;
   2155
   2156	if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
   2157		err = detach_parms_from_attrs(&parms, info);
   2158		if (err) {
   2159			retcode = ERR_MANDATORY_TAG;
   2160			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
   2161			goto out;
   2162		}
   2163	}
   2164
   2165	mutex_lock(&adm_ctx.resource->adm_mutex);
   2166	retcode = adm_detach(adm_ctx.device, parms.force_detach);
   2167	mutex_unlock(&adm_ctx.resource->adm_mutex);
   2168out:
   2169	drbd_adm_finish(&adm_ctx, info, retcode);
   2170	return 0;
   2171}
   2172
   2173static bool conn_resync_running(struct drbd_connection *connection)
   2174{
   2175	struct drbd_peer_device *peer_device;
   2176	bool rv = false;
   2177	int vnr;
   2178
   2179	rcu_read_lock();
   2180	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
   2181		struct drbd_device *device = peer_device->device;
   2182		if (device->state.conn == C_SYNC_SOURCE ||
   2183		    device->state.conn == C_SYNC_TARGET ||
   2184		    device->state.conn == C_PAUSED_SYNC_S ||
   2185		    device->state.conn == C_PAUSED_SYNC_T) {
   2186			rv = true;
   2187			break;
   2188		}
   2189	}
   2190	rcu_read_unlock();
   2191
   2192	return rv;
   2193}
   2194
   2195static bool conn_ov_running(struct drbd_connection *connection)
   2196{
   2197	struct drbd_peer_device *peer_device;
   2198	bool rv = false;
   2199	int vnr;
   2200
   2201	rcu_read_lock();
   2202	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
   2203		struct drbd_device *device = peer_device->device;
   2204		if (device->state.conn == C_VERIFY_S ||
   2205		    device->state.conn == C_VERIFY_T) {
   2206			rv = true;
   2207			break;
   2208		}
   2209	}
   2210	rcu_read_unlock();
   2211
   2212	return rv;
   2213}
   2214
   2215static enum drbd_ret_code
   2216_check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
   2217{
   2218	struct drbd_peer_device *peer_device;
   2219	int i;
   2220
   2221	if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
   2222		if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
   2223			return ERR_NEED_APV_100;
   2224
   2225		if (new_net_conf->two_primaries != old_net_conf->two_primaries)
   2226			return ERR_NEED_APV_100;
   2227
   2228		if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
   2229			return ERR_NEED_APV_100;
   2230	}
   2231
   2232	if (!new_net_conf->two_primaries &&
   2233	    conn_highest_role(connection) == R_PRIMARY &&
   2234	    conn_highest_peer(connection) == R_PRIMARY)
   2235		return ERR_NEED_ALLOW_TWO_PRI;
   2236
   2237	if (new_net_conf->two_primaries &&
   2238	    (new_net_conf->wire_protocol != DRBD_PROT_C))
   2239		return ERR_NOT_PROTO_C;
   2240
   2241	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
   2242		struct drbd_device *device = peer_device->device;
   2243		if (get_ldev(device)) {
   2244			enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
   2245			put_ldev(device);
   2246			if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
   2247				return ERR_STONITH_AND_PROT_A;
   2248		}
   2249		if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
   2250			return ERR_DISCARD_IMPOSSIBLE;
   2251	}
   2252
   2253	if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
   2254		return ERR_CONG_NOT_PROTO_A;
   2255
   2256	return NO_ERROR;
   2257}
   2258
   2259static enum drbd_ret_code
   2260check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
   2261{
   2262	enum drbd_ret_code rv;
   2263	struct drbd_peer_device *peer_device;
   2264	int i;
   2265
   2266	rcu_read_lock();
   2267	rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
   2268	rcu_read_unlock();
   2269
   2270	/* connection->peer_devices protected by genl_lock() here */
   2271	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
   2272		struct drbd_device *device = peer_device->device;
   2273		if (!device->bitmap) {
   2274			if (drbd_bm_init(device))
   2275				return ERR_NOMEM;
   2276		}
   2277	}
   2278
   2279	return rv;
   2280}
   2281
   2282struct crypto {
   2283	struct crypto_shash *verify_tfm;
   2284	struct crypto_shash *csums_tfm;
   2285	struct crypto_shash *cram_hmac_tfm;
   2286	struct crypto_shash *integrity_tfm;
   2287};
   2288
   2289static int
   2290alloc_shash(struct crypto_shash **tfm, char *tfm_name, int err_alg)
   2291{
   2292	if (!tfm_name[0])
   2293		return NO_ERROR;
   2294
   2295	*tfm = crypto_alloc_shash(tfm_name, 0, 0);
   2296	if (IS_ERR(*tfm)) {
   2297		*tfm = NULL;
   2298		return err_alg;
   2299	}
   2300
   2301	return NO_ERROR;
   2302}
   2303
   2304static enum drbd_ret_code
   2305alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
   2306{
   2307	char hmac_name[CRYPTO_MAX_ALG_NAME];
   2308	enum drbd_ret_code rv;
   2309
   2310	rv = alloc_shash(&crypto->csums_tfm, new_net_conf->csums_alg,
   2311			 ERR_CSUMS_ALG);
   2312	if (rv != NO_ERROR)
   2313		return rv;
   2314	rv = alloc_shash(&crypto->verify_tfm, new_net_conf->verify_alg,
   2315			 ERR_VERIFY_ALG);
   2316	if (rv != NO_ERROR)
   2317		return rv;
   2318	rv = alloc_shash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
   2319			 ERR_INTEGRITY_ALG);
   2320	if (rv != NO_ERROR)
   2321		return rv;
   2322	if (new_net_conf->cram_hmac_alg[0] != 0) {
   2323		snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
   2324			 new_net_conf->cram_hmac_alg);
   2325
   2326		rv = alloc_shash(&crypto->cram_hmac_tfm, hmac_name,
   2327				 ERR_AUTH_ALG);
   2328	}
   2329
   2330	return rv;
   2331}
   2332
   2333static void free_crypto(struct crypto *crypto)
   2334{
   2335	crypto_free_shash(crypto->cram_hmac_tfm);
   2336	crypto_free_shash(crypto->integrity_tfm);
   2337	crypto_free_shash(crypto->csums_tfm);
   2338	crypto_free_shash(crypto->verify_tfm);
   2339}
   2340
   2341int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
   2342{
   2343	struct drbd_config_context adm_ctx;
   2344	enum drbd_ret_code retcode;
   2345	struct drbd_connection *connection;
   2346	struct net_conf *old_net_conf, *new_net_conf = NULL;
   2347	int err;
   2348	int ovr; /* online verify running */
   2349	int rsr; /* re-sync running */
   2350	struct crypto crypto = { };
   2351
   2352	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
   2353	if (!adm_ctx.reply_skb)
   2354		return retcode;
   2355	if (retcode != NO_ERROR)
   2356		goto finish;
   2357
   2358	connection = adm_ctx.connection;
   2359	mutex_lock(&adm_ctx.resource->adm_mutex);
   2360
   2361	new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
   2362	if (!new_net_conf) {
   2363		retcode = ERR_NOMEM;
   2364		goto out;
   2365	}
   2366
   2367	conn_reconfig_start(connection);
   2368
   2369	mutex_lock(&connection->data.mutex);
   2370	mutex_lock(&connection->resource->conf_update);
   2371	old_net_conf = connection->net_conf;
   2372
   2373	if (!old_net_conf) {
   2374		drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
   2375		retcode = ERR_INVALID_REQUEST;
   2376		goto fail;
   2377	}
   2378
   2379	*new_net_conf = *old_net_conf;
   2380	if (should_set_defaults(info))
   2381		set_net_conf_defaults(new_net_conf);
   2382
   2383	err = net_conf_from_attrs_for_change(new_net_conf, info);
   2384	if (err && err != -ENOMSG) {
   2385		retcode = ERR_MANDATORY_TAG;
   2386		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
   2387		goto fail;
   2388	}
   2389
   2390	retcode = check_net_options(connection, new_net_conf);
   2391	if (retcode != NO_ERROR)
   2392		goto fail;
   2393
   2394	/* re-sync running */
   2395	rsr = conn_resync_running(connection);
   2396	if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
   2397		retcode = ERR_CSUMS_RESYNC_RUNNING;
   2398		goto fail;
   2399	}
   2400
   2401	/* online verify running */
   2402	ovr = conn_ov_running(connection);
   2403	if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
   2404		retcode = ERR_VERIFY_RUNNING;
   2405		goto fail;
   2406	}
   2407
   2408	retcode = alloc_crypto(&crypto, new_net_conf);
   2409	if (retcode != NO_ERROR)
   2410		goto fail;
   2411
   2412	rcu_assign_pointer(connection->net_conf, new_net_conf);
   2413
   2414	if (!rsr) {
   2415		crypto_free_shash(connection->csums_tfm);
   2416		connection->csums_tfm = crypto.csums_tfm;
   2417		crypto.csums_tfm = NULL;
   2418	}
   2419	if (!ovr) {
   2420		crypto_free_shash(connection->verify_tfm);
   2421		connection->verify_tfm = crypto.verify_tfm;
   2422		crypto.verify_tfm = NULL;
   2423	}
   2424
   2425	crypto_free_shash(connection->integrity_tfm);
   2426	connection->integrity_tfm = crypto.integrity_tfm;
   2427	if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
   2428		/* Do this without trying to take connection->data.mutex again.  */
   2429		__drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
   2430
   2431	crypto_free_shash(connection->cram_hmac_tfm);
   2432	connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
   2433
   2434	mutex_unlock(&connection->resource->conf_update);
   2435	mutex_unlock(&connection->data.mutex);
   2436	kvfree_rcu(old_net_conf);
   2437
   2438	if (connection->cstate >= C_WF_REPORT_PARAMS) {
   2439		struct drbd_peer_device *peer_device;
   2440		int vnr;
   2441
   2442		idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
   2443			drbd_send_sync_param(peer_device);
   2444	}
   2445
   2446	goto done;
   2447
   2448 fail:
   2449	mutex_unlock(&connection->resource->conf_update);
   2450	mutex_unlock(&connection->data.mutex);
   2451	free_crypto(&crypto);
   2452	kfree(new_net_conf);
   2453 done:
   2454	conn_reconfig_done(connection);
   2455 out:
   2456	mutex_unlock(&adm_ctx.resource->adm_mutex);
   2457 finish:
   2458	drbd_adm_finish(&adm_ctx, info, retcode);
   2459	return 0;
   2460}
   2461
   2462static void connection_to_info(struct connection_info *info,
   2463			       struct drbd_connection *connection)
   2464{
   2465	info->conn_connection_state = connection->cstate;
   2466	info->conn_role = conn_highest_peer(connection);
   2467}
   2468
   2469static void peer_device_to_info(struct peer_device_info *info,
   2470				struct drbd_peer_device *peer_device)
   2471{
   2472	struct drbd_device *device = peer_device->device;
   2473
   2474	info->peer_repl_state =
   2475		max_t(enum drbd_conns, C_WF_REPORT_PARAMS, device->state.conn);
   2476	info->peer_disk_state = device->state.pdsk;
   2477	info->peer_resync_susp_user = device->state.user_isp;
   2478	info->peer_resync_susp_peer = device->state.peer_isp;
   2479	info->peer_resync_susp_dependency = device->state.aftr_isp;
   2480}
   2481
   2482int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
   2483{
   2484	struct connection_info connection_info;
   2485	enum drbd_notification_type flags;
   2486	unsigned int peer_devices = 0;
   2487	struct drbd_config_context adm_ctx;
   2488	struct drbd_peer_device *peer_device;
   2489	struct net_conf *old_net_conf, *new_net_conf = NULL;
   2490	struct crypto crypto = { };
   2491	struct drbd_resource *resource;
   2492	struct drbd_connection *connection;
   2493	enum drbd_ret_code retcode;
   2494	enum drbd_state_rv rv;
   2495	int i;
   2496	int err;
   2497
   2498	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
   2499
   2500	if (!adm_ctx.reply_skb)
   2501		return retcode;
   2502	if (retcode != NO_ERROR)
   2503		goto out;
   2504	if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
   2505		drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
   2506		retcode = ERR_INVALID_REQUEST;
   2507		goto out;
   2508	}
   2509
   2510	/* No need for _rcu here. All reconfiguration is
   2511	 * strictly serialized on genl_lock(). We are protected against
   2512	 * concurrent reconfiguration/addition/deletion */
   2513	for_each_resource(resource, &drbd_resources) {
   2514		for_each_connection(connection, resource) {
   2515			if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
   2516			    !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
   2517				    connection->my_addr_len)) {
   2518				retcode = ERR_LOCAL_ADDR;
   2519				goto out;
   2520			}
   2521
   2522			if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
   2523			    !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
   2524				    connection->peer_addr_len)) {
   2525				retcode = ERR_PEER_ADDR;
   2526				goto out;
   2527			}
   2528		}
   2529	}
   2530
   2531	mutex_lock(&adm_ctx.resource->adm_mutex);
   2532	connection = first_connection(adm_ctx.resource);
   2533	conn_reconfig_start(connection);
   2534
   2535	if (connection->cstate > C_STANDALONE) {
   2536		retcode = ERR_NET_CONFIGURED;
   2537		goto fail;
   2538	}
   2539
   2540	/* allocation not in the IO path, drbdsetup / netlink process context */
   2541	new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
   2542	if (!new_net_conf) {
   2543		retcode = ERR_NOMEM;
   2544		goto fail;
   2545	}
   2546
   2547	set_net_conf_defaults(new_net_conf);
   2548
   2549	err = net_conf_from_attrs(new_net_conf, info);
   2550	if (err && err != -ENOMSG) {
   2551		retcode = ERR_MANDATORY_TAG;
   2552		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
   2553		goto fail;
   2554	}
   2555
   2556	retcode = check_net_options(connection, new_net_conf);
   2557	if (retcode != NO_ERROR)
   2558		goto fail;
   2559
   2560	retcode = alloc_crypto(&crypto, new_net_conf);
   2561	if (retcode != NO_ERROR)
   2562		goto fail;
   2563
   2564	((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
   2565
   2566	drbd_flush_workqueue(&connection->sender_work);
   2567
   2568	mutex_lock(&adm_ctx.resource->conf_update);
   2569	old_net_conf = connection->net_conf;
   2570	if (old_net_conf) {
   2571		retcode = ERR_NET_CONFIGURED;
   2572		mutex_unlock(&adm_ctx.resource->conf_update);
   2573		goto fail;
   2574	}
   2575	rcu_assign_pointer(connection->net_conf, new_net_conf);
   2576
   2577	conn_free_crypto(connection);
   2578	connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
   2579	connection->integrity_tfm = crypto.integrity_tfm;
   2580	connection->csums_tfm = crypto.csums_tfm;
   2581	connection->verify_tfm = crypto.verify_tfm;
   2582
   2583	connection->my_addr_len = nla_len(adm_ctx.my_addr);
   2584	memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
   2585	connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
   2586	memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
   2587
   2588	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
   2589		peer_devices++;
   2590	}
   2591
   2592	connection_to_info(&connection_info, connection);
   2593	flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
   2594	mutex_lock(&notification_mutex);
   2595	notify_connection_state(NULL, 0, connection, &connection_info, NOTIFY_CREATE | flags);
   2596	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
   2597		struct peer_device_info peer_device_info;
   2598
   2599		peer_device_to_info(&peer_device_info, peer_device);
   2600		flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
   2601		notify_peer_device_state(NULL, 0, peer_device, &peer_device_info, NOTIFY_CREATE | flags);
   2602	}
   2603	mutex_unlock(&notification_mutex);
   2604	mutex_unlock(&adm_ctx.resource->conf_update);
   2605
   2606	rcu_read_lock();
   2607	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
   2608		struct drbd_device *device = peer_device->device;
   2609		device->send_cnt = 0;
   2610		device->recv_cnt = 0;
   2611	}
   2612	rcu_read_unlock();
   2613
   2614	rv = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
   2615
   2616	conn_reconfig_done(connection);
   2617	mutex_unlock(&adm_ctx.resource->adm_mutex);
   2618	drbd_adm_finish(&adm_ctx, info, rv);
   2619	return 0;
   2620
   2621fail:
   2622	free_crypto(&crypto);
   2623	kfree(new_net_conf);
   2624
   2625	conn_reconfig_done(connection);
   2626	mutex_unlock(&adm_ctx.resource->adm_mutex);
   2627out:
   2628	drbd_adm_finish(&adm_ctx, info, retcode);
   2629	return 0;
   2630}
   2631
   2632static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
   2633{
   2634	enum drbd_conns cstate;
   2635	enum drbd_state_rv rv;
   2636
   2637repeat:
   2638	rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
   2639			force ? CS_HARD : 0);
   2640
   2641	switch (rv) {
   2642	case SS_NOTHING_TO_DO:
   2643		break;
   2644	case SS_ALREADY_STANDALONE:
   2645		return SS_SUCCESS;
   2646	case SS_PRIMARY_NOP:
   2647		/* Our state checking code wants to see the peer outdated. */
   2648		rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
   2649
   2650		if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
   2651			rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
   2652
   2653		break;
   2654	case SS_CW_FAILED_BY_PEER:
   2655		spin_lock_irq(&connection->resource->req_lock);
   2656		cstate = connection->cstate;
   2657		spin_unlock_irq(&connection->resource->req_lock);
   2658		if (cstate <= C_WF_CONNECTION)
   2659			goto repeat;
   2660		/* The peer probably wants to see us outdated. */
   2661		rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
   2662							disk, D_OUTDATED), 0);
   2663		if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
   2664			rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
   2665					CS_HARD);
   2666		}
   2667		break;
   2668	default:;
   2669		/* no special handling necessary */
   2670	}
   2671
   2672	if (rv >= SS_SUCCESS) {
   2673		enum drbd_state_rv rv2;
   2674		/* No one else can reconfigure the network while I am here.
   2675		 * The state handling only uses drbd_thread_stop_nowait(),
   2676		 * we want to really wait here until the receiver is no more.
   2677		 */
   2678		drbd_thread_stop(&connection->receiver);
   2679
   2680		/* Race breaker.  This additional state change request may be
   2681		 * necessary, if this was a forced disconnect during a receiver
   2682		 * restart.  We may have "killed" the receiver thread just
   2683		 * after drbd_receiver() returned.  Typically, we should be
   2684		 * C_STANDALONE already, now, and this becomes a no-op.
   2685		 */
   2686		rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
   2687				CS_VERBOSE | CS_HARD);
   2688		if (rv2 < SS_SUCCESS)
   2689			drbd_err(connection,
   2690				"unexpected rv2=%d in conn_try_disconnect()\n",
   2691				rv2);
   2692		/* Unlike in DRBD 9, the state engine has generated
   2693		 * NOTIFY_DESTROY events before clearing connection->net_conf. */
   2694	}
   2695	return rv;
   2696}
   2697
   2698int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
   2699{
   2700	struct drbd_config_context adm_ctx;
   2701	struct disconnect_parms parms;
   2702	struct drbd_connection *connection;
   2703	enum drbd_state_rv rv;
   2704	enum drbd_ret_code retcode;
   2705	int err;
   2706
   2707	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
   2708	if (!adm_ctx.reply_skb)
   2709		return retcode;
   2710	if (retcode != NO_ERROR)
   2711		goto fail;
   2712
   2713	connection = adm_ctx.connection;
   2714	memset(&parms, 0, sizeof(parms));
   2715	if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
   2716		err = disconnect_parms_from_attrs(&parms, info);
   2717		if (err) {
   2718			retcode = ERR_MANDATORY_TAG;
   2719			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
   2720			goto fail;
   2721		}
   2722	}
   2723
   2724	mutex_lock(&adm_ctx.resource->adm_mutex);
   2725	rv = conn_try_disconnect(connection, parms.force_disconnect);
   2726	mutex_unlock(&adm_ctx.resource->adm_mutex);
   2727	if (rv < SS_SUCCESS) {
   2728		drbd_adm_finish(&adm_ctx, info, rv);
   2729		return 0;
   2730	}
   2731	retcode = NO_ERROR;
   2732 fail:
   2733	drbd_adm_finish(&adm_ctx, info, retcode);
   2734	return 0;
   2735}
   2736
   2737void resync_after_online_grow(struct drbd_device *device)
   2738{
   2739	int iass; /* I am sync source */
   2740
   2741	drbd_info(device, "Resync of new storage after online grow\n");
   2742	if (device->state.role != device->state.peer)
   2743		iass = (device->state.role == R_PRIMARY);
   2744	else
   2745		iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
   2746
   2747	if (iass)
   2748		drbd_start_resync(device, C_SYNC_SOURCE);
   2749	else
   2750		_drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
   2751}
   2752
   2753int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
   2754{
   2755	struct drbd_config_context adm_ctx;
   2756	struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
   2757	struct resize_parms rs;
   2758	struct drbd_device *device;
   2759	enum drbd_ret_code retcode;
   2760	enum determine_dev_size dd;
   2761	bool change_al_layout = false;
   2762	enum dds_flags ddsf;
   2763	sector_t u_size;
   2764	int err;
   2765
   2766	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
   2767	if (!adm_ctx.reply_skb)
   2768		return retcode;
   2769	if (retcode != NO_ERROR)
   2770		goto finish;
   2771
   2772	mutex_lock(&adm_ctx.resource->adm_mutex);
   2773	device = adm_ctx.device;
   2774	if (!get_ldev(device)) {
   2775		retcode = ERR_NO_DISK;
   2776		goto fail;
   2777	}
   2778
   2779	memset(&rs, 0, sizeof(struct resize_parms));
   2780	rs.al_stripes = device->ldev->md.al_stripes;
   2781	rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
   2782	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
   2783		err = resize_parms_from_attrs(&rs, info);
   2784		if (err) {
   2785			retcode = ERR_MANDATORY_TAG;
   2786			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
   2787			goto fail_ldev;
   2788		}
   2789	}
   2790
   2791	if (device->state.conn > C_CONNECTED) {
   2792		retcode = ERR_RESIZE_RESYNC;
   2793		goto fail_ldev;
   2794	}
   2795
   2796	if (device->state.role == R_SECONDARY &&
   2797	    device->state.peer == R_SECONDARY) {
   2798		retcode = ERR_NO_PRIMARY;
   2799		goto fail_ldev;
   2800	}
   2801
   2802	if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
   2803		retcode = ERR_NEED_APV_93;
   2804		goto fail_ldev;
   2805	}
   2806
   2807	rcu_read_lock();
   2808	u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
   2809	rcu_read_unlock();
   2810	if (u_size != (sector_t)rs.resize_size) {
   2811		new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
   2812		if (!new_disk_conf) {
   2813			retcode = ERR_NOMEM;
   2814			goto fail_ldev;
   2815		}
   2816	}
   2817
   2818	if (device->ldev->md.al_stripes != rs.al_stripes ||
   2819	    device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
   2820		u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
   2821
   2822		if (al_size_k > (16 * 1024 * 1024)) {
   2823			retcode = ERR_MD_LAYOUT_TOO_BIG;
   2824			goto fail_ldev;
   2825		}
   2826
   2827		if (al_size_k < MD_32kB_SECT/2) {
   2828			retcode = ERR_MD_LAYOUT_TOO_SMALL;
   2829			goto fail_ldev;
   2830		}
   2831
   2832		if (device->state.conn != C_CONNECTED && !rs.resize_force) {
   2833			retcode = ERR_MD_LAYOUT_CONNECTED;
   2834			goto fail_ldev;
   2835		}
   2836
   2837		change_al_layout = true;
   2838	}
   2839
   2840	if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
   2841		device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
   2842
   2843	if (new_disk_conf) {
   2844		mutex_lock(&device->resource->conf_update);
   2845		old_disk_conf = device->ldev->disk_conf;
   2846		*new_disk_conf = *old_disk_conf;
   2847		new_disk_conf->disk_size = (sector_t)rs.resize_size;
   2848		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
   2849		mutex_unlock(&device->resource->conf_update);
   2850		kvfree_rcu(old_disk_conf);
   2851		new_disk_conf = NULL;
   2852	}
   2853
   2854	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
   2855	dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
   2856	drbd_md_sync(device);
   2857	put_ldev(device);
   2858	if (dd == DS_ERROR) {
   2859		retcode = ERR_NOMEM_BITMAP;
   2860		goto fail;
   2861	} else if (dd == DS_ERROR_SPACE_MD) {
   2862		retcode = ERR_MD_LAYOUT_NO_FIT;
   2863		goto fail;
   2864	} else if (dd == DS_ERROR_SHRINK) {
   2865		retcode = ERR_IMPLICIT_SHRINK;
   2866		goto fail;
   2867	}
   2868
   2869	if (device->state.conn == C_CONNECTED) {
   2870		if (dd == DS_GREW)
   2871			set_bit(RESIZE_PENDING, &device->flags);
   2872
   2873		drbd_send_uuids(first_peer_device(device));
   2874		drbd_send_sizes(first_peer_device(device), 1, ddsf);
   2875	}
   2876
   2877 fail:
   2878	mutex_unlock(&adm_ctx.resource->adm_mutex);
   2879 finish:
   2880	drbd_adm_finish(&adm_ctx, info, retcode);
   2881	return 0;
   2882
   2883 fail_ldev:
   2884	put_ldev(device);
   2885	kfree(new_disk_conf);
   2886	goto fail;
   2887}
   2888
   2889int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
   2890{
   2891	struct drbd_config_context adm_ctx;
   2892	enum drbd_ret_code retcode;
   2893	struct res_opts res_opts;
   2894	int err;
   2895
   2896	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
   2897	if (!adm_ctx.reply_skb)
   2898		return retcode;
   2899	if (retcode != NO_ERROR)
   2900		goto fail;
   2901
   2902	res_opts = adm_ctx.resource->res_opts;
   2903	if (should_set_defaults(info))
   2904		set_res_opts_defaults(&res_opts);
   2905
   2906	err = res_opts_from_attrs(&res_opts, info);
   2907	if (err && err != -ENOMSG) {
   2908		retcode = ERR_MANDATORY_TAG;
   2909		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
   2910		goto fail;
   2911	}
   2912
   2913	mutex_lock(&adm_ctx.resource->adm_mutex);
   2914	err = set_resource_options(adm_ctx.resource, &res_opts);
   2915	if (err) {
   2916		retcode = ERR_INVALID_REQUEST;
   2917		if (err == -ENOMEM)
   2918			retcode = ERR_NOMEM;
   2919	}
   2920	mutex_unlock(&adm_ctx.resource->adm_mutex);
   2921
   2922fail:
   2923	drbd_adm_finish(&adm_ctx, info, retcode);
   2924	return 0;
   2925}
   2926
   2927int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
   2928{
   2929	struct drbd_config_context adm_ctx;
   2930	struct drbd_device *device;
   2931	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
   2932
   2933	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
   2934	if (!adm_ctx.reply_skb)
   2935		return retcode;
   2936	if (retcode != NO_ERROR)
   2937		goto out;
   2938
   2939	device = adm_ctx.device;
   2940	if (!get_ldev(device)) {
   2941		retcode = ERR_NO_DISK;
   2942		goto out;
   2943	}
   2944
   2945	mutex_lock(&adm_ctx.resource->adm_mutex);
   2946
   2947	/* If there is still bitmap IO pending, probably because of a previous
   2948	 * resync just being finished, wait for it before requesting a new resync.
   2949	 * Also wait for it's after_state_ch(). */
   2950	drbd_suspend_io(device);
   2951	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
   2952	drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
   2953
   2954	/* If we happen to be C_STANDALONE R_SECONDARY, just change to
   2955	 * D_INCONSISTENT, and set all bits in the bitmap.  Otherwise,
   2956	 * try to start a resync handshake as sync target for full sync.
   2957	 */
   2958	if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
   2959		retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
   2960		if (retcode >= SS_SUCCESS) {
   2961			if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
   2962				"set_n_write from invalidate", BM_LOCKED_MASK))
   2963				retcode = ERR_IO_MD_DISK;
   2964		}
   2965	} else
   2966		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
   2967	drbd_resume_io(device);
   2968	mutex_unlock(&adm_ctx.resource->adm_mutex);
   2969	put_ldev(device);
   2970out:
   2971	drbd_adm_finish(&adm_ctx, info, retcode);
   2972	return 0;
   2973}
   2974
   2975static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
   2976		union drbd_state mask, union drbd_state val)
   2977{
   2978	struct drbd_config_context adm_ctx;
   2979	enum drbd_ret_code retcode;
   2980
   2981	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
   2982	if (!adm_ctx.reply_skb)
   2983		return retcode;
   2984	if (retcode != NO_ERROR)
   2985		goto out;
   2986
   2987	mutex_lock(&adm_ctx.resource->adm_mutex);
   2988	retcode = drbd_request_state(adm_ctx.device, mask, val);
   2989	mutex_unlock(&adm_ctx.resource->adm_mutex);
   2990out:
   2991	drbd_adm_finish(&adm_ctx, info, retcode);
   2992	return 0;
   2993}
   2994
   2995static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
   2996{
   2997	int rv;
   2998
   2999	rv = drbd_bmio_set_n_write(device);
   3000	drbd_suspend_al(device);
   3001	return rv;
   3002}
   3003
   3004int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
   3005{
   3006	struct drbd_config_context adm_ctx;
   3007	int retcode; /* drbd_ret_code, drbd_state_rv */
   3008	struct drbd_device *device;
   3009
   3010	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
   3011	if (!adm_ctx.reply_skb)
   3012		return retcode;
   3013	if (retcode != NO_ERROR)
   3014		goto out;
   3015
   3016	device = adm_ctx.device;
   3017	if (!get_ldev(device)) {
   3018		retcode = ERR_NO_DISK;
   3019		goto out;
   3020	}
   3021
   3022	mutex_lock(&adm_ctx.resource->adm_mutex);
   3023
   3024	/* If there is still bitmap IO pending, probably because of a previous
   3025	 * resync just being finished, wait for it before requesting a new resync.
   3026	 * Also wait for it's after_state_ch(). */
   3027	drbd_suspend_io(device);
   3028	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
   3029	drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
   3030
   3031	/* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
   3032	 * in the bitmap.  Otherwise, try to start a resync handshake
   3033	 * as sync source for full sync.
   3034	 */
   3035	if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
   3036		/* The peer will get a resync upon connect anyways. Just make that
   3037		   into a full resync. */
   3038		retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
   3039		if (retcode >= SS_SUCCESS) {
   3040			if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
   3041				"set_n_write from invalidate_peer",
   3042				BM_LOCKED_SET_ALLOWED))
   3043				retcode = ERR_IO_MD_DISK;
   3044		}
   3045	} else
   3046		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
   3047	drbd_resume_io(device);
   3048	mutex_unlock(&adm_ctx.resource->adm_mutex);
   3049	put_ldev(device);
   3050out:
   3051	drbd_adm_finish(&adm_ctx, info, retcode);
   3052	return 0;
   3053}
   3054
   3055int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
   3056{
   3057	struct drbd_config_context adm_ctx;
   3058	enum drbd_ret_code retcode;
   3059
   3060	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
   3061	if (!adm_ctx.reply_skb)
   3062		return retcode;
   3063	if (retcode != NO_ERROR)
   3064		goto out;
   3065
   3066	mutex_lock(&adm_ctx.resource->adm_mutex);
   3067	if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
   3068		retcode = ERR_PAUSE_IS_SET;
   3069	mutex_unlock(&adm_ctx.resource->adm_mutex);
   3070out:
   3071	drbd_adm_finish(&adm_ctx, info, retcode);
   3072	return 0;
   3073}
   3074
   3075int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
   3076{
   3077	struct drbd_config_context adm_ctx;
   3078	union drbd_dev_state s;
   3079	enum drbd_ret_code retcode;
   3080
   3081	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
   3082	if (!adm_ctx.reply_skb)
   3083		return retcode;
   3084	if (retcode != NO_ERROR)
   3085		goto out;
   3086
   3087	mutex_lock(&adm_ctx.resource->adm_mutex);
   3088	if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
   3089		s = adm_ctx.device->state;
   3090		if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
   3091			retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
   3092				  s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
   3093		} else {
   3094			retcode = ERR_PAUSE_IS_CLEAR;
   3095		}
   3096	}
   3097	mutex_unlock(&adm_ctx.resource->adm_mutex);
   3098out:
   3099	drbd_adm_finish(&adm_ctx, info, retcode);
   3100	return 0;
   3101}
   3102
   3103int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
   3104{
   3105	return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
   3106}
   3107
   3108int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
   3109{
   3110	struct drbd_config_context adm_ctx;
   3111	struct drbd_device *device;
   3112	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
   3113
   3114	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
   3115	if (!adm_ctx.reply_skb)
   3116		return retcode;
   3117	if (retcode != NO_ERROR)
   3118		goto out;
   3119
   3120	mutex_lock(&adm_ctx.resource->adm_mutex);
   3121	device = adm_ctx.device;
   3122	if (test_bit(NEW_CUR_UUID, &device->flags)) {
   3123		if (get_ldev_if_state(device, D_ATTACHING)) {
   3124			drbd_uuid_new_current(device);
   3125			put_ldev(device);
   3126		} else {
   3127			/* This is effectively a multi-stage "forced down".
   3128			 * The NEW_CUR_UUID bit is supposedly only set, if we
   3129			 * lost the replication connection, and are configured
   3130			 * to freeze IO and wait for some fence-peer handler.
   3131			 * So we still don't have a replication connection.
   3132			 * And now we don't have a local disk either.  After
   3133			 * resume, we will fail all pending and new IO, because
   3134			 * we don't have any data anymore.  Which means we will
   3135			 * eventually be able to terminate all users of this
   3136			 * device, and then take it down.  By bumping the
   3137			 * "effective" data uuid, we make sure that you really
   3138			 * need to tear down before you reconfigure, we will
   3139			 * the refuse to re-connect or re-attach (because no
   3140			 * matching real data uuid exists).
   3141			 */
   3142			u64 val;
   3143			get_random_bytes(&val, sizeof(u64));
   3144			drbd_set_ed_uuid(device, val);
   3145			drbd_warn(device, "Resumed without access to data; please tear down before attempting to re-configure.\n");
   3146		}
   3147		clear_bit(NEW_CUR_UUID, &device->flags);
   3148	}
   3149	drbd_suspend_io(device);
   3150	retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
   3151	if (retcode == SS_SUCCESS) {
   3152		if (device->state.conn < C_CONNECTED)
   3153			tl_clear(first_peer_device(device)->connection);
   3154		if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
   3155			tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
   3156	}
   3157	drbd_resume_io(device);
   3158	mutex_unlock(&adm_ctx.resource->adm_mutex);
   3159out:
   3160	drbd_adm_finish(&adm_ctx, info, retcode);
   3161	return 0;
   3162}
   3163
   3164int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
   3165{
   3166	return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
   3167}
   3168
   3169static int nla_put_drbd_cfg_context(struct sk_buff *skb,
   3170				    struct drbd_resource *resource,
   3171				    struct drbd_connection *connection,
   3172				    struct drbd_device *device)
   3173{
   3174	struct nlattr *nla;
   3175	nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_CONTEXT);
   3176	if (!nla)
   3177		goto nla_put_failure;
   3178	if (device &&
   3179	    nla_put_u32(skb, T_ctx_volume, device->vnr))
   3180		goto nla_put_failure;
   3181	if (nla_put_string(skb, T_ctx_resource_name, resource->name))
   3182		goto nla_put_failure;
   3183	if (connection) {
   3184		if (connection->my_addr_len &&
   3185		    nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
   3186			goto nla_put_failure;
   3187		if (connection->peer_addr_len &&
   3188		    nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
   3189			goto nla_put_failure;
   3190	}
   3191	nla_nest_end(skb, nla);
   3192	return 0;
   3193
   3194nla_put_failure:
   3195	if (nla)
   3196		nla_nest_cancel(skb, nla);
   3197	return -EMSGSIZE;
   3198}
   3199
   3200/*
   3201 * The generic netlink dump callbacks are called outside the genl_lock(), so
   3202 * they cannot use the simple attribute parsing code which uses global
   3203 * attribute tables.
   3204 */
   3205static struct nlattr *find_cfg_context_attr(const struct nlmsghdr *nlh, int attr)
   3206{
   3207	const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
   3208	const int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
   3209	struct nlattr *nla;
   3210
   3211	nla = nla_find(nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen),
   3212		       DRBD_NLA_CFG_CONTEXT);
   3213	if (!nla)
   3214		return NULL;
   3215	return drbd_nla_find_nested(maxtype, nla, __nla_type(attr));
   3216}
   3217
   3218static void resource_to_info(struct resource_info *, struct drbd_resource *);
   3219
   3220int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb)
   3221{
   3222	struct drbd_genlmsghdr *dh;
   3223	struct drbd_resource *resource;
   3224	struct resource_info resource_info;
   3225	struct resource_statistics resource_statistics;
   3226	int err;
   3227
   3228	rcu_read_lock();
   3229	if (cb->args[0]) {
   3230		for_each_resource_rcu(resource, &drbd_resources)
   3231			if (resource == (struct drbd_resource *)cb->args[0])
   3232				goto found_resource;
   3233		err = 0;  /* resource was probably deleted */
   3234		goto out;
   3235	}
   3236	resource = list_entry(&drbd_resources,
   3237			      struct drbd_resource, resources);
   3238
   3239found_resource:
   3240	list_for_each_entry_continue_rcu(resource, &drbd_resources, resources) {
   3241		goto put_result;
   3242	}
   3243	err = 0;
   3244	goto out;
   3245
   3246put_result:
   3247	dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
   3248			cb->nlh->nlmsg_seq, &drbd_genl_family,
   3249			NLM_F_MULTI, DRBD_ADM_GET_RESOURCES);
   3250	err = -ENOMEM;
   3251	if (!dh)
   3252		goto out;
   3253	dh->minor = -1U;
   3254	dh->ret_code = NO_ERROR;
   3255	err = nla_put_drbd_cfg_context(skb, resource, NULL, NULL);
   3256	if (err)
   3257		goto out;
   3258	err = res_opts_to_skb(skb, &resource->res_opts, !capable(CAP_SYS_ADMIN));
   3259	if (err)
   3260		goto out;
   3261	resource_to_info(&resource_info, resource);
   3262	err = resource_info_to_skb(skb, &resource_info, !capable(CAP_SYS_ADMIN));
   3263	if (err)
   3264		goto out;
   3265	resource_statistics.res_stat_write_ordering = resource->write_ordering;
   3266	err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
   3267	if (err)
   3268		goto out;
   3269	cb->args[0] = (long)resource;
   3270	genlmsg_end(skb, dh);
   3271	err = 0;
   3272
   3273out:
   3274	rcu_read_unlock();
   3275	if (err)
   3276		return err;
   3277	return skb->len;
   3278}
   3279
   3280static void device_to_statistics(struct device_statistics *s,
   3281				 struct drbd_device *device)
   3282{
   3283	memset(s, 0, sizeof(*s));
   3284	s->dev_upper_blocked = !may_inc_ap_bio(device);
   3285	if (get_ldev(device)) {
   3286		struct drbd_md *md = &device->ldev->md;
   3287		u64 *history_uuids = (u64 *)s->history_uuids;
   3288		int n;
   3289
   3290		spin_lock_irq(&md->uuid_lock);
   3291		s->dev_current_uuid = md->uuid[UI_CURRENT];
   3292		BUILD_BUG_ON(sizeof(s->history_uuids) < UI_HISTORY_END - UI_HISTORY_START + 1);
   3293		for (n = 0; n < UI_HISTORY_END - UI_HISTORY_START + 1; n++)
   3294			history_uuids[n] = md->uuid[UI_HISTORY_START + n];
   3295		for (; n < HISTORY_UUIDS; n++)
   3296			history_uuids[n] = 0;
   3297		s->history_uuids_len = HISTORY_UUIDS;
   3298		spin_unlock_irq(&md->uuid_lock);
   3299
   3300		s->dev_disk_flags = md->flags;
   3301		put_ldev(device);
   3302	}
   3303	s->dev_size = get_capacity(device->vdisk);
   3304	s->dev_read = device->read_cnt;
   3305	s->dev_write = device->writ_cnt;
   3306	s->dev_al_writes = device->al_writ_cnt;
   3307	s->dev_bm_writes = device->bm_writ_cnt;
   3308	s->dev_upper_pending = atomic_read(&device->ap_bio_cnt);
   3309	s->dev_lower_pending = atomic_read(&device->local_cnt);
   3310	s->dev_al_suspended = test_bit(AL_SUSPENDED, &device->flags);
   3311	s->dev_exposed_data_uuid = device->ed_uuid;
   3312}
   3313
   3314static int put_resource_in_arg0(struct netlink_callback *cb, int holder_nr)
   3315{
   3316	if (cb->args[0]) {
   3317		struct drbd_resource *resource =
   3318			(struct drbd_resource *)cb->args[0];
   3319		kref_put(&resource->kref, drbd_destroy_resource);
   3320	}
   3321
   3322	return 0;
   3323}
   3324
   3325int drbd_adm_dump_devices_done(struct netlink_callback *cb) {
   3326	return put_resource_in_arg0(cb, 7);
   3327}
   3328
   3329static void device_to_info(struct device_info *, struct drbd_device *);
   3330
   3331int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb)
   3332{
   3333	struct nlattr *resource_filter;
   3334	struct drbd_resource *resource;
   3335	struct drbd_device *device;
   3336	int minor, err, retcode;
   3337	struct drbd_genlmsghdr *dh;
   3338	struct device_info device_info;
   3339	struct device_statistics device_statistics;
   3340	struct idr *idr_to_search;
   3341
   3342	resource = (struct drbd_resource *)cb->args[0];
   3343	if (!cb->args[0] && !cb->args[1]) {
   3344		resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
   3345		if (resource_filter) {
   3346			retcode = ERR_RES_NOT_KNOWN;
   3347			resource = drbd_find_resource(nla_data(resource_filter));
   3348			if (!resource)
   3349				goto put_result;
   3350			cb->args[0] = (long)resource;
   3351		}
   3352	}
   3353
   3354	rcu_read_lock();
   3355	minor = cb->args[1];
   3356	idr_to_search = resource ? &resource->devices : &drbd_devices;
   3357	device = idr_get_next(idr_to_search, &minor);
   3358	if (!device) {
   3359		err = 0;
   3360		goto out;
   3361	}
   3362	idr_for_each_entry_continue(idr_to_search, device, minor) {
   3363		retcode = NO_ERROR;
   3364		goto put_result;  /* only one iteration */
   3365	}
   3366	err = 0;
   3367	goto out;  /* no more devices */
   3368
   3369put_result:
   3370	dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
   3371			cb->nlh->nlmsg_seq, &drbd_genl_family,
   3372			NLM_F_MULTI, DRBD_ADM_GET_DEVICES);
   3373	err = -ENOMEM;
   3374	if (!dh)
   3375		goto out;
   3376	dh->ret_code = retcode;
   3377	dh->minor = -1U;
   3378	if (retcode == NO_ERROR) {
   3379		dh->minor = device->minor;
   3380		err = nla_put_drbd_cfg_context(skb, device->resource, NULL, device);
   3381		if (err)
   3382			goto out;
   3383		if (get_ldev(device)) {
   3384			struct disk_conf *disk_conf =
   3385				rcu_dereference(device->ldev->disk_conf);
   3386
   3387			err = disk_conf_to_skb(skb, disk_conf, !capable(CAP_SYS_ADMIN));
   3388			put_ldev(device);
   3389			if (err)
   3390				goto out;
   3391		}
   3392		device_to_info(&device_info, device);
   3393		err = device_info_to_skb(skb, &device_info, !capable(CAP_SYS_ADMIN));
   3394		if (err)
   3395			goto out;
   3396
   3397		device_to_statistics(&device_statistics, device);
   3398		err = device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
   3399		if (err)
   3400			goto out;
   3401		cb->args[1] = minor + 1;
   3402	}
   3403	genlmsg_end(skb, dh);
   3404	err = 0;
   3405
   3406out:
   3407	rcu_read_unlock();
   3408	if (err)
   3409		return err;
   3410	return skb->len;
   3411}
   3412
   3413int drbd_adm_dump_connections_done(struct netlink_callback *cb)
   3414{
   3415	return put_resource_in_arg0(cb, 6);
   3416}
   3417
   3418enum { SINGLE_RESOURCE, ITERATE_RESOURCES };
   3419
   3420int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb)
   3421{
   3422	struct nlattr *resource_filter;
   3423	struct drbd_resource *resource = NULL, *next_resource;
   3424	struct drbd_connection *connection;
   3425	int err = 0, retcode;
   3426	struct drbd_genlmsghdr *dh;
   3427	struct connection_info connection_info;
   3428	struct connection_statistics connection_statistics;
   3429
   3430	rcu_read_lock();
   3431	resource = (struct drbd_resource *)cb->args[0];
   3432	if (!cb->args[0]) {
   3433		resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
   3434		if (resource_filter) {
   3435			retcode = ERR_RES_NOT_KNOWN;
   3436			resource = drbd_find_resource(nla_data(resource_filter));
   3437			if (!resource)
   3438				goto put_result;
   3439			cb->args[0] = (long)resource;
   3440			cb->args[1] = SINGLE_RESOURCE;
   3441		}
   3442	}
   3443	if (!resource) {
   3444		if (list_empty(&drbd_resources))
   3445			goto out;
   3446		resource = list_first_entry(&drbd_resources, struct drbd_resource, resources);
   3447		kref_get(&resource->kref);
   3448		cb->args[0] = (long)resource;
   3449		cb->args[1] = ITERATE_RESOURCES;
   3450	}
   3451
   3452    next_resource:
   3453	rcu_read_unlock();
   3454	mutex_lock(&resource->conf_update);
   3455	rcu_read_lock();
   3456	if (cb->args[2]) {
   3457		for_each_connection_rcu(connection, resource)
   3458			if (connection == (struct drbd_connection *)cb->args[2])
   3459				goto found_connection;
   3460		/* connection was probably deleted */
   3461		goto no_more_connections;
   3462	}
   3463	connection = list_entry(&resource->connections, struct drbd_connection, connections);
   3464
   3465found_connection:
   3466	list_for_each_entry_continue_rcu(connection, &resource->connections, connections) {
   3467		if (!has_net_conf(connection))
   3468			continue;
   3469		retcode = NO_ERROR;
   3470		goto put_result;  /* only one iteration */
   3471	}
   3472
   3473no_more_connections:
   3474	if (cb->args[1] == ITERATE_RESOURCES) {
   3475		for_each_resource_rcu(next_resource, &drbd_resources) {
   3476			if (next_resource == resource)
   3477				goto found_resource;
   3478		}
   3479		/* resource was probably deleted */
   3480	}
   3481	goto out;
   3482
   3483found_resource:
   3484	list_for_each_entry_continue_rcu(next_resource, &drbd_resources, resources) {
   3485		mutex_unlock(&resource->conf_update);
   3486		kref_put(&resource->kref, drbd_destroy_resource);
   3487		resource = next_resource;
   3488		kref_get(&resource->kref);
   3489		cb->args[0] = (long)resource;
   3490		cb->args[2] = 0;
   3491		goto next_resource;
   3492	}
   3493	goto out;  /* no more resources */
   3494
   3495put_result:
   3496	dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
   3497			cb->nlh->nlmsg_seq, &drbd_genl_family,
   3498			NLM_F_MULTI, DRBD_ADM_GET_CONNECTIONS);
   3499	err = -ENOMEM;
   3500	if (!dh)
   3501		goto out;
   3502	dh->ret_code = retcode;
   3503	dh->minor = -1U;
   3504	if (retcode == NO_ERROR) {
   3505		struct net_conf *net_conf;
   3506
   3507		err = nla_put_drbd_cfg_context(skb, resource, connection, NULL);
   3508		if (err)
   3509			goto out;
   3510		net_conf = rcu_dereference(connection->net_conf);
   3511		if (net_conf) {
   3512			err = net_conf_to_skb(skb, net_conf, !capable(CAP_SYS_ADMIN));
   3513			if (err)
   3514				goto out;
   3515		}
   3516		connection_to_info(&connection_info, connection);
   3517		err = connection_info_to_skb(skb, &connection_info, !capable(CAP_SYS_ADMIN));
   3518		if (err)
   3519			goto out;
   3520		connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
   3521		err = connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
   3522		if (err)
   3523			goto out;
   3524		cb->args[2] = (long)connection;
   3525	}
   3526	genlmsg_end(skb, dh);
   3527	err = 0;
   3528
   3529out:
   3530	rcu_read_unlock();
   3531	if (resource)
   3532		mutex_unlock(&resource->conf_update);
   3533	if (err)
   3534		return err;
   3535	return skb->len;
   3536}
   3537
   3538enum mdf_peer_flag {
   3539	MDF_PEER_CONNECTED =	1 << 0,
   3540	MDF_PEER_OUTDATED =	1 << 1,
   3541	MDF_PEER_FENCING =	1 << 2,
   3542	MDF_PEER_FULL_SYNC =	1 << 3,
   3543};
   3544
   3545static void peer_device_to_statistics(struct peer_device_statistics *s,
   3546				      struct drbd_peer_device *peer_device)
   3547{
   3548	struct drbd_device *device = peer_device->device;
   3549
   3550	memset(s, 0, sizeof(*s));
   3551	s->peer_dev_received = device->recv_cnt;
   3552	s->peer_dev_sent = device->send_cnt;
   3553	s->peer_dev_pending = atomic_read(&device->ap_pending_cnt) +
   3554			      atomic_read(&device->rs_pending_cnt);
   3555	s->peer_dev_unacked = atomic_read(&device->unacked_cnt);
   3556	s->peer_dev_out_of_sync = drbd_bm_total_weight(device) << (BM_BLOCK_SHIFT - 9);
   3557	s->peer_dev_resync_failed = device->rs_failed << (BM_BLOCK_SHIFT - 9);
   3558	if (get_ldev(device)) {
   3559		struct drbd_md *md = &device->ldev->md;
   3560
   3561		spin_lock_irq(&md->uuid_lock);
   3562		s->peer_dev_bitmap_uuid = md->uuid[UI_BITMAP];
   3563		spin_unlock_irq(&md->uuid_lock);
   3564		s->peer_dev_flags =
   3565			(drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND) ?
   3566				MDF_PEER_CONNECTED : 0) +
   3567			(drbd_md_test_flag(device->ldev, MDF_CONSISTENT) &&
   3568			 !drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE) ?
   3569				MDF_PEER_OUTDATED : 0) +
   3570			/* FIXME: MDF_PEER_FENCING? */
   3571			(drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ?
   3572				MDF_PEER_FULL_SYNC : 0);
   3573		put_ldev(device);
   3574	}
   3575}
   3576
   3577int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb)
   3578{
   3579	return put_resource_in_arg0(cb, 9);
   3580}
   3581
   3582int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb)
   3583{
   3584	struct nlattr *resource_filter;
   3585	struct drbd_resource *resource;
   3586	struct drbd_device *device;
   3587	struct drbd_peer_device *peer_device = NULL;
   3588	int minor, err, retcode;
   3589	struct drbd_genlmsghdr *dh;
   3590	struct idr *idr_to_search;
   3591
   3592	resource = (struct drbd_resource *)cb->args[0];
   3593	if (!cb->args[0] && !cb->args[1]) {
   3594		resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
   3595		if (resource_filter) {
   3596			retcode = ERR_RES_NOT_KNOWN;
   3597			resource = drbd_find_resource(nla_data(resource_filter));
   3598			if (!resource)
   3599				goto put_result;
   3600		}
   3601		cb->args[0] = (long)resource;
   3602	}
   3603
   3604	rcu_read_lock();
   3605	minor = cb->args[1];
   3606	idr_to_search = resource ? &resource->devices : &drbd_devices;
   3607	device = idr_find(idr_to_search, minor);
   3608	if (!device) {
   3609next_device:
   3610		minor++;
   3611		cb->args[2] = 0;
   3612		device = idr_get_next(idr_to_search, &minor);
   3613		if (!device) {
   3614			err = 0;
   3615			goto out;
   3616		}
   3617	}
   3618	if (cb->args[2]) {
   3619		for_each_peer_device(peer_device, device)
   3620			if (peer_device == (struct drbd_peer_device *)cb->args[2])
   3621				goto found_peer_device;
   3622		/* peer device was probably deleted */
   3623		goto next_device;
   3624	}
   3625	/* Make peer_device point to the list head (not the first entry). */
   3626	peer_device = list_entry(&device->peer_devices, struct drbd_peer_device, peer_devices);
   3627
   3628found_peer_device:
   3629	list_for_each_entry_continue_rcu(peer_device, &device->peer_devices, peer_devices) {
   3630		if (!has_net_conf(peer_device->connection))
   3631			continue;
   3632		retcode = NO_ERROR;
   3633		goto put_result;  /* only one iteration */
   3634	}
   3635	goto next_device;
   3636
   3637put_result:
   3638	dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
   3639			cb->nlh->nlmsg_seq, &drbd_genl_family,
   3640			NLM_F_MULTI, DRBD_ADM_GET_PEER_DEVICES);
   3641	err = -ENOMEM;
   3642	if (!dh)
   3643		goto out;
   3644	dh->ret_code = retcode;
   3645	dh->minor = -1U;
   3646	if (retcode == NO_ERROR) {
   3647		struct peer_device_info peer_device_info;
   3648		struct peer_device_statistics peer_device_statistics;
   3649
   3650		dh->minor = minor;
   3651		err = nla_put_drbd_cfg_context(skb, device->resource, peer_device->connection, device);
   3652		if (err)
   3653			goto out;
   3654		peer_device_to_info(&peer_device_info, peer_device);
   3655		err = peer_device_info_to_skb(skb, &peer_device_info, !capable(CAP_SYS_ADMIN));
   3656		if (err)
   3657			goto out;
   3658		peer_device_to_statistics(&peer_device_statistics, peer_device);
   3659		err = peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
   3660		if (err)
   3661			goto out;
   3662		cb->args[1] = minor;
   3663		cb->args[2] = (long)peer_device;
   3664	}
   3665	genlmsg_end(skb, dh);
   3666	err = 0;
   3667
   3668out:
   3669	rcu_read_unlock();
   3670	if (err)
   3671		return err;
   3672	return skb->len;
   3673}
   3674/*
   3675 * Return the connection of @resource if @resource has exactly one connection.
   3676 */
   3677static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
   3678{
   3679	struct list_head *connections = &resource->connections;
   3680
   3681	if (list_empty(connections) || connections->next->next != connections)
   3682		return NULL;
   3683	return list_first_entry(&resource->connections, struct drbd_connection, connections);
   3684}
   3685
   3686static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
   3687		const struct sib_info *sib)
   3688{
   3689	struct drbd_resource *resource = device->resource;
   3690	struct state_info *si = NULL; /* for sizeof(si->member); */
   3691	struct nlattr *nla;
   3692	int got_ldev;
   3693	int err = 0;
   3694	int exclude_sensitive;
   3695
   3696	/* If sib != NULL, this is drbd_bcast_event, which anyone can listen
   3697	 * to.  So we better exclude_sensitive information.
   3698	 *
   3699	 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
   3700	 * in the context of the requesting user process. Exclude sensitive
   3701	 * information, unless current has superuser.
   3702	 *
   3703	 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
   3704	 * relies on the current implementation of netlink_dump(), which
   3705	 * executes the dump callback successively from netlink_recvmsg(),
   3706	 * always in the context of the receiving process */
   3707	exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
   3708
   3709	got_ldev = get_ldev(device);
   3710
   3711	/* We need to add connection name and volume number information still.
   3712	 * Minor number is in drbd_genlmsghdr. */
   3713	if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
   3714		goto nla_put_failure;
   3715
   3716	if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
   3717		goto nla_put_failure;
   3718
   3719	rcu_read_lock();
   3720	if (got_ldev) {
   3721		struct disk_conf *disk_conf;
   3722
   3723		disk_conf = rcu_dereference(device->ldev->disk_conf);
   3724		err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
   3725	}
   3726	if (!err) {
   3727		struct net_conf *nc;
   3728
   3729		nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
   3730		if (nc)
   3731			err = net_conf_to_skb(skb, nc, exclude_sensitive);
   3732	}
   3733	rcu_read_unlock();
   3734	if (err)
   3735		goto nla_put_failure;
   3736
   3737	nla = nla_nest_start_noflag(skb, DRBD_NLA_STATE_INFO);
   3738	if (!nla)
   3739		goto nla_put_failure;
   3740	if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
   3741	    nla_put_u32(skb, T_current_state, device->state.i) ||
   3742	    nla_put_u64_0pad(skb, T_ed_uuid, device->ed_uuid) ||
   3743	    nla_put_u64_0pad(skb, T_capacity, get_capacity(device->vdisk)) ||
   3744	    nla_put_u64_0pad(skb, T_send_cnt, device->send_cnt) ||
   3745	    nla_put_u64_0pad(skb, T_recv_cnt, device->recv_cnt) ||
   3746	    nla_put_u64_0pad(skb, T_read_cnt, device->read_cnt) ||
   3747	    nla_put_u64_0pad(skb, T_writ_cnt, device->writ_cnt) ||
   3748	    nla_put_u64_0pad(skb, T_al_writ_cnt, device->al_writ_cnt) ||
   3749	    nla_put_u64_0pad(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
   3750	    nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
   3751	    nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
   3752	    nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
   3753		goto nla_put_failure;
   3754
   3755	if (got_ldev) {
   3756		int err;
   3757
   3758		spin_lock_irq(&device->ldev->md.uuid_lock);
   3759		err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
   3760		spin_unlock_irq(&device->ldev->md.uuid_lock);
   3761
   3762		if (err)
   3763			goto nla_put_failure;
   3764
   3765		if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
   3766		    nla_put_u64_0pad(skb, T_bits_total, drbd_bm_bits(device)) ||
   3767		    nla_put_u64_0pad(skb, T_bits_oos,
   3768				     drbd_bm_total_weight(device)))
   3769			goto nla_put_failure;
   3770		if (C_SYNC_SOURCE <= device->state.conn &&
   3771		    C_PAUSED_SYNC_T >= device->state.conn) {
   3772			if (nla_put_u64_0pad(skb, T_bits_rs_total,
   3773					     device->rs_total) ||
   3774			    nla_put_u64_0pad(skb, T_bits_rs_failed,
   3775					     device->rs_failed))
   3776				goto nla_put_failure;
   3777		}
   3778	}
   3779
   3780	if (sib) {
   3781		switch(sib->sib_reason) {
   3782		case SIB_SYNC_PROGRESS:
   3783		case SIB_GET_STATUS_REPLY:
   3784			break;
   3785		case SIB_STATE_CHANGE:
   3786			if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
   3787			    nla_put_u32(skb, T_new_state, sib->ns.i))
   3788				goto nla_put_failure;
   3789			break;
   3790		case SIB_HELPER_POST:
   3791			if (nla_put_u32(skb, T_helper_exit_code,
   3792					sib->helper_exit_code))
   3793				goto nla_put_failure;
   3794			fallthrough;
   3795		case SIB_HELPER_PRE:
   3796			if (nla_put_string(skb, T_helper, sib->helper_name))
   3797				goto nla_put_failure;
   3798			break;
   3799		}
   3800	}
   3801	nla_nest_end(skb, nla);
   3802
   3803	if (0)
   3804nla_put_failure:
   3805		err = -EMSGSIZE;
   3806	if (got_ldev)
   3807		put_ldev(device);
   3808	return err;
   3809}
   3810
   3811int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
   3812{
   3813	struct drbd_config_context adm_ctx;
   3814	enum drbd_ret_code retcode;
   3815	int err;
   3816
   3817	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
   3818	if (!adm_ctx.reply_skb)
   3819		return retcode;
   3820	if (retcode != NO_ERROR)
   3821		goto out;
   3822
   3823	err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
   3824	if (err) {
   3825		nlmsg_free(adm_ctx.reply_skb);
   3826		return err;
   3827	}
   3828out:
   3829	drbd_adm_finish(&adm_ctx, info, retcode);
   3830	return 0;
   3831}
   3832
   3833static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
   3834{
   3835	struct drbd_device *device;
   3836	struct drbd_genlmsghdr *dh;
   3837	struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
   3838	struct drbd_resource *resource = NULL;
   3839	struct drbd_resource *tmp;
   3840	unsigned volume = cb->args[1];
   3841
   3842	/* Open coded, deferred, iteration:
   3843	 * for_each_resource_safe(resource, tmp, &drbd_resources) {
   3844	 *      connection = "first connection of resource or undefined";
   3845	 *	idr_for_each_entry(&resource->devices, device, i) {
   3846	 *	  ...
   3847	 *	}
   3848	 * }
   3849	 * where resource is cb->args[0];
   3850	 * and i is cb->args[1];
   3851	 *
   3852	 * cb->args[2] indicates if we shall loop over all resources,
   3853	 * or just dump all volumes of a single resource.
   3854	 *
   3855	 * This may miss entries inserted after this dump started,
   3856	 * or entries deleted before they are reached.
   3857	 *
   3858	 * We need to make sure the device won't disappear while
   3859	 * we are looking at it, and revalidate our iterators
   3860	 * on each iteration.
   3861	 */
   3862
   3863	/* synchronize with conn_create()/drbd_destroy_connection() */
   3864	rcu_read_lock();
   3865	/* revalidate iterator position */
   3866	for_each_resource_rcu(tmp, &drbd_resources) {
   3867		if (pos == NULL) {
   3868			/* first iteration */
   3869			pos = tmp;
   3870			resource = pos;
   3871			break;
   3872		}
   3873		if (tmp == pos) {
   3874			resource = pos;
   3875			break;
   3876		}
   3877	}
   3878	if (resource) {
   3879next_resource:
   3880		device = idr_get_next(&resource->devices, &volume);
   3881		if (!device) {
   3882			/* No more volumes to dump on this resource.
   3883			 * Advance resource iterator. */
   3884			pos = list_entry_rcu(resource->resources.next,
   3885					     struct drbd_resource, resources);
   3886			/* Did we dump any volume of this resource yet? */
   3887			if (volume != 0) {
   3888				/* If we reached the end of the list,
   3889				 * or only a single resource dump was requested,
   3890				 * we are done. */
   3891				if (&pos->resources == &drbd_resources || cb->args[2])
   3892					goto out;
   3893				volume = 0;
   3894				resource = pos;
   3895				goto next_resource;
   3896			}
   3897		}
   3898
   3899		dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
   3900				cb->nlh->nlmsg_seq, &drbd_genl_family,
   3901				NLM_F_MULTI, DRBD_ADM_GET_STATUS);
   3902		if (!dh)
   3903			goto out;
   3904
   3905		if (!device) {
   3906			/* This is a connection without a single volume.
   3907			 * Suprisingly enough, it may have a network
   3908			 * configuration. */
   3909			struct drbd_connection *connection;
   3910
   3911			dh->minor = -1U;
   3912			dh->ret_code = NO_ERROR;
   3913			connection = the_only_connection(resource);
   3914			if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
   3915				goto cancel;
   3916			if (connection) {
   3917				struct net_conf *nc;
   3918
   3919				nc = rcu_dereference(connection->net_conf);
   3920				if (nc && net_conf_to_skb(skb, nc, 1) != 0)
   3921					goto cancel;
   3922			}
   3923			goto done;
   3924		}
   3925
   3926		D_ASSERT(device, device->vnr == volume);
   3927		D_ASSERT(device, device->resource == resource);
   3928
   3929		dh->minor = device_to_minor(device);
   3930		dh->ret_code = NO_ERROR;
   3931
   3932		if (nla_put_status_info(skb, device, NULL)) {
   3933cancel:
   3934			genlmsg_cancel(skb, dh);
   3935			goto out;
   3936		}
   3937done:
   3938		genlmsg_end(skb, dh);
   3939	}
   3940
   3941out:
   3942	rcu_read_unlock();
   3943	/* where to start the next iteration */
   3944	cb->args[0] = (long)pos;
   3945	cb->args[1] = (pos == resource) ? volume + 1 : 0;
   3946
   3947	/* No more resources/volumes/minors found results in an empty skb.
   3948	 * Which will terminate the dump. */
   3949        return skb->len;
   3950}
   3951
   3952/*
   3953 * Request status of all resources, or of all volumes within a single resource.
   3954 *
   3955 * This is a dump, as the answer may not fit in a single reply skb otherwise.
   3956 * Which means we cannot use the family->attrbuf or other such members, because
   3957 * dump is NOT protected by the genl_lock().  During dump, we only have access
   3958 * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
   3959 *
   3960 * Once things are setup properly, we call into get_one_status().
   3961 */
   3962int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
   3963{
   3964	const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
   3965	struct nlattr *nla;
   3966	const char *resource_name;
   3967	struct drbd_resource *resource;
   3968	int maxtype;
   3969
   3970	/* Is this a followup call? */
   3971	if (cb->args[0]) {
   3972		/* ... of a single resource dump,
   3973		 * and the resource iterator has been advanced already? */
   3974		if (cb->args[2] && cb->args[2] != cb->args[0])
   3975			return 0; /* DONE. */
   3976		goto dump;
   3977	}
   3978
   3979	/* First call (from netlink_dump_start).  We need to figure out
   3980	 * which resource(s) the user wants us to dump. */
   3981	nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
   3982			nlmsg_attrlen(cb->nlh, hdrlen),
   3983			DRBD_NLA_CFG_CONTEXT);
   3984
   3985	/* No explicit context given.  Dump all. */
   3986	if (!nla)
   3987		goto dump;
   3988	maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
   3989	nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
   3990	if (IS_ERR(nla))
   3991		return PTR_ERR(nla);
   3992	/* context given, but no name present? */
   3993	if (!nla)
   3994		return -EINVAL;
   3995	resource_name = nla_data(nla);
   3996	if (!*resource_name)
   3997		return -ENODEV;
   3998	resource = drbd_find_resource(resource_name);
   3999	if (!resource)
   4000		return -ENODEV;
   4001
   4002	kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
   4003
   4004	/* prime iterators, and set "filter" mode mark:
   4005	 * only dump this connection. */
   4006	cb->args[0] = (long)resource;
   4007	/* cb->args[1] = 0; passed in this way. */
   4008	cb->args[2] = (long)resource;
   4009
   4010dump:
   4011	return get_one_status(skb, cb);
   4012}
   4013
   4014int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
   4015{
   4016	struct drbd_config_context adm_ctx;
   4017	enum drbd_ret_code retcode;
   4018	struct timeout_parms tp;
   4019	int err;
   4020
   4021	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
   4022	if (!adm_ctx.reply_skb)
   4023		return retcode;
   4024	if (retcode != NO_ERROR)
   4025		goto out;
   4026
   4027	tp.timeout_type =
   4028		adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
   4029		test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
   4030		UT_DEFAULT;
   4031
   4032	err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
   4033	if (err) {
   4034		nlmsg_free(adm_ctx.reply_skb);
   4035		return err;
   4036	}
   4037out:
   4038	drbd_adm_finish(&adm_ctx, info, retcode);
   4039	return 0;
   4040}
   4041
   4042int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
   4043{
   4044	struct drbd_config_context adm_ctx;
   4045	struct drbd_device *device;
   4046	enum drbd_ret_code retcode;
   4047	struct start_ov_parms parms;
   4048
   4049	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
   4050	if (!adm_ctx.reply_skb)
   4051		return retcode;
   4052	if (retcode != NO_ERROR)
   4053		goto out;
   4054
   4055	device = adm_ctx.device;
   4056
   4057	/* resume from last known position, if possible */
   4058	parms.ov_start_sector = device->ov_start_sector;
   4059	parms.ov_stop_sector = ULLONG_MAX;
   4060	if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
   4061		int err = start_ov_parms_from_attrs(&parms, info);
   4062		if (err) {
   4063			retcode = ERR_MANDATORY_TAG;
   4064			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
   4065			goto out;
   4066		}
   4067	}
   4068	mutex_lock(&adm_ctx.resource->adm_mutex);
   4069
   4070	/* w_make_ov_request expects position to be aligned */
   4071	device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
   4072	device->ov_stop_sector = parms.ov_stop_sector;
   4073
   4074	/* If there is still bitmap IO pending, e.g. previous resync or verify
   4075	 * just being finished, wait for it before requesting a new resync. */
   4076	drbd_suspend_io(device);
   4077	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
   4078	retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
   4079	drbd_resume_io(device);
   4080
   4081	mutex_unlock(&adm_ctx.resource->adm_mutex);
   4082out:
   4083	drbd_adm_finish(&adm_ctx, info, retcode);
   4084	return 0;
   4085}
   4086
   4087
   4088int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
   4089{
   4090	struct drbd_config_context adm_ctx;
   4091	struct drbd_device *device;
   4092	enum drbd_ret_code retcode;
   4093	int skip_initial_sync = 0;
   4094	int err;
   4095	struct new_c_uuid_parms args;
   4096
   4097	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
   4098	if (!adm_ctx.reply_skb)
   4099		return retcode;
   4100	if (retcode != NO_ERROR)
   4101		goto out_nolock;
   4102
   4103	device = adm_ctx.device;
   4104	memset(&args, 0, sizeof(args));
   4105	if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
   4106		err = new_c_uuid_parms_from_attrs(&args, info);
   4107		if (err) {
   4108			retcode = ERR_MANDATORY_TAG;
   4109			drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
   4110			goto out_nolock;
   4111		}
   4112	}
   4113
   4114	mutex_lock(&adm_ctx.resource->adm_mutex);
   4115	mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
   4116
   4117	if (!get_ldev(device)) {
   4118		retcode = ERR_NO_DISK;
   4119		goto out;
   4120	}
   4121
   4122	/* this is "skip initial sync", assume to be clean */
   4123	if (device->state.conn == C_CONNECTED &&
   4124	    first_peer_device(device)->connection->agreed_pro_version >= 90 &&
   4125	    device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
   4126		drbd_info(device, "Preparing to skip initial sync\n");
   4127		skip_initial_sync = 1;
   4128	} else if (device->state.conn != C_STANDALONE) {
   4129		retcode = ERR_CONNECTED;
   4130		goto out_dec;
   4131	}
   4132
   4133	drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
   4134	drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
   4135
   4136	if (args.clear_bm) {
   4137		err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
   4138			"clear_n_write from new_c_uuid", BM_LOCKED_MASK);
   4139		if (err) {
   4140			drbd_err(device, "Writing bitmap failed with %d\n", err);
   4141			retcode = ERR_IO_MD_DISK;
   4142		}
   4143		if (skip_initial_sync) {
   4144			drbd_send_uuids_skip_initial_sync(first_peer_device(device));
   4145			_drbd_uuid_set(device, UI_BITMAP, 0);
   4146			drbd_print_uuids(device, "cleared bitmap UUID");
   4147			spin_lock_irq(&device->resource->req_lock);
   4148			_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
   4149					CS_VERBOSE, NULL);
   4150			spin_unlock_irq(&device->resource->req_lock);
   4151		}
   4152	}
   4153
   4154	drbd_md_sync(device);
   4155out_dec:
   4156	put_ldev(device);
   4157out:
   4158	mutex_unlock(device->state_mutex);
   4159	mutex_unlock(&adm_ctx.resource->adm_mutex);
   4160out_nolock:
   4161	drbd_adm_finish(&adm_ctx, info, retcode);
   4162	return 0;
   4163}
   4164
   4165static enum drbd_ret_code
   4166drbd_check_resource_name(struct drbd_config_context *adm_ctx)
   4167{
   4168	const char *name = adm_ctx->resource_name;
   4169	if (!name || !name[0]) {
   4170		drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
   4171		return ERR_MANDATORY_TAG;
   4172	}
   4173	/* if we want to use these in sysfs/configfs/debugfs some day,
   4174	 * we must not allow slashes */
   4175	if (strchr(name, '/')) {
   4176		drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
   4177		return ERR_INVALID_REQUEST;
   4178	}
   4179	return NO_ERROR;
   4180}
   4181
   4182static void resource_to_info(struct resource_info *info,
   4183			     struct drbd_resource *resource)
   4184{
   4185	info->res_role = conn_highest_role(first_connection(resource));
   4186	info->res_susp = resource->susp;
   4187	info->res_susp_nod = resource->susp_nod;
   4188	info->res_susp_fen = resource->susp_fen;
   4189}
   4190
   4191int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
   4192{
   4193	struct drbd_connection *connection;
   4194	struct drbd_config_context adm_ctx;
   4195	enum drbd_ret_code retcode;
   4196	struct res_opts res_opts;
   4197	int err;
   4198
   4199	retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
   4200	if (!adm_ctx.reply_skb)
   4201		return retcode;
   4202	if (retcode != NO_ERROR)
   4203		goto out;
   4204
   4205	set_res_opts_defaults(&res_opts);
   4206	err = res_opts_from_attrs(&res_opts, info);
   4207	if (err && err != -ENOMSG) {
   4208		retcode = ERR_MANDATORY_TAG;
   4209		drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
   4210		goto out;
   4211	}
   4212
   4213	retcode = drbd_check_resource_name(&adm_ctx);
   4214	if (retcode != NO_ERROR)
   4215		goto out;
   4216
   4217	if (adm_ctx.resource) {
   4218		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
   4219			retcode = ERR_INVALID_REQUEST;
   4220			drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
   4221		}
   4222		/* else: still NO_ERROR */
   4223		goto out;
   4224	}
   4225
   4226	/* not yet safe for genl_family.parallel_ops */
   4227	mutex_lock(&resources_mutex);
   4228	connection = conn_create(adm_ctx.resource_name, &res_opts);
   4229	mutex_unlock(&resources_mutex);
   4230
   4231	if (connection) {
   4232		struct resource_info resource_info;
   4233
   4234		mutex_lock(&notification_mutex);
   4235		resource_to_info(&resource_info, connection->resource);
   4236		notify_resource_state(NULL, 0, connection->resource,
   4237				      &resource_info, NOTIFY_CREATE);
   4238		mutex_unlock(&notification_mutex);
   4239	} else
   4240		retcode = ERR_NOMEM;
   4241
   4242out:
   4243	drbd_adm_finish(&adm_ctx, info, retcode);
   4244	return 0;
   4245}
   4246
   4247static void device_to_info(struct device_info *info,
   4248			   struct drbd_device *device)
   4249{
   4250	info->dev_disk_state = device->state.disk;
   4251}
   4252
   4253
   4254int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
   4255{
   4256	struct drbd_config_context adm_ctx;
   4257	struct drbd_genlmsghdr *dh = info->userhdr;
   4258	enum drbd_ret_code retcode;
   4259
   4260	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
   4261	if (!adm_ctx.reply_skb)
   4262		return retcode;
   4263	if (retcode != NO_ERROR)
   4264		goto out;
   4265
   4266	if (dh->minor > MINORMASK) {
   4267		drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
   4268		retcode = ERR_INVALID_REQUEST;
   4269		goto out;
   4270	}
   4271	if (adm_ctx.volume > DRBD_VOLUME_MAX) {
   4272		drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
   4273		retcode = ERR_INVALID_REQUEST;
   4274		goto out;
   4275	}
   4276
   4277	/* drbd_adm_prepare made sure already
   4278	 * that first_peer_device(device)->connection and device->vnr match the request. */
   4279	if (adm_ctx.device) {
   4280		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
   4281			retcode = ERR_MINOR_OR_VOLUME_EXISTS;
   4282		/* else: still NO_ERROR */
   4283		goto out;
   4284	}
   4285
   4286	mutex_lock(&adm_ctx.resource->adm_mutex);
   4287	retcode = drbd_create_device(&adm_ctx, dh->minor);
   4288	if (retcode == NO_ERROR) {
   4289		struct drbd_device *device;
   4290		struct drbd_peer_device *peer_device;
   4291		struct device_info info;
   4292		unsigned int peer_devices = 0;
   4293		enum drbd_notification_type flags;
   4294
   4295		device = minor_to_device(dh->minor);
   4296		for_each_peer_device(peer_device, device) {
   4297			if (!has_net_conf(peer_device->connection))
   4298				continue;
   4299			peer_devices++;
   4300		}
   4301
   4302		device_to_info(&info, device);
   4303		mutex_lock(&notification_mutex);
   4304		flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
   4305		notify_device_state(NULL, 0, device, &info, NOTIFY_CREATE | flags);
   4306		for_each_peer_device(peer_device, device) {
   4307			struct peer_device_info peer_device_info;
   4308
   4309			if (!has_net_conf(peer_device->connection))
   4310				continue;
   4311			peer_device_to_info(&peer_device_info, peer_device);
   4312			flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
   4313			notify_peer_device_state(NULL, 0, peer_device, &peer_device_info,
   4314						 NOTIFY_CREATE | flags);
   4315		}
   4316		mutex_unlock(&notification_mutex);
   4317	}
   4318	mutex_unlock(&adm_ctx.resource->adm_mutex);
   4319out:
   4320	drbd_adm_finish(&adm_ctx, info, retcode);
   4321	return 0;
   4322}
   4323
   4324static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
   4325{
   4326	struct drbd_peer_device *peer_device;
   4327
   4328	if (device->state.disk == D_DISKLESS &&
   4329	    /* no need to be device->state.conn == C_STANDALONE &&
   4330	     * we may want to delete a minor from a live replication group.
   4331	     */
   4332	    device->state.role == R_SECONDARY) {
   4333		struct drbd_connection *connection =
   4334			first_connection(device->resource);
   4335
   4336		_drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
   4337				    CS_VERBOSE + CS_WAIT_COMPLETE);
   4338
   4339		/* If the state engine hasn't stopped the sender thread yet, we
   4340		 * need to flush the sender work queue before generating the
   4341		 * DESTROY events here. */
   4342		if (get_t_state(&connection->worker) == RUNNING)
   4343			drbd_flush_workqueue(&connection->sender_work);
   4344
   4345		mutex_lock(&notification_mutex);
   4346		for_each_peer_device(peer_device, device) {
   4347			if (!has_net_conf(peer_device->connection))
   4348				continue;
   4349			notify_peer_device_state(NULL, 0, peer_device, NULL,
   4350						 NOTIFY_DESTROY | NOTIFY_CONTINUES);
   4351		}
   4352		notify_device_state(NULL, 0, device, NULL, NOTIFY_DESTROY);
   4353		mutex_unlock(&notification_mutex);
   4354
   4355		drbd_delete_device(device);
   4356		return NO_ERROR;
   4357	} else
   4358		return ERR_MINOR_CONFIGURED;
   4359}
   4360
   4361int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
   4362{
   4363	struct drbd_config_context adm_ctx;
   4364	enum drbd_ret_code retcode;
   4365
   4366	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
   4367	if (!adm_ctx.reply_skb)
   4368		return retcode;
   4369	if (retcode != NO_ERROR)
   4370		goto out;
   4371
   4372	mutex_lock(&adm_ctx.resource->adm_mutex);
   4373	retcode = adm_del_minor(adm_ctx.device);
   4374	mutex_unlock(&adm_ctx.resource->adm_mutex);
   4375out:
   4376	drbd_adm_finish(&adm_ctx, info, retcode);
   4377	return 0;
   4378}
   4379
   4380static int adm_del_resource(struct drbd_resource *resource)
   4381{
   4382	struct drbd_connection *connection;
   4383
   4384	for_each_connection(connection, resource) {
   4385		if (connection->cstate > C_STANDALONE)
   4386			return ERR_NET_CONFIGURED;
   4387	}
   4388	if (!idr_is_empty(&resource->devices))
   4389		return ERR_RES_IN_USE;
   4390
   4391	/* The state engine has stopped the sender thread, so we don't
   4392	 * need to flush the sender work queue before generating the
   4393	 * DESTROY event here. */
   4394	mutex_lock(&notification_mutex);
   4395	notify_resource_state(NULL, 0, resource, NULL, NOTIFY_DESTROY);
   4396	mutex_unlock(&notification_mutex);
   4397
   4398	mutex_lock(&resources_mutex);
   4399	list_del_rcu(&resource->resources);
   4400	mutex_unlock(&resources_mutex);
   4401	/* Make sure all threads have actually stopped: state handling only
   4402	 * does drbd_thread_stop_nowait(). */
   4403	list_for_each_entry(connection, &resource->connections, connections)
   4404		drbd_thread_stop(&connection->worker);
   4405	synchronize_rcu();
   4406	drbd_free_resource(resource);
   4407	return NO_ERROR;
   4408}
   4409
   4410int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
   4411{
   4412	struct drbd_config_context adm_ctx;
   4413	struct drbd_resource *resource;
   4414	struct drbd_connection *connection;
   4415	struct drbd_device *device;
   4416	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
   4417	unsigned i;
   4418
   4419	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
   4420	if (!adm_ctx.reply_skb)
   4421		return retcode;
   4422	if (retcode != NO_ERROR)
   4423		goto finish;
   4424
   4425	resource = adm_ctx.resource;
   4426	mutex_lock(&resource->adm_mutex);
   4427	/* demote */
   4428	for_each_connection(connection, resource) {
   4429		struct drbd_peer_device *peer_device;
   4430
   4431		idr_for_each_entry(&connection->peer_devices, peer_device, i) {
   4432			retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
   4433			if (retcode < SS_SUCCESS) {
   4434				drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
   4435				goto out;
   4436			}
   4437		}
   4438
   4439		retcode = conn_try_disconnect(connection, 0);
   4440		if (retcode < SS_SUCCESS) {
   4441			drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
   4442			goto out;
   4443		}
   4444	}
   4445
   4446	/* detach */
   4447	idr_for_each_entry(&resource->devices, device, i) {
   4448		retcode = adm_detach(device, 0);
   4449		if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
   4450			drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
   4451			goto out;
   4452		}
   4453	}
   4454
   4455	/* delete volumes */
   4456	idr_for_each_entry(&resource->devices, device, i) {
   4457		retcode = adm_del_minor(device);
   4458		if (retcode != NO_ERROR) {
   4459			/* "can not happen" */
   4460			drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
   4461			goto out;
   4462		}
   4463	}
   4464
   4465	retcode = adm_del_resource(resource);
   4466out:
   4467	mutex_unlock(&resource->adm_mutex);
   4468finish:
   4469	drbd_adm_finish(&adm_ctx, info, retcode);
   4470	return 0;
   4471}
   4472
   4473int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
   4474{
   4475	struct drbd_config_context adm_ctx;
   4476	struct drbd_resource *resource;
   4477	enum drbd_ret_code retcode;
   4478
   4479	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
   4480	if (!adm_ctx.reply_skb)
   4481		return retcode;
   4482	if (retcode != NO_ERROR)
   4483		goto finish;
   4484	resource = adm_ctx.resource;
   4485
   4486	mutex_lock(&resource->adm_mutex);
   4487	retcode = adm_del_resource(resource);
   4488	mutex_unlock(&resource->adm_mutex);
   4489finish:
   4490	drbd_adm_finish(&adm_ctx, info, retcode);
   4491	return 0;
   4492}
   4493
   4494void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
   4495{
   4496	struct sk_buff *msg;
   4497	struct drbd_genlmsghdr *d_out;
   4498	unsigned seq;
   4499	int err = -ENOMEM;
   4500
   4501	seq = atomic_inc_return(&drbd_genl_seq);
   4502	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
   4503	if (!msg)
   4504		goto failed;
   4505
   4506	err = -EMSGSIZE;
   4507	d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
   4508	if (!d_out) /* cannot happen, but anyways. */
   4509		goto nla_put_failure;
   4510	d_out->minor = device_to_minor(device);
   4511	d_out->ret_code = NO_ERROR;
   4512
   4513	if (nla_put_status_info(msg, device, sib))
   4514		goto nla_put_failure;
   4515	genlmsg_end(msg, d_out);
   4516	err = drbd_genl_multicast_events(msg, GFP_NOWAIT);
   4517	/* msg has been consumed or freed in netlink_broadcast() */
   4518	if (err && err != -ESRCH)
   4519		goto failed;
   4520
   4521	return;
   4522
   4523nla_put_failure:
   4524	nlmsg_free(msg);
   4525failed:
   4526	drbd_err(device, "Error %d while broadcasting event. "
   4527			"Event seq:%u sib_reason:%u\n",
   4528			err, seq, sib->sib_reason);
   4529}
   4530
   4531static int nla_put_notification_header(struct sk_buff *msg,
   4532				       enum drbd_notification_type type)
   4533{
   4534	struct drbd_notification_header nh = {
   4535		.nh_type = type,
   4536	};
   4537
   4538	return drbd_notification_header_to_skb(msg, &nh, true);
   4539}
   4540
   4541int notify_resource_state(struct sk_buff *skb,
   4542			   unsigned int seq,
   4543			   struct drbd_resource *resource,
   4544			   struct resource_info *resource_info,
   4545			   enum drbd_notification_type type)
   4546{
   4547	struct resource_statistics resource_statistics;
   4548	struct drbd_genlmsghdr *dh;
   4549	bool multicast = false;
   4550	int err;
   4551
   4552	if (!skb) {
   4553		seq = atomic_inc_return(&notify_genl_seq);
   4554		skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
   4555		err = -ENOMEM;
   4556		if (!skb)
   4557			goto failed;
   4558		multicast = true;
   4559	}
   4560
   4561	err = -EMSGSIZE;
   4562	dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_RESOURCE_STATE);
   4563	if (!dh)
   4564		goto nla_put_failure;
   4565	dh->minor = -1U;
   4566	dh->ret_code = NO_ERROR;
   4567	if (nla_put_drbd_cfg_context(skb, resource, NULL, NULL) ||
   4568	    nla_put_notification_header(skb, type) ||
   4569	    ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
   4570	     resource_info_to_skb(skb, resource_info, true)))
   4571		goto nla_put_failure;
   4572	resource_statistics.res_stat_write_ordering = resource->write_ordering;
   4573	err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
   4574	if (err)
   4575		goto nla_put_failure;
   4576	genlmsg_end(skb, dh);
   4577	if (multicast) {
   4578		err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
   4579		/* skb has been consumed or freed in netlink_broadcast() */
   4580		if (err && err != -ESRCH)
   4581			goto failed;
   4582	}
   4583	return 0;
   4584
   4585nla_put_failure:
   4586	nlmsg_free(skb);
   4587failed:
   4588	drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
   4589			err, seq);
   4590	return err;
   4591}
   4592
   4593int notify_device_state(struct sk_buff *skb,
   4594			 unsigned int seq,
   4595			 struct drbd_device *device,
   4596			 struct device_info *device_info,
   4597			 enum drbd_notification_type type)
   4598{
   4599	struct device_statistics device_statistics;
   4600	struct drbd_genlmsghdr *dh;
   4601	bool multicast = false;
   4602	int err;
   4603
   4604	if (!skb) {
   4605		seq = atomic_inc_return(&notify_genl_seq);
   4606		skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
   4607		err = -ENOMEM;
   4608		if (!skb)
   4609			goto failed;
   4610		multicast = true;
   4611	}
   4612
   4613	err = -EMSGSIZE;
   4614	dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_DEVICE_STATE);
   4615	if (!dh)
   4616		goto nla_put_failure;
   4617	dh->minor = device->minor;
   4618	dh->ret_code = NO_ERROR;
   4619	if (nla_put_drbd_cfg_context(skb, device->resource, NULL, device) ||
   4620	    nla_put_notification_header(skb, type) ||
   4621	    ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
   4622	     device_info_to_skb(skb, device_info, true)))
   4623		goto nla_put_failure;
   4624	device_to_statistics(&device_statistics, device);
   4625	device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
   4626	genlmsg_end(skb, dh);
   4627	if (multicast) {
   4628		err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
   4629		/* skb has been consumed or freed in netlink_broadcast() */
   4630		if (err && err != -ESRCH)
   4631			goto failed;
   4632	}
   4633	return 0;
   4634
   4635nla_put_failure:
   4636	nlmsg_free(skb);
   4637failed:
   4638	drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n",
   4639		 err, seq);
   4640	return err;
   4641}
   4642
   4643int notify_connection_state(struct sk_buff *skb,
   4644			     unsigned int seq,
   4645			     struct drbd_connection *connection,
   4646			     struct connection_info *connection_info,
   4647			     enum drbd_notification_type type)
   4648{
   4649	struct connection_statistics connection_statistics;
   4650	struct drbd_genlmsghdr *dh;
   4651	bool multicast = false;
   4652	int err;
   4653
   4654	if (!skb) {
   4655		seq = atomic_inc_return(&notify_genl_seq);
   4656		skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
   4657		err = -ENOMEM;
   4658		if (!skb)
   4659			goto failed;
   4660		multicast = true;
   4661	}
   4662
   4663	err = -EMSGSIZE;
   4664	dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_CONNECTION_STATE);
   4665	if (!dh)
   4666		goto nla_put_failure;
   4667	dh->minor = -1U;
   4668	dh->ret_code = NO_ERROR;
   4669	if (nla_put_drbd_cfg_context(skb, connection->resource, connection, NULL) ||
   4670	    nla_put_notification_header(skb, type) ||
   4671	    ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
   4672	     connection_info_to_skb(skb, connection_info, true)))
   4673		goto nla_put_failure;
   4674	connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
   4675	connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
   4676	genlmsg_end(skb, dh);
   4677	if (multicast) {
   4678		err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
   4679		/* skb has been consumed or freed in netlink_broadcast() */
   4680		if (err && err != -ESRCH)
   4681			goto failed;
   4682	}
   4683	return 0;
   4684
   4685nla_put_failure:
   4686	nlmsg_free(skb);
   4687failed:
   4688	drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n",
   4689		 err, seq);
   4690	return err;
   4691}
   4692
   4693int notify_peer_device_state(struct sk_buff *skb,
   4694			      unsigned int seq,
   4695			      struct drbd_peer_device *peer_device,
   4696			      struct peer_device_info *peer_device_info,
   4697			      enum drbd_notification_type type)
   4698{
   4699	struct peer_device_statistics peer_device_statistics;
   4700	struct drbd_resource *resource = peer_device->device->resource;
   4701	struct drbd_genlmsghdr *dh;
   4702	bool multicast = false;
   4703	int err;
   4704
   4705	if (!skb) {
   4706		seq = atomic_inc_return(&notify_genl_seq);
   4707		skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
   4708		err = -ENOMEM;
   4709		if (!skb)
   4710			goto failed;
   4711		multicast = true;
   4712	}
   4713
   4714	err = -EMSGSIZE;
   4715	dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_PEER_DEVICE_STATE);
   4716	if (!dh)
   4717		goto nla_put_failure;
   4718	dh->minor = -1U;
   4719	dh->ret_code = NO_ERROR;
   4720	if (nla_put_drbd_cfg_context(skb, resource, peer_device->connection, peer_device->device) ||
   4721	    nla_put_notification_header(skb, type) ||
   4722	    ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
   4723	     peer_device_info_to_skb(skb, peer_device_info, true)))
   4724		goto nla_put_failure;
   4725	peer_device_to_statistics(&peer_device_statistics, peer_device);
   4726	peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
   4727	genlmsg_end(skb, dh);
   4728	if (multicast) {
   4729		err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
   4730		/* skb has been consumed or freed in netlink_broadcast() */
   4731		if (err && err != -ESRCH)
   4732			goto failed;
   4733	}
   4734	return 0;
   4735
   4736nla_put_failure:
   4737	nlmsg_free(skb);
   4738failed:
   4739	drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n",
   4740		 err, seq);
   4741	return err;
   4742}
   4743
   4744void notify_helper(enum drbd_notification_type type,
   4745		   struct drbd_device *device, struct drbd_connection *connection,
   4746		   const char *name, int status)
   4747{
   4748	struct drbd_resource *resource = device ? device->resource : connection->resource;
   4749	struct drbd_helper_info helper_info;
   4750	unsigned int seq = atomic_inc_return(&notify_genl_seq);
   4751	struct sk_buff *skb = NULL;
   4752	struct drbd_genlmsghdr *dh;
   4753	int err;
   4754
   4755	strlcpy(helper_info.helper_name, name, sizeof(helper_info.helper_name));
   4756	helper_info.helper_name_len = min(strlen(name), sizeof(helper_info.helper_name));
   4757	helper_info.helper_status = status;
   4758
   4759	skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
   4760	err = -ENOMEM;
   4761	if (!skb)
   4762		goto fail;
   4763
   4764	err = -EMSGSIZE;
   4765	dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_HELPER);
   4766	if (!dh)
   4767		goto fail;
   4768	dh->minor = device ? device->minor : -1;
   4769	dh->ret_code = NO_ERROR;
   4770	mutex_lock(&notification_mutex);
   4771	if (nla_put_drbd_cfg_context(skb, resource, connection, device) ||
   4772	    nla_put_notification_header(skb, type) ||
   4773	    drbd_helper_info_to_skb(skb, &helper_info, true))
   4774		goto unlock_fail;
   4775	genlmsg_end(skb, dh);
   4776	err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
   4777	skb = NULL;
   4778	/* skb has been consumed or freed in netlink_broadcast() */
   4779	if (err && err != -ESRCH)
   4780		goto unlock_fail;
   4781	mutex_unlock(&notification_mutex);
   4782	return;
   4783
   4784unlock_fail:
   4785	mutex_unlock(&notification_mutex);
   4786fail:
   4787	nlmsg_free(skb);
   4788	drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
   4789		 err, seq);
   4790}
   4791
   4792static int notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
   4793{
   4794	struct drbd_genlmsghdr *dh;
   4795	int err;
   4796
   4797	err = -EMSGSIZE;
   4798	dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_INITIAL_STATE_DONE);
   4799	if (!dh)
   4800		goto nla_put_failure;
   4801	dh->minor = -1U;
   4802	dh->ret_code = NO_ERROR;
   4803	if (nla_put_notification_header(skb, NOTIFY_EXISTS))
   4804		goto nla_put_failure;
   4805	genlmsg_end(skb, dh);
   4806	return 0;
   4807
   4808nla_put_failure:
   4809	nlmsg_free(skb);
   4810	pr_err("Error %d sending event. Event seq:%u\n", err, seq);
   4811	return err;
   4812}
   4813
   4814static void free_state_changes(struct list_head *list)
   4815{
   4816	while (!list_empty(list)) {
   4817		struct drbd_state_change *state_change =
   4818			list_first_entry(list, struct drbd_state_change, list);
   4819		list_del(&state_change->list);
   4820		forget_state_change(state_change);
   4821	}
   4822}
   4823
   4824static unsigned int notifications_for_state_change(struct drbd_state_change *state_change)
   4825{
   4826	return 1 +
   4827	       state_change->n_connections +
   4828	       state_change->n_devices +
   4829	       state_change->n_devices * state_change->n_connections;
   4830}
   4831
   4832static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
   4833{
   4834	struct drbd_state_change *state_change = (struct drbd_state_change *)cb->args[0];
   4835	unsigned int seq = cb->args[2];
   4836	unsigned int n;
   4837	enum drbd_notification_type flags = 0;
   4838	int err = 0;
   4839
   4840	/* There is no need for taking notification_mutex here: it doesn't
   4841	   matter if the initial state events mix with later state chage
   4842	   events; we can always tell the events apart by the NOTIFY_EXISTS
   4843	   flag. */
   4844
   4845	cb->args[5]--;
   4846	if (cb->args[5] == 1) {
   4847		err = notify_initial_state_done(skb, seq);
   4848		goto out;
   4849	}
   4850	n = cb->args[4]++;
   4851	if (cb->args[4] < cb->args[3])
   4852		flags |= NOTIFY_CONTINUES;
   4853	if (n < 1) {
   4854		err = notify_resource_state_change(skb, seq, state_change->resource,
   4855					     NOTIFY_EXISTS | flags);
   4856		goto next;
   4857	}
   4858	n--;
   4859	if (n < state_change->n_connections) {
   4860		err = notify_connection_state_change(skb, seq, &state_change->connections[n],
   4861					       NOTIFY_EXISTS | flags);
   4862		goto next;
   4863	}
   4864	n -= state_change->n_connections;
   4865	if (n < state_change->n_devices) {
   4866		err = notify_device_state_change(skb, seq, &state_change->devices[n],
   4867					   NOTIFY_EXISTS | flags);
   4868		goto next;
   4869	}
   4870	n -= state_change->n_devices;
   4871	if (n < state_change->n_devices * state_change->n_connections) {
   4872		err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
   4873						NOTIFY_EXISTS | flags);
   4874		goto next;
   4875	}
   4876
   4877next:
   4878	if (cb->args[4] == cb->args[3]) {
   4879		struct drbd_state_change *next_state_change =
   4880			list_entry(state_change->list.next,
   4881				   struct drbd_state_change, list);
   4882		cb->args[0] = (long)next_state_change;
   4883		cb->args[3] = notifications_for_state_change(next_state_change);
   4884		cb->args[4] = 0;
   4885	}
   4886out:
   4887	if (err)
   4888		return err;
   4889	else
   4890		return skb->len;
   4891}
   4892
   4893int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
   4894{
   4895	struct drbd_resource *resource;
   4896	LIST_HEAD(head);
   4897
   4898	if (cb->args[5] >= 1) {
   4899		if (cb->args[5] > 1)
   4900			return get_initial_state(skb, cb);
   4901		if (cb->args[0]) {
   4902			struct drbd_state_change *state_change =
   4903				(struct drbd_state_change *)cb->args[0];
   4904
   4905			/* connect list to head */
   4906			list_add(&head, &state_change->list);
   4907			free_state_changes(&head);
   4908		}
   4909		return 0;
   4910	}
   4911
   4912	cb->args[5] = 2;  /* number of iterations */
   4913	mutex_lock(&resources_mutex);
   4914	for_each_resource(resource, &drbd_resources) {
   4915		struct drbd_state_change *state_change;
   4916
   4917		state_change = remember_old_state(resource, GFP_KERNEL);
   4918		if (!state_change) {
   4919			if (!list_empty(&head))
   4920				free_state_changes(&head);
   4921			mutex_unlock(&resources_mutex);
   4922			return -ENOMEM;
   4923		}
   4924		copy_old_to_new_state_change(state_change);
   4925		list_add_tail(&state_change->list, &head);
   4926		cb->args[5] += notifications_for_state_change(state_change);
   4927	}
   4928	mutex_unlock(&resources_mutex);
   4929
   4930	if (!list_empty(&head)) {
   4931		struct drbd_state_change *state_change =
   4932			list_entry(head.next, struct drbd_state_change, list);
   4933		cb->args[0] = (long)state_change;
   4934		cb->args[3] = notifications_for_state_change(state_change);
   4935		list_del(&head);  /* detach list from head */
   4936	}
   4937
   4938	cb->args[2] = cb->nlh->nlmsg_seq;
   4939	return get_initial_state(skb, cb);
   4940}