cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

fc.c (111410B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (c) 2016 Avago Technologies.  All rights reserved.
      4 */
      5#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
      6#include <linux/module.h>
      7#include <linux/parser.h>
      8#include <uapi/scsi/fc/fc_fs.h>
      9#include <uapi/scsi/fc/fc_els.h>
     10#include <linux/delay.h>
     11#include <linux/overflow.h>
     12#include <linux/blk-cgroup.h>
     13#include "nvme.h"
     14#include "fabrics.h"
     15#include <linux/nvme-fc-driver.h>
     16#include <linux/nvme-fc.h>
     17#include "fc.h"
     18#include <scsi/scsi_transport_fc.h>
     19#include <linux/blk-mq-pci.h>
     20
     21/* *************************** Data Structures/Defines ****************** */
     22
     23
     24enum nvme_fc_queue_flags {
     25	NVME_FC_Q_CONNECTED = 0,
     26	NVME_FC_Q_LIVE,
     27};
     28
     29#define NVME_FC_DEFAULT_DEV_LOSS_TMO	60	/* seconds */
     30#define NVME_FC_DEFAULT_RECONNECT_TMO	2	/* delay between reconnects
     31						 * when connected and a
     32						 * connection failure.
     33						 */
     34
     35struct nvme_fc_queue {
     36	struct nvme_fc_ctrl	*ctrl;
     37	struct device		*dev;
     38	struct blk_mq_hw_ctx	*hctx;
     39	void			*lldd_handle;
     40	size_t			cmnd_capsule_len;
     41	u32			qnum;
     42	u32			rqcnt;
     43	u32			seqno;
     44
     45	u64			connection_id;
     46	atomic_t		csn;
     47
     48	unsigned long		flags;
     49} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
     50
     51enum nvme_fcop_flags {
     52	FCOP_FLAGS_TERMIO	= (1 << 0),
     53	FCOP_FLAGS_AEN		= (1 << 1),
     54};
     55
     56struct nvmefc_ls_req_op {
     57	struct nvmefc_ls_req	ls_req;
     58
     59	struct nvme_fc_rport	*rport;
     60	struct nvme_fc_queue	*queue;
     61	struct request		*rq;
     62	u32			flags;
     63
     64	int			ls_error;
     65	struct completion	ls_done;
     66	struct list_head	lsreq_list;	/* rport->ls_req_list */
     67	bool			req_queued;
     68};
     69
     70struct nvmefc_ls_rcv_op {
     71	struct nvme_fc_rport		*rport;
     72	struct nvmefc_ls_rsp		*lsrsp;
     73	union nvmefc_ls_requests	*rqstbuf;
     74	union nvmefc_ls_responses	*rspbuf;
     75	u16				rqstdatalen;
     76	bool				handled;
     77	dma_addr_t			rspdma;
     78	struct list_head		lsrcv_list;	/* rport->ls_rcv_list */
     79} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
     80
     81enum nvme_fcpop_state {
     82	FCPOP_STATE_UNINIT	= 0,
     83	FCPOP_STATE_IDLE	= 1,
     84	FCPOP_STATE_ACTIVE	= 2,
     85	FCPOP_STATE_ABORTED	= 3,
     86	FCPOP_STATE_COMPLETE	= 4,
     87};
     88
     89struct nvme_fc_fcp_op {
     90	struct nvme_request	nreq;		/*
     91						 * nvme/host/core.c
     92						 * requires this to be
     93						 * the 1st element in the
     94						 * private structure
     95						 * associated with the
     96						 * request.
     97						 */
     98	struct nvmefc_fcp_req	fcp_req;
     99
    100	struct nvme_fc_ctrl	*ctrl;
    101	struct nvme_fc_queue	*queue;
    102	struct request		*rq;
    103
    104	atomic_t		state;
    105	u32			flags;
    106	u32			rqno;
    107	u32			nents;
    108
    109	struct nvme_fc_cmd_iu	cmd_iu;
    110	struct nvme_fc_ersp_iu	rsp_iu;
    111};
    112
    113struct nvme_fcp_op_w_sgl {
    114	struct nvme_fc_fcp_op	op;
    115	struct scatterlist	sgl[NVME_INLINE_SG_CNT];
    116	uint8_t			priv[];
    117};
    118
    119struct nvme_fc_lport {
    120	struct nvme_fc_local_port	localport;
    121
    122	struct ida			endp_cnt;
    123	struct list_head		port_list;	/* nvme_fc_port_list */
    124	struct list_head		endp_list;
    125	struct device			*dev;	/* physical device for dma */
    126	struct nvme_fc_port_template	*ops;
    127	struct kref			ref;
    128	atomic_t                        act_rport_cnt;
    129} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
    130
    131struct nvme_fc_rport {
    132	struct nvme_fc_remote_port	remoteport;
    133
    134	struct list_head		endp_list; /* for lport->endp_list */
    135	struct list_head		ctrl_list;
    136	struct list_head		ls_req_list;
    137	struct list_head		ls_rcv_list;
    138	struct list_head		disc_list;
    139	struct device			*dev;	/* physical device for dma */
    140	struct nvme_fc_lport		*lport;
    141	spinlock_t			lock;
    142	struct kref			ref;
    143	atomic_t                        act_ctrl_cnt;
    144	unsigned long			dev_loss_end;
    145	struct work_struct		lsrcv_work;
    146} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
    147
    148/* fc_ctrl flags values - specified as bit positions */
    149#define ASSOC_ACTIVE		0
    150#define ASSOC_FAILED		1
    151#define FCCTRL_TERMIO		2
    152
    153struct nvme_fc_ctrl {
    154	spinlock_t		lock;
    155	struct nvme_fc_queue	*queues;
    156	struct device		*dev;
    157	struct nvme_fc_lport	*lport;
    158	struct nvme_fc_rport	*rport;
    159	u32			cnum;
    160
    161	bool			ioq_live;
    162	u64			association_id;
    163	struct nvmefc_ls_rcv_op	*rcv_disconn;
    164
    165	struct list_head	ctrl_list;	/* rport->ctrl_list */
    166
    167	struct blk_mq_tag_set	admin_tag_set;
    168	struct blk_mq_tag_set	tag_set;
    169
    170	struct work_struct	ioerr_work;
    171	struct delayed_work	connect_work;
    172
    173	struct kref		ref;
    174	unsigned long		flags;
    175	u32			iocnt;
    176	wait_queue_head_t	ioabort_wait;
    177
    178	struct nvme_fc_fcp_op	aen_ops[NVME_NR_AEN_COMMANDS];
    179
    180	struct nvme_ctrl	ctrl;
    181};
    182
    183static inline struct nvme_fc_ctrl *
    184to_fc_ctrl(struct nvme_ctrl *ctrl)
    185{
    186	return container_of(ctrl, struct nvme_fc_ctrl, ctrl);
    187}
    188
    189static inline struct nvme_fc_lport *
    190localport_to_lport(struct nvme_fc_local_port *portptr)
    191{
    192	return container_of(portptr, struct nvme_fc_lport, localport);
    193}
    194
    195static inline struct nvme_fc_rport *
    196remoteport_to_rport(struct nvme_fc_remote_port *portptr)
    197{
    198	return container_of(portptr, struct nvme_fc_rport, remoteport);
    199}
    200
    201static inline struct nvmefc_ls_req_op *
    202ls_req_to_lsop(struct nvmefc_ls_req *lsreq)
    203{
    204	return container_of(lsreq, struct nvmefc_ls_req_op, ls_req);
    205}
    206
    207static inline struct nvme_fc_fcp_op *
    208fcp_req_to_fcp_op(struct nvmefc_fcp_req *fcpreq)
    209{
    210	return container_of(fcpreq, struct nvme_fc_fcp_op, fcp_req);
    211}
    212
    213
    214
    215/* *************************** Globals **************************** */
    216
    217
    218static DEFINE_SPINLOCK(nvme_fc_lock);
    219
    220static LIST_HEAD(nvme_fc_lport_list);
    221static DEFINE_IDA(nvme_fc_local_port_cnt);
    222static DEFINE_IDA(nvme_fc_ctrl_cnt);
    223
    224static struct workqueue_struct *nvme_fc_wq;
    225
    226static bool nvme_fc_waiting_to_unload;
    227static DECLARE_COMPLETION(nvme_fc_unload_proceed);
    228
    229/*
    230 * These items are short-term. They will eventually be moved into
    231 * a generic FC class. See comments in module init.
    232 */
    233static struct device *fc_udev_device;
    234
    235static void nvme_fc_complete_rq(struct request *rq);
    236
    237/* *********************** FC-NVME Port Management ************************ */
    238
    239static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
    240			struct nvme_fc_queue *, unsigned int);
    241
    242static void nvme_fc_handle_ls_rqst_work(struct work_struct *work);
    243
    244
    245static void
    246nvme_fc_free_lport(struct kref *ref)
    247{
    248	struct nvme_fc_lport *lport =
    249		container_of(ref, struct nvme_fc_lport, ref);
    250	unsigned long flags;
    251
    252	WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED);
    253	WARN_ON(!list_empty(&lport->endp_list));
    254
    255	/* remove from transport list */
    256	spin_lock_irqsave(&nvme_fc_lock, flags);
    257	list_del(&lport->port_list);
    258	if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list))
    259		complete(&nvme_fc_unload_proceed);
    260	spin_unlock_irqrestore(&nvme_fc_lock, flags);
    261
    262	ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num);
    263	ida_destroy(&lport->endp_cnt);
    264
    265	put_device(lport->dev);
    266
    267	kfree(lport);
    268}
    269
    270static void
    271nvme_fc_lport_put(struct nvme_fc_lport *lport)
    272{
    273	kref_put(&lport->ref, nvme_fc_free_lport);
    274}
    275
    276static int
    277nvme_fc_lport_get(struct nvme_fc_lport *lport)
    278{
    279	return kref_get_unless_zero(&lport->ref);
    280}
    281
    282
    283static struct nvme_fc_lport *
    284nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo,
    285			struct nvme_fc_port_template *ops,
    286			struct device *dev)
    287{
    288	struct nvme_fc_lport *lport;
    289	unsigned long flags;
    290
    291	spin_lock_irqsave(&nvme_fc_lock, flags);
    292
    293	list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
    294		if (lport->localport.node_name != pinfo->node_name ||
    295		    lport->localport.port_name != pinfo->port_name)
    296			continue;
    297
    298		if (lport->dev != dev) {
    299			lport = ERR_PTR(-EXDEV);
    300			goto out_done;
    301		}
    302
    303		if (lport->localport.port_state != FC_OBJSTATE_DELETED) {
    304			lport = ERR_PTR(-EEXIST);
    305			goto out_done;
    306		}
    307
    308		if (!nvme_fc_lport_get(lport)) {
    309			/*
    310			 * fails if ref cnt already 0. If so,
    311			 * act as if lport already deleted
    312			 */
    313			lport = NULL;
    314			goto out_done;
    315		}
    316
    317		/* resume the lport */
    318
    319		lport->ops = ops;
    320		lport->localport.port_role = pinfo->port_role;
    321		lport->localport.port_id = pinfo->port_id;
    322		lport->localport.port_state = FC_OBJSTATE_ONLINE;
    323
    324		spin_unlock_irqrestore(&nvme_fc_lock, flags);
    325
    326		return lport;
    327	}
    328
    329	lport = NULL;
    330
    331out_done:
    332	spin_unlock_irqrestore(&nvme_fc_lock, flags);
    333
    334	return lport;
    335}
    336
    337/**
    338 * nvme_fc_register_localport - transport entry point called by an
    339 *                              LLDD to register the existence of a NVME
    340 *                              host FC port.
    341 * @pinfo:     pointer to information about the port to be registered
    342 * @template:  LLDD entrypoints and operational parameters for the port
    343 * @dev:       physical hardware device node port corresponds to. Will be
    344 *             used for DMA mappings
    345 * @portptr:   pointer to a local port pointer. Upon success, the routine
    346 *             will allocate a nvme_fc_local_port structure and place its
    347 *             address in the local port pointer. Upon failure, local port
    348 *             pointer will be set to 0.
    349 *
    350 * Returns:
    351 * a completion status. Must be 0 upon success; a negative errno
    352 * (ex: -ENXIO) upon failure.
    353 */
    354int
    355nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
    356			struct nvme_fc_port_template *template,
    357			struct device *dev,
    358			struct nvme_fc_local_port **portptr)
    359{
    360	struct nvme_fc_lport *newrec;
    361	unsigned long flags;
    362	int ret, idx;
    363
    364	if (!template->localport_delete || !template->remoteport_delete ||
    365	    !template->ls_req || !template->fcp_io ||
    366	    !template->ls_abort || !template->fcp_abort ||
    367	    !template->max_hw_queues || !template->max_sgl_segments ||
    368	    !template->max_dif_sgl_segments || !template->dma_boundary) {
    369		ret = -EINVAL;
    370		goto out_reghost_failed;
    371	}
    372
    373	/*
    374	 * look to see if there is already a localport that had been
    375	 * deregistered and in the process of waiting for all the
    376	 * references to fully be removed.  If the references haven't
    377	 * expired, we can simply re-enable the localport. Remoteports
    378	 * and controller reconnections should resume naturally.
    379	 */
    380	newrec = nvme_fc_attach_to_unreg_lport(pinfo, template, dev);
    381
    382	/* found an lport, but something about its state is bad */
    383	if (IS_ERR(newrec)) {
    384		ret = PTR_ERR(newrec);
    385		goto out_reghost_failed;
    386
    387	/* found existing lport, which was resumed */
    388	} else if (newrec) {
    389		*portptr = &newrec->localport;
    390		return 0;
    391	}
    392
    393	/* nothing found - allocate a new localport struct */
    394
    395	newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz),
    396			 GFP_KERNEL);
    397	if (!newrec) {
    398		ret = -ENOMEM;
    399		goto out_reghost_failed;
    400	}
    401
    402	idx = ida_alloc(&nvme_fc_local_port_cnt, GFP_KERNEL);
    403	if (idx < 0) {
    404		ret = -ENOSPC;
    405		goto out_fail_kfree;
    406	}
    407
    408	if (!get_device(dev) && dev) {
    409		ret = -ENODEV;
    410		goto out_ida_put;
    411	}
    412
    413	INIT_LIST_HEAD(&newrec->port_list);
    414	INIT_LIST_HEAD(&newrec->endp_list);
    415	kref_init(&newrec->ref);
    416	atomic_set(&newrec->act_rport_cnt, 0);
    417	newrec->ops = template;
    418	newrec->dev = dev;
    419	ida_init(&newrec->endp_cnt);
    420	if (template->local_priv_sz)
    421		newrec->localport.private = &newrec[1];
    422	else
    423		newrec->localport.private = NULL;
    424	newrec->localport.node_name = pinfo->node_name;
    425	newrec->localport.port_name = pinfo->port_name;
    426	newrec->localport.port_role = pinfo->port_role;
    427	newrec->localport.port_id = pinfo->port_id;
    428	newrec->localport.port_state = FC_OBJSTATE_ONLINE;
    429	newrec->localport.port_num = idx;
    430
    431	spin_lock_irqsave(&nvme_fc_lock, flags);
    432	list_add_tail(&newrec->port_list, &nvme_fc_lport_list);
    433	spin_unlock_irqrestore(&nvme_fc_lock, flags);
    434
    435	if (dev)
    436		dma_set_seg_boundary(dev, template->dma_boundary);
    437
    438	*portptr = &newrec->localport;
    439	return 0;
    440
    441out_ida_put:
    442	ida_free(&nvme_fc_local_port_cnt, idx);
    443out_fail_kfree:
    444	kfree(newrec);
    445out_reghost_failed:
    446	*portptr = NULL;
    447
    448	return ret;
    449}
    450EXPORT_SYMBOL_GPL(nvme_fc_register_localport);
    451
    452/**
    453 * nvme_fc_unregister_localport - transport entry point called by an
    454 *                              LLDD to deregister/remove a previously
    455 *                              registered a NVME host FC port.
    456 * @portptr: pointer to the (registered) local port that is to be deregistered.
    457 *
    458 * Returns:
    459 * a completion status. Must be 0 upon success; a negative errno
    460 * (ex: -ENXIO) upon failure.
    461 */
    462int
    463nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr)
    464{
    465	struct nvme_fc_lport *lport = localport_to_lport(portptr);
    466	unsigned long flags;
    467
    468	if (!portptr)
    469		return -EINVAL;
    470
    471	spin_lock_irqsave(&nvme_fc_lock, flags);
    472
    473	if (portptr->port_state != FC_OBJSTATE_ONLINE) {
    474		spin_unlock_irqrestore(&nvme_fc_lock, flags);
    475		return -EINVAL;
    476	}
    477	portptr->port_state = FC_OBJSTATE_DELETED;
    478
    479	spin_unlock_irqrestore(&nvme_fc_lock, flags);
    480
    481	if (atomic_read(&lport->act_rport_cnt) == 0)
    482		lport->ops->localport_delete(&lport->localport);
    483
    484	nvme_fc_lport_put(lport);
    485
    486	return 0;
    487}
    488EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport);
    489
    490/*
    491 * TRADDR strings, per FC-NVME are fixed format:
    492 *   "nn-0x<16hexdigits>:pn-0x<16hexdigits>" - 43 characters
    493 * udev event will only differ by prefix of what field is
    494 * being specified:
    495 *    "NVMEFC_HOST_TRADDR=" or "NVMEFC_TRADDR=" - 19 max characters
    496 *  19 + 43 + null_fudge = 64 characters
    497 */
    498#define FCNVME_TRADDR_LENGTH		64
    499
    500static void
    501nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport,
    502		struct nvme_fc_rport *rport)
    503{
    504	char hostaddr[FCNVME_TRADDR_LENGTH];	/* NVMEFC_HOST_TRADDR=...*/
    505	char tgtaddr[FCNVME_TRADDR_LENGTH];	/* NVMEFC_TRADDR=...*/
    506	char *envp[4] = { "FC_EVENT=nvmediscovery", hostaddr, tgtaddr, NULL };
    507
    508	if (!(rport->remoteport.port_role & FC_PORT_ROLE_NVME_DISCOVERY))
    509		return;
    510
    511	snprintf(hostaddr, sizeof(hostaddr),
    512		"NVMEFC_HOST_TRADDR=nn-0x%016llx:pn-0x%016llx",
    513		lport->localport.node_name, lport->localport.port_name);
    514	snprintf(tgtaddr, sizeof(tgtaddr),
    515		"NVMEFC_TRADDR=nn-0x%016llx:pn-0x%016llx",
    516		rport->remoteport.node_name, rport->remoteport.port_name);
    517	kobject_uevent_env(&fc_udev_device->kobj, KOBJ_CHANGE, envp);
    518}
    519
    520static void
    521nvme_fc_free_rport(struct kref *ref)
    522{
    523	struct nvme_fc_rport *rport =
    524		container_of(ref, struct nvme_fc_rport, ref);
    525	struct nvme_fc_lport *lport =
    526			localport_to_lport(rport->remoteport.localport);
    527	unsigned long flags;
    528
    529	WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED);
    530	WARN_ON(!list_empty(&rport->ctrl_list));
    531
    532	/* remove from lport list */
    533	spin_lock_irqsave(&nvme_fc_lock, flags);
    534	list_del(&rport->endp_list);
    535	spin_unlock_irqrestore(&nvme_fc_lock, flags);
    536
    537	WARN_ON(!list_empty(&rport->disc_list));
    538	ida_free(&lport->endp_cnt, rport->remoteport.port_num);
    539
    540	kfree(rport);
    541
    542	nvme_fc_lport_put(lport);
    543}
    544
    545static void
    546nvme_fc_rport_put(struct nvme_fc_rport *rport)
    547{
    548	kref_put(&rport->ref, nvme_fc_free_rport);
    549}
    550
    551static int
    552nvme_fc_rport_get(struct nvme_fc_rport *rport)
    553{
    554	return kref_get_unless_zero(&rport->ref);
    555}
    556
    557static void
    558nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
    559{
    560	switch (ctrl->ctrl.state) {
    561	case NVME_CTRL_NEW:
    562	case NVME_CTRL_CONNECTING:
    563		/*
    564		 * As all reconnects were suppressed, schedule a
    565		 * connect.
    566		 */
    567		dev_info(ctrl->ctrl.device,
    568			"NVME-FC{%d}: connectivity re-established. "
    569			"Attempting reconnect\n", ctrl->cnum);
    570
    571		queue_delayed_work(nvme_wq, &ctrl->connect_work, 0);
    572		break;
    573
    574	case NVME_CTRL_RESETTING:
    575		/*
    576		 * Controller is already in the process of terminating the
    577		 * association. No need to do anything further. The reconnect
    578		 * step will naturally occur after the reset completes.
    579		 */
    580		break;
    581
    582	default:
    583		/* no action to take - let it delete */
    584		break;
    585	}
    586}
    587
    588static struct nvme_fc_rport *
    589nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport,
    590				struct nvme_fc_port_info *pinfo)
    591{
    592	struct nvme_fc_rport *rport;
    593	struct nvme_fc_ctrl *ctrl;
    594	unsigned long flags;
    595
    596	spin_lock_irqsave(&nvme_fc_lock, flags);
    597
    598	list_for_each_entry(rport, &lport->endp_list, endp_list) {
    599		if (rport->remoteport.node_name != pinfo->node_name ||
    600		    rport->remoteport.port_name != pinfo->port_name)
    601			continue;
    602
    603		if (!nvme_fc_rport_get(rport)) {
    604			rport = ERR_PTR(-ENOLCK);
    605			goto out_done;
    606		}
    607
    608		spin_unlock_irqrestore(&nvme_fc_lock, flags);
    609
    610		spin_lock_irqsave(&rport->lock, flags);
    611
    612		/* has it been unregistered */
    613		if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) {
    614			/* means lldd called us twice */
    615			spin_unlock_irqrestore(&rport->lock, flags);
    616			nvme_fc_rport_put(rport);
    617			return ERR_PTR(-ESTALE);
    618		}
    619
    620		rport->remoteport.port_role = pinfo->port_role;
    621		rport->remoteport.port_id = pinfo->port_id;
    622		rport->remoteport.port_state = FC_OBJSTATE_ONLINE;
    623		rport->dev_loss_end = 0;
    624
    625		/*
    626		 * kick off a reconnect attempt on all associations to the
    627		 * remote port. A successful reconnects will resume i/o.
    628		 */
    629		list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
    630			nvme_fc_resume_controller(ctrl);
    631
    632		spin_unlock_irqrestore(&rport->lock, flags);
    633
    634		return rport;
    635	}
    636
    637	rport = NULL;
    638
    639out_done:
    640	spin_unlock_irqrestore(&nvme_fc_lock, flags);
    641
    642	return rport;
    643}
    644
    645static inline void
    646__nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport,
    647			struct nvme_fc_port_info *pinfo)
    648{
    649	if (pinfo->dev_loss_tmo)
    650		rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo;
    651	else
    652		rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO;
    653}
    654
    655/**
    656 * nvme_fc_register_remoteport - transport entry point called by an
    657 *                              LLDD to register the existence of a NVME
    658 *                              subsystem FC port on its fabric.
    659 * @localport: pointer to the (registered) local port that the remote
    660 *             subsystem port is connected to.
    661 * @pinfo:     pointer to information about the port to be registered
    662 * @portptr:   pointer to a remote port pointer. Upon success, the routine
    663 *             will allocate a nvme_fc_remote_port structure and place its
    664 *             address in the remote port pointer. Upon failure, remote port
    665 *             pointer will be set to 0.
    666 *
    667 * Returns:
    668 * a completion status. Must be 0 upon success; a negative errno
    669 * (ex: -ENXIO) upon failure.
    670 */
    671int
    672nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
    673				struct nvme_fc_port_info *pinfo,
    674				struct nvme_fc_remote_port **portptr)
    675{
    676	struct nvme_fc_lport *lport = localport_to_lport(localport);
    677	struct nvme_fc_rport *newrec;
    678	unsigned long flags;
    679	int ret, idx;
    680
    681	if (!nvme_fc_lport_get(lport)) {
    682		ret = -ESHUTDOWN;
    683		goto out_reghost_failed;
    684	}
    685
    686	/*
    687	 * look to see if there is already a remoteport that is waiting
    688	 * for a reconnect (within dev_loss_tmo) with the same WWN's.
    689	 * If so, transition to it and reconnect.
    690	 */
    691	newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo);
    692
    693	/* found an rport, but something about its state is bad */
    694	if (IS_ERR(newrec)) {
    695		ret = PTR_ERR(newrec);
    696		goto out_lport_put;
    697
    698	/* found existing rport, which was resumed */
    699	} else if (newrec) {
    700		nvme_fc_lport_put(lport);
    701		__nvme_fc_set_dev_loss_tmo(newrec, pinfo);
    702		nvme_fc_signal_discovery_scan(lport, newrec);
    703		*portptr = &newrec->remoteport;
    704		return 0;
    705	}
    706
    707	/* nothing found - allocate a new remoteport struct */
    708
    709	newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz),
    710			 GFP_KERNEL);
    711	if (!newrec) {
    712		ret = -ENOMEM;
    713		goto out_lport_put;
    714	}
    715
    716	idx = ida_alloc(&lport->endp_cnt, GFP_KERNEL);
    717	if (idx < 0) {
    718		ret = -ENOSPC;
    719		goto out_kfree_rport;
    720	}
    721
    722	INIT_LIST_HEAD(&newrec->endp_list);
    723	INIT_LIST_HEAD(&newrec->ctrl_list);
    724	INIT_LIST_HEAD(&newrec->ls_req_list);
    725	INIT_LIST_HEAD(&newrec->disc_list);
    726	kref_init(&newrec->ref);
    727	atomic_set(&newrec->act_ctrl_cnt, 0);
    728	spin_lock_init(&newrec->lock);
    729	newrec->remoteport.localport = &lport->localport;
    730	INIT_LIST_HEAD(&newrec->ls_rcv_list);
    731	newrec->dev = lport->dev;
    732	newrec->lport = lport;
    733	if (lport->ops->remote_priv_sz)
    734		newrec->remoteport.private = &newrec[1];
    735	else
    736		newrec->remoteport.private = NULL;
    737	newrec->remoteport.port_role = pinfo->port_role;
    738	newrec->remoteport.node_name = pinfo->node_name;
    739	newrec->remoteport.port_name = pinfo->port_name;
    740	newrec->remoteport.port_id = pinfo->port_id;
    741	newrec->remoteport.port_state = FC_OBJSTATE_ONLINE;
    742	newrec->remoteport.port_num = idx;
    743	__nvme_fc_set_dev_loss_tmo(newrec, pinfo);
    744	INIT_WORK(&newrec->lsrcv_work, nvme_fc_handle_ls_rqst_work);
    745
    746	spin_lock_irqsave(&nvme_fc_lock, flags);
    747	list_add_tail(&newrec->endp_list, &lport->endp_list);
    748	spin_unlock_irqrestore(&nvme_fc_lock, flags);
    749
    750	nvme_fc_signal_discovery_scan(lport, newrec);
    751
    752	*portptr = &newrec->remoteport;
    753	return 0;
    754
    755out_kfree_rport:
    756	kfree(newrec);
    757out_lport_put:
    758	nvme_fc_lport_put(lport);
    759out_reghost_failed:
    760	*portptr = NULL;
    761	return ret;
    762}
    763EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport);
    764
    765static int
    766nvme_fc_abort_lsops(struct nvme_fc_rport *rport)
    767{
    768	struct nvmefc_ls_req_op *lsop;
    769	unsigned long flags;
    770
    771restart:
    772	spin_lock_irqsave(&rport->lock, flags);
    773
    774	list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) {
    775		if (!(lsop->flags & FCOP_FLAGS_TERMIO)) {
    776			lsop->flags |= FCOP_FLAGS_TERMIO;
    777			spin_unlock_irqrestore(&rport->lock, flags);
    778			rport->lport->ops->ls_abort(&rport->lport->localport,
    779						&rport->remoteport,
    780						&lsop->ls_req);
    781			goto restart;
    782		}
    783	}
    784	spin_unlock_irqrestore(&rport->lock, flags);
    785
    786	return 0;
    787}
    788
    789static void
    790nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
    791{
    792	dev_info(ctrl->ctrl.device,
    793		"NVME-FC{%d}: controller connectivity lost. Awaiting "
    794		"Reconnect", ctrl->cnum);
    795
    796	switch (ctrl->ctrl.state) {
    797	case NVME_CTRL_NEW:
    798	case NVME_CTRL_LIVE:
    799		/*
    800		 * Schedule a controller reset. The reset will terminate the
    801		 * association and schedule the reconnect timer.  Reconnects
    802		 * will be attempted until either the ctlr_loss_tmo
    803		 * (max_retries * connect_delay) expires or the remoteport's
    804		 * dev_loss_tmo expires.
    805		 */
    806		if (nvme_reset_ctrl(&ctrl->ctrl)) {
    807			dev_warn(ctrl->ctrl.device,
    808				"NVME-FC{%d}: Couldn't schedule reset.\n",
    809				ctrl->cnum);
    810			nvme_delete_ctrl(&ctrl->ctrl);
    811		}
    812		break;
    813
    814	case NVME_CTRL_CONNECTING:
    815		/*
    816		 * The association has already been terminated and the
    817		 * controller is attempting reconnects.  No need to do anything
    818		 * futher.  Reconnects will be attempted until either the
    819		 * ctlr_loss_tmo (max_retries * connect_delay) expires or the
    820		 * remoteport's dev_loss_tmo expires.
    821		 */
    822		break;
    823
    824	case NVME_CTRL_RESETTING:
    825		/*
    826		 * Controller is already in the process of terminating the
    827		 * association.  No need to do anything further. The reconnect
    828		 * step will kick in naturally after the association is
    829		 * terminated.
    830		 */
    831		break;
    832
    833	case NVME_CTRL_DELETING:
    834	case NVME_CTRL_DELETING_NOIO:
    835	default:
    836		/* no action to take - let it delete */
    837		break;
    838	}
    839}
    840
    841/**
    842 * nvme_fc_unregister_remoteport - transport entry point called by an
    843 *                              LLDD to deregister/remove a previously
    844 *                              registered a NVME subsystem FC port.
    845 * @portptr: pointer to the (registered) remote port that is to be
    846 *           deregistered.
    847 *
    848 * Returns:
    849 * a completion status. Must be 0 upon success; a negative errno
    850 * (ex: -ENXIO) upon failure.
    851 */
    852int
    853nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr)
    854{
    855	struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
    856	struct nvme_fc_ctrl *ctrl;
    857	unsigned long flags;
    858
    859	if (!portptr)
    860		return -EINVAL;
    861
    862	spin_lock_irqsave(&rport->lock, flags);
    863
    864	if (portptr->port_state != FC_OBJSTATE_ONLINE) {
    865		spin_unlock_irqrestore(&rport->lock, flags);
    866		return -EINVAL;
    867	}
    868	portptr->port_state = FC_OBJSTATE_DELETED;
    869
    870	rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ);
    871
    872	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
    873		/* if dev_loss_tmo==0, dev loss is immediate */
    874		if (!portptr->dev_loss_tmo) {
    875			dev_warn(ctrl->ctrl.device,
    876				"NVME-FC{%d}: controller connectivity lost.\n",
    877				ctrl->cnum);
    878			nvme_delete_ctrl(&ctrl->ctrl);
    879		} else
    880			nvme_fc_ctrl_connectivity_loss(ctrl);
    881	}
    882
    883	spin_unlock_irqrestore(&rport->lock, flags);
    884
    885	nvme_fc_abort_lsops(rport);
    886
    887	if (atomic_read(&rport->act_ctrl_cnt) == 0)
    888		rport->lport->ops->remoteport_delete(portptr);
    889
    890	/*
    891	 * release the reference, which will allow, if all controllers
    892	 * go away, which should only occur after dev_loss_tmo occurs,
    893	 * for the rport to be torn down.
    894	 */
    895	nvme_fc_rport_put(rport);
    896
    897	return 0;
    898}
    899EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport);
    900
    901/**
    902 * nvme_fc_rescan_remoteport - transport entry point called by an
    903 *                              LLDD to request a nvme device rescan.
    904 * @remoteport: pointer to the (registered) remote port that is to be
    905 *              rescanned.
    906 *
    907 * Returns: N/A
    908 */
    909void
    910nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport)
    911{
    912	struct nvme_fc_rport *rport = remoteport_to_rport(remoteport);
    913
    914	nvme_fc_signal_discovery_scan(rport->lport, rport);
    915}
    916EXPORT_SYMBOL_GPL(nvme_fc_rescan_remoteport);
    917
    918int
    919nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr,
    920			u32 dev_loss_tmo)
    921{
    922	struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
    923	unsigned long flags;
    924
    925	spin_lock_irqsave(&rport->lock, flags);
    926
    927	if (portptr->port_state != FC_OBJSTATE_ONLINE) {
    928		spin_unlock_irqrestore(&rport->lock, flags);
    929		return -EINVAL;
    930	}
    931
    932	/* a dev_loss_tmo of 0 (immediate) is allowed to be set */
    933	rport->remoteport.dev_loss_tmo = dev_loss_tmo;
    934
    935	spin_unlock_irqrestore(&rport->lock, flags);
    936
    937	return 0;
    938}
    939EXPORT_SYMBOL_GPL(nvme_fc_set_remoteport_devloss);
    940
    941
    942/* *********************** FC-NVME DMA Handling **************************** */
    943
    944/*
    945 * The fcloop device passes in a NULL device pointer. Real LLD's will
    946 * pass in a valid device pointer. If NULL is passed to the dma mapping
    947 * routines, depending on the platform, it may or may not succeed, and
    948 * may crash.
    949 *
    950 * As such:
    951 * Wrapper all the dma routines and check the dev pointer.
    952 *
    953 * If simple mappings (return just a dma address, we'll noop them,
    954 * returning a dma address of 0.
    955 *
    956 * On more complex mappings (dma_map_sg), a pseudo routine fills
    957 * in the scatter list, setting all dma addresses to 0.
    958 */
    959
    960static inline dma_addr_t
    961fc_dma_map_single(struct device *dev, void *ptr, size_t size,
    962		enum dma_data_direction dir)
    963{
    964	return dev ? dma_map_single(dev, ptr, size, dir) : (dma_addr_t)0L;
    965}
    966
    967static inline int
    968fc_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
    969{
    970	return dev ? dma_mapping_error(dev, dma_addr) : 0;
    971}
    972
    973static inline void
    974fc_dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
    975	enum dma_data_direction dir)
    976{
    977	if (dev)
    978		dma_unmap_single(dev, addr, size, dir);
    979}
    980
    981static inline void
    982fc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
    983		enum dma_data_direction dir)
    984{
    985	if (dev)
    986		dma_sync_single_for_cpu(dev, addr, size, dir);
    987}
    988
    989static inline void
    990fc_dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size,
    991		enum dma_data_direction dir)
    992{
    993	if (dev)
    994		dma_sync_single_for_device(dev, addr, size, dir);
    995}
    996
    997/* pseudo dma_map_sg call */
    998static int
    999fc_map_sg(struct scatterlist *sg, int nents)
   1000{
   1001	struct scatterlist *s;
   1002	int i;
   1003
   1004	WARN_ON(nents == 0 || sg[0].length == 0);
   1005
   1006	for_each_sg(sg, s, nents, i) {
   1007		s->dma_address = 0L;
   1008#ifdef CONFIG_NEED_SG_DMA_LENGTH
   1009		s->dma_length = s->length;
   1010#endif
   1011	}
   1012	return nents;
   1013}
   1014
   1015static inline int
   1016fc_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
   1017		enum dma_data_direction dir)
   1018{
   1019	return dev ? dma_map_sg(dev, sg, nents, dir) : fc_map_sg(sg, nents);
   1020}
   1021
   1022static inline void
   1023fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
   1024		enum dma_data_direction dir)
   1025{
   1026	if (dev)
   1027		dma_unmap_sg(dev, sg, nents, dir);
   1028}
   1029
   1030/* *********************** FC-NVME LS Handling **************************** */
   1031
   1032static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *);
   1033static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *);
   1034
   1035static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
   1036
   1037static void
   1038__nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop)
   1039{
   1040	struct nvme_fc_rport *rport = lsop->rport;
   1041	struct nvmefc_ls_req *lsreq = &lsop->ls_req;
   1042	unsigned long flags;
   1043
   1044	spin_lock_irqsave(&rport->lock, flags);
   1045
   1046	if (!lsop->req_queued) {
   1047		spin_unlock_irqrestore(&rport->lock, flags);
   1048		return;
   1049	}
   1050
   1051	list_del(&lsop->lsreq_list);
   1052
   1053	lsop->req_queued = false;
   1054
   1055	spin_unlock_irqrestore(&rport->lock, flags);
   1056
   1057	fc_dma_unmap_single(rport->dev, lsreq->rqstdma,
   1058				  (lsreq->rqstlen + lsreq->rsplen),
   1059				  DMA_BIDIRECTIONAL);
   1060
   1061	nvme_fc_rport_put(rport);
   1062}
   1063
   1064static int
   1065__nvme_fc_send_ls_req(struct nvme_fc_rport *rport,
   1066		struct nvmefc_ls_req_op *lsop,
   1067		void (*done)(struct nvmefc_ls_req *req, int status))
   1068{
   1069	struct nvmefc_ls_req *lsreq = &lsop->ls_req;
   1070	unsigned long flags;
   1071	int ret = 0;
   1072
   1073	if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
   1074		return -ECONNREFUSED;
   1075
   1076	if (!nvme_fc_rport_get(rport))
   1077		return -ESHUTDOWN;
   1078
   1079	lsreq->done = done;
   1080	lsop->rport = rport;
   1081	lsop->req_queued = false;
   1082	INIT_LIST_HEAD(&lsop->lsreq_list);
   1083	init_completion(&lsop->ls_done);
   1084
   1085	lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr,
   1086				  lsreq->rqstlen + lsreq->rsplen,
   1087				  DMA_BIDIRECTIONAL);
   1088	if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) {
   1089		ret = -EFAULT;
   1090		goto out_putrport;
   1091	}
   1092	lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen;
   1093
   1094	spin_lock_irqsave(&rport->lock, flags);
   1095
   1096	list_add_tail(&lsop->lsreq_list, &rport->ls_req_list);
   1097
   1098	lsop->req_queued = true;
   1099
   1100	spin_unlock_irqrestore(&rport->lock, flags);
   1101
   1102	ret = rport->lport->ops->ls_req(&rport->lport->localport,
   1103					&rport->remoteport, lsreq);
   1104	if (ret)
   1105		goto out_unlink;
   1106
   1107	return 0;
   1108
   1109out_unlink:
   1110	lsop->ls_error = ret;
   1111	spin_lock_irqsave(&rport->lock, flags);
   1112	lsop->req_queued = false;
   1113	list_del(&lsop->lsreq_list);
   1114	spin_unlock_irqrestore(&rport->lock, flags);
   1115	fc_dma_unmap_single(rport->dev, lsreq->rqstdma,
   1116				  (lsreq->rqstlen + lsreq->rsplen),
   1117				  DMA_BIDIRECTIONAL);
   1118out_putrport:
   1119	nvme_fc_rport_put(rport);
   1120
   1121	return ret;
   1122}
   1123
   1124static void
   1125nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status)
   1126{
   1127	struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq);
   1128
   1129	lsop->ls_error = status;
   1130	complete(&lsop->ls_done);
   1131}
   1132
   1133static int
   1134nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop)
   1135{
   1136	struct nvmefc_ls_req *lsreq = &lsop->ls_req;
   1137	struct fcnvme_ls_rjt *rjt = lsreq->rspaddr;
   1138	int ret;
   1139
   1140	ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done);
   1141
   1142	if (!ret) {
   1143		/*
   1144		 * No timeout/not interruptible as we need the struct
   1145		 * to exist until the lldd calls us back. Thus mandate
   1146		 * wait until driver calls back. lldd responsible for
   1147		 * the timeout action
   1148		 */
   1149		wait_for_completion(&lsop->ls_done);
   1150
   1151		__nvme_fc_finish_ls_req(lsop);
   1152
   1153		ret = lsop->ls_error;
   1154	}
   1155
   1156	if (ret)
   1157		return ret;
   1158
   1159	/* ACC or RJT payload ? */
   1160	if (rjt->w0.ls_cmd == FCNVME_LS_RJT)
   1161		return -ENXIO;
   1162
   1163	return 0;
   1164}
   1165
   1166static int
   1167nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport,
   1168		struct nvmefc_ls_req_op *lsop,
   1169		void (*done)(struct nvmefc_ls_req *req, int status))
   1170{
   1171	/* don't wait for completion */
   1172
   1173	return __nvme_fc_send_ls_req(rport, lsop, done);
   1174}
   1175
   1176static int
   1177nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
   1178	struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio)
   1179{
   1180	struct nvmefc_ls_req_op *lsop;
   1181	struct nvmefc_ls_req *lsreq;
   1182	struct fcnvme_ls_cr_assoc_rqst *assoc_rqst;
   1183	struct fcnvme_ls_cr_assoc_acc *assoc_acc;
   1184	unsigned long flags;
   1185	int ret, fcret = 0;
   1186
   1187	lsop = kzalloc((sizeof(*lsop) +
   1188			 sizeof(*assoc_rqst) + sizeof(*assoc_acc) +
   1189			 ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
   1190	if (!lsop) {
   1191		dev_info(ctrl->ctrl.device,
   1192			"NVME-FC{%d}: send Create Association failed: ENOMEM\n",
   1193			ctrl->cnum);
   1194		ret = -ENOMEM;
   1195		goto out_no_memory;
   1196	}
   1197
   1198	assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)&lsop[1];
   1199	assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1];
   1200	lsreq = &lsop->ls_req;
   1201	if (ctrl->lport->ops->lsrqst_priv_sz)
   1202		lsreq->private = &assoc_acc[1];
   1203	else
   1204		lsreq->private = NULL;
   1205
   1206	assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION;
   1207	assoc_rqst->desc_list_len =
   1208			cpu_to_be32(sizeof(struct fcnvme_lsdesc_cr_assoc_cmd));
   1209
   1210	assoc_rqst->assoc_cmd.desc_tag =
   1211			cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD);
   1212	assoc_rqst->assoc_cmd.desc_len =
   1213			fcnvme_lsdesc_len(
   1214				sizeof(struct fcnvme_lsdesc_cr_assoc_cmd));
   1215
   1216	assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio);
   1217	assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize - 1);
   1218	/* Linux supports only Dynamic controllers */
   1219	assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff);
   1220	uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id);
   1221	strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn,
   1222		min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE));
   1223	strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn,
   1224		min(FCNVME_ASSOC_SUBNQN_LEN, NVMF_NQN_SIZE));
   1225
   1226	lsop->queue = queue;
   1227	lsreq->rqstaddr = assoc_rqst;
   1228	lsreq->rqstlen = sizeof(*assoc_rqst);
   1229	lsreq->rspaddr = assoc_acc;
   1230	lsreq->rsplen = sizeof(*assoc_acc);
   1231	lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
   1232
   1233	ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
   1234	if (ret)
   1235		goto out_free_buffer;
   1236
   1237	/* process connect LS completion */
   1238
   1239	/* validate the ACC response */
   1240	if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC)
   1241		fcret = VERR_LSACC;
   1242	else if (assoc_acc->hdr.desc_list_len !=
   1243			fcnvme_lsdesc_len(
   1244				sizeof(struct fcnvme_ls_cr_assoc_acc)))
   1245		fcret = VERR_CR_ASSOC_ACC_LEN;
   1246	else if (assoc_acc->hdr.rqst.desc_tag !=
   1247			cpu_to_be32(FCNVME_LSDESC_RQST))
   1248		fcret = VERR_LSDESC_RQST;
   1249	else if (assoc_acc->hdr.rqst.desc_len !=
   1250			fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)))
   1251		fcret = VERR_LSDESC_RQST_LEN;
   1252	else if (assoc_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_ASSOCIATION)
   1253		fcret = VERR_CR_ASSOC;
   1254	else if (assoc_acc->associd.desc_tag !=
   1255			cpu_to_be32(FCNVME_LSDESC_ASSOC_ID))
   1256		fcret = VERR_ASSOC_ID;
   1257	else if (assoc_acc->associd.desc_len !=
   1258			fcnvme_lsdesc_len(
   1259				sizeof(struct fcnvme_lsdesc_assoc_id)))
   1260		fcret = VERR_ASSOC_ID_LEN;
   1261	else if (assoc_acc->connectid.desc_tag !=
   1262			cpu_to_be32(FCNVME_LSDESC_CONN_ID))
   1263		fcret = VERR_CONN_ID;
   1264	else if (assoc_acc->connectid.desc_len !=
   1265			fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id)))
   1266		fcret = VERR_CONN_ID_LEN;
   1267
   1268	if (fcret) {
   1269		ret = -EBADF;
   1270		dev_err(ctrl->dev,
   1271			"q %d Create Association LS failed: %s\n",
   1272			queue->qnum, validation_errors[fcret]);
   1273	} else {
   1274		spin_lock_irqsave(&ctrl->lock, flags);
   1275		ctrl->association_id =
   1276			be64_to_cpu(assoc_acc->associd.association_id);
   1277		queue->connection_id =
   1278			be64_to_cpu(assoc_acc->connectid.connection_id);
   1279		set_bit(NVME_FC_Q_CONNECTED, &queue->flags);
   1280		spin_unlock_irqrestore(&ctrl->lock, flags);
   1281	}
   1282
   1283out_free_buffer:
   1284	kfree(lsop);
   1285out_no_memory:
   1286	if (ret)
   1287		dev_err(ctrl->dev,
   1288			"queue %d connect admin queue failed (%d).\n",
   1289			queue->qnum, ret);
   1290	return ret;
   1291}
   1292
   1293static int
   1294nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
   1295			u16 qsize, u16 ersp_ratio)
   1296{
   1297	struct nvmefc_ls_req_op *lsop;
   1298	struct nvmefc_ls_req *lsreq;
   1299	struct fcnvme_ls_cr_conn_rqst *conn_rqst;
   1300	struct fcnvme_ls_cr_conn_acc *conn_acc;
   1301	int ret, fcret = 0;
   1302
   1303	lsop = kzalloc((sizeof(*lsop) +
   1304			 sizeof(*conn_rqst) + sizeof(*conn_acc) +
   1305			 ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
   1306	if (!lsop) {
   1307		dev_info(ctrl->ctrl.device,
   1308			"NVME-FC{%d}: send Create Connection failed: ENOMEM\n",
   1309			ctrl->cnum);
   1310		ret = -ENOMEM;
   1311		goto out_no_memory;
   1312	}
   1313
   1314	conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)&lsop[1];
   1315	conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1];
   1316	lsreq = &lsop->ls_req;
   1317	if (ctrl->lport->ops->lsrqst_priv_sz)
   1318		lsreq->private = (void *)&conn_acc[1];
   1319	else
   1320		lsreq->private = NULL;
   1321
   1322	conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION;
   1323	conn_rqst->desc_list_len = cpu_to_be32(
   1324				sizeof(struct fcnvme_lsdesc_assoc_id) +
   1325				sizeof(struct fcnvme_lsdesc_cr_conn_cmd));
   1326
   1327	conn_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID);
   1328	conn_rqst->associd.desc_len =
   1329			fcnvme_lsdesc_len(
   1330				sizeof(struct fcnvme_lsdesc_assoc_id));
   1331	conn_rqst->associd.association_id = cpu_to_be64(ctrl->association_id);
   1332	conn_rqst->connect_cmd.desc_tag =
   1333			cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD);
   1334	conn_rqst->connect_cmd.desc_len =
   1335			fcnvme_lsdesc_len(
   1336				sizeof(struct fcnvme_lsdesc_cr_conn_cmd));
   1337	conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio);
   1338	conn_rqst->connect_cmd.qid  = cpu_to_be16(queue->qnum);
   1339	conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize - 1);
   1340
   1341	lsop->queue = queue;
   1342	lsreq->rqstaddr = conn_rqst;
   1343	lsreq->rqstlen = sizeof(*conn_rqst);
   1344	lsreq->rspaddr = conn_acc;
   1345	lsreq->rsplen = sizeof(*conn_acc);
   1346	lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
   1347
   1348	ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
   1349	if (ret)
   1350		goto out_free_buffer;
   1351
   1352	/* process connect LS completion */
   1353
   1354	/* validate the ACC response */
   1355	if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC)
   1356		fcret = VERR_LSACC;
   1357	else if (conn_acc->hdr.desc_list_len !=
   1358			fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc)))
   1359		fcret = VERR_CR_CONN_ACC_LEN;
   1360	else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST))
   1361		fcret = VERR_LSDESC_RQST;
   1362	else if (conn_acc->hdr.rqst.desc_len !=
   1363			fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)))
   1364		fcret = VERR_LSDESC_RQST_LEN;
   1365	else if (conn_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_CONNECTION)
   1366		fcret = VERR_CR_CONN;
   1367	else if (conn_acc->connectid.desc_tag !=
   1368			cpu_to_be32(FCNVME_LSDESC_CONN_ID))
   1369		fcret = VERR_CONN_ID;
   1370	else if (conn_acc->connectid.desc_len !=
   1371			fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id)))
   1372		fcret = VERR_CONN_ID_LEN;
   1373
   1374	if (fcret) {
   1375		ret = -EBADF;
   1376		dev_err(ctrl->dev,
   1377			"q %d Create I/O Connection LS failed: %s\n",
   1378			queue->qnum, validation_errors[fcret]);
   1379	} else {
   1380		queue->connection_id =
   1381			be64_to_cpu(conn_acc->connectid.connection_id);
   1382		set_bit(NVME_FC_Q_CONNECTED, &queue->flags);
   1383	}
   1384
   1385out_free_buffer:
   1386	kfree(lsop);
   1387out_no_memory:
   1388	if (ret)
   1389		dev_err(ctrl->dev,
   1390			"queue %d connect I/O queue failed (%d).\n",
   1391			queue->qnum, ret);
   1392	return ret;
   1393}
   1394
   1395static void
   1396nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status)
   1397{
   1398	struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq);
   1399
   1400	__nvme_fc_finish_ls_req(lsop);
   1401
   1402	/* fc-nvme initiator doesn't care about success or failure of cmd */
   1403
   1404	kfree(lsop);
   1405}
   1406
   1407/*
   1408 * This routine sends a FC-NVME LS to disconnect (aka terminate)
   1409 * the FC-NVME Association.  Terminating the association also
   1410 * terminates the FC-NVME connections (per queue, both admin and io
   1411 * queues) that are part of the association. E.g. things are torn
   1412 * down, and the related FC-NVME Association ID and Connection IDs
   1413 * become invalid.
   1414 *
   1415 * The behavior of the fc-nvme initiator is such that it's
   1416 * understanding of the association and connections will implicitly
   1417 * be torn down. The action is implicit as it may be due to a loss of
   1418 * connectivity with the fc-nvme target, so you may never get a
   1419 * response even if you tried.  As such, the action of this routine
   1420 * is to asynchronously send the LS, ignore any results of the LS, and
   1421 * continue on with terminating the association. If the fc-nvme target
   1422 * is present and receives the LS, it too can tear down.
   1423 */
   1424static void
   1425nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
   1426{
   1427	struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst;
   1428	struct fcnvme_ls_disconnect_assoc_acc *discon_acc;
   1429	struct nvmefc_ls_req_op *lsop;
   1430	struct nvmefc_ls_req *lsreq;
   1431	int ret;
   1432
   1433	lsop = kzalloc((sizeof(*lsop) +
   1434			sizeof(*discon_rqst) + sizeof(*discon_acc) +
   1435			ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
   1436	if (!lsop) {
   1437		dev_info(ctrl->ctrl.device,
   1438			"NVME-FC{%d}: send Disconnect Association "
   1439			"failed: ENOMEM\n",
   1440			ctrl->cnum);
   1441		return;
   1442	}
   1443
   1444	discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1];
   1445	discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1];
   1446	lsreq = &lsop->ls_req;
   1447	if (ctrl->lport->ops->lsrqst_priv_sz)
   1448		lsreq->private = (void *)&discon_acc[1];
   1449	else
   1450		lsreq->private = NULL;
   1451
   1452	nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc,
   1453				ctrl->association_id);
   1454
   1455	ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop,
   1456				nvme_fc_disconnect_assoc_done);
   1457	if (ret)
   1458		kfree(lsop);
   1459}
   1460
   1461static void
   1462nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp)
   1463{
   1464	struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private;
   1465	struct nvme_fc_rport *rport = lsop->rport;
   1466	struct nvme_fc_lport *lport = rport->lport;
   1467	unsigned long flags;
   1468
   1469	spin_lock_irqsave(&rport->lock, flags);
   1470	list_del(&lsop->lsrcv_list);
   1471	spin_unlock_irqrestore(&rport->lock, flags);
   1472
   1473	fc_dma_sync_single_for_cpu(lport->dev, lsop->rspdma,
   1474				sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
   1475	fc_dma_unmap_single(lport->dev, lsop->rspdma,
   1476			sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
   1477
   1478	kfree(lsop);
   1479
   1480	nvme_fc_rport_put(rport);
   1481}
   1482
   1483static void
   1484nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop)
   1485{
   1486	struct nvme_fc_rport *rport = lsop->rport;
   1487	struct nvme_fc_lport *lport = rport->lport;
   1488	struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0;
   1489	int ret;
   1490
   1491	fc_dma_sync_single_for_device(lport->dev, lsop->rspdma,
   1492				  sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
   1493
   1494	ret = lport->ops->xmt_ls_rsp(&lport->localport, &rport->remoteport,
   1495				     lsop->lsrsp);
   1496	if (ret) {
   1497		dev_warn(lport->dev,
   1498			"LLDD rejected LS RSP xmt: LS %d status %d\n",
   1499			w0->ls_cmd, ret);
   1500		nvme_fc_xmt_ls_rsp_done(lsop->lsrsp);
   1501		return;
   1502	}
   1503}
   1504
   1505static struct nvme_fc_ctrl *
   1506nvme_fc_match_disconn_ls(struct nvme_fc_rport *rport,
   1507		      struct nvmefc_ls_rcv_op *lsop)
   1508{
   1509	struct fcnvme_ls_disconnect_assoc_rqst *rqst =
   1510					&lsop->rqstbuf->rq_dis_assoc;
   1511	struct nvme_fc_ctrl *ctrl, *ret = NULL;
   1512	struct nvmefc_ls_rcv_op *oldls = NULL;
   1513	u64 association_id = be64_to_cpu(rqst->associd.association_id);
   1514	unsigned long flags;
   1515
   1516	spin_lock_irqsave(&rport->lock, flags);
   1517
   1518	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
   1519		if (!nvme_fc_ctrl_get(ctrl))
   1520			continue;
   1521		spin_lock(&ctrl->lock);
   1522		if (association_id == ctrl->association_id) {
   1523			oldls = ctrl->rcv_disconn;
   1524			ctrl->rcv_disconn = lsop;
   1525			ret = ctrl;
   1526		}
   1527		spin_unlock(&ctrl->lock);
   1528		if (ret)
   1529			/* leave the ctrl get reference */
   1530			break;
   1531		nvme_fc_ctrl_put(ctrl);
   1532	}
   1533
   1534	spin_unlock_irqrestore(&rport->lock, flags);
   1535
   1536	/* transmit a response for anything that was pending */
   1537	if (oldls) {
   1538		dev_info(rport->lport->dev,
   1539			"NVME-FC{%d}: Multiple Disconnect Association "
   1540			"LS's received\n", ctrl->cnum);
   1541		/* overwrite good response with bogus failure */
   1542		oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf,
   1543						sizeof(*oldls->rspbuf),
   1544						rqst->w0.ls_cmd,
   1545						FCNVME_RJT_RC_UNAB,
   1546						FCNVME_RJT_EXP_NONE, 0);
   1547		nvme_fc_xmt_ls_rsp(oldls);
   1548	}
   1549
   1550	return ret;
   1551}
   1552
   1553/*
   1554 * returns true to mean LS handled and ls_rsp can be sent
   1555 * returns false to defer ls_rsp xmt (will be done as part of
   1556 *     association termination)
   1557 */
   1558static bool
   1559nvme_fc_ls_disconnect_assoc(struct nvmefc_ls_rcv_op *lsop)
   1560{
   1561	struct nvme_fc_rport *rport = lsop->rport;
   1562	struct fcnvme_ls_disconnect_assoc_rqst *rqst =
   1563					&lsop->rqstbuf->rq_dis_assoc;
   1564	struct fcnvme_ls_disconnect_assoc_acc *acc =
   1565					&lsop->rspbuf->rsp_dis_assoc;
   1566	struct nvme_fc_ctrl *ctrl = NULL;
   1567	int ret = 0;
   1568
   1569	memset(acc, 0, sizeof(*acc));
   1570
   1571	ret = nvmefc_vldt_lsreq_discon_assoc(lsop->rqstdatalen, rqst);
   1572	if (!ret) {
   1573		/* match an active association */
   1574		ctrl = nvme_fc_match_disconn_ls(rport, lsop);
   1575		if (!ctrl)
   1576			ret = VERR_NO_ASSOC;
   1577	}
   1578
   1579	if (ret) {
   1580		dev_info(rport->lport->dev,
   1581			"Disconnect LS failed: %s\n",
   1582			validation_errors[ret]);
   1583		lsop->lsrsp->rsplen = nvme_fc_format_rjt(acc,
   1584					sizeof(*acc), rqst->w0.ls_cmd,
   1585					(ret == VERR_NO_ASSOC) ?
   1586						FCNVME_RJT_RC_INV_ASSOC :
   1587						FCNVME_RJT_RC_LOGIC,
   1588					FCNVME_RJT_EXP_NONE, 0);
   1589		return true;
   1590	}
   1591
   1592	/* format an ACCept response */
   1593
   1594	lsop->lsrsp->rsplen = sizeof(*acc);
   1595
   1596	nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
   1597			fcnvme_lsdesc_len(
   1598				sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
   1599			FCNVME_LS_DISCONNECT_ASSOC);
   1600
   1601	/*
   1602	 * the transmit of the response will occur after the exchanges
   1603	 * for the association have been ABTS'd by
   1604	 * nvme_fc_delete_association().
   1605	 */
   1606
   1607	/* fail the association */
   1608	nvme_fc_error_recovery(ctrl, "Disconnect Association LS received");
   1609
   1610	/* release the reference taken by nvme_fc_match_disconn_ls() */
   1611	nvme_fc_ctrl_put(ctrl);
   1612
   1613	return false;
   1614}
   1615
   1616/*
   1617 * Actual Processing routine for received FC-NVME LS Requests from the LLD
   1618 * returns true if a response should be sent afterward, false if rsp will
   1619 * be sent asynchronously.
   1620 */
   1621static bool
   1622nvme_fc_handle_ls_rqst(struct nvmefc_ls_rcv_op *lsop)
   1623{
   1624	struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0;
   1625	bool ret = true;
   1626
   1627	lsop->lsrsp->nvme_fc_private = lsop;
   1628	lsop->lsrsp->rspbuf = lsop->rspbuf;
   1629	lsop->lsrsp->rspdma = lsop->rspdma;
   1630	lsop->lsrsp->done = nvme_fc_xmt_ls_rsp_done;
   1631	/* Be preventative. handlers will later set to valid length */
   1632	lsop->lsrsp->rsplen = 0;
   1633
   1634	/*
   1635	 * handlers:
   1636	 *   parse request input, execute the request, and format the
   1637	 *   LS response
   1638	 */
   1639	switch (w0->ls_cmd) {
   1640	case FCNVME_LS_DISCONNECT_ASSOC:
   1641		ret = nvme_fc_ls_disconnect_assoc(lsop);
   1642		break;
   1643	case FCNVME_LS_DISCONNECT_CONN:
   1644		lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
   1645				sizeof(*lsop->rspbuf), w0->ls_cmd,
   1646				FCNVME_RJT_RC_UNSUP, FCNVME_RJT_EXP_NONE, 0);
   1647		break;
   1648	case FCNVME_LS_CREATE_ASSOCIATION:
   1649	case FCNVME_LS_CREATE_CONNECTION:
   1650		lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
   1651				sizeof(*lsop->rspbuf), w0->ls_cmd,
   1652				FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0);
   1653		break;
   1654	default:
   1655		lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
   1656				sizeof(*lsop->rspbuf), w0->ls_cmd,
   1657				FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0);
   1658		break;
   1659	}
   1660
   1661	return(ret);
   1662}
   1663
   1664static void
   1665nvme_fc_handle_ls_rqst_work(struct work_struct *work)
   1666{
   1667	struct nvme_fc_rport *rport =
   1668		container_of(work, struct nvme_fc_rport, lsrcv_work);
   1669	struct fcnvme_ls_rqst_w0 *w0;
   1670	struct nvmefc_ls_rcv_op *lsop;
   1671	unsigned long flags;
   1672	bool sendrsp;
   1673
   1674restart:
   1675	sendrsp = true;
   1676	spin_lock_irqsave(&rport->lock, flags);
   1677	list_for_each_entry(lsop, &rport->ls_rcv_list, lsrcv_list) {
   1678		if (lsop->handled)
   1679			continue;
   1680
   1681		lsop->handled = true;
   1682		if (rport->remoteport.port_state == FC_OBJSTATE_ONLINE) {
   1683			spin_unlock_irqrestore(&rport->lock, flags);
   1684			sendrsp = nvme_fc_handle_ls_rqst(lsop);
   1685		} else {
   1686			spin_unlock_irqrestore(&rport->lock, flags);
   1687			w0 = &lsop->rqstbuf->w0;
   1688			lsop->lsrsp->rsplen = nvme_fc_format_rjt(
   1689						lsop->rspbuf,
   1690						sizeof(*lsop->rspbuf),
   1691						w0->ls_cmd,
   1692						FCNVME_RJT_RC_UNAB,
   1693						FCNVME_RJT_EXP_NONE, 0);
   1694		}
   1695		if (sendrsp)
   1696			nvme_fc_xmt_ls_rsp(lsop);
   1697		goto restart;
   1698	}
   1699	spin_unlock_irqrestore(&rport->lock, flags);
   1700}
   1701
   1702/**
   1703 * nvme_fc_rcv_ls_req - transport entry point called by an LLDD
   1704 *                       upon the reception of a NVME LS request.
   1705 *
   1706 * The nvme-fc layer will copy payload to an internal structure for
   1707 * processing.  As such, upon completion of the routine, the LLDD may
   1708 * immediately free/reuse the LS request buffer passed in the call.
   1709 *
   1710 * If this routine returns error, the LLDD should abort the exchange.
   1711 *
   1712 * @portptr:    pointer to the (registered) remote port that the LS
   1713 *              was received from. The remoteport is associated with
   1714 *              a specific localport.
   1715 * @lsrsp:      pointer to a nvmefc_ls_rsp response structure to be
   1716 *              used to reference the exchange corresponding to the LS
   1717 *              when issuing an ls response.
   1718 * @lsreqbuf:   pointer to the buffer containing the LS Request
   1719 * @lsreqbuf_len: length, in bytes, of the received LS request
   1720 */
   1721int
   1722nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr,
   1723			struct nvmefc_ls_rsp *lsrsp,
   1724			void *lsreqbuf, u32 lsreqbuf_len)
   1725{
   1726	struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
   1727	struct nvme_fc_lport *lport = rport->lport;
   1728	struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf;
   1729	struct nvmefc_ls_rcv_op *lsop;
   1730	unsigned long flags;
   1731	int ret;
   1732
   1733	nvme_fc_rport_get(rport);
   1734
   1735	/* validate there's a routine to transmit a response */
   1736	if (!lport->ops->xmt_ls_rsp) {
   1737		dev_info(lport->dev,
   1738			"RCV %s LS failed: no LLDD xmt_ls_rsp\n",
   1739			(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
   1740				nvmefc_ls_names[w0->ls_cmd] : "");
   1741		ret = -EINVAL;
   1742		goto out_put;
   1743	}
   1744
   1745	if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) {
   1746		dev_info(lport->dev,
   1747			"RCV %s LS failed: payload too large\n",
   1748			(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
   1749				nvmefc_ls_names[w0->ls_cmd] : "");
   1750		ret = -E2BIG;
   1751		goto out_put;
   1752	}
   1753
   1754	lsop = kzalloc(sizeof(*lsop) +
   1755			sizeof(union nvmefc_ls_requests) +
   1756			sizeof(union nvmefc_ls_responses),
   1757			GFP_KERNEL);
   1758	if (!lsop) {
   1759		dev_info(lport->dev,
   1760			"RCV %s LS failed: No memory\n",
   1761			(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
   1762				nvmefc_ls_names[w0->ls_cmd] : "");
   1763		ret = -ENOMEM;
   1764		goto out_put;
   1765	}
   1766	lsop->rqstbuf = (union nvmefc_ls_requests *)&lsop[1];
   1767	lsop->rspbuf = (union nvmefc_ls_responses *)&lsop->rqstbuf[1];
   1768
   1769	lsop->rspdma = fc_dma_map_single(lport->dev, lsop->rspbuf,
   1770					sizeof(*lsop->rspbuf),
   1771					DMA_TO_DEVICE);
   1772	if (fc_dma_mapping_error(lport->dev, lsop->rspdma)) {
   1773		dev_info(lport->dev,
   1774			"RCV %s LS failed: DMA mapping failure\n",
   1775			(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
   1776				nvmefc_ls_names[w0->ls_cmd] : "");
   1777		ret = -EFAULT;
   1778		goto out_free;
   1779	}
   1780
   1781	lsop->rport = rport;
   1782	lsop->lsrsp = lsrsp;
   1783
   1784	memcpy(lsop->rqstbuf, lsreqbuf, lsreqbuf_len);
   1785	lsop->rqstdatalen = lsreqbuf_len;
   1786
   1787	spin_lock_irqsave(&rport->lock, flags);
   1788	if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) {
   1789		spin_unlock_irqrestore(&rport->lock, flags);
   1790		ret = -ENOTCONN;
   1791		goto out_unmap;
   1792	}
   1793	list_add_tail(&lsop->lsrcv_list, &rport->ls_rcv_list);
   1794	spin_unlock_irqrestore(&rport->lock, flags);
   1795
   1796	schedule_work(&rport->lsrcv_work);
   1797
   1798	return 0;
   1799
   1800out_unmap:
   1801	fc_dma_unmap_single(lport->dev, lsop->rspdma,
   1802			sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
   1803out_free:
   1804	kfree(lsop);
   1805out_put:
   1806	nvme_fc_rport_put(rport);
   1807	return ret;
   1808}
   1809EXPORT_SYMBOL_GPL(nvme_fc_rcv_ls_req);
   1810
   1811
   1812/* *********************** NVME Ctrl Routines **************************** */
   1813
   1814static void
   1815__nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl,
   1816		struct nvme_fc_fcp_op *op)
   1817{
   1818	fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.rspdma,
   1819				sizeof(op->rsp_iu), DMA_FROM_DEVICE);
   1820	fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.cmddma,
   1821				sizeof(op->cmd_iu), DMA_TO_DEVICE);
   1822
   1823	atomic_set(&op->state, FCPOP_STATE_UNINIT);
   1824}
   1825
   1826static void
   1827nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq,
   1828		unsigned int hctx_idx)
   1829{
   1830	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
   1831
   1832	return __nvme_fc_exit_request(set->driver_data, op);
   1833}
   1834
   1835static int
   1836__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
   1837{
   1838	unsigned long flags;
   1839	int opstate;
   1840
   1841	spin_lock_irqsave(&ctrl->lock, flags);
   1842	opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
   1843	if (opstate != FCPOP_STATE_ACTIVE)
   1844		atomic_set(&op->state, opstate);
   1845	else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
   1846		op->flags |= FCOP_FLAGS_TERMIO;
   1847		ctrl->iocnt++;
   1848	}
   1849	spin_unlock_irqrestore(&ctrl->lock, flags);
   1850
   1851	if (opstate != FCPOP_STATE_ACTIVE)
   1852		return -ECANCELED;
   1853
   1854	ctrl->lport->ops->fcp_abort(&ctrl->lport->localport,
   1855					&ctrl->rport->remoteport,
   1856					op->queue->lldd_handle,
   1857					&op->fcp_req);
   1858
   1859	return 0;
   1860}
   1861
   1862static void
   1863nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
   1864{
   1865	struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops;
   1866	int i;
   1867
   1868	/* ensure we've initialized the ops once */
   1869	if (!(aen_op->flags & FCOP_FLAGS_AEN))
   1870		return;
   1871
   1872	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++)
   1873		__nvme_fc_abort_op(ctrl, aen_op);
   1874}
   1875
   1876static inline void
   1877__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
   1878		struct nvme_fc_fcp_op *op, int opstate)
   1879{
   1880	unsigned long flags;
   1881
   1882	if (opstate == FCPOP_STATE_ABORTED) {
   1883		spin_lock_irqsave(&ctrl->lock, flags);
   1884		if (test_bit(FCCTRL_TERMIO, &ctrl->flags) &&
   1885		    op->flags & FCOP_FLAGS_TERMIO) {
   1886			if (!--ctrl->iocnt)
   1887				wake_up(&ctrl->ioabort_wait);
   1888		}
   1889		spin_unlock_irqrestore(&ctrl->lock, flags);
   1890	}
   1891}
   1892
   1893static void
   1894nvme_fc_ctrl_ioerr_work(struct work_struct *work)
   1895{
   1896	struct nvme_fc_ctrl *ctrl =
   1897			container_of(work, struct nvme_fc_ctrl, ioerr_work);
   1898
   1899	nvme_fc_error_recovery(ctrl, "transport detected io error");
   1900}
   1901
   1902/*
   1903 * nvme_fc_io_getuuid - Routine called to get the appid field
   1904 * associated with request by the lldd
   1905 * @req:IO request from nvme fc to driver
   1906 * Returns: UUID if there is an appid associated with VM or
   1907 * NULL if the user/libvirt has not set the appid to VM
   1908 */
   1909char *nvme_fc_io_getuuid(struct nvmefc_fcp_req *req)
   1910{
   1911	struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req);
   1912	struct request *rq = op->rq;
   1913
   1914	if (!IS_ENABLED(CONFIG_BLK_CGROUP_FC_APPID) || !rq->bio)
   1915		return NULL;
   1916	return blkcg_get_fc_appid(rq->bio);
   1917}
   1918EXPORT_SYMBOL_GPL(nvme_fc_io_getuuid);
   1919
   1920static void
   1921nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
   1922{
   1923	struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req);
   1924	struct request *rq = op->rq;
   1925	struct nvmefc_fcp_req *freq = &op->fcp_req;
   1926	struct nvme_fc_ctrl *ctrl = op->ctrl;
   1927	struct nvme_fc_queue *queue = op->queue;
   1928	struct nvme_completion *cqe = &op->rsp_iu.cqe;
   1929	struct nvme_command *sqe = &op->cmd_iu.sqe;
   1930	__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
   1931	union nvme_result result;
   1932	bool terminate_assoc = true;
   1933	int opstate;
   1934
   1935	/*
   1936	 * WARNING:
   1937	 * The current linux implementation of a nvme controller
   1938	 * allocates a single tag set for all io queues and sizes
   1939	 * the io queues to fully hold all possible tags. Thus, the
   1940	 * implementation does not reference or care about the sqhd
   1941	 * value as it never needs to use the sqhd/sqtail pointers
   1942	 * for submission pacing.
   1943	 *
   1944	 * This affects the FC-NVME implementation in two ways:
   1945	 * 1) As the value doesn't matter, we don't need to waste
   1946	 *    cycles extracting it from ERSPs and stamping it in the
   1947	 *    cases where the transport fabricates CQEs on successful
   1948	 *    completions.
   1949	 * 2) The FC-NVME implementation requires that delivery of
   1950	 *    ERSP completions are to go back to the nvme layer in order
   1951	 *    relative to the rsn, such that the sqhd value will always
   1952	 *    be "in order" for the nvme layer. As the nvme layer in
   1953	 *    linux doesn't care about sqhd, there's no need to return
   1954	 *    them in order.
   1955	 *
   1956	 * Additionally:
   1957	 * As the core nvme layer in linux currently does not look at
   1958	 * every field in the cqe - in cases where the FC transport must
   1959	 * fabricate a CQE, the following fields will not be set as they
   1960	 * are not referenced:
   1961	 *      cqe.sqid,  cqe.sqhd,  cqe.command_id
   1962	 *
   1963	 * Failure or error of an individual i/o, in a transport
   1964	 * detected fashion unrelated to the nvme completion status,
   1965	 * potentially cause the initiator and target sides to get out
   1966	 * of sync on SQ head/tail (aka outstanding io count allowed).
   1967	 * Per FC-NVME spec, failure of an individual command requires
   1968	 * the connection to be terminated, which in turn requires the
   1969	 * association to be terminated.
   1970	 */
   1971
   1972	opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
   1973
   1974	fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
   1975				sizeof(op->rsp_iu), DMA_FROM_DEVICE);
   1976
   1977	if (opstate == FCPOP_STATE_ABORTED)
   1978		status = cpu_to_le16(NVME_SC_HOST_ABORTED_CMD << 1);
   1979	else if (freq->status) {
   1980		status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
   1981		dev_info(ctrl->ctrl.device,
   1982			"NVME-FC{%d}: io failed due to lldd error %d\n",
   1983			ctrl->cnum, freq->status);
   1984	}
   1985
   1986	/*
   1987	 * For the linux implementation, if we have an unsuccesful
   1988	 * status, they blk-mq layer can typically be called with the
   1989	 * non-zero status and the content of the cqe isn't important.
   1990	 */
   1991	if (status)
   1992		goto done;
   1993
   1994	/*
   1995	 * command completed successfully relative to the wire
   1996	 * protocol. However, validate anything received and
   1997	 * extract the status and result from the cqe (create it
   1998	 * where necessary).
   1999	 */
   2000
   2001	switch (freq->rcv_rsplen) {
   2002
   2003	case 0:
   2004	case NVME_FC_SIZEOF_ZEROS_RSP:
   2005		/*
   2006		 * No response payload or 12 bytes of payload (which
   2007		 * should all be zeros) are considered successful and
   2008		 * no payload in the CQE by the transport.
   2009		 */
   2010		if (freq->transferred_length !=
   2011		    be32_to_cpu(op->cmd_iu.data_len)) {
   2012			status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
   2013			dev_info(ctrl->ctrl.device,
   2014				"NVME-FC{%d}: io failed due to bad transfer "
   2015				"length: %d vs expected %d\n",
   2016				ctrl->cnum, freq->transferred_length,
   2017				be32_to_cpu(op->cmd_iu.data_len));
   2018			goto done;
   2019		}
   2020		result.u64 = 0;
   2021		break;
   2022
   2023	case sizeof(struct nvme_fc_ersp_iu):
   2024		/*
   2025		 * The ERSP IU contains a full completion with CQE.
   2026		 * Validate ERSP IU and look at cqe.
   2027		 */
   2028		if (unlikely(be16_to_cpu(op->rsp_iu.iu_len) !=
   2029					(freq->rcv_rsplen / 4) ||
   2030			     be32_to_cpu(op->rsp_iu.xfrd_len) !=
   2031					freq->transferred_length ||
   2032			     op->rsp_iu.ersp_result ||
   2033			     sqe->common.command_id != cqe->command_id)) {
   2034			status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
   2035			dev_info(ctrl->ctrl.device,
   2036				"NVME-FC{%d}: io failed due to bad NVMe_ERSP: "
   2037				"iu len %d, xfr len %d vs %d, status code "
   2038				"%d, cmdid %d vs %d\n",
   2039				ctrl->cnum, be16_to_cpu(op->rsp_iu.iu_len),
   2040				be32_to_cpu(op->rsp_iu.xfrd_len),
   2041				freq->transferred_length,
   2042				op->rsp_iu.ersp_result,
   2043				sqe->common.command_id,
   2044				cqe->command_id);
   2045			goto done;
   2046		}
   2047		result = cqe->result;
   2048		status = cqe->status;
   2049		break;
   2050
   2051	default:
   2052		status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
   2053		dev_info(ctrl->ctrl.device,
   2054			"NVME-FC{%d}: io failed due to odd NVMe_xRSP iu "
   2055			"len %d\n",
   2056			ctrl->cnum, freq->rcv_rsplen);
   2057		goto done;
   2058	}
   2059
   2060	terminate_assoc = false;
   2061
   2062done:
   2063	if (op->flags & FCOP_FLAGS_AEN) {
   2064		nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
   2065		__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
   2066		atomic_set(&op->state, FCPOP_STATE_IDLE);
   2067		op->flags = FCOP_FLAGS_AEN;	/* clear other flags */
   2068		nvme_fc_ctrl_put(ctrl);
   2069		goto check_error;
   2070	}
   2071
   2072	__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
   2073	if (!nvme_try_complete_req(rq, status, result))
   2074		nvme_fc_complete_rq(rq);
   2075
   2076check_error:
   2077	if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING)
   2078		queue_work(nvme_reset_wq, &ctrl->ioerr_work);
   2079}
   2080
   2081static int
   2082__nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
   2083		struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op,
   2084		struct request *rq, u32 rqno)
   2085{
   2086	struct nvme_fcp_op_w_sgl *op_w_sgl =
   2087		container_of(op, typeof(*op_w_sgl), op);
   2088	struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
   2089	int ret = 0;
   2090
   2091	memset(op, 0, sizeof(*op));
   2092	op->fcp_req.cmdaddr = &op->cmd_iu;
   2093	op->fcp_req.cmdlen = sizeof(op->cmd_iu);
   2094	op->fcp_req.rspaddr = &op->rsp_iu;
   2095	op->fcp_req.rsplen = sizeof(op->rsp_iu);
   2096	op->fcp_req.done = nvme_fc_fcpio_done;
   2097	op->ctrl = ctrl;
   2098	op->queue = queue;
   2099	op->rq = rq;
   2100	op->rqno = rqno;
   2101
   2102	cmdiu->format_id = NVME_CMD_FORMAT_ID;
   2103	cmdiu->fc_id = NVME_CMD_FC_ID;
   2104	cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32));
   2105	if (queue->qnum)
   2106		cmdiu->rsv_cat = fccmnd_set_cat_css(0,
   2107					(NVME_CC_CSS_NVM >> NVME_CC_CSS_SHIFT));
   2108	else
   2109		cmdiu->rsv_cat = fccmnd_set_cat_admin(0);
   2110
   2111	op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev,
   2112				&op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE);
   2113	if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.cmddma)) {
   2114		dev_err(ctrl->dev,
   2115			"FCP Op failed - cmdiu dma mapping failed.\n");
   2116		ret = -EFAULT;
   2117		goto out_on_error;
   2118	}
   2119
   2120	op->fcp_req.rspdma = fc_dma_map_single(ctrl->lport->dev,
   2121				&op->rsp_iu, sizeof(op->rsp_iu),
   2122				DMA_FROM_DEVICE);
   2123	if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.rspdma)) {
   2124		dev_err(ctrl->dev,
   2125			"FCP Op failed - rspiu dma mapping failed.\n");
   2126		ret = -EFAULT;
   2127	}
   2128
   2129	atomic_set(&op->state, FCPOP_STATE_IDLE);
   2130out_on_error:
   2131	return ret;
   2132}
   2133
   2134static int
   2135nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
   2136		unsigned int hctx_idx, unsigned int numa_node)
   2137{
   2138	struct nvme_fc_ctrl *ctrl = set->driver_data;
   2139	struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq);
   2140	int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
   2141	struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
   2142	int res;
   2143
   2144	res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++);
   2145	if (res)
   2146		return res;
   2147	op->op.fcp_req.first_sgl = op->sgl;
   2148	op->op.fcp_req.private = &op->priv[0];
   2149	nvme_req(rq)->ctrl = &ctrl->ctrl;
   2150	nvme_req(rq)->cmd = &op->op.cmd_iu.sqe;
   2151	return res;
   2152}
   2153
   2154static int
   2155nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl)
   2156{
   2157	struct nvme_fc_fcp_op *aen_op;
   2158	struct nvme_fc_cmd_iu *cmdiu;
   2159	struct nvme_command *sqe;
   2160	void *private = NULL;
   2161	int i, ret;
   2162
   2163	aen_op = ctrl->aen_ops;
   2164	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
   2165		if (ctrl->lport->ops->fcprqst_priv_sz) {
   2166			private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
   2167						GFP_KERNEL);
   2168			if (!private)
   2169				return -ENOMEM;
   2170		}
   2171
   2172		cmdiu = &aen_op->cmd_iu;
   2173		sqe = &cmdiu->sqe;
   2174		ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0],
   2175				aen_op, (struct request *)NULL,
   2176				(NVME_AQ_BLK_MQ_DEPTH + i));
   2177		if (ret) {
   2178			kfree(private);
   2179			return ret;
   2180		}
   2181
   2182		aen_op->flags = FCOP_FLAGS_AEN;
   2183		aen_op->fcp_req.private = private;
   2184
   2185		memset(sqe, 0, sizeof(*sqe));
   2186		sqe->common.opcode = nvme_admin_async_event;
   2187		/* Note: core layer may overwrite the sqe.command_id value */
   2188		sqe->common.command_id = NVME_AQ_BLK_MQ_DEPTH + i;
   2189	}
   2190	return 0;
   2191}
   2192
   2193static void
   2194nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl)
   2195{
   2196	struct nvme_fc_fcp_op *aen_op;
   2197	int i;
   2198
   2199	cancel_work_sync(&ctrl->ctrl.async_event_work);
   2200	aen_op = ctrl->aen_ops;
   2201	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
   2202		__nvme_fc_exit_request(ctrl, aen_op);
   2203
   2204		kfree(aen_op->fcp_req.private);
   2205		aen_op->fcp_req.private = NULL;
   2206	}
   2207}
   2208
   2209static inline void
   2210__nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl,
   2211		unsigned int qidx)
   2212{
   2213	struct nvme_fc_queue *queue = &ctrl->queues[qidx];
   2214
   2215	hctx->driver_data = queue;
   2216	queue->hctx = hctx;
   2217}
   2218
   2219static int
   2220nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
   2221		unsigned int hctx_idx)
   2222{
   2223	struct nvme_fc_ctrl *ctrl = data;
   2224
   2225	__nvme_fc_init_hctx(hctx, ctrl, hctx_idx + 1);
   2226
   2227	return 0;
   2228}
   2229
   2230static int
   2231nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
   2232		unsigned int hctx_idx)
   2233{
   2234	struct nvme_fc_ctrl *ctrl = data;
   2235
   2236	__nvme_fc_init_hctx(hctx, ctrl, hctx_idx);
   2237
   2238	return 0;
   2239}
   2240
   2241static void
   2242nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx)
   2243{
   2244	struct nvme_fc_queue *queue;
   2245
   2246	queue = &ctrl->queues[idx];
   2247	memset(queue, 0, sizeof(*queue));
   2248	queue->ctrl = ctrl;
   2249	queue->qnum = idx;
   2250	atomic_set(&queue->csn, 0);
   2251	queue->dev = ctrl->dev;
   2252
   2253	if (idx > 0)
   2254		queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16;
   2255	else
   2256		queue->cmnd_capsule_len = sizeof(struct nvme_command);
   2257
   2258	/*
   2259	 * Considered whether we should allocate buffers for all SQEs
   2260	 * and CQEs and dma map them - mapping their respective entries
   2261	 * into the request structures (kernel vm addr and dma address)
   2262	 * thus the driver could use the buffers/mappings directly.
   2263	 * It only makes sense if the LLDD would use them for its
   2264	 * messaging api. It's very unlikely most adapter api's would use
   2265	 * a native NVME sqe/cqe. More reasonable if FC-NVME IU payload
   2266	 * structures were used instead.
   2267	 */
   2268}
   2269
   2270/*
   2271 * This routine terminates a queue at the transport level.
   2272 * The transport has already ensured that all outstanding ios on
   2273 * the queue have been terminated.
   2274 * The transport will send a Disconnect LS request to terminate
   2275 * the queue's connection. Termination of the admin queue will also
   2276 * terminate the association at the target.
   2277 */
   2278static void
   2279nvme_fc_free_queue(struct nvme_fc_queue *queue)
   2280{
   2281	if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags))
   2282		return;
   2283
   2284	clear_bit(NVME_FC_Q_LIVE, &queue->flags);
   2285	/*
   2286	 * Current implementation never disconnects a single queue.
   2287	 * It always terminates a whole association. So there is never
   2288	 * a disconnect(queue) LS sent to the target.
   2289	 */
   2290
   2291	queue->connection_id = 0;
   2292	atomic_set(&queue->csn, 0);
   2293}
   2294
   2295static void
   2296__nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl,
   2297	struct nvme_fc_queue *queue, unsigned int qidx)
   2298{
   2299	if (ctrl->lport->ops->delete_queue)
   2300		ctrl->lport->ops->delete_queue(&ctrl->lport->localport, qidx,
   2301				queue->lldd_handle);
   2302	queue->lldd_handle = NULL;
   2303}
   2304
   2305static void
   2306nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl)
   2307{
   2308	int i;
   2309
   2310	for (i = 1; i < ctrl->ctrl.queue_count; i++)
   2311		nvme_fc_free_queue(&ctrl->queues[i]);
   2312}
   2313
   2314static int
   2315__nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl,
   2316	struct nvme_fc_queue *queue, unsigned int qidx, u16 qsize)
   2317{
   2318	int ret = 0;
   2319
   2320	queue->lldd_handle = NULL;
   2321	if (ctrl->lport->ops->create_queue)
   2322		ret = ctrl->lport->ops->create_queue(&ctrl->lport->localport,
   2323				qidx, qsize, &queue->lldd_handle);
   2324
   2325	return ret;
   2326}
   2327
   2328static void
   2329nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl)
   2330{
   2331	struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - 1];
   2332	int i;
   2333
   2334	for (i = ctrl->ctrl.queue_count - 1; i >= 1; i--, queue--)
   2335		__nvme_fc_delete_hw_queue(ctrl, queue, i);
   2336}
   2337
   2338static int
   2339nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
   2340{
   2341	struct nvme_fc_queue *queue = &ctrl->queues[1];
   2342	int i, ret;
   2343
   2344	for (i = 1; i < ctrl->ctrl.queue_count; i++, queue++) {
   2345		ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize);
   2346		if (ret)
   2347			goto delete_queues;
   2348	}
   2349
   2350	return 0;
   2351
   2352delete_queues:
   2353	for (; i > 0; i--)
   2354		__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i);
   2355	return ret;
   2356}
   2357
   2358static int
   2359nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
   2360{
   2361	int i, ret = 0;
   2362
   2363	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
   2364		ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize,
   2365					(qsize / 5));
   2366		if (ret)
   2367			break;
   2368		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
   2369		if (ret)
   2370			break;
   2371
   2372		set_bit(NVME_FC_Q_LIVE, &ctrl->queues[i].flags);
   2373	}
   2374
   2375	return ret;
   2376}
   2377
   2378static void
   2379nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl)
   2380{
   2381	int i;
   2382
   2383	for (i = 1; i < ctrl->ctrl.queue_count; i++)
   2384		nvme_fc_init_queue(ctrl, i);
   2385}
   2386
   2387static void
   2388nvme_fc_ctrl_free(struct kref *ref)
   2389{
   2390	struct nvme_fc_ctrl *ctrl =
   2391		container_of(ref, struct nvme_fc_ctrl, ref);
   2392	unsigned long flags;
   2393
   2394	if (ctrl->ctrl.tagset) {
   2395		blk_cleanup_queue(ctrl->ctrl.connect_q);
   2396		blk_mq_free_tag_set(&ctrl->tag_set);
   2397	}
   2398
   2399	/* remove from rport list */
   2400	spin_lock_irqsave(&ctrl->rport->lock, flags);
   2401	list_del(&ctrl->ctrl_list);
   2402	spin_unlock_irqrestore(&ctrl->rport->lock, flags);
   2403
   2404	nvme_start_admin_queue(&ctrl->ctrl);
   2405	blk_cleanup_queue(ctrl->ctrl.admin_q);
   2406	blk_cleanup_queue(ctrl->ctrl.fabrics_q);
   2407	blk_mq_free_tag_set(&ctrl->admin_tag_set);
   2408
   2409	kfree(ctrl->queues);
   2410
   2411	put_device(ctrl->dev);
   2412	nvme_fc_rport_put(ctrl->rport);
   2413
   2414	ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum);
   2415	if (ctrl->ctrl.opts)
   2416		nvmf_free_options(ctrl->ctrl.opts);
   2417	kfree(ctrl);
   2418}
   2419
   2420static void
   2421nvme_fc_ctrl_put(struct nvme_fc_ctrl *ctrl)
   2422{
   2423	kref_put(&ctrl->ref, nvme_fc_ctrl_free);
   2424}
   2425
   2426static int
   2427nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl)
   2428{
   2429	return kref_get_unless_zero(&ctrl->ref);
   2430}
   2431
   2432/*
   2433 * All accesses from nvme core layer done - can now free the
   2434 * controller. Called after last nvme_put_ctrl() call
   2435 */
   2436static void
   2437nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
   2438{
   2439	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
   2440
   2441	WARN_ON(nctrl != &ctrl->ctrl);
   2442
   2443	nvme_fc_ctrl_put(ctrl);
   2444}
   2445
   2446/*
   2447 * This routine is used by the transport when it needs to find active
   2448 * io on a queue that is to be terminated. The transport uses
   2449 * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
   2450 * this routine to kill them on a 1 by 1 basis.
   2451 *
   2452 * As FC allocates FC exchange for each io, the transport must contact
   2453 * the LLDD to terminate the exchange, thus releasing the FC exchange.
   2454 * After terminating the exchange the LLDD will call the transport's
   2455 * normal io done path for the request, but it will have an aborted
   2456 * status. The done path will return the io request back to the block
   2457 * layer with an error status.
   2458 */
   2459static bool
   2460nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
   2461{
   2462	struct nvme_ctrl *nctrl = data;
   2463	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
   2464	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
   2465
   2466	op->nreq.flags |= NVME_REQ_CANCELLED;
   2467	__nvme_fc_abort_op(ctrl, op);
   2468	return true;
   2469}
   2470
   2471/*
   2472 * This routine runs through all outstanding commands on the association
   2473 * and aborts them.  This routine is typically be called by the
   2474 * delete_association routine. It is also called due to an error during
   2475 * reconnect. In that scenario, it is most likely a command that initializes
   2476 * the controller, including fabric Connect commands on io queues, that
   2477 * may have timed out or failed thus the io must be killed for the connect
   2478 * thread to see the error.
   2479 */
   2480static void
   2481__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
   2482{
   2483	int q;
   2484
   2485	/*
   2486	 * if aborting io, the queues are no longer good, mark them
   2487	 * all as not live.
   2488	 */
   2489	if (ctrl->ctrl.queue_count > 1) {
   2490		for (q = 1; q < ctrl->ctrl.queue_count; q++)
   2491			clear_bit(NVME_FC_Q_LIVE, &ctrl->queues[q].flags);
   2492	}
   2493	clear_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags);
   2494
   2495	/*
   2496	 * If io queues are present, stop them and terminate all outstanding
   2497	 * ios on them. As FC allocates FC exchange for each io, the
   2498	 * transport must contact the LLDD to terminate the exchange,
   2499	 * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
   2500	 * to tell us what io's are busy and invoke a transport routine
   2501	 * to kill them with the LLDD.  After terminating the exchange
   2502	 * the LLDD will call the transport's normal io done path, but it
   2503	 * will have an aborted status. The done path will return the
   2504	 * io requests back to the block layer as part of normal completions
   2505	 * (but with error status).
   2506	 */
   2507	if (ctrl->ctrl.queue_count > 1) {
   2508		nvme_stop_queues(&ctrl->ctrl);
   2509		nvme_sync_io_queues(&ctrl->ctrl);
   2510		blk_mq_tagset_busy_iter(&ctrl->tag_set,
   2511				nvme_fc_terminate_exchange, &ctrl->ctrl);
   2512		blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
   2513		if (start_queues)
   2514			nvme_start_queues(&ctrl->ctrl);
   2515	}
   2516
   2517	/*
   2518	 * Other transports, which don't have link-level contexts bound
   2519	 * to sqe's, would try to gracefully shutdown the controller by
   2520	 * writing the registers for shutdown and polling (call
   2521	 * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
   2522	 * just aborted and we will wait on those contexts, and given
   2523	 * there was no indication of how live the controlelr is on the
   2524	 * link, don't send more io to create more contexts for the
   2525	 * shutdown. Let the controller fail via keepalive failure if
   2526	 * its still present.
   2527	 */
   2528
   2529	/*
   2530	 * clean up the admin queue. Same thing as above.
   2531	 */
   2532	nvme_stop_admin_queue(&ctrl->ctrl);
   2533	blk_sync_queue(ctrl->ctrl.admin_q);
   2534	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
   2535				nvme_fc_terminate_exchange, &ctrl->ctrl);
   2536	blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
   2537}
   2538
   2539static void
   2540nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
   2541{
   2542	/*
   2543	 * if an error (io timeout, etc) while (re)connecting, the remote
   2544	 * port requested terminating of the association (disconnect_ls)
   2545	 * or an error (timeout or abort) occurred on an io while creating
   2546	 * the controller.  Abort any ios on the association and let the
   2547	 * create_association error path resolve things.
   2548	 */
   2549	if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
   2550		__nvme_fc_abort_outstanding_ios(ctrl, true);
   2551		set_bit(ASSOC_FAILED, &ctrl->flags);
   2552		return;
   2553	}
   2554
   2555	/* Otherwise, only proceed if in LIVE state - e.g. on first error */
   2556	if (ctrl->ctrl.state != NVME_CTRL_LIVE)
   2557		return;
   2558
   2559	dev_warn(ctrl->ctrl.device,
   2560		"NVME-FC{%d}: transport association event: %s\n",
   2561		ctrl->cnum, errmsg);
   2562	dev_warn(ctrl->ctrl.device,
   2563		"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
   2564
   2565	nvme_reset_ctrl(&ctrl->ctrl);
   2566}
   2567
   2568static enum blk_eh_timer_return
   2569nvme_fc_timeout(struct request *rq, bool reserved)
   2570{
   2571	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
   2572	struct nvme_fc_ctrl *ctrl = op->ctrl;
   2573	struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
   2574	struct nvme_command *sqe = &cmdiu->sqe;
   2575
   2576	/*
   2577	 * Attempt to abort the offending command. Command completion
   2578	 * will detect the aborted io and will fail the connection.
   2579	 */
   2580	dev_info(ctrl->ctrl.device,
   2581		"NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: "
   2582		"x%08x/x%08x\n",
   2583		ctrl->cnum, op->queue->qnum, sqe->common.opcode,
   2584		sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11);
   2585	if (__nvme_fc_abort_op(ctrl, op))
   2586		nvme_fc_error_recovery(ctrl, "io timeout abort failed");
   2587
   2588	/*
   2589	 * the io abort has been initiated. Have the reset timer
   2590	 * restarted and the abort completion will complete the io
   2591	 * shortly. Avoids a synchronous wait while the abort finishes.
   2592	 */
   2593	return BLK_EH_RESET_TIMER;
   2594}
   2595
   2596static int
   2597nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
   2598		struct nvme_fc_fcp_op *op)
   2599{
   2600	struct nvmefc_fcp_req *freq = &op->fcp_req;
   2601	int ret;
   2602
   2603	freq->sg_cnt = 0;
   2604
   2605	if (!blk_rq_nr_phys_segments(rq))
   2606		return 0;
   2607
   2608	freq->sg_table.sgl = freq->first_sgl;
   2609	ret = sg_alloc_table_chained(&freq->sg_table,
   2610			blk_rq_nr_phys_segments(rq), freq->sg_table.sgl,
   2611			NVME_INLINE_SG_CNT);
   2612	if (ret)
   2613		return -ENOMEM;
   2614
   2615	op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl);
   2616	WARN_ON(op->nents > blk_rq_nr_phys_segments(rq));
   2617	freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl,
   2618				op->nents, rq_dma_dir(rq));
   2619	if (unlikely(freq->sg_cnt <= 0)) {
   2620		sg_free_table_chained(&freq->sg_table, NVME_INLINE_SG_CNT);
   2621		freq->sg_cnt = 0;
   2622		return -EFAULT;
   2623	}
   2624
   2625	/*
   2626	 * TODO: blk_integrity_rq(rq)  for DIF
   2627	 */
   2628	return 0;
   2629}
   2630
   2631static void
   2632nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
   2633		struct nvme_fc_fcp_op *op)
   2634{
   2635	struct nvmefc_fcp_req *freq = &op->fcp_req;
   2636
   2637	if (!freq->sg_cnt)
   2638		return;
   2639
   2640	fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents,
   2641			rq_dma_dir(rq));
   2642
   2643	sg_free_table_chained(&freq->sg_table, NVME_INLINE_SG_CNT);
   2644
   2645	freq->sg_cnt = 0;
   2646}
   2647
   2648/*
   2649 * In FC, the queue is a logical thing. At transport connect, the target
   2650 * creates its "queue" and returns a handle that is to be given to the
   2651 * target whenever it posts something to the corresponding SQ.  When an
   2652 * SQE is sent on a SQ, FC effectively considers the SQE, or rather the
   2653 * command contained within the SQE, an io, and assigns a FC exchange
   2654 * to it. The SQE and the associated SQ handle are sent in the initial
   2655 * CMD IU sents on the exchange. All transfers relative to the io occur
   2656 * as part of the exchange.  The CQE is the last thing for the io,
   2657 * which is transferred (explicitly or implicitly) with the RSP IU
   2658 * sent on the exchange. After the CQE is received, the FC exchange is
   2659 * terminaed and the Exchange may be used on a different io.
   2660 *
   2661 * The transport to LLDD api has the transport making a request for a
   2662 * new fcp io request to the LLDD. The LLDD then allocates a FC exchange
   2663 * resource and transfers the command. The LLDD will then process all
   2664 * steps to complete the io. Upon completion, the transport done routine
   2665 * is called.
   2666 *
   2667 * So - while the operation is outstanding to the LLDD, there is a link
   2668 * level FC exchange resource that is also outstanding. This must be
   2669 * considered in all cleanup operations.
   2670 */
   2671static blk_status_t
   2672nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
   2673	struct nvme_fc_fcp_op *op, u32 data_len,
   2674	enum nvmefc_fcp_datadir	io_dir)
   2675{
   2676	struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
   2677	struct nvme_command *sqe = &cmdiu->sqe;
   2678	int ret, opstate;
   2679
   2680	/*
   2681	 * before attempting to send the io, check to see if we believe
   2682	 * the target device is present
   2683	 */
   2684	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
   2685		return BLK_STS_RESOURCE;
   2686
   2687	if (!nvme_fc_ctrl_get(ctrl))
   2688		return BLK_STS_IOERR;
   2689
   2690	/* format the FC-NVME CMD IU and fcp_req */
   2691	cmdiu->connection_id = cpu_to_be64(queue->connection_id);
   2692	cmdiu->data_len = cpu_to_be32(data_len);
   2693	switch (io_dir) {
   2694	case NVMEFC_FCP_WRITE:
   2695		cmdiu->flags = FCNVME_CMD_FLAGS_WRITE;
   2696		break;
   2697	case NVMEFC_FCP_READ:
   2698		cmdiu->flags = FCNVME_CMD_FLAGS_READ;
   2699		break;
   2700	case NVMEFC_FCP_NODATA:
   2701		cmdiu->flags = 0;
   2702		break;
   2703	}
   2704	op->fcp_req.payload_length = data_len;
   2705	op->fcp_req.io_dir = io_dir;
   2706	op->fcp_req.transferred_length = 0;
   2707	op->fcp_req.rcv_rsplen = 0;
   2708	op->fcp_req.status = NVME_SC_SUCCESS;
   2709	op->fcp_req.sqid = cpu_to_le16(queue->qnum);
   2710
   2711	/*
   2712	 * validate per fabric rules, set fields mandated by fabric spec
   2713	 * as well as those by FC-NVME spec.
   2714	 */
   2715	WARN_ON_ONCE(sqe->common.metadata);
   2716	sqe->common.flags |= NVME_CMD_SGL_METABUF;
   2717
   2718	/*
   2719	 * format SQE DPTR field per FC-NVME rules:
   2720	 *    type=0x5     Transport SGL Data Block Descriptor
   2721	 *    subtype=0xA  Transport-specific value
   2722	 *    address=0
   2723	 *    length=length of the data series
   2724	 */
   2725	sqe->rw.dptr.sgl.type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
   2726					NVME_SGL_FMT_TRANSPORT_A;
   2727	sqe->rw.dptr.sgl.length = cpu_to_le32(data_len);
   2728	sqe->rw.dptr.sgl.addr = 0;
   2729
   2730	if (!(op->flags & FCOP_FLAGS_AEN)) {
   2731		ret = nvme_fc_map_data(ctrl, op->rq, op);
   2732		if (ret < 0) {
   2733			nvme_cleanup_cmd(op->rq);
   2734			nvme_fc_ctrl_put(ctrl);
   2735			if (ret == -ENOMEM || ret == -EAGAIN)
   2736				return BLK_STS_RESOURCE;
   2737			return BLK_STS_IOERR;
   2738		}
   2739	}
   2740
   2741	fc_dma_sync_single_for_device(ctrl->lport->dev, op->fcp_req.cmddma,
   2742				  sizeof(op->cmd_iu), DMA_TO_DEVICE);
   2743
   2744	atomic_set(&op->state, FCPOP_STATE_ACTIVE);
   2745
   2746	if (!(op->flags & FCOP_FLAGS_AEN))
   2747		blk_mq_start_request(op->rq);
   2748
   2749	cmdiu->csn = cpu_to_be32(atomic_inc_return(&queue->csn));
   2750	ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport,
   2751					&ctrl->rport->remoteport,
   2752					queue->lldd_handle, &op->fcp_req);
   2753
   2754	if (ret) {
   2755		/*
   2756		 * If the lld fails to send the command is there an issue with
   2757		 * the csn value?  If the command that fails is the Connect,
   2758		 * no - as the connection won't be live.  If it is a command
   2759		 * post-connect, it's possible a gap in csn may be created.
   2760		 * Does this matter?  As Linux initiators don't send fused
   2761		 * commands, no.  The gap would exist, but as there's nothing
   2762		 * that depends on csn order to be delivered on the target
   2763		 * side, it shouldn't hurt.  It would be difficult for a
   2764		 * target to even detect the csn gap as it has no idea when the
   2765		 * cmd with the csn was supposed to arrive.
   2766		 */
   2767		opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
   2768		__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
   2769
   2770		if (!(op->flags & FCOP_FLAGS_AEN)) {
   2771			nvme_fc_unmap_data(ctrl, op->rq, op);
   2772			nvme_cleanup_cmd(op->rq);
   2773		}
   2774
   2775		nvme_fc_ctrl_put(ctrl);
   2776
   2777		if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE &&
   2778				ret != -EBUSY)
   2779			return BLK_STS_IOERR;
   2780
   2781		return BLK_STS_RESOURCE;
   2782	}
   2783
   2784	return BLK_STS_OK;
   2785}
   2786
   2787static blk_status_t
   2788nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
   2789			const struct blk_mq_queue_data *bd)
   2790{
   2791	struct nvme_ns *ns = hctx->queue->queuedata;
   2792	struct nvme_fc_queue *queue = hctx->driver_data;
   2793	struct nvme_fc_ctrl *ctrl = queue->ctrl;
   2794	struct request *rq = bd->rq;
   2795	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
   2796	enum nvmefc_fcp_datadir	io_dir;
   2797	bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags);
   2798	u32 data_len;
   2799	blk_status_t ret;
   2800
   2801	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
   2802	    !nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
   2803		return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
   2804
   2805	ret = nvme_setup_cmd(ns, rq);
   2806	if (ret)
   2807		return ret;
   2808
   2809	/*
   2810	 * nvme core doesn't quite treat the rq opaquely. Commands such
   2811	 * as WRITE ZEROES will return a non-zero rq payload_bytes yet
   2812	 * there is no actual payload to be transferred.
   2813	 * To get it right, key data transmission on there being 1 or
   2814	 * more physical segments in the sg list. If there is no
   2815	 * physical segments, there is no payload.
   2816	 */
   2817	if (blk_rq_nr_phys_segments(rq)) {
   2818		data_len = blk_rq_payload_bytes(rq);
   2819		io_dir = ((rq_data_dir(rq) == WRITE) ?
   2820					NVMEFC_FCP_WRITE : NVMEFC_FCP_READ);
   2821	} else {
   2822		data_len = 0;
   2823		io_dir = NVMEFC_FCP_NODATA;
   2824	}
   2825
   2826
   2827	return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir);
   2828}
   2829
   2830static void
   2831nvme_fc_submit_async_event(struct nvme_ctrl *arg)
   2832{
   2833	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg);
   2834	struct nvme_fc_fcp_op *aen_op;
   2835	blk_status_t ret;
   2836
   2837	if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
   2838		return;
   2839
   2840	aen_op = &ctrl->aen_ops[0];
   2841
   2842	ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0,
   2843					NVMEFC_FCP_NODATA);
   2844	if (ret)
   2845		dev_err(ctrl->ctrl.device,
   2846			"failed async event work\n");
   2847}
   2848
   2849static void
   2850nvme_fc_complete_rq(struct request *rq)
   2851{
   2852	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
   2853	struct nvme_fc_ctrl *ctrl = op->ctrl;
   2854
   2855	atomic_set(&op->state, FCPOP_STATE_IDLE);
   2856	op->flags &= ~FCOP_FLAGS_TERMIO;
   2857
   2858	nvme_fc_unmap_data(ctrl, rq, op);
   2859	nvme_complete_rq(rq);
   2860	nvme_fc_ctrl_put(ctrl);
   2861}
   2862
   2863static int nvme_fc_map_queues(struct blk_mq_tag_set *set)
   2864{
   2865	struct nvme_fc_ctrl *ctrl = set->driver_data;
   2866	int i;
   2867
   2868	for (i = 0; i < set->nr_maps; i++) {
   2869		struct blk_mq_queue_map *map = &set->map[i];
   2870
   2871		if (!map->nr_queues) {
   2872			WARN_ON(i == HCTX_TYPE_DEFAULT);
   2873			continue;
   2874		}
   2875
   2876		/* Call LLDD map queue functionality if defined */
   2877		if (ctrl->lport->ops->map_queues)
   2878			ctrl->lport->ops->map_queues(&ctrl->lport->localport,
   2879						     map);
   2880		else
   2881			blk_mq_map_queues(map);
   2882	}
   2883	return 0;
   2884}
   2885
   2886static const struct blk_mq_ops nvme_fc_mq_ops = {
   2887	.queue_rq	= nvme_fc_queue_rq,
   2888	.complete	= nvme_fc_complete_rq,
   2889	.init_request	= nvme_fc_init_request,
   2890	.exit_request	= nvme_fc_exit_request,
   2891	.init_hctx	= nvme_fc_init_hctx,
   2892	.timeout	= nvme_fc_timeout,
   2893	.map_queues	= nvme_fc_map_queues,
   2894};
   2895
   2896static int
   2897nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
   2898{
   2899	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
   2900	unsigned int nr_io_queues;
   2901	int ret;
   2902
   2903	nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
   2904				ctrl->lport->ops->max_hw_queues);
   2905	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
   2906	if (ret) {
   2907		dev_info(ctrl->ctrl.device,
   2908			"set_queue_count failed: %d\n", ret);
   2909		return ret;
   2910	}
   2911
   2912	ctrl->ctrl.queue_count = nr_io_queues + 1;
   2913	if (!nr_io_queues)
   2914		return 0;
   2915
   2916	nvme_fc_init_io_queues(ctrl);
   2917
   2918	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
   2919	ctrl->tag_set.ops = &nvme_fc_mq_ops;
   2920	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
   2921	ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS;
   2922	ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
   2923	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
   2924	ctrl->tag_set.cmd_size =
   2925		struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
   2926			    ctrl->lport->ops->fcprqst_priv_sz);
   2927	ctrl->tag_set.driver_data = ctrl;
   2928	ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
   2929	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
   2930
   2931	ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
   2932	if (ret)
   2933		return ret;
   2934
   2935	ctrl->ctrl.tagset = &ctrl->tag_set;
   2936
   2937	ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl));
   2938	if (ret)
   2939		goto out_free_tag_set;
   2940
   2941	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
   2942	if (ret)
   2943		goto out_cleanup_blk_queue;
   2944
   2945	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
   2946	if (ret)
   2947		goto out_delete_hw_queues;
   2948
   2949	ctrl->ioq_live = true;
   2950
   2951	return 0;
   2952
   2953out_delete_hw_queues:
   2954	nvme_fc_delete_hw_io_queues(ctrl);
   2955out_cleanup_blk_queue:
   2956	blk_cleanup_queue(ctrl->ctrl.connect_q);
   2957out_free_tag_set:
   2958	blk_mq_free_tag_set(&ctrl->tag_set);
   2959	nvme_fc_free_io_queues(ctrl);
   2960
   2961	/* force put free routine to ignore io queues */
   2962	ctrl->ctrl.tagset = NULL;
   2963
   2964	return ret;
   2965}
   2966
   2967static int
   2968nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
   2969{
   2970	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
   2971	u32 prior_ioq_cnt = ctrl->ctrl.queue_count - 1;
   2972	unsigned int nr_io_queues;
   2973	int ret;
   2974
   2975	nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
   2976				ctrl->lport->ops->max_hw_queues);
   2977	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
   2978	if (ret) {
   2979		dev_info(ctrl->ctrl.device,
   2980			"set_queue_count failed: %d\n", ret);
   2981		return ret;
   2982	}
   2983
   2984	if (!nr_io_queues && prior_ioq_cnt) {
   2985		dev_info(ctrl->ctrl.device,
   2986			"Fail Reconnect: At least 1 io queue "
   2987			"required (was %d)\n", prior_ioq_cnt);
   2988		return -ENOSPC;
   2989	}
   2990
   2991	ctrl->ctrl.queue_count = nr_io_queues + 1;
   2992	/* check for io queues existing */
   2993	if (ctrl->ctrl.queue_count == 1)
   2994		return 0;
   2995
   2996	if (prior_ioq_cnt != nr_io_queues) {
   2997		dev_info(ctrl->ctrl.device,
   2998			"reconnect: revising io queue count from %d to %d\n",
   2999			prior_ioq_cnt, nr_io_queues);
   3000		blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
   3001	}
   3002
   3003	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
   3004	if (ret)
   3005		goto out_free_io_queues;
   3006
   3007	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
   3008	if (ret)
   3009		goto out_delete_hw_queues;
   3010
   3011	return 0;
   3012
   3013out_delete_hw_queues:
   3014	nvme_fc_delete_hw_io_queues(ctrl);
   3015out_free_io_queues:
   3016	nvme_fc_free_io_queues(ctrl);
   3017	return ret;
   3018}
   3019
   3020static void
   3021nvme_fc_rport_active_on_lport(struct nvme_fc_rport *rport)
   3022{
   3023	struct nvme_fc_lport *lport = rport->lport;
   3024
   3025	atomic_inc(&lport->act_rport_cnt);
   3026}
   3027
   3028static void
   3029nvme_fc_rport_inactive_on_lport(struct nvme_fc_rport *rport)
   3030{
   3031	struct nvme_fc_lport *lport = rport->lport;
   3032	u32 cnt;
   3033
   3034	cnt = atomic_dec_return(&lport->act_rport_cnt);
   3035	if (cnt == 0 && lport->localport.port_state == FC_OBJSTATE_DELETED)
   3036		lport->ops->localport_delete(&lport->localport);
   3037}
   3038
   3039static int
   3040nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl)
   3041{
   3042	struct nvme_fc_rport *rport = ctrl->rport;
   3043	u32 cnt;
   3044
   3045	if (test_and_set_bit(ASSOC_ACTIVE, &ctrl->flags))
   3046		return 1;
   3047
   3048	cnt = atomic_inc_return(&rport->act_ctrl_cnt);
   3049	if (cnt == 1)
   3050		nvme_fc_rport_active_on_lport(rport);
   3051
   3052	return 0;
   3053}
   3054
   3055static int
   3056nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl)
   3057{
   3058	struct nvme_fc_rport *rport = ctrl->rport;
   3059	struct nvme_fc_lport *lport = rport->lport;
   3060	u32 cnt;
   3061
   3062	/* clearing of ctrl->flags ASSOC_ACTIVE bit is in association delete */
   3063
   3064	cnt = atomic_dec_return(&rport->act_ctrl_cnt);
   3065	if (cnt == 0) {
   3066		if (rport->remoteport.port_state == FC_OBJSTATE_DELETED)
   3067			lport->ops->remoteport_delete(&rport->remoteport);
   3068		nvme_fc_rport_inactive_on_lport(rport);
   3069	}
   3070
   3071	return 0;
   3072}
   3073
   3074/*
   3075 * This routine restarts the controller on the host side, and
   3076 * on the link side, recreates the controller association.
   3077 */
   3078static int
   3079nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
   3080{
   3081	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
   3082	struct nvmefc_ls_rcv_op *disls = NULL;
   3083	unsigned long flags;
   3084	int ret;
   3085	bool changed;
   3086
   3087	++ctrl->ctrl.nr_reconnects;
   3088
   3089	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
   3090		return -ENODEV;
   3091
   3092	if (nvme_fc_ctlr_active_on_rport(ctrl))
   3093		return -ENOTUNIQ;
   3094
   3095	dev_info(ctrl->ctrl.device,
   3096		"NVME-FC{%d}: create association : host wwpn 0x%016llx "
   3097		" rport wwpn 0x%016llx: NQN \"%s\"\n",
   3098		ctrl->cnum, ctrl->lport->localport.port_name,
   3099		ctrl->rport->remoteport.port_name, ctrl->ctrl.opts->subsysnqn);
   3100
   3101	clear_bit(ASSOC_FAILED, &ctrl->flags);
   3102
   3103	/*
   3104	 * Create the admin queue
   3105	 */
   3106
   3107	ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
   3108				NVME_AQ_DEPTH);
   3109	if (ret)
   3110		goto out_free_queue;
   3111
   3112	ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0],
   3113				NVME_AQ_DEPTH, (NVME_AQ_DEPTH / 4));
   3114	if (ret)
   3115		goto out_delete_hw_queue;
   3116
   3117	ret = nvmf_connect_admin_queue(&ctrl->ctrl);
   3118	if (ret)
   3119		goto out_disconnect_admin_queue;
   3120
   3121	set_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags);
   3122
   3123	/*
   3124	 * Check controller capabilities
   3125	 *
   3126	 * todo:- add code to check if ctrl attributes changed from
   3127	 * prior connection values
   3128	 */
   3129
   3130	ret = nvme_enable_ctrl(&ctrl->ctrl);
   3131	if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
   3132		goto out_disconnect_admin_queue;
   3133
   3134	ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments;
   3135	ctrl->ctrl.max_hw_sectors = ctrl->ctrl.max_segments <<
   3136						(ilog2(SZ_4K) - 9);
   3137
   3138	nvme_start_admin_queue(&ctrl->ctrl);
   3139
   3140	ret = nvme_init_ctrl_finish(&ctrl->ctrl);
   3141	if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
   3142		goto out_disconnect_admin_queue;
   3143
   3144	/* sanity checks */
   3145
   3146	/* FC-NVME does not have other data in the capsule */
   3147	if (ctrl->ctrl.icdoff) {
   3148		dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
   3149				ctrl->ctrl.icdoff);
   3150		ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
   3151		goto out_disconnect_admin_queue;
   3152	}
   3153
   3154	/* FC-NVME supports normal SGL Data Block Descriptors */
   3155	if (!nvme_ctrl_sgl_supported(&ctrl->ctrl)) {
   3156		dev_err(ctrl->ctrl.device,
   3157			"Mandatory sgls are not supported!\n");
   3158		ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
   3159		goto out_disconnect_admin_queue;
   3160	}
   3161
   3162	if (opts->queue_size > ctrl->ctrl.maxcmd) {
   3163		/* warn if maxcmd is lower than queue_size */
   3164		dev_warn(ctrl->ctrl.device,
   3165			"queue_size %zu > ctrl maxcmd %u, reducing "
   3166			"to maxcmd\n",
   3167			opts->queue_size, ctrl->ctrl.maxcmd);
   3168		opts->queue_size = ctrl->ctrl.maxcmd;
   3169	}
   3170
   3171	if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
   3172		/* warn if sqsize is lower than queue_size */
   3173		dev_warn(ctrl->ctrl.device,
   3174			"queue_size %zu > ctrl sqsize %u, reducing "
   3175			"to sqsize\n",
   3176			opts->queue_size, ctrl->ctrl.sqsize + 1);
   3177		opts->queue_size = ctrl->ctrl.sqsize + 1;
   3178	}
   3179
   3180	ret = nvme_fc_init_aen_ops(ctrl);
   3181	if (ret)
   3182		goto out_term_aen_ops;
   3183
   3184	/*
   3185	 * Create the io queues
   3186	 */
   3187
   3188	if (ctrl->ctrl.queue_count > 1) {
   3189		if (!ctrl->ioq_live)
   3190			ret = nvme_fc_create_io_queues(ctrl);
   3191		else
   3192			ret = nvme_fc_recreate_io_queues(ctrl);
   3193	}
   3194	if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
   3195		goto out_term_aen_ops;
   3196
   3197	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
   3198
   3199	ctrl->ctrl.nr_reconnects = 0;
   3200
   3201	if (changed)
   3202		nvme_start_ctrl(&ctrl->ctrl);
   3203
   3204	return 0;	/* Success */
   3205
   3206out_term_aen_ops:
   3207	nvme_fc_term_aen_ops(ctrl);
   3208out_disconnect_admin_queue:
   3209	/* send a Disconnect(association) LS to fc-nvme target */
   3210	nvme_fc_xmt_disconnect_assoc(ctrl);
   3211	spin_lock_irqsave(&ctrl->lock, flags);
   3212	ctrl->association_id = 0;
   3213	disls = ctrl->rcv_disconn;
   3214	ctrl->rcv_disconn = NULL;
   3215	spin_unlock_irqrestore(&ctrl->lock, flags);
   3216	if (disls)
   3217		nvme_fc_xmt_ls_rsp(disls);
   3218out_delete_hw_queue:
   3219	__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
   3220out_free_queue:
   3221	nvme_fc_free_queue(&ctrl->queues[0]);
   3222	clear_bit(ASSOC_ACTIVE, &ctrl->flags);
   3223	nvme_fc_ctlr_inactive_on_rport(ctrl);
   3224
   3225	return ret;
   3226}
   3227
   3228
   3229/*
   3230 * This routine stops operation of the controller on the host side.
   3231 * On the host os stack side: Admin and IO queues are stopped,
   3232 *   outstanding ios on them terminated via FC ABTS.
   3233 * On the link side: the association is terminated.
   3234 */
   3235static void
   3236nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
   3237{
   3238	struct nvmefc_ls_rcv_op *disls = NULL;
   3239	unsigned long flags;
   3240
   3241	if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
   3242		return;
   3243
   3244	spin_lock_irqsave(&ctrl->lock, flags);
   3245	set_bit(FCCTRL_TERMIO, &ctrl->flags);
   3246	ctrl->iocnt = 0;
   3247	spin_unlock_irqrestore(&ctrl->lock, flags);
   3248
   3249	__nvme_fc_abort_outstanding_ios(ctrl, false);
   3250
   3251	/* kill the aens as they are a separate path */
   3252	nvme_fc_abort_aen_ops(ctrl);
   3253
   3254	/* wait for all io that had to be aborted */
   3255	spin_lock_irq(&ctrl->lock);
   3256	wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
   3257	clear_bit(FCCTRL_TERMIO, &ctrl->flags);
   3258	spin_unlock_irq(&ctrl->lock);
   3259
   3260	nvme_fc_term_aen_ops(ctrl);
   3261
   3262	/*
   3263	 * send a Disconnect(association) LS to fc-nvme target
   3264	 * Note: could have been sent at top of process, but
   3265	 * cleaner on link traffic if after the aborts complete.
   3266	 * Note: if association doesn't exist, association_id will be 0
   3267	 */
   3268	if (ctrl->association_id)
   3269		nvme_fc_xmt_disconnect_assoc(ctrl);
   3270
   3271	spin_lock_irqsave(&ctrl->lock, flags);
   3272	ctrl->association_id = 0;
   3273	disls = ctrl->rcv_disconn;
   3274	ctrl->rcv_disconn = NULL;
   3275	spin_unlock_irqrestore(&ctrl->lock, flags);
   3276	if (disls)
   3277		/*
   3278		 * if a Disconnect Request was waiting for a response, send
   3279		 * now that all ABTS's have been issued (and are complete).
   3280		 */
   3281		nvme_fc_xmt_ls_rsp(disls);
   3282
   3283	if (ctrl->ctrl.tagset) {
   3284		nvme_fc_delete_hw_io_queues(ctrl);
   3285		nvme_fc_free_io_queues(ctrl);
   3286	}
   3287
   3288	__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
   3289	nvme_fc_free_queue(&ctrl->queues[0]);
   3290
   3291	/* re-enable the admin_q so anything new can fast fail */
   3292	nvme_start_admin_queue(&ctrl->ctrl);
   3293
   3294	/* resume the io queues so that things will fast fail */
   3295	nvme_start_queues(&ctrl->ctrl);
   3296
   3297	nvme_fc_ctlr_inactive_on_rport(ctrl);
   3298}
   3299
   3300static void
   3301nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
   3302{
   3303	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
   3304
   3305	cancel_work_sync(&ctrl->ioerr_work);
   3306	cancel_delayed_work_sync(&ctrl->connect_work);
   3307	/*
   3308	 * kill the association on the link side.  this will block
   3309	 * waiting for io to terminate
   3310	 */
   3311	nvme_fc_delete_association(ctrl);
   3312}
   3313
   3314static void
   3315nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
   3316{
   3317	struct nvme_fc_rport *rport = ctrl->rport;
   3318	struct nvme_fc_remote_port *portptr = &rport->remoteport;
   3319	unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
   3320	bool recon = true;
   3321
   3322	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
   3323		return;
   3324
   3325	if (portptr->port_state == FC_OBJSTATE_ONLINE) {
   3326		dev_info(ctrl->ctrl.device,
   3327			"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
   3328			ctrl->cnum, status);
   3329		if (status > 0 && (status & NVME_SC_DNR))
   3330			recon = false;
   3331	} else if (time_after_eq(jiffies, rport->dev_loss_end))
   3332		recon = false;
   3333
   3334	if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
   3335		if (portptr->port_state == FC_OBJSTATE_ONLINE)
   3336			dev_info(ctrl->ctrl.device,
   3337				"NVME-FC{%d}: Reconnect attempt in %ld "
   3338				"seconds\n",
   3339				ctrl->cnum, recon_delay / HZ);
   3340		else if (time_after(jiffies + recon_delay, rport->dev_loss_end))
   3341			recon_delay = rport->dev_loss_end - jiffies;
   3342
   3343		queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay);
   3344	} else {
   3345		if (portptr->port_state == FC_OBJSTATE_ONLINE) {
   3346			if (status > 0 && (status & NVME_SC_DNR))
   3347				dev_warn(ctrl->ctrl.device,
   3348					 "NVME-FC{%d}: reconnect failure\n",
   3349					 ctrl->cnum);
   3350			else
   3351				dev_warn(ctrl->ctrl.device,
   3352					 "NVME-FC{%d}: Max reconnect attempts "
   3353					 "(%d) reached.\n",
   3354					 ctrl->cnum, ctrl->ctrl.nr_reconnects);
   3355		} else
   3356			dev_warn(ctrl->ctrl.device,
   3357				"NVME-FC{%d}: dev_loss_tmo (%d) expired "
   3358				"while waiting for remoteport connectivity.\n",
   3359				ctrl->cnum, min_t(int, portptr->dev_loss_tmo,
   3360					(ctrl->ctrl.opts->max_reconnects *
   3361					 ctrl->ctrl.opts->reconnect_delay)));
   3362		WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
   3363	}
   3364}
   3365
   3366static void
   3367nvme_fc_reset_ctrl_work(struct work_struct *work)
   3368{
   3369	struct nvme_fc_ctrl *ctrl =
   3370		container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
   3371
   3372	nvme_stop_ctrl(&ctrl->ctrl);
   3373
   3374	/* will block will waiting for io to terminate */
   3375	nvme_fc_delete_association(ctrl);
   3376
   3377	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
   3378		dev_err(ctrl->ctrl.device,
   3379			"NVME-FC{%d}: error_recovery: Couldn't change state "
   3380			"to CONNECTING\n", ctrl->cnum);
   3381
   3382	if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) {
   3383		if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
   3384			dev_err(ctrl->ctrl.device,
   3385				"NVME-FC{%d}: failed to schedule connect "
   3386				"after reset\n", ctrl->cnum);
   3387		} else {
   3388			flush_delayed_work(&ctrl->connect_work);
   3389		}
   3390	} else {
   3391		nvme_fc_reconnect_or_delete(ctrl, -ENOTCONN);
   3392	}
   3393}
   3394
   3395
   3396static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
   3397	.name			= "fc",
   3398	.module			= THIS_MODULE,
   3399	.flags			= NVME_F_FABRICS,
   3400	.reg_read32		= nvmf_reg_read32,
   3401	.reg_read64		= nvmf_reg_read64,
   3402	.reg_write32		= nvmf_reg_write32,
   3403	.free_ctrl		= nvme_fc_nvme_ctrl_freed,
   3404	.submit_async_event	= nvme_fc_submit_async_event,
   3405	.delete_ctrl		= nvme_fc_delete_ctrl,
   3406	.get_address		= nvmf_get_address,
   3407};
   3408
   3409static void
   3410nvme_fc_connect_ctrl_work(struct work_struct *work)
   3411{
   3412	int ret;
   3413
   3414	struct nvme_fc_ctrl *ctrl =
   3415			container_of(to_delayed_work(work),
   3416				struct nvme_fc_ctrl, connect_work);
   3417
   3418	ret = nvme_fc_create_association(ctrl);
   3419	if (ret)
   3420		nvme_fc_reconnect_or_delete(ctrl, ret);
   3421	else
   3422		dev_info(ctrl->ctrl.device,
   3423			"NVME-FC{%d}: controller connect complete\n",
   3424			ctrl->cnum);
   3425}
   3426
   3427
   3428static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
   3429	.queue_rq	= nvme_fc_queue_rq,
   3430	.complete	= nvme_fc_complete_rq,
   3431	.init_request	= nvme_fc_init_request,
   3432	.exit_request	= nvme_fc_exit_request,
   3433	.init_hctx	= nvme_fc_init_admin_hctx,
   3434	.timeout	= nvme_fc_timeout,
   3435};
   3436
   3437
   3438/*
   3439 * Fails a controller request if it matches an existing controller
   3440 * (association) with the same tuple:
   3441 * <Host NQN, Host ID, local FC port, remote FC port, SUBSYS NQN>
   3442 *
   3443 * The ports don't need to be compared as they are intrinsically
   3444 * already matched by the port pointers supplied.
   3445 */
   3446static bool
   3447nvme_fc_existing_controller(struct nvme_fc_rport *rport,
   3448		struct nvmf_ctrl_options *opts)
   3449{
   3450	struct nvme_fc_ctrl *ctrl;
   3451	unsigned long flags;
   3452	bool found = false;
   3453
   3454	spin_lock_irqsave(&rport->lock, flags);
   3455	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
   3456		found = nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts);
   3457		if (found)
   3458			break;
   3459	}
   3460	spin_unlock_irqrestore(&rport->lock, flags);
   3461
   3462	return found;
   3463}
   3464
   3465static struct nvme_ctrl *
   3466nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
   3467	struct nvme_fc_lport *lport, struct nvme_fc_rport *rport)
   3468{
   3469	struct nvme_fc_ctrl *ctrl;
   3470	unsigned long flags;
   3471	int ret, idx, ctrl_loss_tmo;
   3472
   3473	if (!(rport->remoteport.port_role &
   3474	    (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
   3475		ret = -EBADR;
   3476		goto out_fail;
   3477	}
   3478
   3479	if (!opts->duplicate_connect &&
   3480	    nvme_fc_existing_controller(rport, opts)) {
   3481		ret = -EALREADY;
   3482		goto out_fail;
   3483	}
   3484
   3485	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
   3486	if (!ctrl) {
   3487		ret = -ENOMEM;
   3488		goto out_fail;
   3489	}
   3490
   3491	idx = ida_alloc(&nvme_fc_ctrl_cnt, GFP_KERNEL);
   3492	if (idx < 0) {
   3493		ret = -ENOSPC;
   3494		goto out_free_ctrl;
   3495	}
   3496
   3497	/*
   3498	 * if ctrl_loss_tmo is being enforced and the default reconnect delay
   3499	 * is being used, change to a shorter reconnect delay for FC.
   3500	 */
   3501	if (opts->max_reconnects != -1 &&
   3502	    opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY &&
   3503	    opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) {
   3504		ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay;
   3505		opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO;
   3506		opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
   3507						opts->reconnect_delay);
   3508	}
   3509
   3510	ctrl->ctrl.opts = opts;
   3511	ctrl->ctrl.nr_reconnects = 0;
   3512	if (lport->dev)
   3513		ctrl->ctrl.numa_node = dev_to_node(lport->dev);
   3514	else
   3515		ctrl->ctrl.numa_node = NUMA_NO_NODE;
   3516	INIT_LIST_HEAD(&ctrl->ctrl_list);
   3517	ctrl->lport = lport;
   3518	ctrl->rport = rport;
   3519	ctrl->dev = lport->dev;
   3520	ctrl->cnum = idx;
   3521	ctrl->ioq_live = false;
   3522	init_waitqueue_head(&ctrl->ioabort_wait);
   3523
   3524	get_device(ctrl->dev);
   3525	kref_init(&ctrl->ref);
   3526
   3527	INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
   3528	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
   3529	INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
   3530	spin_lock_init(&ctrl->lock);
   3531
   3532	/* io queue count */
   3533	ctrl->ctrl.queue_count = min_t(unsigned int,
   3534				opts->nr_io_queues,
   3535				lport->ops->max_hw_queues);
   3536	ctrl->ctrl.queue_count++;	/* +1 for admin queue */
   3537
   3538	ctrl->ctrl.sqsize = opts->queue_size - 1;
   3539	ctrl->ctrl.kato = opts->kato;
   3540	ctrl->ctrl.cntlid = 0xffff;
   3541
   3542	ret = -ENOMEM;
   3543	ctrl->queues = kcalloc(ctrl->ctrl.queue_count,
   3544				sizeof(struct nvme_fc_queue), GFP_KERNEL);
   3545	if (!ctrl->queues)
   3546		goto out_free_ida;
   3547
   3548	nvme_fc_init_queue(ctrl, 0);
   3549
   3550	memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
   3551	ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
   3552	ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
   3553	ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS;
   3554	ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
   3555	ctrl->admin_tag_set.cmd_size =
   3556		struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
   3557			    ctrl->lport->ops->fcprqst_priv_sz);
   3558	ctrl->admin_tag_set.driver_data = ctrl;
   3559	ctrl->admin_tag_set.nr_hw_queues = 1;
   3560	ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT;
   3561	ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED;
   3562
   3563	ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
   3564	if (ret)
   3565		goto out_free_queues;
   3566	ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
   3567
   3568	ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set);
   3569	if (IS_ERR(ctrl->ctrl.fabrics_q)) {
   3570		ret = PTR_ERR(ctrl->ctrl.fabrics_q);
   3571		goto out_free_admin_tag_set;
   3572	}
   3573
   3574	ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
   3575	if (IS_ERR(ctrl->ctrl.admin_q)) {
   3576		ret = PTR_ERR(ctrl->ctrl.admin_q);
   3577		goto out_cleanup_fabrics_q;
   3578	}
   3579
   3580	/*
   3581	 * Would have been nice to init io queues tag set as well.
   3582	 * However, we require interaction from the controller
   3583	 * for max io queue count before we can do so.
   3584	 * Defer this to the connect path.
   3585	 */
   3586
   3587	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0);
   3588	if (ret)
   3589		goto out_cleanup_admin_q;
   3590
   3591	/* at this point, teardown path changes to ref counting on nvme ctrl */
   3592
   3593	spin_lock_irqsave(&rport->lock, flags);
   3594	list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
   3595	spin_unlock_irqrestore(&rport->lock, flags);
   3596
   3597	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) ||
   3598	    !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
   3599		dev_err(ctrl->ctrl.device,
   3600			"NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum);
   3601		goto fail_ctrl;
   3602	}
   3603
   3604	if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
   3605		dev_err(ctrl->ctrl.device,
   3606			"NVME-FC{%d}: failed to schedule initial connect\n",
   3607			ctrl->cnum);
   3608		goto fail_ctrl;
   3609	}
   3610
   3611	flush_delayed_work(&ctrl->connect_work);
   3612
   3613	dev_info(ctrl->ctrl.device,
   3614		"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
   3615		ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl));
   3616
   3617	return &ctrl->ctrl;
   3618
   3619fail_ctrl:
   3620	nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
   3621	cancel_work_sync(&ctrl->ioerr_work);
   3622	cancel_work_sync(&ctrl->ctrl.reset_work);
   3623	cancel_delayed_work_sync(&ctrl->connect_work);
   3624
   3625	ctrl->ctrl.opts = NULL;
   3626
   3627	/* initiate nvme ctrl ref counting teardown */
   3628	nvme_uninit_ctrl(&ctrl->ctrl);
   3629
   3630	/* Remove core ctrl ref. */
   3631	nvme_put_ctrl(&ctrl->ctrl);
   3632
   3633	/* as we're past the point where we transition to the ref
   3634	 * counting teardown path, if we return a bad pointer here,
   3635	 * the calling routine, thinking it's prior to the
   3636	 * transition, will do an rport put. Since the teardown
   3637	 * path also does a rport put, we do an extra get here to
   3638	 * so proper order/teardown happens.
   3639	 */
   3640	nvme_fc_rport_get(rport);
   3641
   3642	return ERR_PTR(-EIO);
   3643
   3644out_cleanup_admin_q:
   3645	blk_cleanup_queue(ctrl->ctrl.admin_q);
   3646out_cleanup_fabrics_q:
   3647	blk_cleanup_queue(ctrl->ctrl.fabrics_q);
   3648out_free_admin_tag_set:
   3649	blk_mq_free_tag_set(&ctrl->admin_tag_set);
   3650out_free_queues:
   3651	kfree(ctrl->queues);
   3652out_free_ida:
   3653	put_device(ctrl->dev);
   3654	ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum);
   3655out_free_ctrl:
   3656	kfree(ctrl);
   3657out_fail:
   3658	/* exit via here doesn't follow ctlr ref points */
   3659	return ERR_PTR(ret);
   3660}
   3661
   3662
   3663struct nvmet_fc_traddr {
   3664	u64	nn;
   3665	u64	pn;
   3666};
   3667
   3668static int
   3669__nvme_fc_parse_u64(substring_t *sstr, u64 *val)
   3670{
   3671	u64 token64;
   3672
   3673	if (match_u64(sstr, &token64))
   3674		return -EINVAL;
   3675	*val = token64;
   3676
   3677	return 0;
   3678}
   3679
   3680/*
   3681 * This routine validates and extracts the WWN's from the TRADDR string.
   3682 * As kernel parsers need the 0x to determine number base, universally
   3683 * build string to parse with 0x prefix before parsing name strings.
   3684 */
   3685static int
   3686nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen)
   3687{
   3688	char name[2 + NVME_FC_TRADDR_HEXNAMELEN + 1];
   3689	substring_t wwn = { name, &name[sizeof(name)-1] };
   3690	int nnoffset, pnoffset;
   3691
   3692	/* validate if string is one of the 2 allowed formats */
   3693	if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH &&
   3694			!strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) &&
   3695			!strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET],
   3696				"pn-0x", NVME_FC_TRADDR_OXNNLEN)) {
   3697		nnoffset = NVME_FC_TRADDR_OXNNLEN;
   3698		pnoffset = NVME_FC_TRADDR_MAX_PN_OFFSET +
   3699						NVME_FC_TRADDR_OXNNLEN;
   3700	} else if ((strnlen(buf, blen) == NVME_FC_TRADDR_MINLENGTH &&
   3701			!strncmp(buf, "nn-", NVME_FC_TRADDR_NNLEN) &&
   3702			!strncmp(&buf[NVME_FC_TRADDR_MIN_PN_OFFSET],
   3703				"pn-", NVME_FC_TRADDR_NNLEN))) {
   3704		nnoffset = NVME_FC_TRADDR_NNLEN;
   3705		pnoffset = NVME_FC_TRADDR_MIN_PN_OFFSET + NVME_FC_TRADDR_NNLEN;
   3706	} else
   3707		goto out_einval;
   3708
   3709	name[0] = '0';
   3710	name[1] = 'x';
   3711	name[2 + NVME_FC_TRADDR_HEXNAMELEN] = 0;
   3712
   3713	memcpy(&name[2], &buf[nnoffset], NVME_FC_TRADDR_HEXNAMELEN);
   3714	if (__nvme_fc_parse_u64(&wwn, &traddr->nn))
   3715		goto out_einval;
   3716
   3717	memcpy(&name[2], &buf[pnoffset], NVME_FC_TRADDR_HEXNAMELEN);
   3718	if (__nvme_fc_parse_u64(&wwn, &traddr->pn))
   3719		goto out_einval;
   3720
   3721	return 0;
   3722
   3723out_einval:
   3724	pr_warn("%s: bad traddr string\n", __func__);
   3725	return -EINVAL;
   3726}
   3727
   3728static struct nvme_ctrl *
   3729nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
   3730{
   3731	struct nvme_fc_lport *lport;
   3732	struct nvme_fc_rport *rport;
   3733	struct nvme_ctrl *ctrl;
   3734	struct nvmet_fc_traddr laddr = { 0L, 0L };
   3735	struct nvmet_fc_traddr raddr = { 0L, 0L };
   3736	unsigned long flags;
   3737	int ret;
   3738
   3739	ret = nvme_fc_parse_traddr(&raddr, opts->traddr, NVMF_TRADDR_SIZE);
   3740	if (ret || !raddr.nn || !raddr.pn)
   3741		return ERR_PTR(-EINVAL);
   3742
   3743	ret = nvme_fc_parse_traddr(&laddr, opts->host_traddr, NVMF_TRADDR_SIZE);
   3744	if (ret || !laddr.nn || !laddr.pn)
   3745		return ERR_PTR(-EINVAL);
   3746
   3747	/* find the host and remote ports to connect together */
   3748	spin_lock_irqsave(&nvme_fc_lock, flags);
   3749	list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
   3750		if (lport->localport.node_name != laddr.nn ||
   3751		    lport->localport.port_name != laddr.pn ||
   3752		    lport->localport.port_state != FC_OBJSTATE_ONLINE)
   3753			continue;
   3754
   3755		list_for_each_entry(rport, &lport->endp_list, endp_list) {
   3756			if (rport->remoteport.node_name != raddr.nn ||
   3757			    rport->remoteport.port_name != raddr.pn ||
   3758			    rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
   3759				continue;
   3760
   3761			/* if fail to get reference fall through. Will error */
   3762			if (!nvme_fc_rport_get(rport))
   3763				break;
   3764
   3765			spin_unlock_irqrestore(&nvme_fc_lock, flags);
   3766
   3767			ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport);
   3768			if (IS_ERR(ctrl))
   3769				nvme_fc_rport_put(rport);
   3770			return ctrl;
   3771		}
   3772	}
   3773	spin_unlock_irqrestore(&nvme_fc_lock, flags);
   3774
   3775	pr_warn("%s: %s - %s combination not found\n",
   3776		__func__, opts->traddr, opts->host_traddr);
   3777	return ERR_PTR(-ENOENT);
   3778}
   3779
   3780
   3781static struct nvmf_transport_ops nvme_fc_transport = {
   3782	.name		= "fc",
   3783	.module		= THIS_MODULE,
   3784	.required_opts	= NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR,
   3785	.allowed_opts	= NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO,
   3786	.create_ctrl	= nvme_fc_create_ctrl,
   3787};
   3788
   3789/* Arbitrary successive failures max. With lots of subsystems could be high */
   3790#define DISCOVERY_MAX_FAIL	20
   3791
   3792static ssize_t nvme_fc_nvme_discovery_store(struct device *dev,
   3793		struct device_attribute *attr, const char *buf, size_t count)
   3794{
   3795	unsigned long flags;
   3796	LIST_HEAD(local_disc_list);
   3797	struct nvme_fc_lport *lport;
   3798	struct nvme_fc_rport *rport;
   3799	int failcnt = 0;
   3800
   3801	spin_lock_irqsave(&nvme_fc_lock, flags);
   3802restart:
   3803	list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
   3804		list_for_each_entry(rport, &lport->endp_list, endp_list) {
   3805			if (!nvme_fc_lport_get(lport))
   3806				continue;
   3807			if (!nvme_fc_rport_get(rport)) {
   3808				/*
   3809				 * This is a temporary condition. Upon restart
   3810				 * this rport will be gone from the list.
   3811				 *
   3812				 * Revert the lport put and retry.  Anything
   3813				 * added to the list already will be skipped (as
   3814				 * they are no longer list_empty).  Loops should
   3815				 * resume at rports that were not yet seen.
   3816				 */
   3817				nvme_fc_lport_put(lport);
   3818
   3819				if (failcnt++ < DISCOVERY_MAX_FAIL)
   3820					goto restart;
   3821
   3822				pr_err("nvme_discovery: too many reference "
   3823				       "failures\n");
   3824				goto process_local_list;
   3825			}
   3826			if (list_empty(&rport->disc_list))
   3827				list_add_tail(&rport->disc_list,
   3828					      &local_disc_list);
   3829		}
   3830	}
   3831
   3832process_local_list:
   3833	while (!list_empty(&local_disc_list)) {
   3834		rport = list_first_entry(&local_disc_list,
   3835					 struct nvme_fc_rport, disc_list);
   3836		list_del_init(&rport->disc_list);
   3837		spin_unlock_irqrestore(&nvme_fc_lock, flags);
   3838
   3839		lport = rport->lport;
   3840		/* signal discovery. Won't hurt if it repeats */
   3841		nvme_fc_signal_discovery_scan(lport, rport);
   3842		nvme_fc_rport_put(rport);
   3843		nvme_fc_lport_put(lport);
   3844
   3845		spin_lock_irqsave(&nvme_fc_lock, flags);
   3846	}
   3847	spin_unlock_irqrestore(&nvme_fc_lock, flags);
   3848
   3849	return count;
   3850}
   3851
   3852static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store);
   3853
   3854#ifdef CONFIG_BLK_CGROUP_FC_APPID
   3855/* Parse the cgroup id from a buf and return the length of cgrpid */
   3856static int fc_parse_cgrpid(const char *buf, u64 *id)
   3857{
   3858	char cgrp_id[16+1];
   3859	int cgrpid_len, j;
   3860
   3861	memset(cgrp_id, 0x0, sizeof(cgrp_id));
   3862	for (cgrpid_len = 0, j = 0; cgrpid_len < 17; cgrpid_len++) {
   3863		if (buf[cgrpid_len] != ':')
   3864			cgrp_id[cgrpid_len] = buf[cgrpid_len];
   3865		else {
   3866			j = 1;
   3867			break;
   3868		}
   3869	}
   3870	if (!j)
   3871		return -EINVAL;
   3872	if (kstrtou64(cgrp_id, 16, id) < 0)
   3873		return -EINVAL;
   3874	return cgrpid_len;
   3875}
   3876
   3877/*
   3878 * Parse and update the appid in the blkcg associated with the cgroupid.
   3879 */
   3880static ssize_t fc_appid_store(struct device *dev,
   3881		struct device_attribute *attr, const char *buf, size_t count)
   3882{
   3883	u64 cgrp_id;
   3884	int appid_len = 0;
   3885	int cgrpid_len = 0;
   3886	char app_id[FC_APPID_LEN];
   3887	int ret = 0;
   3888
   3889	if (buf[count-1] == '\n')
   3890		count--;
   3891
   3892	if ((count > (16+1+FC_APPID_LEN)) || (!strchr(buf, ':')))
   3893		return -EINVAL;
   3894
   3895	cgrpid_len = fc_parse_cgrpid(buf, &cgrp_id);
   3896	if (cgrpid_len < 0)
   3897		return -EINVAL;
   3898	appid_len = count - cgrpid_len - 1;
   3899	if (appid_len > FC_APPID_LEN)
   3900		return -EINVAL;
   3901
   3902	memset(app_id, 0x0, sizeof(app_id));
   3903	memcpy(app_id, &buf[cgrpid_len+1], appid_len);
   3904	ret = blkcg_set_fc_appid(app_id, cgrp_id, sizeof(app_id));
   3905	if (ret < 0)
   3906		return ret;
   3907	return count;
   3908}
   3909static DEVICE_ATTR(appid_store, 0200, NULL, fc_appid_store);
   3910#endif /* CONFIG_BLK_CGROUP_FC_APPID */
   3911
   3912static struct attribute *nvme_fc_attrs[] = {
   3913	&dev_attr_nvme_discovery.attr,
   3914#ifdef CONFIG_BLK_CGROUP_FC_APPID
   3915	&dev_attr_appid_store.attr,
   3916#endif
   3917	NULL
   3918};
   3919
   3920static const struct attribute_group nvme_fc_attr_group = {
   3921	.attrs = nvme_fc_attrs,
   3922};
   3923
   3924static const struct attribute_group *nvme_fc_attr_groups[] = {
   3925	&nvme_fc_attr_group,
   3926	NULL
   3927};
   3928
   3929static struct class fc_class = {
   3930	.name = "fc",
   3931	.dev_groups = nvme_fc_attr_groups,
   3932	.owner = THIS_MODULE,
   3933};
   3934
   3935static int __init nvme_fc_init_module(void)
   3936{
   3937	int ret;
   3938
   3939	nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0);
   3940	if (!nvme_fc_wq)
   3941		return -ENOMEM;
   3942
   3943	/*
   3944	 * NOTE:
   3945	 * It is expected that in the future the kernel will combine
   3946	 * the FC-isms that are currently under scsi and now being
   3947	 * added to by NVME into a new standalone FC class. The SCSI
   3948	 * and NVME protocols and their devices would be under this
   3949	 * new FC class.
   3950	 *
   3951	 * As we need something to post FC-specific udev events to,
   3952	 * specifically for nvme probe events, start by creating the
   3953	 * new device class.  When the new standalone FC class is
   3954	 * put in place, this code will move to a more generic
   3955	 * location for the class.
   3956	 */
   3957	ret = class_register(&fc_class);
   3958	if (ret) {
   3959		pr_err("couldn't register class fc\n");
   3960		goto out_destroy_wq;
   3961	}
   3962
   3963	/*
   3964	 * Create a device for the FC-centric udev events
   3965	 */
   3966	fc_udev_device = device_create(&fc_class, NULL, MKDEV(0, 0), NULL,
   3967				"fc_udev_device");
   3968	if (IS_ERR(fc_udev_device)) {
   3969		pr_err("couldn't create fc_udev device!\n");
   3970		ret = PTR_ERR(fc_udev_device);
   3971		goto out_destroy_class;
   3972	}
   3973
   3974	ret = nvmf_register_transport(&nvme_fc_transport);
   3975	if (ret)
   3976		goto out_destroy_device;
   3977
   3978	return 0;
   3979
   3980out_destroy_device:
   3981	device_destroy(&fc_class, MKDEV(0, 0));
   3982out_destroy_class:
   3983	class_unregister(&fc_class);
   3984out_destroy_wq:
   3985	destroy_workqueue(nvme_fc_wq);
   3986
   3987	return ret;
   3988}
   3989
   3990static void
   3991nvme_fc_delete_controllers(struct nvme_fc_rport *rport)
   3992{
   3993	struct nvme_fc_ctrl *ctrl;
   3994
   3995	spin_lock(&rport->lock);
   3996	list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
   3997		dev_warn(ctrl->ctrl.device,
   3998			"NVME-FC{%d}: transport unloading: deleting ctrl\n",
   3999			ctrl->cnum);
   4000		nvme_delete_ctrl(&ctrl->ctrl);
   4001	}
   4002	spin_unlock(&rport->lock);
   4003}
   4004
   4005static void
   4006nvme_fc_cleanup_for_unload(void)
   4007{
   4008	struct nvme_fc_lport *lport;
   4009	struct nvme_fc_rport *rport;
   4010
   4011	list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
   4012		list_for_each_entry(rport, &lport->endp_list, endp_list) {
   4013			nvme_fc_delete_controllers(rport);
   4014		}
   4015	}
   4016}
   4017
   4018static void __exit nvme_fc_exit_module(void)
   4019{
   4020	unsigned long flags;
   4021	bool need_cleanup = false;
   4022
   4023	spin_lock_irqsave(&nvme_fc_lock, flags);
   4024	nvme_fc_waiting_to_unload = true;
   4025	if (!list_empty(&nvme_fc_lport_list)) {
   4026		need_cleanup = true;
   4027		nvme_fc_cleanup_for_unload();
   4028	}
   4029	spin_unlock_irqrestore(&nvme_fc_lock, flags);
   4030	if (need_cleanup) {
   4031		pr_info("%s: waiting for ctlr deletes\n", __func__);
   4032		wait_for_completion(&nvme_fc_unload_proceed);
   4033		pr_info("%s: ctrl deletes complete\n", __func__);
   4034	}
   4035
   4036	nvmf_unregister_transport(&nvme_fc_transport);
   4037
   4038	ida_destroy(&nvme_fc_local_port_cnt);
   4039	ida_destroy(&nvme_fc_ctrl_cnt);
   4040
   4041	device_destroy(&fc_class, MKDEV(0, 0));
   4042	class_unregister(&fc_class);
   4043	destroy_workqueue(nvme_fc_wq);
   4044}
   4045
   4046module_init(nvme_fc_init_module);
   4047module_exit(nvme_fc_exit_module);
   4048
   4049MODULE_LICENSE("GPL v2");