cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

iscsi_iser.c (31473B)


      1/*
      2 * iSCSI Initiator over iSER Data-Path
      3 *
      4 * Copyright (C) 2004 Dmitry Yusupov
      5 * Copyright (C) 2004 Alex Aizman
      6 * Copyright (C) 2005 Mike Christie
      7 * Copyright (c) 2005, 2006 Voltaire, Inc. All rights reserved.
      8 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
      9 * maintained by openib-general@openib.org
     10 *
     11 * This software is available to you under a choice of one of two
     12 * licenses.  You may choose to be licensed under the terms of the GNU
     13 * General Public License (GPL) Version 2, available from the file
     14 * COPYING in the main directory of this source tree, or the
     15 * OpenIB.org BSD license below:
     16 *
     17 *     Redistribution and use in source and binary forms, with or
     18 *     without modification, are permitted provided that the following
     19 *     conditions are met:
     20 *
     21 *	- Redistributions of source code must retain the above
     22 *	  copyright notice, this list of conditions and the following
     23 *	  disclaimer.
     24 *
     25 *	- Redistributions in binary form must reproduce the above
     26 *	  copyright notice, this list of conditions and the following
     27 *	  disclaimer in the documentation and/or other materials
     28 *	  provided with the distribution.
     29 *
     30 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     31 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     32 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     33 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     34 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     35 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     36 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     37 * SOFTWARE.
     38 *
     39 * Credits:
     40 *	Christoph Hellwig
     41 *	FUJITA Tomonori
     42 *	Arne Redlich
     43 *	Zhenyu Wang
     44 * Modified by:
     45 *      Erez Zilber
     46 */
     47
     48#include <linux/types.h>
     49#include <linux/list.h>
     50#include <linux/hardirq.h>
     51#include <linux/kfifo.h>
     52#include <linux/blkdev.h>
     53#include <linux/init.h>
     54#include <linux/ioctl.h>
     55#include <linux/cdev.h>
     56#include <linux/in.h>
     57#include <linux/net.h>
     58#include <linux/scatterlist.h>
     59#include <linux/delay.h>
     60#include <linux/slab.h>
     61#include <linux/module.h>
     62
     63#include <net/sock.h>
     64
     65#include <linux/uaccess.h>
     66
     67#include <scsi/scsi_cmnd.h>
     68#include <scsi/scsi_device.h>
     69#include <scsi/scsi_eh.h>
     70#include <scsi/scsi_tcq.h>
     71#include <scsi/scsi_host.h>
     72#include <scsi/scsi.h>
     73#include <scsi/scsi_transport_iscsi.h>
     74
     75#include "iscsi_iser.h"
     76
     77MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover");
     78MODULE_LICENSE("Dual BSD/GPL");
     79MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov, Or Gerlitz");
     80
     81static struct scsi_host_template iscsi_iser_sht;
     82static struct iscsi_transport iscsi_iser_transport;
     83static struct scsi_transport_template *iscsi_iser_scsi_transport;
     84static struct workqueue_struct *release_wq;
     85static DEFINE_MUTEX(unbind_iser_conn_mutex);
     86struct iser_global ig;
     87
     88int iser_debug_level = 0;
     89module_param_named(debug_level, iser_debug_level, int, S_IRUGO | S_IWUSR);
     90MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)");
     91
     92static int iscsi_iser_set(const char *val, const struct kernel_param *kp);
     93static const struct kernel_param_ops iscsi_iser_size_ops = {
     94	.set = iscsi_iser_set,
     95	.get = param_get_uint,
     96};
     97
     98static unsigned int iscsi_max_lun = 512;
     99module_param_cb(max_lun, &iscsi_iser_size_ops, &iscsi_max_lun, S_IRUGO);
    100MODULE_PARM_DESC(max_lun, "Max LUNs to allow per session, should > 0 (default:512)");
    101
    102unsigned int iser_max_sectors = ISER_DEF_MAX_SECTORS;
    103module_param_cb(max_sectors, &iscsi_iser_size_ops, &iser_max_sectors,
    104		S_IRUGO | S_IWUSR);
    105MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command, should > 0 (default:1024)");
    106
    107bool iser_always_reg = true;
    108module_param_named(always_register, iser_always_reg, bool, S_IRUGO);
    109MODULE_PARM_DESC(always_register,
    110		 "Always register memory, even for continuous memory regions (default:true)");
    111
    112bool iser_pi_enable = false;
    113module_param_named(pi_enable, iser_pi_enable, bool, S_IRUGO);
    114MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
    115
    116static int iscsi_iser_set(const char *val, const struct kernel_param *kp)
    117{
    118	int ret;
    119	unsigned int n = 0;
    120
    121	ret = kstrtouint(val, 10, &n);
    122	if (ret != 0 || n == 0)
    123		return -EINVAL;
    124
    125	return param_set_uint(val, kp);
    126}
    127
    128/*
    129 * iscsi_iser_recv() - Process a successful recv completion
    130 * @conn:         iscsi connection
    131 * @hdr:          iscsi header
    132 * @rx_data:      buffer containing receive data payload
    133 * @rx_data_len:  length of rx_data
    134 *
    135 * Notes: In case of data length errors or iscsi PDU completion failures
    136 *        this routine will signal iscsi layer of connection failure.
    137 */
    138void iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
    139		     char *rx_data, int rx_data_len)
    140{
    141	int rc = 0;
    142	int datalen;
    143
    144	/* verify PDU length */
    145	datalen = ntoh24(hdr->dlength);
    146	if (datalen > rx_data_len || (datalen + 4) < rx_data_len) {
    147		iser_err("wrong datalen %d (hdr), %d (IB)\n",
    148			datalen, rx_data_len);
    149		rc = ISCSI_ERR_DATALEN;
    150		goto error;
    151	}
    152
    153	if (datalen != rx_data_len)
    154		iser_dbg("aligned datalen (%d) hdr, %d (IB)\n",
    155			datalen, rx_data_len);
    156
    157	rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len);
    158	if (rc && rc != ISCSI_ERR_NO_SCSI_CMD)
    159		goto error;
    160
    161	return;
    162error:
    163	iscsi_conn_failure(conn, rc);
    164}
    165
    166/**
    167 * iscsi_iser_pdu_alloc() - allocate an iscsi-iser PDU
    168 * @task:     iscsi task
    169 * @opcode:   iscsi command opcode
    170 *
    171 * Netes: This routine can't fail, just assign iscsi task
    172 *        hdr and max hdr size.
    173 */
    174static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
    175{
    176	struct iscsi_iser_task *iser_task = task->dd_data;
    177
    178	task->hdr = (struct iscsi_hdr *)&iser_task->desc.iscsi_header;
    179	task->hdr_max = sizeof(iser_task->desc.iscsi_header);
    180
    181	return 0;
    182}
    183
    184/**
    185 * iser_initialize_task_headers() - Initialize task headers
    186 * @task:       iscsi task
    187 * @tx_desc:    iser tx descriptor
    188 *
    189 * Notes:
    190 * This routine may race with iser teardown flow for scsi
    191 * error handling TMFs. So for TMF we should acquire the
    192 * state mutex to avoid dereferencing the IB device which
    193 * may have already been terminated.
    194 */
    195int iser_initialize_task_headers(struct iscsi_task *task,
    196				 struct iser_tx_desc *tx_desc)
    197{
    198	struct iser_conn *iser_conn = task->conn->dd_data;
    199	struct iser_device *device = iser_conn->ib_conn.device;
    200	struct iscsi_iser_task *iser_task = task->dd_data;
    201	u64 dma_addr;
    202
    203	if (unlikely(iser_conn->state != ISER_CONN_UP))
    204		return -ENODEV;
    205
    206	dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
    207				ISER_HEADERS_LEN, DMA_TO_DEVICE);
    208	if (ib_dma_mapping_error(device->ib_device, dma_addr))
    209		return -ENOMEM;
    210
    211	tx_desc->inv_wr.next = NULL;
    212	tx_desc->reg_wr.wr.next = NULL;
    213	tx_desc->mapped = true;
    214	tx_desc->dma_addr = dma_addr;
    215	tx_desc->tx_sg[0].addr   = tx_desc->dma_addr;
    216	tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
    217	tx_desc->tx_sg[0].lkey   = device->pd->local_dma_lkey;
    218
    219	iser_task->iser_conn = iser_conn;
    220
    221	return 0;
    222}
    223
    224/**
    225 * iscsi_iser_task_init() - Initialize iscsi-iser task
    226 * @task: iscsi task
    227 *
    228 * Initialize the task for the scsi command or mgmt command.
    229 *
    230 * Return: Returns zero on success or -ENOMEM when failing
    231 *         to init task headers (dma mapping error).
    232 */
    233static int iscsi_iser_task_init(struct iscsi_task *task)
    234{
    235	struct iscsi_iser_task *iser_task = task->dd_data;
    236	int ret;
    237
    238	ret = iser_initialize_task_headers(task, &iser_task->desc);
    239	if (ret) {
    240		iser_err("Failed to init task %p, err = %d\n",
    241			 iser_task, ret);
    242		return ret;
    243	}
    244
    245	/* mgmt task */
    246	if (!task->sc)
    247		return 0;
    248
    249	iser_task->command_sent = 0;
    250	iser_task_rdma_init(iser_task);
    251	iser_task->sc = task->sc;
    252
    253	return 0;
    254}
    255
    256/**
    257 * iscsi_iser_mtask_xmit() - xmit management (immediate) task
    258 * @conn: iscsi connection
    259 * @task: task management task
    260 *
    261 * Notes:
    262 *	The function can return -EAGAIN in which case caller must
    263 *	call it again later, or recover. '0' return code means successful
    264 *	xmit.
    265 *
    266 **/
    267static int iscsi_iser_mtask_xmit(struct iscsi_conn *conn,
    268				 struct iscsi_task *task)
    269{
    270	int error = 0;
    271
    272	iser_dbg("mtask xmit [cid %d itt 0x%x]\n", conn->id, task->itt);
    273
    274	error = iser_send_control(conn, task);
    275
    276	/* since iser xmits control with zero copy, tasks can not be recycled
    277	 * right after sending them.
    278	 * The recycling scheme is based on whether a response is expected
    279	 * - if yes, the task is recycled at iscsi_complete_pdu
    280	 * - if no,  the task is recycled at iser_snd_completion
    281	 */
    282	return error;
    283}
    284
    285static int iscsi_iser_task_xmit_unsol_data(struct iscsi_conn *conn,
    286					   struct iscsi_task *task)
    287{
    288	struct iscsi_r2t_info *r2t = &task->unsol_r2t;
    289	struct iscsi_data hdr;
    290	int error = 0;
    291
    292	/* Send data-out PDUs while there's still unsolicited data to send */
    293	while (iscsi_task_has_unsol_data(task)) {
    294		iscsi_prep_data_out_pdu(task, r2t, &hdr);
    295		iser_dbg("Sending data-out: itt 0x%x, data count %d\n",
    296			   hdr.itt, r2t->data_count);
    297
    298		/* the buffer description has been passed with the command */
    299		/* Send the command */
    300		error = iser_send_data_out(conn, task, &hdr);
    301		if (error) {
    302			r2t->datasn--;
    303			goto iscsi_iser_task_xmit_unsol_data_exit;
    304		}
    305		r2t->sent += r2t->data_count;
    306		iser_dbg("Need to send %d more as data-out PDUs\n",
    307			   r2t->data_length - r2t->sent);
    308	}
    309
    310iscsi_iser_task_xmit_unsol_data_exit:
    311	return error;
    312}
    313
    314/**
    315 * iscsi_iser_task_xmit() - xmit iscsi-iser task
    316 * @task: iscsi task
    317 *
    318 * Return: zero on success or escalates $error on failure.
    319 */
    320static int iscsi_iser_task_xmit(struct iscsi_task *task)
    321{
    322	struct iscsi_conn *conn = task->conn;
    323	struct iscsi_iser_task *iser_task = task->dd_data;
    324	int error = 0;
    325
    326	if (!task->sc)
    327		return iscsi_iser_mtask_xmit(conn, task);
    328
    329	if (task->sc->sc_data_direction == DMA_TO_DEVICE) {
    330		BUG_ON(scsi_bufflen(task->sc) == 0);
    331
    332		iser_dbg("cmd [itt %x total %d imm %d unsol_data %d\n",
    333			   task->itt, scsi_bufflen(task->sc),
    334			   task->imm_count, task->unsol_r2t.data_length);
    335	}
    336
    337	iser_dbg("ctask xmit [cid %d itt 0x%x]\n",
    338		   conn->id, task->itt);
    339
    340	/* Send the cmd PDU */
    341	if (!iser_task->command_sent) {
    342		error = iser_send_command(conn, task);
    343		if (error)
    344			goto iscsi_iser_task_xmit_exit;
    345		iser_task->command_sent = 1;
    346	}
    347
    348	/* Send unsolicited data-out PDU(s) if necessary */
    349	if (iscsi_task_has_unsol_data(task))
    350		error = iscsi_iser_task_xmit_unsol_data(conn, task);
    351
    352 iscsi_iser_task_xmit_exit:
    353	return error;
    354}
    355
    356/**
    357 * iscsi_iser_cleanup_task() - cleanup an iscsi-iser task
    358 * @task: iscsi task
    359 *
    360 * Notes: In case the RDMA device is already NULL (might have
    361 *        been removed in DEVICE_REMOVAL CM event it will bail-out
    362 *        without doing dma unmapping.
    363 */
    364static void iscsi_iser_cleanup_task(struct iscsi_task *task)
    365{
    366	struct iscsi_iser_task *iser_task = task->dd_data;
    367	struct iser_tx_desc *tx_desc = &iser_task->desc;
    368	struct iser_conn *iser_conn = task->conn->dd_data;
    369	struct iser_device *device = iser_conn->ib_conn.device;
    370
    371	/* DEVICE_REMOVAL event might have already released the device */
    372	if (!device)
    373		return;
    374
    375	if (likely(tx_desc->mapped)) {
    376		ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
    377				    ISER_HEADERS_LEN, DMA_TO_DEVICE);
    378		tx_desc->mapped = false;
    379	}
    380
    381	/* mgmt tasks do not need special cleanup */
    382	if (!task->sc)
    383		return;
    384
    385	if (iser_task->status == ISER_TASK_STATUS_STARTED) {
    386		iser_task->status = ISER_TASK_STATUS_COMPLETED;
    387		iser_task_rdma_finalize(iser_task);
    388	}
    389}
    390
    391/**
    392 * iscsi_iser_check_protection() - check protection information status of task.
    393 * @task:     iscsi task
    394 * @sector:   error sector if exsists (output)
    395 *
    396 * Return: zero if no data-integrity errors have occured
    397 *         0x1: data-integrity error occured in the guard-block
    398 *         0x2: data-integrity error occured in the reference tag
    399 *         0x3: data-integrity error occured in the application tag
    400 *
    401 *         In addition the error sector is marked.
    402 */
    403static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
    404{
    405	struct iscsi_iser_task *iser_task = task->dd_data;
    406	enum iser_data_dir dir = iser_task->dir[ISER_DIR_IN] ?
    407					ISER_DIR_IN : ISER_DIR_OUT;
    408
    409	return iser_check_task_pi_status(iser_task, dir, sector);
    410}
    411
    412/**
    413 * iscsi_iser_conn_create() - create a new iscsi-iser connection
    414 * @cls_session: iscsi class connection
    415 * @conn_idx:    connection index within the session (for MCS)
    416 *
    417 * Return: iscsi_cls_conn when iscsi_conn_setup succeeds or NULL
    418 *         otherwise.
    419 */
    420static struct iscsi_cls_conn *
    421iscsi_iser_conn_create(struct iscsi_cls_session *cls_session,
    422		       uint32_t conn_idx)
    423{
    424	struct iscsi_conn *conn;
    425	struct iscsi_cls_conn *cls_conn;
    426
    427	cls_conn = iscsi_conn_setup(cls_session, 0, conn_idx);
    428	if (!cls_conn)
    429		return NULL;
    430	conn = cls_conn->dd_data;
    431
    432	/*
    433	 * due to issues with the login code re iser sematics
    434	 * this not set in iscsi_conn_setup - FIXME
    435	 */
    436	conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN;
    437
    438	return cls_conn;
    439}
    440
    441/**
    442 * iscsi_iser_conn_bind() - bind iscsi and iser connection structures
    443 * @cls_session:     iscsi class session
    444 * @cls_conn:        iscsi class connection
    445 * @transport_eph:   transport end-point handle
    446 * @is_leading:      indicate if this is the session leading connection (MCS)
    447 *
    448 * Return: zero on success, $error if iscsi_conn_bind fails and
    449 *         -EINVAL in case end-point doesn't exsits anymore or iser connection
    450 *         state is not UP (teardown already started).
    451 */
    452static int iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
    453				struct iscsi_cls_conn *cls_conn,
    454				uint64_t transport_eph, int is_leading)
    455{
    456	struct iscsi_conn *conn = cls_conn->dd_data;
    457	struct iser_conn *iser_conn;
    458	struct iscsi_endpoint *ep;
    459	int error;
    460
    461	error = iscsi_conn_bind(cls_session, cls_conn, is_leading);
    462	if (error)
    463		return error;
    464
    465	/* the transport ep handle comes from user space so it must be
    466	 * verified against the global ib connections list */
    467	ep = iscsi_lookup_endpoint(transport_eph);
    468	if (!ep) {
    469		iser_err("can't bind eph %llx\n",
    470			 (unsigned long long)transport_eph);
    471		return -EINVAL;
    472	}
    473	iser_conn = ep->dd_data;
    474
    475	mutex_lock(&iser_conn->state_mutex);
    476	if (iser_conn->state != ISER_CONN_UP) {
    477		error = -EINVAL;
    478		iser_err("iser_conn %p state is %d, teardown started\n",
    479			 iser_conn, iser_conn->state);
    480		goto out;
    481	}
    482
    483	error = iser_alloc_rx_descriptors(iser_conn, conn->session);
    484	if (error)
    485		goto out;
    486
    487	/* binds the iSER connection retrieved from the previously
    488	 * connected ep_handle to the iSCSI layer connection. exchanges
    489	 * connection pointers */
    490	iser_info("binding iscsi conn %p to iser_conn %p\n", conn, iser_conn);
    491
    492	conn->dd_data = iser_conn;
    493	iser_conn->iscsi_conn = conn;
    494
    495out:
    496	iscsi_put_endpoint(ep);
    497	mutex_unlock(&iser_conn->state_mutex);
    498	return error;
    499}
    500
    501/**
    502 * iscsi_iser_conn_start() - start iscsi-iser connection
    503 * @cls_conn: iscsi class connection
    504 *
    505 * Notes: Here iser intialize (or re-initialize) stop_completion as
    506 *        from this point iscsi must call conn_stop in session/connection
    507 *        teardown so iser transport must wait for it.
    508 */
    509static int iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
    510{
    511	struct iscsi_conn *iscsi_conn;
    512	struct iser_conn *iser_conn;
    513
    514	iscsi_conn = cls_conn->dd_data;
    515	iser_conn = iscsi_conn->dd_data;
    516	reinit_completion(&iser_conn->stop_completion);
    517
    518	return iscsi_conn_start(cls_conn);
    519}
    520
    521/**
    522 * iscsi_iser_conn_stop() - stop iscsi-iser connection
    523 * @cls_conn:  iscsi class connection
    524 * @flag:      indicate if recover or terminate (passed as is)
    525 *
    526 * Notes: Calling iscsi_conn_stop might theoretically race with
    527 *        DEVICE_REMOVAL event and dereference a previously freed RDMA device
    528 *        handle, so we call it under iser the state lock to protect against
    529 *        this kind of race.
    530 */
    531static void iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
    532{
    533	struct iscsi_conn *conn = cls_conn->dd_data;
    534	struct iser_conn *iser_conn = conn->dd_data;
    535
    536	iser_info("stopping iscsi_conn: %p, iser_conn: %p\n", conn, iser_conn);
    537
    538	/*
    539	 * Userspace may have goofed up and not bound the connection or
    540	 * might have only partially setup the connection.
    541	 */
    542	if (iser_conn) {
    543		mutex_lock(&iser_conn->state_mutex);
    544		mutex_lock(&unbind_iser_conn_mutex);
    545		iser_conn_terminate(iser_conn);
    546		iscsi_conn_stop(cls_conn, flag);
    547
    548		/* unbind */
    549		iser_conn->iscsi_conn = NULL;
    550		conn->dd_data = NULL;
    551		mutex_unlock(&unbind_iser_conn_mutex);
    552
    553		complete(&iser_conn->stop_completion);
    554		mutex_unlock(&iser_conn->state_mutex);
    555	} else {
    556		iscsi_conn_stop(cls_conn, flag);
    557	}
    558}
    559
    560/**
    561 * iscsi_iser_session_destroy() - destroy iscsi-iser session
    562 * @cls_session: iscsi class session
    563 *
    564 * Removes and free iscsi host.
    565 */
    566static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
    567{
    568	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
    569
    570	iscsi_session_teardown(cls_session);
    571	iscsi_host_remove(shost);
    572	iscsi_host_free(shost);
    573}
    574
    575static inline unsigned int iser_dif_prot_caps(int prot_caps)
    576{
    577	int ret = 0;
    578
    579	if (prot_caps & IB_PROT_T10DIF_TYPE_1)
    580		ret |= SHOST_DIF_TYPE1_PROTECTION |
    581		       SHOST_DIX_TYPE0_PROTECTION |
    582		       SHOST_DIX_TYPE1_PROTECTION;
    583	if (prot_caps & IB_PROT_T10DIF_TYPE_2)
    584		ret |= SHOST_DIF_TYPE2_PROTECTION |
    585		       SHOST_DIX_TYPE2_PROTECTION;
    586	if (prot_caps & IB_PROT_T10DIF_TYPE_3)
    587		ret |= SHOST_DIF_TYPE3_PROTECTION |
    588		       SHOST_DIX_TYPE3_PROTECTION;
    589
    590	return ret;
    591}
    592
    593/**
    594 * iscsi_iser_session_create() - create an iscsi-iser session
    595 * @ep:             iscsi end-point handle
    596 * @cmds_max:       maximum commands in this session
    597 * @qdepth:         session command queue depth
    598 * @initial_cmdsn:  initiator command sequnce number
    599 *
    600 * Allocates and adds a scsi host, expose DIF supprot if
    601 * exists, and sets up an iscsi session.
    602 */
    603static struct iscsi_cls_session *
    604iscsi_iser_session_create(struct iscsi_endpoint *ep,
    605			  uint16_t cmds_max, uint16_t qdepth,
    606			  uint32_t initial_cmdsn)
    607{
    608	struct iscsi_cls_session *cls_session;
    609	struct Scsi_Host *shost;
    610	struct iser_conn *iser_conn = NULL;
    611	struct ib_conn *ib_conn;
    612	struct ib_device *ib_dev;
    613	u32 max_fr_sectors;
    614
    615	shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
    616	if (!shost)
    617		return NULL;
    618	shost->transportt = iscsi_iser_scsi_transport;
    619	shost->cmd_per_lun = qdepth;
    620	shost->max_lun = iscsi_max_lun;
    621	shost->max_id = 0;
    622	shost->max_channel = 0;
    623	shost->max_cmd_len = 16;
    624
    625	/*
    626	 * older userspace tools (before 2.0-870) did not pass us
    627	 * the leading conn's ep so this will be NULL;
    628	 */
    629	if (ep) {
    630		iser_conn = ep->dd_data;
    631		shost->sg_tablesize = iser_conn->scsi_sg_tablesize;
    632		shost->can_queue = min_t(u16, cmds_max, iser_conn->max_cmds);
    633
    634		mutex_lock(&iser_conn->state_mutex);
    635		if (iser_conn->state != ISER_CONN_UP) {
    636			iser_err("iser conn %p already started teardown\n",
    637				 iser_conn);
    638			mutex_unlock(&iser_conn->state_mutex);
    639			goto free_host;
    640		}
    641
    642		ib_conn = &iser_conn->ib_conn;
    643		ib_dev = ib_conn->device->ib_device;
    644		if (ib_conn->pi_support) {
    645			u32 sig_caps = ib_dev->attrs.sig_prot_cap;
    646
    647			shost->sg_prot_tablesize = shost->sg_tablesize;
    648			scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
    649			scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
    650						   SHOST_DIX_GUARD_CRC);
    651		}
    652
    653		if (!(ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG))
    654			shost->virt_boundary_mask = SZ_4K - 1;
    655
    656		if (iscsi_host_add(shost, ib_dev->dev.parent)) {
    657			mutex_unlock(&iser_conn->state_mutex);
    658			goto free_host;
    659		}
    660		mutex_unlock(&iser_conn->state_mutex);
    661	} else {
    662		shost->can_queue = min_t(u16, cmds_max, ISER_DEF_XMIT_CMDS_MAX);
    663		if (iscsi_host_add(shost, NULL))
    664			goto free_host;
    665	}
    666
    667	max_fr_sectors = (shost->sg_tablesize * PAGE_SIZE) >> 9;
    668	shost->max_sectors = min(iser_max_sectors, max_fr_sectors);
    669
    670	iser_dbg("iser_conn %p, sg_tablesize %u, max_sectors %u\n",
    671		 iser_conn, shost->sg_tablesize,
    672		 shost->max_sectors);
    673
    674	if (shost->max_sectors < iser_max_sectors)
    675		iser_warn("max_sectors was reduced from %u to %u\n",
    676			  iser_max_sectors, shost->max_sectors);
    677
    678	cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
    679					  shost->can_queue, 0,
    680					  sizeof(struct iscsi_iser_task),
    681					  initial_cmdsn, 0);
    682	if (!cls_session)
    683		goto remove_host;
    684
    685	return cls_session;
    686
    687remove_host:
    688	iscsi_host_remove(shost);
    689free_host:
    690	iscsi_host_free(shost);
    691	return NULL;
    692}
    693
    694static int iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,
    695				enum iscsi_param param, char *buf, int buflen)
    696{
    697	int value;
    698
    699	switch (param) {
    700	case ISCSI_PARAM_MAX_RECV_DLENGTH:
    701		/* TBD */
    702		break;
    703	case ISCSI_PARAM_HDRDGST_EN:
    704		sscanf(buf, "%d", &value);
    705		if (value) {
    706			iser_err("DataDigest wasn't negotiated to None\n");
    707			return -EPROTO;
    708		}
    709		break;
    710	case ISCSI_PARAM_DATADGST_EN:
    711		sscanf(buf, "%d", &value);
    712		if (value) {
    713			iser_err("DataDigest wasn't negotiated to None\n");
    714			return -EPROTO;
    715		}
    716		break;
    717	case ISCSI_PARAM_IFMARKER_EN:
    718		sscanf(buf, "%d", &value);
    719		if (value) {
    720			iser_err("IFMarker wasn't negotiated to No\n");
    721			return -EPROTO;
    722		}
    723		break;
    724	case ISCSI_PARAM_OFMARKER_EN:
    725		sscanf(buf, "%d", &value);
    726		if (value) {
    727			iser_err("OFMarker wasn't negotiated to No\n");
    728			return -EPROTO;
    729		}
    730		break;
    731	default:
    732		return iscsi_set_param(cls_conn, param, buf, buflen);
    733	}
    734
    735	return 0;
    736}
    737
    738/**
    739 * iscsi_iser_conn_get_stats() - get iscsi connection statistics
    740 * @cls_conn:    iscsi class connection
    741 * @stats:       iscsi stats to output
    742 *
    743 * Output connection statistics.
    744 */
    745static void iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn,
    746				      struct iscsi_stats *stats)
    747{
    748	struct iscsi_conn *conn = cls_conn->dd_data;
    749
    750	stats->txdata_octets = conn->txdata_octets;
    751	stats->rxdata_octets = conn->rxdata_octets;
    752	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
    753	stats->dataout_pdus = conn->dataout_pdus_cnt;
    754	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
    755	stats->datain_pdus = conn->datain_pdus_cnt; /* always 0 */
    756	stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */
    757	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
    758	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
    759	stats->custom_length = 0;
    760}
    761
    762static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
    763				   enum iscsi_param param, char *buf)
    764{
    765	struct iser_conn *iser_conn = ep->dd_data;
    766
    767	switch (param) {
    768	case ISCSI_PARAM_CONN_PORT:
    769	case ISCSI_PARAM_CONN_ADDRESS:
    770		if (!iser_conn || !iser_conn->ib_conn.cma_id)
    771			return -ENOTCONN;
    772
    773		return iscsi_conn_get_addr_param((struct sockaddr_storage *)
    774				&iser_conn->ib_conn.cma_id->route.addr.dst_addr,
    775				param, buf);
    776	default:
    777		break;
    778	}
    779	return -ENOSYS;
    780}
    781
    782/**
    783 * iscsi_iser_ep_connect() - Initiate iSER connection establishment
    784 * @shost:          scsi_host
    785 * @dst_addr:       destination address
    786 * @non_blocking:   indicate if routine can block
    787 *
    788 * Allocate an iscsi endpoint, an iser_conn structure and bind them.
    789 * After that start RDMA connection establishment via rdma_cm. We
    790 * don't allocate iser_conn embedded in iscsi_endpoint since in teardown
    791 * the endpoint will be destroyed at ep_disconnect while iser_conn will
    792 * cleanup its resources asynchronuously.
    793 *
    794 * Return: iscsi_endpoint created by iscsi layer or ERR_PTR(error)
    795 *         if fails.
    796 */
    797static struct iscsi_endpoint *iscsi_iser_ep_connect(struct Scsi_Host *shost,
    798						    struct sockaddr *dst_addr,
    799						    int non_blocking)
    800{
    801	int err;
    802	struct iser_conn *iser_conn;
    803	struct iscsi_endpoint *ep;
    804
    805	ep = iscsi_create_endpoint(0);
    806	if (!ep)
    807		return ERR_PTR(-ENOMEM);
    808
    809	iser_conn = kzalloc(sizeof(*iser_conn), GFP_KERNEL);
    810	if (!iser_conn) {
    811		err = -ENOMEM;
    812		goto failure;
    813	}
    814
    815	ep->dd_data = iser_conn;
    816	iser_conn->ep = ep;
    817	iser_conn_init(iser_conn);
    818
    819	err = iser_connect(iser_conn, NULL, dst_addr, non_blocking);
    820	if (err)
    821		goto failure;
    822
    823	return ep;
    824failure:
    825	iscsi_destroy_endpoint(ep);
    826	return ERR_PTR(err);
    827}
    828
    829/**
    830 * iscsi_iser_ep_poll() - poll for iser connection establishment to complete
    831 * @ep:            iscsi endpoint (created at ep_connect)
    832 * @timeout_ms:    polling timeout allowed in ms.
    833 *
    834 * This routine boils down to waiting for up_completion signaling
    835 * that cma_id got CONNECTED event.
    836 *
    837 * Return: 1 if succeeded in connection establishment, 0 if timeout expired
    838 *         (libiscsi will retry will kick in) or -1 if interrupted by signal
    839 *         or more likely iser connection state transitioned to TEMINATING or
    840 *         DOWN during the wait period.
    841 */
    842static int iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
    843{
    844	struct iser_conn *iser_conn = ep->dd_data;
    845	int rc;
    846
    847	rc = wait_for_completion_interruptible_timeout(&iser_conn->up_completion,
    848						       msecs_to_jiffies(timeout_ms));
    849	/* if conn establishment failed, return error code to iscsi */
    850	if (rc == 0) {
    851		mutex_lock(&iser_conn->state_mutex);
    852		if (iser_conn->state == ISER_CONN_TERMINATING ||
    853		    iser_conn->state == ISER_CONN_DOWN)
    854			rc = -1;
    855		mutex_unlock(&iser_conn->state_mutex);
    856	}
    857
    858	iser_info("iser conn %p rc = %d\n", iser_conn, rc);
    859
    860	if (rc > 0)
    861		return 1; /* success, this is the equivalent of EPOLLOUT */
    862	else if (!rc)
    863		return 0; /* timeout */
    864	else
    865		return rc; /* signal */
    866}
    867
    868/**
    869 * iscsi_iser_ep_disconnect() - Initiate connection teardown process
    870 * @ep:    iscsi endpoint handle
    871 *
    872 * This routine is not blocked by iser and RDMA termination process
    873 * completion as we queue a deffered work for iser/RDMA destruction
    874 * and cleanup or actually call it immediately in case we didn't pass
    875 * iscsi conn bind/start stage, thus it is safe.
    876 */
    877static void iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
    878{
    879	struct iser_conn *iser_conn = ep->dd_data;
    880
    881	iser_info("ep %p iser conn %p\n", ep, iser_conn);
    882
    883	mutex_lock(&iser_conn->state_mutex);
    884	iser_conn_terminate(iser_conn);
    885
    886	/*
    887	 * if iser_conn and iscsi_conn are bound, we must wait for
    888	 * iscsi_conn_stop and flush errors completion before freeing
    889	 * the iser resources. Otherwise we are safe to free resources
    890	 * immediately.
    891	 */
    892	if (iser_conn->iscsi_conn) {
    893		INIT_WORK(&iser_conn->release_work, iser_release_work);
    894		queue_work(release_wq, &iser_conn->release_work);
    895		mutex_unlock(&iser_conn->state_mutex);
    896	} else {
    897		iser_conn->state = ISER_CONN_DOWN;
    898		mutex_unlock(&iser_conn->state_mutex);
    899		iser_conn_release(iser_conn);
    900	}
    901
    902	iscsi_destroy_endpoint(ep);
    903}
    904
    905static umode_t iser_attr_is_visible(int param_type, int param)
    906{
    907	switch (param_type) {
    908	case ISCSI_HOST_PARAM:
    909		switch (param) {
    910		case ISCSI_HOST_PARAM_NETDEV_NAME:
    911		case ISCSI_HOST_PARAM_HWADDRESS:
    912		case ISCSI_HOST_PARAM_INITIATOR_NAME:
    913			return S_IRUGO;
    914		default:
    915			return 0;
    916		}
    917	case ISCSI_PARAM:
    918		switch (param) {
    919		case ISCSI_PARAM_MAX_RECV_DLENGTH:
    920		case ISCSI_PARAM_MAX_XMIT_DLENGTH:
    921		case ISCSI_PARAM_HDRDGST_EN:
    922		case ISCSI_PARAM_DATADGST_EN:
    923		case ISCSI_PARAM_CONN_ADDRESS:
    924		case ISCSI_PARAM_CONN_PORT:
    925		case ISCSI_PARAM_EXP_STATSN:
    926		case ISCSI_PARAM_PERSISTENT_ADDRESS:
    927		case ISCSI_PARAM_PERSISTENT_PORT:
    928		case ISCSI_PARAM_PING_TMO:
    929		case ISCSI_PARAM_RECV_TMO:
    930		case ISCSI_PARAM_INITIAL_R2T_EN:
    931		case ISCSI_PARAM_MAX_R2T:
    932		case ISCSI_PARAM_IMM_DATA_EN:
    933		case ISCSI_PARAM_FIRST_BURST:
    934		case ISCSI_PARAM_MAX_BURST:
    935		case ISCSI_PARAM_PDU_INORDER_EN:
    936		case ISCSI_PARAM_DATASEQ_INORDER_EN:
    937		case ISCSI_PARAM_TARGET_NAME:
    938		case ISCSI_PARAM_TPGT:
    939		case ISCSI_PARAM_USERNAME:
    940		case ISCSI_PARAM_PASSWORD:
    941		case ISCSI_PARAM_USERNAME_IN:
    942		case ISCSI_PARAM_PASSWORD_IN:
    943		case ISCSI_PARAM_FAST_ABORT:
    944		case ISCSI_PARAM_ABORT_TMO:
    945		case ISCSI_PARAM_LU_RESET_TMO:
    946		case ISCSI_PARAM_TGT_RESET_TMO:
    947		case ISCSI_PARAM_IFACE_NAME:
    948		case ISCSI_PARAM_INITIATOR_NAME:
    949		case ISCSI_PARAM_DISCOVERY_SESS:
    950			return S_IRUGO;
    951		default:
    952			return 0;
    953		}
    954	}
    955
    956	return 0;
    957}
    958
    959static struct scsi_host_template iscsi_iser_sht = {
    960	.module                 = THIS_MODULE,
    961	.name                   = "iSCSI Initiator over iSER",
    962	.queuecommand           = iscsi_queuecommand,
    963	.change_queue_depth	= scsi_change_queue_depth,
    964	.sg_tablesize           = ISCSI_ISER_DEF_SG_TABLESIZE,
    965	.cmd_per_lun            = ISER_DEF_CMD_PER_LUN,
    966	.eh_timed_out		= iscsi_eh_cmd_timed_out,
    967	.eh_abort_handler       = iscsi_eh_abort,
    968	.eh_device_reset_handler= iscsi_eh_device_reset,
    969	.eh_target_reset_handler = iscsi_eh_recover_target,
    970	.target_alloc		= iscsi_target_alloc,
    971	.proc_name              = "iscsi_iser",
    972	.this_id                = -1,
    973	.track_queue_depth	= 1,
    974	.cmd_size		= sizeof(struct iscsi_cmd),
    975};
    976
    977static struct iscsi_transport iscsi_iser_transport = {
    978	.owner                  = THIS_MODULE,
    979	.name                   = "iser",
    980	.caps                   = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_TEXT_NEGO,
    981	/* session management */
    982	.create_session         = iscsi_iser_session_create,
    983	.destroy_session        = iscsi_iser_session_destroy,
    984	/* connection management */
    985	.create_conn            = iscsi_iser_conn_create,
    986	.bind_conn              = iscsi_iser_conn_bind,
    987	.unbind_conn		= iscsi_conn_unbind,
    988	.destroy_conn           = iscsi_conn_teardown,
    989	.attr_is_visible	= iser_attr_is_visible,
    990	.set_param              = iscsi_iser_set_param,
    991	.get_conn_param		= iscsi_conn_get_param,
    992	.get_ep_param		= iscsi_iser_get_ep_param,
    993	.get_session_param	= iscsi_session_get_param,
    994	.start_conn             = iscsi_iser_conn_start,
    995	.stop_conn              = iscsi_iser_conn_stop,
    996	/* iscsi host params */
    997	.get_host_param		= iscsi_host_get_param,
    998	.set_host_param		= iscsi_host_set_param,
    999	/* IO */
   1000	.send_pdu		= iscsi_conn_send_pdu,
   1001	.get_stats		= iscsi_iser_conn_get_stats,
   1002	.init_task		= iscsi_iser_task_init,
   1003	.xmit_task		= iscsi_iser_task_xmit,
   1004	.cleanup_task		= iscsi_iser_cleanup_task,
   1005	.alloc_pdu		= iscsi_iser_pdu_alloc,
   1006	.check_protection	= iscsi_iser_check_protection,
   1007	/* recovery */
   1008	.session_recovery_timedout = iscsi_session_recovery_timedout,
   1009
   1010	.ep_connect             = iscsi_iser_ep_connect,
   1011	.ep_poll                = iscsi_iser_ep_poll,
   1012	.ep_disconnect          = iscsi_iser_ep_disconnect
   1013};
   1014
   1015static int __init iser_init(void)
   1016{
   1017	int err;
   1018
   1019	iser_dbg("Starting iSER datamover...\n");
   1020
   1021	memset(&ig, 0, sizeof(struct iser_global));
   1022
   1023	ig.desc_cache = kmem_cache_create("iser_descriptors",
   1024					  sizeof(struct iser_tx_desc),
   1025					  0, SLAB_HWCACHE_ALIGN,
   1026					  NULL);
   1027	if (ig.desc_cache == NULL)
   1028		return -ENOMEM;
   1029
   1030	/* device init is called only after the first addr resolution */
   1031	mutex_init(&ig.device_list_mutex);
   1032	INIT_LIST_HEAD(&ig.device_list);
   1033	mutex_init(&ig.connlist_mutex);
   1034	INIT_LIST_HEAD(&ig.connlist);
   1035
   1036	release_wq = alloc_workqueue("release workqueue", 0, 0);
   1037	if (!release_wq) {
   1038		iser_err("failed to allocate release workqueue\n");
   1039		err = -ENOMEM;
   1040		goto err_alloc_wq;
   1041	}
   1042
   1043	iscsi_iser_scsi_transport = iscsi_register_transport(
   1044							&iscsi_iser_transport);
   1045	if (!iscsi_iser_scsi_transport) {
   1046		iser_err("iscsi_register_transport failed\n");
   1047		err = -EINVAL;
   1048		goto err_reg;
   1049	}
   1050
   1051	return 0;
   1052
   1053err_reg:
   1054	destroy_workqueue(release_wq);
   1055err_alloc_wq:
   1056	kmem_cache_destroy(ig.desc_cache);
   1057
   1058	return err;
   1059}
   1060
   1061static void __exit iser_exit(void)
   1062{
   1063	struct iser_conn *iser_conn, *n;
   1064	int connlist_empty;
   1065
   1066	iser_dbg("Removing iSER datamover...\n");
   1067	destroy_workqueue(release_wq);
   1068
   1069	mutex_lock(&ig.connlist_mutex);
   1070	connlist_empty = list_empty(&ig.connlist);
   1071	mutex_unlock(&ig.connlist_mutex);
   1072
   1073	if (!connlist_empty) {
   1074		iser_err("Error cleanup stage completed but we still have iser "
   1075			 "connections, destroying them anyway\n");
   1076		list_for_each_entry_safe(iser_conn, n, &ig.connlist,
   1077					 conn_list) {
   1078			iser_conn_release(iser_conn);
   1079		}
   1080	}
   1081
   1082	iscsi_unregister_transport(&iscsi_iser_transport);
   1083	kmem_cache_destroy(ig.desc_cache);
   1084}
   1085
   1086module_init(iser_init);
   1087module_exit(iser_exit);