cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

transport_rdma.c (60711B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *   Copyright (C) 2017, Microsoft Corporation.
      4 *   Copyright (C) 2018, LG Electronics.
      5 *
      6 *   Author(s): Long Li <longli@microsoft.com>,
      7 *		Hyunchul Lee <hyc.lee@gmail.com>
      8 */
      9
     10#define SUBMOD_NAME	"smb_direct"
     11
     12#include <linux/kthread.h>
     13#include <linux/list.h>
     14#include <linux/mempool.h>
     15#include <linux/highmem.h>
     16#include <linux/scatterlist.h>
     17#include <rdma/ib_verbs.h>
     18#include <rdma/rdma_cm.h>
     19#include <rdma/rw.h>
     20
     21#include "glob.h"
     22#include "connection.h"
     23#include "smb_common.h"
     24#include "smbstatus.h"
     25#include "transport_rdma.h"
     26
     27#define SMB_DIRECT_PORT_IWARP		5445
     28#define SMB_DIRECT_PORT_INFINIBAND	445
     29
     30#define SMB_DIRECT_VERSION_LE		cpu_to_le16(0x0100)
     31
     32/* SMB_DIRECT negotiation timeout in seconds */
     33#define SMB_DIRECT_NEGOTIATE_TIMEOUT		120
     34
     35#define SMB_DIRECT_MAX_SEND_SGES		8
     36#define SMB_DIRECT_MAX_RECV_SGES		1
     37
     38/*
     39 * Default maximum number of RDMA read/write outstanding on this connection
     40 * This value is possibly decreased during QP creation on hardware limit
     41 */
     42#define SMB_DIRECT_CM_INITIATOR_DEPTH		8
     43
     44/* Maximum number of retries on data transfer operations */
     45#define SMB_DIRECT_CM_RETRY			6
     46/* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */
     47#define SMB_DIRECT_CM_RNR_RETRY		0
     48
     49/*
     50 * User configurable initial values per SMB_DIRECT transport connection
     51 * as defined in [MS-SMBD] 3.1.1.1
     52 * Those may change after a SMB_DIRECT negotiation
     53 */
     54
     55/* Set 445 port to SMB Direct port by default */
     56static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND;
     57
     58/* The local peer's maximum number of credits to grant to the peer */
     59static int smb_direct_receive_credit_max = 255;
     60
     61/* The remote peer's credit request of local peer */
     62static int smb_direct_send_credit_target = 255;
     63
     64/* The maximum single message size can be sent to remote peer */
     65static int smb_direct_max_send_size = 8192;
     66
     67/*  The maximum fragmented upper-layer payload receive size supported */
     68static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
     69
     70/*  The maximum single-message size which can be received */
     71static int smb_direct_max_receive_size = 8192;
     72
     73static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE;
     74
     75static LIST_HEAD(smb_direct_device_list);
     76static DEFINE_RWLOCK(smb_direct_device_lock);
     77
     78struct smb_direct_device {
     79	struct ib_device	*ib_dev;
     80	struct list_head	list;
     81};
     82
     83static struct smb_direct_listener {
     84	struct rdma_cm_id	*cm_id;
     85} smb_direct_listener;
     86
     87static struct workqueue_struct *smb_direct_wq;
     88
     89enum smb_direct_status {
     90	SMB_DIRECT_CS_NEW = 0,
     91	SMB_DIRECT_CS_CONNECTED,
     92	SMB_DIRECT_CS_DISCONNECTING,
     93	SMB_DIRECT_CS_DISCONNECTED,
     94};
     95
     96struct smb_direct_transport {
     97	struct ksmbd_transport	transport;
     98
     99	enum smb_direct_status	status;
    100	bool			full_packet_received;
    101	wait_queue_head_t	wait_status;
    102
    103	struct rdma_cm_id	*cm_id;
    104	struct ib_cq		*send_cq;
    105	struct ib_cq		*recv_cq;
    106	struct ib_pd		*pd;
    107	struct ib_qp		*qp;
    108
    109	int			max_send_size;
    110	int			max_recv_size;
    111	int			max_fragmented_send_size;
    112	int			max_fragmented_recv_size;
    113	int			max_rdma_rw_size;
    114
    115	spinlock_t		reassembly_queue_lock;
    116	struct list_head	reassembly_queue;
    117	int			reassembly_data_length;
    118	int			reassembly_queue_length;
    119	int			first_entry_offset;
    120	wait_queue_head_t	wait_reassembly_queue;
    121
    122	spinlock_t		receive_credit_lock;
    123	int			recv_credits;
    124	int			count_avail_recvmsg;
    125	int			recv_credit_max;
    126	int			recv_credit_target;
    127
    128	spinlock_t		recvmsg_queue_lock;
    129	struct list_head	recvmsg_queue;
    130
    131	spinlock_t		empty_recvmsg_queue_lock;
    132	struct list_head	empty_recvmsg_queue;
    133
    134	int			send_credit_target;
    135	atomic_t		send_credits;
    136	spinlock_t		lock_new_recv_credits;
    137	int			new_recv_credits;
    138	int			max_rw_credits;
    139	int			pages_per_rw_credit;
    140	atomic_t		rw_credits;
    141
    142	wait_queue_head_t	wait_send_credits;
    143	wait_queue_head_t	wait_rw_credits;
    144
    145	mempool_t		*sendmsg_mempool;
    146	struct kmem_cache	*sendmsg_cache;
    147	mempool_t		*recvmsg_mempool;
    148	struct kmem_cache	*recvmsg_cache;
    149
    150	wait_queue_head_t	wait_send_pending;
    151	atomic_t		send_pending;
    152
    153	struct delayed_work	post_recv_credits_work;
    154	struct work_struct	send_immediate_work;
    155	struct work_struct	disconnect_work;
    156
    157	bool			negotiation_requested;
    158};
    159
    160#define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport))
    161
    162enum {
    163	SMB_DIRECT_MSG_NEGOTIATE_REQ = 0,
    164	SMB_DIRECT_MSG_DATA_TRANSFER
    165};
    166
    167static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
    168
    169struct smb_direct_send_ctx {
    170	struct list_head	msg_list;
    171	int			wr_cnt;
    172	bool			need_invalidate_rkey;
    173	unsigned int		remote_key;
    174};
    175
    176struct smb_direct_sendmsg {
    177	struct smb_direct_transport	*transport;
    178	struct ib_send_wr	wr;
    179	struct list_head	list;
    180	int			num_sge;
    181	struct ib_sge		sge[SMB_DIRECT_MAX_SEND_SGES];
    182	struct ib_cqe		cqe;
    183	u8			packet[];
    184};
    185
    186struct smb_direct_recvmsg {
    187	struct smb_direct_transport	*transport;
    188	struct list_head	list;
    189	int			type;
    190	struct ib_sge		sge;
    191	struct ib_cqe		cqe;
    192	bool			first_segment;
    193	u8			packet[];
    194};
    195
    196struct smb_direct_rdma_rw_msg {
    197	struct smb_direct_transport	*t;
    198	struct ib_cqe		cqe;
    199	int			status;
    200	struct completion	*completion;
    201	struct list_head	list;
    202	struct rdma_rw_ctx	rw_ctx;
    203	struct sg_table		sgt;
    204	struct scatterlist	sg_list[];
    205};
    206
    207void init_smbd_max_io_size(unsigned int sz)
    208{
    209	sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE);
    210	smb_direct_max_read_write_size = sz;
    211}
    212
    213unsigned int get_smbd_max_read_write_size(void)
    214{
    215	return smb_direct_max_read_write_size;
    216}
    217
    218static inline int get_buf_page_count(void *buf, int size)
    219{
    220	return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
    221		(uintptr_t)buf / PAGE_SIZE;
    222}
    223
    224static void smb_direct_destroy_pools(struct smb_direct_transport *transport);
    225static void smb_direct_post_recv_credits(struct work_struct *work);
    226static int smb_direct_post_send_data(struct smb_direct_transport *t,
    227				     struct smb_direct_send_ctx *send_ctx,
    228				     struct kvec *iov, int niov,
    229				     int remaining_data_length);
    230
    231static inline struct smb_direct_transport *
    232smb_trans_direct_transfort(struct ksmbd_transport *t)
    233{
    234	return container_of(t, struct smb_direct_transport, transport);
    235}
    236
    237static inline void
    238*smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg)
    239{
    240	return (void *)recvmsg->packet;
    241}
    242
    243static inline bool is_receive_credit_post_required(int receive_credits,
    244						   int avail_recvmsg_count)
    245{
    246	return receive_credits <= (smb_direct_receive_credit_max >> 3) &&
    247		avail_recvmsg_count >= (receive_credits >> 2);
    248}
    249
    250static struct
    251smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t)
    252{
    253	struct smb_direct_recvmsg *recvmsg = NULL;
    254
    255	spin_lock(&t->recvmsg_queue_lock);
    256	if (!list_empty(&t->recvmsg_queue)) {
    257		recvmsg = list_first_entry(&t->recvmsg_queue,
    258					   struct smb_direct_recvmsg,
    259					   list);
    260		list_del(&recvmsg->list);
    261	}
    262	spin_unlock(&t->recvmsg_queue_lock);
    263	return recvmsg;
    264}
    265
    266static void put_recvmsg(struct smb_direct_transport *t,
    267			struct smb_direct_recvmsg *recvmsg)
    268{
    269	ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
    270			    recvmsg->sge.length, DMA_FROM_DEVICE);
    271
    272	spin_lock(&t->recvmsg_queue_lock);
    273	list_add(&recvmsg->list, &t->recvmsg_queue);
    274	spin_unlock(&t->recvmsg_queue_lock);
    275}
    276
    277static struct
    278smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t)
    279{
    280	struct smb_direct_recvmsg *recvmsg = NULL;
    281
    282	spin_lock(&t->empty_recvmsg_queue_lock);
    283	if (!list_empty(&t->empty_recvmsg_queue)) {
    284		recvmsg = list_first_entry(&t->empty_recvmsg_queue,
    285					   struct smb_direct_recvmsg, list);
    286		list_del(&recvmsg->list);
    287	}
    288	spin_unlock(&t->empty_recvmsg_queue_lock);
    289	return recvmsg;
    290}
    291
    292static void put_empty_recvmsg(struct smb_direct_transport *t,
    293			      struct smb_direct_recvmsg *recvmsg)
    294{
    295	ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
    296			    recvmsg->sge.length, DMA_FROM_DEVICE);
    297
    298	spin_lock(&t->empty_recvmsg_queue_lock);
    299	list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue);
    300	spin_unlock(&t->empty_recvmsg_queue_lock);
    301}
    302
    303static void enqueue_reassembly(struct smb_direct_transport *t,
    304			       struct smb_direct_recvmsg *recvmsg,
    305			       int data_length)
    306{
    307	spin_lock(&t->reassembly_queue_lock);
    308	list_add_tail(&recvmsg->list, &t->reassembly_queue);
    309	t->reassembly_queue_length++;
    310	/*
    311	 * Make sure reassembly_data_length is updated after list and
    312	 * reassembly_queue_length are updated. On the dequeue side
    313	 * reassembly_data_length is checked without a lock to determine
    314	 * if reassembly_queue_length and list is up to date
    315	 */
    316	virt_wmb();
    317	t->reassembly_data_length += data_length;
    318	spin_unlock(&t->reassembly_queue_lock);
    319}
    320
    321static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t)
    322{
    323	if (!list_empty(&t->reassembly_queue))
    324		return list_first_entry(&t->reassembly_queue,
    325				struct smb_direct_recvmsg, list);
    326	else
    327		return NULL;
    328}
    329
    330static void smb_direct_disconnect_rdma_work(struct work_struct *work)
    331{
    332	struct smb_direct_transport *t =
    333		container_of(work, struct smb_direct_transport,
    334			     disconnect_work);
    335
    336	if (t->status == SMB_DIRECT_CS_CONNECTED) {
    337		t->status = SMB_DIRECT_CS_DISCONNECTING;
    338		rdma_disconnect(t->cm_id);
    339	}
    340}
    341
    342static void
    343smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t)
    344{
    345	if (t->status == SMB_DIRECT_CS_CONNECTED)
    346		queue_work(smb_direct_wq, &t->disconnect_work);
    347}
    348
    349static void smb_direct_send_immediate_work(struct work_struct *work)
    350{
    351	struct smb_direct_transport *t = container_of(work,
    352			struct smb_direct_transport, send_immediate_work);
    353
    354	if (t->status != SMB_DIRECT_CS_CONNECTED)
    355		return;
    356
    357	smb_direct_post_send_data(t, NULL, NULL, 0, 0);
    358}
    359
    360static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
    361{
    362	struct smb_direct_transport *t;
    363	struct ksmbd_conn *conn;
    364
    365	t = kzalloc(sizeof(*t), GFP_KERNEL);
    366	if (!t)
    367		return NULL;
    368
    369	t->cm_id = cm_id;
    370	cm_id->context = t;
    371
    372	t->status = SMB_DIRECT_CS_NEW;
    373	init_waitqueue_head(&t->wait_status);
    374
    375	spin_lock_init(&t->reassembly_queue_lock);
    376	INIT_LIST_HEAD(&t->reassembly_queue);
    377	t->reassembly_data_length = 0;
    378	t->reassembly_queue_length = 0;
    379	init_waitqueue_head(&t->wait_reassembly_queue);
    380	init_waitqueue_head(&t->wait_send_credits);
    381	init_waitqueue_head(&t->wait_rw_credits);
    382
    383	spin_lock_init(&t->receive_credit_lock);
    384	spin_lock_init(&t->recvmsg_queue_lock);
    385	INIT_LIST_HEAD(&t->recvmsg_queue);
    386
    387	spin_lock_init(&t->empty_recvmsg_queue_lock);
    388	INIT_LIST_HEAD(&t->empty_recvmsg_queue);
    389
    390	init_waitqueue_head(&t->wait_send_pending);
    391	atomic_set(&t->send_pending, 0);
    392
    393	spin_lock_init(&t->lock_new_recv_credits);
    394
    395	INIT_DELAYED_WORK(&t->post_recv_credits_work,
    396			  smb_direct_post_recv_credits);
    397	INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work);
    398	INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work);
    399
    400	conn = ksmbd_conn_alloc();
    401	if (!conn)
    402		goto err;
    403	conn->transport = KSMBD_TRANS(t);
    404	KSMBD_TRANS(t)->conn = conn;
    405	KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops;
    406	return t;
    407err:
    408	kfree(t);
    409	return NULL;
    410}
    411
    412static void free_transport(struct smb_direct_transport *t)
    413{
    414	struct smb_direct_recvmsg *recvmsg;
    415
    416	wake_up_interruptible(&t->wait_send_credits);
    417
    418	ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n");
    419	wait_event(t->wait_send_pending,
    420		   atomic_read(&t->send_pending) == 0);
    421
    422	cancel_work_sync(&t->disconnect_work);
    423	cancel_delayed_work_sync(&t->post_recv_credits_work);
    424	cancel_work_sync(&t->send_immediate_work);
    425
    426	if (t->qp) {
    427		ib_drain_qp(t->qp);
    428		ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs);
    429		ib_destroy_qp(t->qp);
    430	}
    431
    432	ksmbd_debug(RDMA, "drain the reassembly queue\n");
    433	do {
    434		spin_lock(&t->reassembly_queue_lock);
    435		recvmsg = get_first_reassembly(t);
    436		if (recvmsg) {
    437			list_del(&recvmsg->list);
    438			spin_unlock(&t->reassembly_queue_lock);
    439			put_recvmsg(t, recvmsg);
    440		} else {
    441			spin_unlock(&t->reassembly_queue_lock);
    442		}
    443	} while (recvmsg);
    444	t->reassembly_data_length = 0;
    445
    446	if (t->send_cq)
    447		ib_free_cq(t->send_cq);
    448	if (t->recv_cq)
    449		ib_free_cq(t->recv_cq);
    450	if (t->pd)
    451		ib_dealloc_pd(t->pd);
    452	if (t->cm_id)
    453		rdma_destroy_id(t->cm_id);
    454
    455	smb_direct_destroy_pools(t);
    456	ksmbd_conn_free(KSMBD_TRANS(t)->conn);
    457	kfree(t);
    458}
    459
    460static struct smb_direct_sendmsg
    461*smb_direct_alloc_sendmsg(struct smb_direct_transport *t)
    462{
    463	struct smb_direct_sendmsg *msg;
    464
    465	msg = mempool_alloc(t->sendmsg_mempool, GFP_KERNEL);
    466	if (!msg)
    467		return ERR_PTR(-ENOMEM);
    468	msg->transport = t;
    469	INIT_LIST_HEAD(&msg->list);
    470	msg->num_sge = 0;
    471	return msg;
    472}
    473
    474static void smb_direct_free_sendmsg(struct smb_direct_transport *t,
    475				    struct smb_direct_sendmsg *msg)
    476{
    477	int i;
    478
    479	if (msg->num_sge > 0) {
    480		ib_dma_unmap_single(t->cm_id->device,
    481				    msg->sge[0].addr, msg->sge[0].length,
    482				    DMA_TO_DEVICE);
    483		for (i = 1; i < msg->num_sge; i++)
    484			ib_dma_unmap_page(t->cm_id->device,
    485					  msg->sge[i].addr, msg->sge[i].length,
    486					  DMA_TO_DEVICE);
    487	}
    488	mempool_free(msg, t->sendmsg_mempool);
    489}
    490
    491static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg)
    492{
    493	switch (recvmsg->type) {
    494	case SMB_DIRECT_MSG_DATA_TRANSFER: {
    495		struct smb_direct_data_transfer *req =
    496			(struct smb_direct_data_transfer *)recvmsg->packet;
    497		struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet
    498				+ le32_to_cpu(req->data_offset));
    499		ksmbd_debug(RDMA,
    500			    "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n",
    501			    le16_to_cpu(req->credits_granted),
    502			    le16_to_cpu(req->credits_requested),
    503			    req->data_length, req->remaining_data_length,
    504			    hdr->ProtocolId, hdr->Command);
    505		break;
    506	}
    507	case SMB_DIRECT_MSG_NEGOTIATE_REQ: {
    508		struct smb_direct_negotiate_req *req =
    509			(struct smb_direct_negotiate_req *)recvmsg->packet;
    510		ksmbd_debug(RDMA,
    511			    "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n",
    512			    le16_to_cpu(req->min_version),
    513			    le16_to_cpu(req->max_version),
    514			    le16_to_cpu(req->credits_requested),
    515			    le32_to_cpu(req->preferred_send_size),
    516			    le32_to_cpu(req->max_receive_size),
    517			    le32_to_cpu(req->max_fragmented_size));
    518		if (le16_to_cpu(req->min_version) > 0x0100 ||
    519		    le16_to_cpu(req->max_version) < 0x0100)
    520			return -EOPNOTSUPP;
    521		if (le16_to_cpu(req->credits_requested) <= 0 ||
    522		    le32_to_cpu(req->max_receive_size) <= 128 ||
    523		    le32_to_cpu(req->max_fragmented_size) <=
    524					128 * 1024)
    525			return -ECONNABORTED;
    526
    527		break;
    528	}
    529	default:
    530		return -EINVAL;
    531	}
    532	return 0;
    533}
    534
    535static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
    536{
    537	struct smb_direct_recvmsg *recvmsg;
    538	struct smb_direct_transport *t;
    539
    540	recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe);
    541	t = recvmsg->transport;
    542
    543	if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
    544		if (wc->status != IB_WC_WR_FLUSH_ERR) {
    545			pr_err("Recv error. status='%s (%d)' opcode=%d\n",
    546			       ib_wc_status_msg(wc->status), wc->status,
    547			       wc->opcode);
    548			smb_direct_disconnect_rdma_connection(t);
    549		}
    550		put_empty_recvmsg(t, recvmsg);
    551		return;
    552	}
    553
    554	ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n",
    555		    ib_wc_status_msg(wc->status), wc->status,
    556		    wc->opcode);
    557
    558	ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr,
    559				   recvmsg->sge.length, DMA_FROM_DEVICE);
    560
    561	switch (recvmsg->type) {
    562	case SMB_DIRECT_MSG_NEGOTIATE_REQ:
    563		if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) {
    564			put_empty_recvmsg(t, recvmsg);
    565			return;
    566		}
    567		t->negotiation_requested = true;
    568		t->full_packet_received = true;
    569		t->status = SMB_DIRECT_CS_CONNECTED;
    570		enqueue_reassembly(t, recvmsg, 0);
    571		wake_up_interruptible(&t->wait_status);
    572		break;
    573	case SMB_DIRECT_MSG_DATA_TRANSFER: {
    574		struct smb_direct_data_transfer *data_transfer =
    575			(struct smb_direct_data_transfer *)recvmsg->packet;
    576		unsigned int data_length;
    577		int avail_recvmsg_count, receive_credits;
    578
    579		if (wc->byte_len <
    580		    offsetof(struct smb_direct_data_transfer, padding)) {
    581			put_empty_recvmsg(t, recvmsg);
    582			return;
    583		}
    584
    585		data_length = le32_to_cpu(data_transfer->data_length);
    586		if (data_length) {
    587			if (wc->byte_len < sizeof(struct smb_direct_data_transfer) +
    588			    (u64)data_length) {
    589				put_empty_recvmsg(t, recvmsg);
    590				return;
    591			}
    592
    593			if (t->full_packet_received)
    594				recvmsg->first_segment = true;
    595
    596			if (le32_to_cpu(data_transfer->remaining_data_length))
    597				t->full_packet_received = false;
    598			else
    599				t->full_packet_received = true;
    600
    601			enqueue_reassembly(t, recvmsg, (int)data_length);
    602			wake_up_interruptible(&t->wait_reassembly_queue);
    603
    604			spin_lock(&t->receive_credit_lock);
    605			receive_credits = --(t->recv_credits);
    606			avail_recvmsg_count = t->count_avail_recvmsg;
    607			spin_unlock(&t->receive_credit_lock);
    608		} else {
    609			put_empty_recvmsg(t, recvmsg);
    610
    611			spin_lock(&t->receive_credit_lock);
    612			receive_credits = --(t->recv_credits);
    613			avail_recvmsg_count = ++(t->count_avail_recvmsg);
    614			spin_unlock(&t->receive_credit_lock);
    615		}
    616
    617		t->recv_credit_target =
    618				le16_to_cpu(data_transfer->credits_requested);
    619		atomic_add(le16_to_cpu(data_transfer->credits_granted),
    620			   &t->send_credits);
    621
    622		if (le16_to_cpu(data_transfer->flags) &
    623		    SMB_DIRECT_RESPONSE_REQUESTED)
    624			queue_work(smb_direct_wq, &t->send_immediate_work);
    625
    626		if (atomic_read(&t->send_credits) > 0)
    627			wake_up_interruptible(&t->wait_send_credits);
    628
    629		if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count))
    630			mod_delayed_work(smb_direct_wq,
    631					 &t->post_recv_credits_work, 0);
    632		break;
    633	}
    634	default:
    635		break;
    636	}
    637}
    638
    639static int smb_direct_post_recv(struct smb_direct_transport *t,
    640				struct smb_direct_recvmsg *recvmsg)
    641{
    642	struct ib_recv_wr wr;
    643	int ret;
    644
    645	recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device,
    646					      recvmsg->packet, t->max_recv_size,
    647					      DMA_FROM_DEVICE);
    648	ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr);
    649	if (ret)
    650		return ret;
    651	recvmsg->sge.length = t->max_recv_size;
    652	recvmsg->sge.lkey = t->pd->local_dma_lkey;
    653	recvmsg->cqe.done = recv_done;
    654
    655	wr.wr_cqe = &recvmsg->cqe;
    656	wr.next = NULL;
    657	wr.sg_list = &recvmsg->sge;
    658	wr.num_sge = 1;
    659
    660	ret = ib_post_recv(t->qp, &wr, NULL);
    661	if (ret) {
    662		pr_err("Can't post recv: %d\n", ret);
    663		ib_dma_unmap_single(t->cm_id->device,
    664				    recvmsg->sge.addr, recvmsg->sge.length,
    665				    DMA_FROM_DEVICE);
    666		smb_direct_disconnect_rdma_connection(t);
    667		return ret;
    668	}
    669	return ret;
    670}
    671
    672static int smb_direct_read(struct ksmbd_transport *t, char *buf,
    673			   unsigned int size)
    674{
    675	struct smb_direct_recvmsg *recvmsg;
    676	struct smb_direct_data_transfer *data_transfer;
    677	int to_copy, to_read, data_read, offset;
    678	u32 data_length, remaining_data_length, data_offset;
    679	int rc;
    680	struct smb_direct_transport *st = smb_trans_direct_transfort(t);
    681
    682again:
    683	if (st->status != SMB_DIRECT_CS_CONNECTED) {
    684		pr_err("disconnected\n");
    685		return -ENOTCONN;
    686	}
    687
    688	/*
    689	 * No need to hold the reassembly queue lock all the time as we are
    690	 * the only one reading from the front of the queue. The transport
    691	 * may add more entries to the back of the queue at the same time
    692	 */
    693	if (st->reassembly_data_length >= size) {
    694		int queue_length;
    695		int queue_removed = 0;
    696
    697		/*
    698		 * Need to make sure reassembly_data_length is read before
    699		 * reading reassembly_queue_length and calling
    700		 * get_first_reassembly. This call is lock free
    701		 * as we never read at the end of the queue which are being
    702		 * updated in SOFTIRQ as more data is received
    703		 */
    704		virt_rmb();
    705		queue_length = st->reassembly_queue_length;
    706		data_read = 0;
    707		to_read = size;
    708		offset = st->first_entry_offset;
    709		while (data_read < size) {
    710			recvmsg = get_first_reassembly(st);
    711			data_transfer = smb_direct_recvmsg_payload(recvmsg);
    712			data_length = le32_to_cpu(data_transfer->data_length);
    713			remaining_data_length =
    714				le32_to_cpu(data_transfer->remaining_data_length);
    715			data_offset = le32_to_cpu(data_transfer->data_offset);
    716
    717			/*
    718			 * The upper layer expects RFC1002 length at the
    719			 * beginning of the payload. Return it to indicate
    720			 * the total length of the packet. This minimize the
    721			 * change to upper layer packet processing logic. This
    722			 * will be eventually remove when an intermediate
    723			 * transport layer is added
    724			 */
    725			if (recvmsg->first_segment && size == 4) {
    726				unsigned int rfc1002_len =
    727					data_length + remaining_data_length;
    728				*((__be32 *)buf) = cpu_to_be32(rfc1002_len);
    729				data_read = 4;
    730				recvmsg->first_segment = false;
    731				ksmbd_debug(RDMA,
    732					    "returning rfc1002 length %d\n",
    733					    rfc1002_len);
    734				goto read_rfc1002_done;
    735			}
    736
    737			to_copy = min_t(int, data_length - offset, to_read);
    738			memcpy(buf + data_read, (char *)data_transfer + data_offset + offset,
    739			       to_copy);
    740
    741			/* move on to the next buffer? */
    742			if (to_copy == data_length - offset) {
    743				queue_length--;
    744				/*
    745				 * No need to lock if we are not at the
    746				 * end of the queue
    747				 */
    748				if (queue_length) {
    749					list_del(&recvmsg->list);
    750				} else {
    751					spin_lock_irq(&st->reassembly_queue_lock);
    752					list_del(&recvmsg->list);
    753					spin_unlock_irq(&st->reassembly_queue_lock);
    754				}
    755				queue_removed++;
    756				put_recvmsg(st, recvmsg);
    757				offset = 0;
    758			} else {
    759				offset += to_copy;
    760			}
    761
    762			to_read -= to_copy;
    763			data_read += to_copy;
    764		}
    765
    766		spin_lock_irq(&st->reassembly_queue_lock);
    767		st->reassembly_data_length -= data_read;
    768		st->reassembly_queue_length -= queue_removed;
    769		spin_unlock_irq(&st->reassembly_queue_lock);
    770
    771		spin_lock(&st->receive_credit_lock);
    772		st->count_avail_recvmsg += queue_removed;
    773		if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) {
    774			spin_unlock(&st->receive_credit_lock);
    775			mod_delayed_work(smb_direct_wq,
    776					 &st->post_recv_credits_work, 0);
    777		} else {
    778			spin_unlock(&st->receive_credit_lock);
    779		}
    780
    781		st->first_entry_offset = offset;
    782		ksmbd_debug(RDMA,
    783			    "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
    784			    data_read, st->reassembly_data_length,
    785			    st->first_entry_offset);
    786read_rfc1002_done:
    787		return data_read;
    788	}
    789
    790	ksmbd_debug(RDMA, "wait_event on more data\n");
    791	rc = wait_event_interruptible(st->wait_reassembly_queue,
    792				      st->reassembly_data_length >= size ||
    793				       st->status != SMB_DIRECT_CS_CONNECTED);
    794	if (rc)
    795		return -EINTR;
    796
    797	goto again;
    798}
    799
    800static void smb_direct_post_recv_credits(struct work_struct *work)
    801{
    802	struct smb_direct_transport *t = container_of(work,
    803		struct smb_direct_transport, post_recv_credits_work.work);
    804	struct smb_direct_recvmsg *recvmsg;
    805	int receive_credits, credits = 0;
    806	int ret;
    807	int use_free = 1;
    808
    809	spin_lock(&t->receive_credit_lock);
    810	receive_credits = t->recv_credits;
    811	spin_unlock(&t->receive_credit_lock);
    812
    813	if (receive_credits < t->recv_credit_target) {
    814		while (true) {
    815			if (use_free)
    816				recvmsg = get_free_recvmsg(t);
    817			else
    818				recvmsg = get_empty_recvmsg(t);
    819			if (!recvmsg) {
    820				if (use_free) {
    821					use_free = 0;
    822					continue;
    823				} else {
    824					break;
    825				}
    826			}
    827
    828			recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER;
    829			recvmsg->first_segment = false;
    830
    831			ret = smb_direct_post_recv(t, recvmsg);
    832			if (ret) {
    833				pr_err("Can't post recv: %d\n", ret);
    834				put_recvmsg(t, recvmsg);
    835				break;
    836			}
    837			credits++;
    838		}
    839	}
    840
    841	spin_lock(&t->receive_credit_lock);
    842	t->recv_credits += credits;
    843	t->count_avail_recvmsg -= credits;
    844	spin_unlock(&t->receive_credit_lock);
    845
    846	spin_lock(&t->lock_new_recv_credits);
    847	t->new_recv_credits += credits;
    848	spin_unlock(&t->lock_new_recv_credits);
    849
    850	if (credits)
    851		queue_work(smb_direct_wq, &t->send_immediate_work);
    852}
    853
    854static void send_done(struct ib_cq *cq, struct ib_wc *wc)
    855{
    856	struct smb_direct_sendmsg *sendmsg, *sibling;
    857	struct smb_direct_transport *t;
    858	struct list_head *pos, *prev, *end;
    859
    860	sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe);
    861	t = sendmsg->transport;
    862
    863	ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n",
    864		    ib_wc_status_msg(wc->status), wc->status,
    865		    wc->opcode);
    866
    867	if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
    868		pr_err("Send error. status='%s (%d)', opcode=%d\n",
    869		       ib_wc_status_msg(wc->status), wc->status,
    870		       wc->opcode);
    871		smb_direct_disconnect_rdma_connection(t);
    872	}
    873
    874	if (atomic_dec_and_test(&t->send_pending))
    875		wake_up(&t->wait_send_pending);
    876
    877	/* iterate and free the list of messages in reverse. the list's head
    878	 * is invalid.
    879	 */
    880	for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next;
    881	     prev != end; pos = prev, prev = prev->prev) {
    882		sibling = container_of(pos, struct smb_direct_sendmsg, list);
    883		smb_direct_free_sendmsg(t, sibling);
    884	}
    885
    886	sibling = container_of(pos, struct smb_direct_sendmsg, list);
    887	smb_direct_free_sendmsg(t, sibling);
    888}
    889
    890static int manage_credits_prior_sending(struct smb_direct_transport *t)
    891{
    892	int new_credits;
    893
    894	spin_lock(&t->lock_new_recv_credits);
    895	new_credits = t->new_recv_credits;
    896	t->new_recv_credits = 0;
    897	spin_unlock(&t->lock_new_recv_credits);
    898
    899	return new_credits;
    900}
    901
    902static int smb_direct_post_send(struct smb_direct_transport *t,
    903				struct ib_send_wr *wr)
    904{
    905	int ret;
    906
    907	atomic_inc(&t->send_pending);
    908	ret = ib_post_send(t->qp, wr, NULL);
    909	if (ret) {
    910		pr_err("failed to post send: %d\n", ret);
    911		if (atomic_dec_and_test(&t->send_pending))
    912			wake_up(&t->wait_send_pending);
    913		smb_direct_disconnect_rdma_connection(t);
    914	}
    915	return ret;
    916}
    917
    918static void smb_direct_send_ctx_init(struct smb_direct_transport *t,
    919				     struct smb_direct_send_ctx *send_ctx,
    920				     bool need_invalidate_rkey,
    921				     unsigned int remote_key)
    922{
    923	INIT_LIST_HEAD(&send_ctx->msg_list);
    924	send_ctx->wr_cnt = 0;
    925	send_ctx->need_invalidate_rkey = need_invalidate_rkey;
    926	send_ctx->remote_key = remote_key;
    927}
    928
    929static int smb_direct_flush_send_list(struct smb_direct_transport *t,
    930				      struct smb_direct_send_ctx *send_ctx,
    931				      bool is_last)
    932{
    933	struct smb_direct_sendmsg *first, *last;
    934	int ret;
    935
    936	if (list_empty(&send_ctx->msg_list))
    937		return 0;
    938
    939	first = list_first_entry(&send_ctx->msg_list,
    940				 struct smb_direct_sendmsg,
    941				 list);
    942	last = list_last_entry(&send_ctx->msg_list,
    943			       struct smb_direct_sendmsg,
    944			       list);
    945
    946	last->wr.send_flags = IB_SEND_SIGNALED;
    947	last->wr.wr_cqe = &last->cqe;
    948	if (is_last && send_ctx->need_invalidate_rkey) {
    949		last->wr.opcode = IB_WR_SEND_WITH_INV;
    950		last->wr.ex.invalidate_rkey = send_ctx->remote_key;
    951	}
    952
    953	ret = smb_direct_post_send(t, &first->wr);
    954	if (!ret) {
    955		smb_direct_send_ctx_init(t, send_ctx,
    956					 send_ctx->need_invalidate_rkey,
    957					 send_ctx->remote_key);
    958	} else {
    959		atomic_add(send_ctx->wr_cnt, &t->send_credits);
    960		wake_up(&t->wait_send_credits);
    961		list_for_each_entry_safe(first, last, &send_ctx->msg_list,
    962					 list) {
    963			smb_direct_free_sendmsg(t, first);
    964		}
    965	}
    966	return ret;
    967}
    968
    969static int wait_for_credits(struct smb_direct_transport *t,
    970			    wait_queue_head_t *waitq, atomic_t *total_credits,
    971			    int needed)
    972{
    973	int ret;
    974
    975	do {
    976		if (atomic_sub_return(needed, total_credits) >= 0)
    977			return 0;
    978
    979		atomic_add(needed, total_credits);
    980		ret = wait_event_interruptible(*waitq,
    981					       atomic_read(total_credits) >= needed ||
    982					       t->status != SMB_DIRECT_CS_CONNECTED);
    983
    984		if (t->status != SMB_DIRECT_CS_CONNECTED)
    985			return -ENOTCONN;
    986		else if (ret < 0)
    987			return ret;
    988	} while (true);
    989}
    990
    991static int wait_for_send_credits(struct smb_direct_transport *t,
    992				 struct smb_direct_send_ctx *send_ctx)
    993{
    994	int ret;
    995
    996	if (send_ctx &&
    997	    (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) {
    998		ret = smb_direct_flush_send_list(t, send_ctx, false);
    999		if (ret)
   1000			return ret;
   1001	}
   1002
   1003	return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1);
   1004}
   1005
   1006static int wait_for_rw_credits(struct smb_direct_transport *t, int credits)
   1007{
   1008	return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits);
   1009}
   1010
   1011static int calc_rw_credits(struct smb_direct_transport *t,
   1012			   char *buf, unsigned int len)
   1013{
   1014	return DIV_ROUND_UP(get_buf_page_count(buf, len),
   1015			    t->pages_per_rw_credit);
   1016}
   1017
   1018static int smb_direct_create_header(struct smb_direct_transport *t,
   1019				    int size, int remaining_data_length,
   1020				    struct smb_direct_sendmsg **sendmsg_out)
   1021{
   1022	struct smb_direct_sendmsg *sendmsg;
   1023	struct smb_direct_data_transfer *packet;
   1024	int header_length;
   1025	int ret;
   1026
   1027	sendmsg = smb_direct_alloc_sendmsg(t);
   1028	if (IS_ERR(sendmsg))
   1029		return PTR_ERR(sendmsg);
   1030
   1031	/* Fill in the packet header */
   1032	packet = (struct smb_direct_data_transfer *)sendmsg->packet;
   1033	packet->credits_requested = cpu_to_le16(t->send_credit_target);
   1034	packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
   1035
   1036	packet->flags = 0;
   1037	packet->reserved = 0;
   1038	if (!size)
   1039		packet->data_offset = 0;
   1040	else
   1041		packet->data_offset = cpu_to_le32(24);
   1042	packet->data_length = cpu_to_le32(size);
   1043	packet->remaining_data_length = cpu_to_le32(remaining_data_length);
   1044	packet->padding = 0;
   1045
   1046	ksmbd_debug(RDMA,
   1047		    "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
   1048		    le16_to_cpu(packet->credits_requested),
   1049		    le16_to_cpu(packet->credits_granted),
   1050		    le32_to_cpu(packet->data_offset),
   1051		    le32_to_cpu(packet->data_length),
   1052		    le32_to_cpu(packet->remaining_data_length));
   1053
   1054	/* Map the packet to DMA */
   1055	header_length = sizeof(struct smb_direct_data_transfer);
   1056	/* If this is a packet without payload, don't send padding */
   1057	if (!size)
   1058		header_length =
   1059			offsetof(struct smb_direct_data_transfer, padding);
   1060
   1061	sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
   1062						 (void *)packet,
   1063						 header_length,
   1064						 DMA_TO_DEVICE);
   1065	ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
   1066	if (ret) {
   1067		smb_direct_free_sendmsg(t, sendmsg);
   1068		return ret;
   1069	}
   1070
   1071	sendmsg->num_sge = 1;
   1072	sendmsg->sge[0].length = header_length;
   1073	sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
   1074
   1075	*sendmsg_out = sendmsg;
   1076	return 0;
   1077}
   1078
   1079static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries)
   1080{
   1081	bool high = is_vmalloc_addr(buf);
   1082	struct page *page;
   1083	int offset, len;
   1084	int i = 0;
   1085
   1086	if (size <= 0 || nentries < get_buf_page_count(buf, size))
   1087		return -EINVAL;
   1088
   1089	offset = offset_in_page(buf);
   1090	buf -= offset;
   1091	while (size > 0) {
   1092		len = min_t(int, PAGE_SIZE - offset, size);
   1093		if (high)
   1094			page = vmalloc_to_page(buf);
   1095		else
   1096			page = kmap_to_page(buf);
   1097
   1098		if (!sg_list)
   1099			return -EINVAL;
   1100		sg_set_page(sg_list, page, len, offset);
   1101		sg_list = sg_next(sg_list);
   1102
   1103		buf += PAGE_SIZE;
   1104		size -= len;
   1105		offset = 0;
   1106		i++;
   1107	}
   1108	return i;
   1109}
   1110
   1111static int get_mapped_sg_list(struct ib_device *device, void *buf, int size,
   1112			      struct scatterlist *sg_list, int nentries,
   1113			      enum dma_data_direction dir)
   1114{
   1115	int npages;
   1116
   1117	npages = get_sg_list(buf, size, sg_list, nentries);
   1118	if (npages < 0)
   1119		return -EINVAL;
   1120	return ib_dma_map_sg(device, sg_list, npages, dir);
   1121}
   1122
   1123static int post_sendmsg(struct smb_direct_transport *t,
   1124			struct smb_direct_send_ctx *send_ctx,
   1125			struct smb_direct_sendmsg *msg)
   1126{
   1127	int i;
   1128
   1129	for (i = 0; i < msg->num_sge; i++)
   1130		ib_dma_sync_single_for_device(t->cm_id->device,
   1131					      msg->sge[i].addr, msg->sge[i].length,
   1132					      DMA_TO_DEVICE);
   1133
   1134	msg->cqe.done = send_done;
   1135	msg->wr.opcode = IB_WR_SEND;
   1136	msg->wr.sg_list = &msg->sge[0];
   1137	msg->wr.num_sge = msg->num_sge;
   1138	msg->wr.next = NULL;
   1139
   1140	if (send_ctx) {
   1141		msg->wr.wr_cqe = NULL;
   1142		msg->wr.send_flags = 0;
   1143		if (!list_empty(&send_ctx->msg_list)) {
   1144			struct smb_direct_sendmsg *last;
   1145
   1146			last = list_last_entry(&send_ctx->msg_list,
   1147					       struct smb_direct_sendmsg,
   1148					       list);
   1149			last->wr.next = &msg->wr;
   1150		}
   1151		list_add_tail(&msg->list, &send_ctx->msg_list);
   1152		send_ctx->wr_cnt++;
   1153		return 0;
   1154	}
   1155
   1156	msg->wr.wr_cqe = &msg->cqe;
   1157	msg->wr.send_flags = IB_SEND_SIGNALED;
   1158	return smb_direct_post_send(t, &msg->wr);
   1159}
   1160
   1161static int smb_direct_post_send_data(struct smb_direct_transport *t,
   1162				     struct smb_direct_send_ctx *send_ctx,
   1163				     struct kvec *iov, int niov,
   1164				     int remaining_data_length)
   1165{
   1166	int i, j, ret;
   1167	struct smb_direct_sendmsg *msg;
   1168	int data_length;
   1169	struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1];
   1170
   1171	ret = wait_for_send_credits(t, send_ctx);
   1172	if (ret)
   1173		return ret;
   1174
   1175	data_length = 0;
   1176	for (i = 0; i < niov; i++)
   1177		data_length += iov[i].iov_len;
   1178
   1179	ret = smb_direct_create_header(t, data_length, remaining_data_length,
   1180				       &msg);
   1181	if (ret) {
   1182		atomic_inc(&t->send_credits);
   1183		return ret;
   1184	}
   1185
   1186	for (i = 0; i < niov; i++) {
   1187		struct ib_sge *sge;
   1188		int sg_cnt;
   1189
   1190		sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1);
   1191		sg_cnt = get_mapped_sg_list(t->cm_id->device,
   1192					    iov[i].iov_base, iov[i].iov_len,
   1193					    sg, SMB_DIRECT_MAX_SEND_SGES - 1,
   1194					    DMA_TO_DEVICE);
   1195		if (sg_cnt <= 0) {
   1196			pr_err("failed to map buffer\n");
   1197			ret = -ENOMEM;
   1198			goto err;
   1199		} else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) {
   1200			pr_err("buffer not fitted into sges\n");
   1201			ret = -E2BIG;
   1202			ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt,
   1203					DMA_TO_DEVICE);
   1204			goto err;
   1205		}
   1206
   1207		for (j = 0; j < sg_cnt; j++) {
   1208			sge = &msg->sge[msg->num_sge];
   1209			sge->addr = sg_dma_address(&sg[j]);
   1210			sge->length = sg_dma_len(&sg[j]);
   1211			sge->lkey  = t->pd->local_dma_lkey;
   1212			msg->num_sge++;
   1213		}
   1214	}
   1215
   1216	ret = post_sendmsg(t, send_ctx, msg);
   1217	if (ret)
   1218		goto err;
   1219	return 0;
   1220err:
   1221	smb_direct_free_sendmsg(t, msg);
   1222	atomic_inc(&t->send_credits);
   1223	return ret;
   1224}
   1225
   1226static int smb_direct_writev(struct ksmbd_transport *t,
   1227			     struct kvec *iov, int niovs, int buflen,
   1228			     bool need_invalidate, unsigned int remote_key)
   1229{
   1230	struct smb_direct_transport *st = smb_trans_direct_transfort(t);
   1231	int remaining_data_length;
   1232	int start, i, j;
   1233	int max_iov_size = st->max_send_size -
   1234			sizeof(struct smb_direct_data_transfer);
   1235	int ret;
   1236	struct kvec vec;
   1237	struct smb_direct_send_ctx send_ctx;
   1238
   1239	if (st->status != SMB_DIRECT_CS_CONNECTED)
   1240		return -ENOTCONN;
   1241
   1242	//FIXME: skip RFC1002 header..
   1243	buflen -= 4;
   1244	iov[0].iov_base += 4;
   1245	iov[0].iov_len -= 4;
   1246
   1247	remaining_data_length = buflen;
   1248	ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
   1249
   1250	smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
   1251	start = i = 0;
   1252	buflen = 0;
   1253	while (true) {
   1254		buflen += iov[i].iov_len;
   1255		if (buflen > max_iov_size) {
   1256			if (i > start) {
   1257				remaining_data_length -=
   1258					(buflen - iov[i].iov_len);
   1259				ret = smb_direct_post_send_data(st, &send_ctx,
   1260								&iov[start], i - start,
   1261								remaining_data_length);
   1262				if (ret)
   1263					goto done;
   1264			} else {
   1265				/* iov[start] is too big, break it */
   1266				int nvec  = (buflen + max_iov_size - 1) /
   1267						max_iov_size;
   1268
   1269				for (j = 0; j < nvec; j++) {
   1270					vec.iov_base =
   1271						(char *)iov[start].iov_base +
   1272						j * max_iov_size;
   1273					vec.iov_len =
   1274						min_t(int, max_iov_size,
   1275						      buflen - max_iov_size * j);
   1276					remaining_data_length -= vec.iov_len;
   1277					ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1,
   1278									remaining_data_length);
   1279					if (ret)
   1280						goto done;
   1281				}
   1282				i++;
   1283				if (i == niovs)
   1284					break;
   1285			}
   1286			start = i;
   1287			buflen = 0;
   1288		} else {
   1289			i++;
   1290			if (i == niovs) {
   1291				/* send out all remaining vecs */
   1292				remaining_data_length -= buflen;
   1293				ret = smb_direct_post_send_data(st, &send_ctx,
   1294								&iov[start], i - start,
   1295								remaining_data_length);
   1296				if (ret)
   1297					goto done;
   1298				break;
   1299			}
   1300		}
   1301	}
   1302
   1303done:
   1304	ret = smb_direct_flush_send_list(st, &send_ctx, true);
   1305
   1306	/*
   1307	 * As an optimization, we don't wait for individual I/O to finish
   1308	 * before sending the next one.
   1309	 * Send them all and wait for pending send count to get to 0
   1310	 * that means all the I/Os have been out and we are good to return
   1311	 */
   1312
   1313	wait_event(st->wait_send_pending,
   1314		   atomic_read(&st->send_pending) == 0);
   1315	return ret;
   1316}
   1317
   1318static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t,
   1319					struct smb_direct_rdma_rw_msg *msg,
   1320					enum dma_data_direction dir)
   1321{
   1322	rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
   1323			    msg->sgt.sgl, msg->sgt.nents, dir);
   1324	sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
   1325	kfree(msg);
   1326}
   1327
   1328static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
   1329			    enum dma_data_direction dir)
   1330{
   1331	struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe,
   1332							  struct smb_direct_rdma_rw_msg, cqe);
   1333	struct smb_direct_transport *t = msg->t;
   1334
   1335	if (wc->status != IB_WC_SUCCESS) {
   1336		msg->status = -EIO;
   1337		pr_err("read/write error. opcode = %d, status = %s(%d)\n",
   1338		       wc->opcode, ib_wc_status_msg(wc->status), wc->status);
   1339		if (wc->status != IB_WC_WR_FLUSH_ERR)
   1340			smb_direct_disconnect_rdma_connection(t);
   1341	}
   1342
   1343	complete(msg->completion);
   1344}
   1345
   1346static void read_done(struct ib_cq *cq, struct ib_wc *wc)
   1347{
   1348	read_write_done(cq, wc, DMA_FROM_DEVICE);
   1349}
   1350
   1351static void write_done(struct ib_cq *cq, struct ib_wc *wc)
   1352{
   1353	read_write_done(cq, wc, DMA_TO_DEVICE);
   1354}
   1355
   1356static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
   1357				void *buf, int buf_len,
   1358				struct smb2_buffer_desc_v1 *desc,
   1359				unsigned int desc_len,
   1360				bool is_read)
   1361{
   1362	struct smb_direct_rdma_rw_msg *msg, *next_msg;
   1363	int i, ret;
   1364	DECLARE_COMPLETION_ONSTACK(completion);
   1365	struct ib_send_wr *first_wr;
   1366	LIST_HEAD(msg_list);
   1367	char *desc_buf;
   1368	int credits_needed;
   1369	unsigned int desc_buf_len;
   1370	size_t total_length = 0;
   1371
   1372	if (t->status != SMB_DIRECT_CS_CONNECTED)
   1373		return -ENOTCONN;
   1374
   1375	/* calculate needed credits */
   1376	credits_needed = 0;
   1377	desc_buf = buf;
   1378	for (i = 0; i < desc_len / sizeof(*desc); i++) {
   1379		desc_buf_len = le32_to_cpu(desc[i].length);
   1380
   1381		credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len);
   1382		desc_buf += desc_buf_len;
   1383		total_length += desc_buf_len;
   1384		if (desc_buf_len == 0 || total_length > buf_len ||
   1385		    total_length > t->max_rdma_rw_size)
   1386			return -EINVAL;
   1387	}
   1388
   1389	ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n",
   1390		    is_read ? "read" : "write", buf_len, credits_needed);
   1391
   1392	ret = wait_for_rw_credits(t, credits_needed);
   1393	if (ret < 0)
   1394		return ret;
   1395
   1396	/* build rdma_rw_ctx for each descriptor */
   1397	desc_buf = buf;
   1398	for (i = 0; i < desc_len / sizeof(*desc); i++) {
   1399		msg = kzalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) +
   1400			      sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL);
   1401		if (!msg) {
   1402			ret = -ENOMEM;
   1403			goto out;
   1404		}
   1405
   1406		desc_buf_len = le32_to_cpu(desc[i].length);
   1407
   1408		msg->t = t;
   1409		msg->cqe.done = is_read ? read_done : write_done;
   1410		msg->completion = &completion;
   1411
   1412		msg->sgt.sgl = &msg->sg_list[0];
   1413		ret = sg_alloc_table_chained(&msg->sgt,
   1414					     get_buf_page_count(desc_buf, desc_buf_len),
   1415					     msg->sg_list, SG_CHUNK_SIZE);
   1416		if (ret) {
   1417			kfree(msg);
   1418			ret = -ENOMEM;
   1419			goto out;
   1420		}
   1421
   1422		ret = get_sg_list(desc_buf, desc_buf_len,
   1423				  msg->sgt.sgl, msg->sgt.orig_nents);
   1424		if (ret < 0) {
   1425			sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
   1426			kfree(msg);
   1427			goto out;
   1428		}
   1429
   1430		ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port,
   1431				       msg->sgt.sgl,
   1432				       get_buf_page_count(desc_buf, desc_buf_len),
   1433				       0,
   1434				       le64_to_cpu(desc[i].offset),
   1435				       le32_to_cpu(desc[i].token),
   1436				       is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
   1437		if (ret < 0) {
   1438			pr_err("failed to init rdma_rw_ctx: %d\n", ret);
   1439			sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
   1440			kfree(msg);
   1441			goto out;
   1442		}
   1443
   1444		list_add_tail(&msg->list, &msg_list);
   1445		desc_buf += desc_buf_len;
   1446	}
   1447
   1448	/* concatenate work requests of rdma_rw_ctxs */
   1449	first_wr = NULL;
   1450	list_for_each_entry_reverse(msg, &msg_list, list) {
   1451		first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port,
   1452					   &msg->cqe, first_wr);
   1453	}
   1454
   1455	ret = ib_post_send(t->qp, first_wr, NULL);
   1456	if (ret) {
   1457		pr_err("failed to post send wr for RDMA R/W: %d\n", ret);
   1458		goto out;
   1459	}
   1460
   1461	msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list);
   1462	wait_for_completion(&completion);
   1463	ret = msg->status;
   1464out:
   1465	list_for_each_entry_safe(msg, next_msg, &msg_list, list) {
   1466		list_del(&msg->list);
   1467		smb_direct_free_rdma_rw_msg(t, msg,
   1468					    is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
   1469	}
   1470	atomic_add(credits_needed, &t->rw_credits);
   1471	wake_up(&t->wait_rw_credits);
   1472	return ret;
   1473}
   1474
   1475static int smb_direct_rdma_write(struct ksmbd_transport *t,
   1476				 void *buf, unsigned int buflen,
   1477				 struct smb2_buffer_desc_v1 *desc,
   1478				 unsigned int desc_len)
   1479{
   1480	return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
   1481				    desc, desc_len, false);
   1482}
   1483
   1484static int smb_direct_rdma_read(struct ksmbd_transport *t,
   1485				void *buf, unsigned int buflen,
   1486				struct smb2_buffer_desc_v1 *desc,
   1487				unsigned int desc_len)
   1488{
   1489	return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
   1490				    desc, desc_len, true);
   1491}
   1492
   1493static void smb_direct_disconnect(struct ksmbd_transport *t)
   1494{
   1495	struct smb_direct_transport *st = smb_trans_direct_transfort(t);
   1496
   1497	ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id);
   1498
   1499	smb_direct_disconnect_rdma_work(&st->disconnect_work);
   1500	wait_event_interruptible(st->wait_status,
   1501				 st->status == SMB_DIRECT_CS_DISCONNECTED);
   1502	free_transport(st);
   1503}
   1504
   1505static void smb_direct_shutdown(struct ksmbd_transport *t)
   1506{
   1507	struct smb_direct_transport *st = smb_trans_direct_transfort(t);
   1508
   1509	ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id);
   1510
   1511	smb_direct_disconnect_rdma_work(&st->disconnect_work);
   1512}
   1513
   1514static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
   1515				 struct rdma_cm_event *event)
   1516{
   1517	struct smb_direct_transport *t = cm_id->context;
   1518
   1519	ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n",
   1520		    cm_id, rdma_event_msg(event->event), event->event);
   1521
   1522	switch (event->event) {
   1523	case RDMA_CM_EVENT_ESTABLISHED: {
   1524		t->status = SMB_DIRECT_CS_CONNECTED;
   1525		wake_up_interruptible(&t->wait_status);
   1526		break;
   1527	}
   1528	case RDMA_CM_EVENT_DEVICE_REMOVAL:
   1529	case RDMA_CM_EVENT_DISCONNECTED: {
   1530		t->status = SMB_DIRECT_CS_DISCONNECTED;
   1531		wake_up_interruptible(&t->wait_status);
   1532		wake_up_interruptible(&t->wait_reassembly_queue);
   1533		wake_up(&t->wait_send_credits);
   1534		break;
   1535	}
   1536	case RDMA_CM_EVENT_CONNECT_ERROR: {
   1537		t->status = SMB_DIRECT_CS_DISCONNECTED;
   1538		wake_up_interruptible(&t->wait_status);
   1539		break;
   1540	}
   1541	default:
   1542		pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n",
   1543		       cm_id, rdma_event_msg(event->event),
   1544		       event->event);
   1545		break;
   1546	}
   1547	return 0;
   1548}
   1549
   1550static void smb_direct_qpair_handler(struct ib_event *event, void *context)
   1551{
   1552	struct smb_direct_transport *t = context;
   1553
   1554	ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n",
   1555		    t->cm_id, ib_event_msg(event->event), event->event);
   1556
   1557	switch (event->event) {
   1558	case IB_EVENT_CQ_ERR:
   1559	case IB_EVENT_QP_FATAL:
   1560		smb_direct_disconnect_rdma_connection(t);
   1561		break;
   1562	default:
   1563		break;
   1564	}
   1565}
   1566
   1567static int smb_direct_send_negotiate_response(struct smb_direct_transport *t,
   1568					      int failed)
   1569{
   1570	struct smb_direct_sendmsg *sendmsg;
   1571	struct smb_direct_negotiate_resp *resp;
   1572	int ret;
   1573
   1574	sendmsg = smb_direct_alloc_sendmsg(t);
   1575	if (IS_ERR(sendmsg))
   1576		return -ENOMEM;
   1577
   1578	resp = (struct smb_direct_negotiate_resp *)sendmsg->packet;
   1579	if (failed) {
   1580		memset(resp, 0, sizeof(*resp));
   1581		resp->min_version = cpu_to_le16(0x0100);
   1582		resp->max_version = cpu_to_le16(0x0100);
   1583		resp->status = STATUS_NOT_SUPPORTED;
   1584	} else {
   1585		resp->status = STATUS_SUCCESS;
   1586		resp->min_version = SMB_DIRECT_VERSION_LE;
   1587		resp->max_version = SMB_DIRECT_VERSION_LE;
   1588		resp->negotiated_version = SMB_DIRECT_VERSION_LE;
   1589		resp->reserved = 0;
   1590		resp->credits_requested =
   1591				cpu_to_le16(t->send_credit_target);
   1592		resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
   1593		resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size);
   1594		resp->preferred_send_size = cpu_to_le32(t->max_send_size);
   1595		resp->max_receive_size = cpu_to_le32(t->max_recv_size);
   1596		resp->max_fragmented_size =
   1597				cpu_to_le32(t->max_fragmented_recv_size);
   1598	}
   1599
   1600	sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
   1601						 (void *)resp, sizeof(*resp),
   1602						 DMA_TO_DEVICE);
   1603	ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
   1604	if (ret) {
   1605		smb_direct_free_sendmsg(t, sendmsg);
   1606		return ret;
   1607	}
   1608
   1609	sendmsg->num_sge = 1;
   1610	sendmsg->sge[0].length = sizeof(*resp);
   1611	sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
   1612
   1613	ret = post_sendmsg(t, NULL, sendmsg);
   1614	if (ret) {
   1615		smb_direct_free_sendmsg(t, sendmsg);
   1616		return ret;
   1617	}
   1618
   1619	wait_event(t->wait_send_pending,
   1620		   atomic_read(&t->send_pending) == 0);
   1621	return 0;
   1622}
   1623
   1624static int smb_direct_accept_client(struct smb_direct_transport *t)
   1625{
   1626	struct rdma_conn_param conn_param;
   1627	struct ib_port_immutable port_immutable;
   1628	u32 ird_ord_hdr[2];
   1629	int ret;
   1630
   1631	memset(&conn_param, 0, sizeof(conn_param));
   1632	conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom,
   1633					   SMB_DIRECT_CM_INITIATOR_DEPTH);
   1634	conn_param.responder_resources = 0;
   1635
   1636	t->cm_id->device->ops.get_port_immutable(t->cm_id->device,
   1637						 t->cm_id->port_num,
   1638						 &port_immutable);
   1639	if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
   1640		ird_ord_hdr[0] = conn_param.responder_resources;
   1641		ird_ord_hdr[1] = 1;
   1642		conn_param.private_data = ird_ord_hdr;
   1643		conn_param.private_data_len = sizeof(ird_ord_hdr);
   1644	} else {
   1645		conn_param.private_data = NULL;
   1646		conn_param.private_data_len = 0;
   1647	}
   1648	conn_param.retry_count = SMB_DIRECT_CM_RETRY;
   1649	conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY;
   1650	conn_param.flow_control = 0;
   1651
   1652	ret = rdma_accept(t->cm_id, &conn_param);
   1653	if (ret) {
   1654		pr_err("error at rdma_accept: %d\n", ret);
   1655		return ret;
   1656	}
   1657	return 0;
   1658}
   1659
   1660static int smb_direct_prepare_negotiation(struct smb_direct_transport *t)
   1661{
   1662	int ret;
   1663	struct smb_direct_recvmsg *recvmsg;
   1664
   1665	recvmsg = get_free_recvmsg(t);
   1666	if (!recvmsg)
   1667		return -ENOMEM;
   1668	recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ;
   1669
   1670	ret = smb_direct_post_recv(t, recvmsg);
   1671	if (ret) {
   1672		pr_err("Can't post recv: %d\n", ret);
   1673		goto out_err;
   1674	}
   1675
   1676	t->negotiation_requested = false;
   1677	ret = smb_direct_accept_client(t);
   1678	if (ret) {
   1679		pr_err("Can't accept client\n");
   1680		goto out_err;
   1681	}
   1682
   1683	smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
   1684	return 0;
   1685out_err:
   1686	put_recvmsg(t, recvmsg);
   1687	return ret;
   1688}
   1689
   1690static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t)
   1691{
   1692	return min_t(unsigned int,
   1693		     t->cm_id->device->attrs.max_fast_reg_page_list_len,
   1694		     256);
   1695}
   1696
   1697static int smb_direct_init_params(struct smb_direct_transport *t,
   1698				  struct ib_qp_cap *cap)
   1699{
   1700	struct ib_device *device = t->cm_id->device;
   1701	int max_send_sges, max_rw_wrs, max_send_wrs;
   1702	unsigned int max_sge_per_wr, wrs_per_credit;
   1703
   1704	/* need 3 more sge. because a SMB_DIRECT header, SMB2 header,
   1705	 * SMB2 response could be mapped.
   1706	 */
   1707	t->max_send_size = smb_direct_max_send_size;
   1708	max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 3;
   1709	if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) {
   1710		pr_err("max_send_size %d is too large\n", t->max_send_size);
   1711		return -EINVAL;
   1712	}
   1713
   1714	/* Calculate the number of work requests for RDMA R/W.
   1715	 * The maximum number of pages which can be registered
   1716	 * with one Memory region can be transferred with one
   1717	 * R/W credit. And at least 4 work requests for each credit
   1718	 * are needed for MR registration, RDMA R/W, local & remote
   1719	 * MR invalidation.
   1720	 */
   1721	t->max_rdma_rw_size = smb_direct_max_read_write_size;
   1722	t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t);
   1723	t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size,
   1724					 (t->pages_per_rw_credit - 1) *
   1725					 PAGE_SIZE);
   1726
   1727	max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge,
   1728			       device->attrs.max_sge_rd);
   1729	max_sge_per_wr = max_t(unsigned int, max_sge_per_wr,
   1730			       max_send_sges);
   1731	wrs_per_credit = max_t(unsigned int, 4,
   1732			       DIV_ROUND_UP(t->pages_per_rw_credit,
   1733					    max_sge_per_wr) + 1);
   1734	max_rw_wrs = t->max_rw_credits * wrs_per_credit;
   1735
   1736	max_send_wrs = smb_direct_send_credit_target + max_rw_wrs;
   1737	if (max_send_wrs > device->attrs.max_cqe ||
   1738	    max_send_wrs > device->attrs.max_qp_wr) {
   1739		pr_err("consider lowering send_credit_target = %d\n",
   1740		       smb_direct_send_credit_target);
   1741		pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
   1742		       device->attrs.max_cqe, device->attrs.max_qp_wr);
   1743		return -EINVAL;
   1744	}
   1745
   1746	if (smb_direct_receive_credit_max > device->attrs.max_cqe ||
   1747	    smb_direct_receive_credit_max > device->attrs.max_qp_wr) {
   1748		pr_err("consider lowering receive_credit_max = %d\n",
   1749		       smb_direct_receive_credit_max);
   1750		pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
   1751		       device->attrs.max_cqe, device->attrs.max_qp_wr);
   1752		return -EINVAL;
   1753	}
   1754
   1755	if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
   1756		pr_err("warning: device max_recv_sge = %d too small\n",
   1757		       device->attrs.max_recv_sge);
   1758		return -EINVAL;
   1759	}
   1760
   1761	t->recv_credits = 0;
   1762	t->count_avail_recvmsg = 0;
   1763
   1764	t->recv_credit_max = smb_direct_receive_credit_max;
   1765	t->recv_credit_target = 10;
   1766	t->new_recv_credits = 0;
   1767
   1768	t->send_credit_target = smb_direct_send_credit_target;
   1769	atomic_set(&t->send_credits, 0);
   1770	atomic_set(&t->rw_credits, t->max_rw_credits);
   1771
   1772	t->max_send_size = smb_direct_max_send_size;
   1773	t->max_recv_size = smb_direct_max_receive_size;
   1774	t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size;
   1775
   1776	cap->max_send_wr = max_send_wrs;
   1777	cap->max_recv_wr = t->recv_credit_max;
   1778	cap->max_send_sge = max_sge_per_wr;
   1779	cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
   1780	cap->max_inline_data = 0;
   1781	cap->max_rdma_ctxs = t->max_rw_credits;
   1782	return 0;
   1783}
   1784
   1785static void smb_direct_destroy_pools(struct smb_direct_transport *t)
   1786{
   1787	struct smb_direct_recvmsg *recvmsg;
   1788
   1789	while ((recvmsg = get_free_recvmsg(t)))
   1790		mempool_free(recvmsg, t->recvmsg_mempool);
   1791	while ((recvmsg = get_empty_recvmsg(t)))
   1792		mempool_free(recvmsg, t->recvmsg_mempool);
   1793
   1794	mempool_destroy(t->recvmsg_mempool);
   1795	t->recvmsg_mempool = NULL;
   1796
   1797	kmem_cache_destroy(t->recvmsg_cache);
   1798	t->recvmsg_cache = NULL;
   1799
   1800	mempool_destroy(t->sendmsg_mempool);
   1801	t->sendmsg_mempool = NULL;
   1802
   1803	kmem_cache_destroy(t->sendmsg_cache);
   1804	t->sendmsg_cache = NULL;
   1805}
   1806
   1807static int smb_direct_create_pools(struct smb_direct_transport *t)
   1808{
   1809	char name[80];
   1810	int i;
   1811	struct smb_direct_recvmsg *recvmsg;
   1812
   1813	snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t);
   1814	t->sendmsg_cache = kmem_cache_create(name,
   1815					     sizeof(struct smb_direct_sendmsg) +
   1816					      sizeof(struct smb_direct_negotiate_resp),
   1817					     0, SLAB_HWCACHE_ALIGN, NULL);
   1818	if (!t->sendmsg_cache)
   1819		return -ENOMEM;
   1820
   1821	t->sendmsg_mempool = mempool_create(t->send_credit_target,
   1822					    mempool_alloc_slab, mempool_free_slab,
   1823					    t->sendmsg_cache);
   1824	if (!t->sendmsg_mempool)
   1825		goto err;
   1826
   1827	snprintf(name, sizeof(name), "smb_direct_resp_%p", t);
   1828	t->recvmsg_cache = kmem_cache_create(name,
   1829					     sizeof(struct smb_direct_recvmsg) +
   1830					      t->max_recv_size,
   1831					     0, SLAB_HWCACHE_ALIGN, NULL);
   1832	if (!t->recvmsg_cache)
   1833		goto err;
   1834
   1835	t->recvmsg_mempool =
   1836		mempool_create(t->recv_credit_max, mempool_alloc_slab,
   1837			       mempool_free_slab, t->recvmsg_cache);
   1838	if (!t->recvmsg_mempool)
   1839		goto err;
   1840
   1841	INIT_LIST_HEAD(&t->recvmsg_queue);
   1842
   1843	for (i = 0; i < t->recv_credit_max; i++) {
   1844		recvmsg = mempool_alloc(t->recvmsg_mempool, GFP_KERNEL);
   1845		if (!recvmsg)
   1846			goto err;
   1847		recvmsg->transport = t;
   1848		list_add(&recvmsg->list, &t->recvmsg_queue);
   1849	}
   1850	t->count_avail_recvmsg = t->recv_credit_max;
   1851
   1852	return 0;
   1853err:
   1854	smb_direct_destroy_pools(t);
   1855	return -ENOMEM;
   1856}
   1857
   1858static int smb_direct_create_qpair(struct smb_direct_transport *t,
   1859				   struct ib_qp_cap *cap)
   1860{
   1861	int ret;
   1862	struct ib_qp_init_attr qp_attr;
   1863	int pages_per_rw;
   1864
   1865	t->pd = ib_alloc_pd(t->cm_id->device, 0);
   1866	if (IS_ERR(t->pd)) {
   1867		pr_err("Can't create RDMA PD\n");
   1868		ret = PTR_ERR(t->pd);
   1869		t->pd = NULL;
   1870		return ret;
   1871	}
   1872
   1873	t->send_cq = ib_alloc_cq(t->cm_id->device, t,
   1874				 smb_direct_send_credit_target + cap->max_rdma_ctxs,
   1875				 0, IB_POLL_WORKQUEUE);
   1876	if (IS_ERR(t->send_cq)) {
   1877		pr_err("Can't create RDMA send CQ\n");
   1878		ret = PTR_ERR(t->send_cq);
   1879		t->send_cq = NULL;
   1880		goto err;
   1881	}
   1882
   1883	t->recv_cq = ib_alloc_cq(t->cm_id->device, t,
   1884				 t->recv_credit_max, 0, IB_POLL_WORKQUEUE);
   1885	if (IS_ERR(t->recv_cq)) {
   1886		pr_err("Can't create RDMA recv CQ\n");
   1887		ret = PTR_ERR(t->recv_cq);
   1888		t->recv_cq = NULL;
   1889		goto err;
   1890	}
   1891
   1892	memset(&qp_attr, 0, sizeof(qp_attr));
   1893	qp_attr.event_handler = smb_direct_qpair_handler;
   1894	qp_attr.qp_context = t;
   1895	qp_attr.cap = *cap;
   1896	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
   1897	qp_attr.qp_type = IB_QPT_RC;
   1898	qp_attr.send_cq = t->send_cq;
   1899	qp_attr.recv_cq = t->recv_cq;
   1900	qp_attr.port_num = ~0;
   1901
   1902	ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr);
   1903	if (ret) {
   1904		pr_err("Can't create RDMA QP: %d\n", ret);
   1905		goto err;
   1906	}
   1907
   1908	t->qp = t->cm_id->qp;
   1909	t->cm_id->event_handler = smb_direct_cm_handler;
   1910
   1911	pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
   1912	if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) {
   1913		ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs,
   1914				      t->max_rw_credits, IB_MR_TYPE_MEM_REG,
   1915				      t->pages_per_rw_credit, 0);
   1916		if (ret) {
   1917			pr_err("failed to init mr pool count %d pages %d\n",
   1918			       t->max_rw_credits, t->pages_per_rw_credit);
   1919			goto err;
   1920		}
   1921	}
   1922
   1923	return 0;
   1924err:
   1925	if (t->qp) {
   1926		ib_destroy_qp(t->qp);
   1927		t->qp = NULL;
   1928	}
   1929	if (t->recv_cq) {
   1930		ib_destroy_cq(t->recv_cq);
   1931		t->recv_cq = NULL;
   1932	}
   1933	if (t->send_cq) {
   1934		ib_destroy_cq(t->send_cq);
   1935		t->send_cq = NULL;
   1936	}
   1937	if (t->pd) {
   1938		ib_dealloc_pd(t->pd);
   1939		t->pd = NULL;
   1940	}
   1941	return ret;
   1942}
   1943
   1944static int smb_direct_prepare(struct ksmbd_transport *t)
   1945{
   1946	struct smb_direct_transport *st = smb_trans_direct_transfort(t);
   1947	struct smb_direct_recvmsg *recvmsg;
   1948	struct smb_direct_negotiate_req *req;
   1949	int ret;
   1950
   1951	ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
   1952	ret = wait_event_interruptible_timeout(st->wait_status,
   1953					       st->negotiation_requested ||
   1954					       st->status == SMB_DIRECT_CS_DISCONNECTED,
   1955					       SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
   1956	if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED)
   1957		return ret < 0 ? ret : -ETIMEDOUT;
   1958
   1959	recvmsg = get_first_reassembly(st);
   1960	if (!recvmsg)
   1961		return -ECONNABORTED;
   1962
   1963	ret = smb_direct_check_recvmsg(recvmsg);
   1964	if (ret == -ECONNABORTED)
   1965		goto out;
   1966
   1967	req = (struct smb_direct_negotiate_req *)recvmsg->packet;
   1968	st->max_recv_size = min_t(int, st->max_recv_size,
   1969				  le32_to_cpu(req->preferred_send_size));
   1970	st->max_send_size = min_t(int, st->max_send_size,
   1971				  le32_to_cpu(req->max_receive_size));
   1972	st->max_fragmented_send_size =
   1973		le32_to_cpu(req->max_fragmented_size);
   1974	st->max_fragmented_recv_size =
   1975		(st->recv_credit_max * st->max_recv_size) / 2;
   1976
   1977	ret = smb_direct_send_negotiate_response(st, ret);
   1978out:
   1979	spin_lock_irq(&st->reassembly_queue_lock);
   1980	st->reassembly_queue_length--;
   1981	list_del(&recvmsg->list);
   1982	spin_unlock_irq(&st->reassembly_queue_lock);
   1983	put_recvmsg(st, recvmsg);
   1984
   1985	return ret;
   1986}
   1987
   1988static int smb_direct_connect(struct smb_direct_transport *st)
   1989{
   1990	int ret;
   1991	struct ib_qp_cap qp_cap;
   1992
   1993	ret = smb_direct_init_params(st, &qp_cap);
   1994	if (ret) {
   1995		pr_err("Can't configure RDMA parameters\n");
   1996		return ret;
   1997	}
   1998
   1999	ret = smb_direct_create_pools(st);
   2000	if (ret) {
   2001		pr_err("Can't init RDMA pool: %d\n", ret);
   2002		return ret;
   2003	}
   2004
   2005	ret = smb_direct_create_qpair(st, &qp_cap);
   2006	if (ret) {
   2007		pr_err("Can't accept RDMA client: %d\n", ret);
   2008		return ret;
   2009	}
   2010
   2011	ret = smb_direct_prepare_negotiation(st);
   2012	if (ret) {
   2013		pr_err("Can't negotiate: %d\n", ret);
   2014		return ret;
   2015	}
   2016	return 0;
   2017}
   2018
   2019static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
   2020{
   2021	if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
   2022		return false;
   2023	if (attrs->max_fast_reg_page_list_len == 0)
   2024		return false;
   2025	return true;
   2026}
   2027
   2028static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
   2029{
   2030	struct smb_direct_transport *t;
   2031	int ret;
   2032
   2033	if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
   2034		ksmbd_debug(RDMA,
   2035			    "Fast Registration Work Requests is not supported. device capabilities=%llx\n",
   2036			    new_cm_id->device->attrs.device_cap_flags);
   2037		return -EPROTONOSUPPORT;
   2038	}
   2039
   2040	t = alloc_transport(new_cm_id);
   2041	if (!t)
   2042		return -ENOMEM;
   2043
   2044	ret = smb_direct_connect(t);
   2045	if (ret)
   2046		goto out_err;
   2047
   2048	KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
   2049					      KSMBD_TRANS(t)->conn, "ksmbd:r%u",
   2050					      smb_direct_port);
   2051	if (IS_ERR(KSMBD_TRANS(t)->handler)) {
   2052		ret = PTR_ERR(KSMBD_TRANS(t)->handler);
   2053		pr_err("Can't start thread\n");
   2054		goto out_err;
   2055	}
   2056
   2057	return 0;
   2058out_err:
   2059	free_transport(t);
   2060	return ret;
   2061}
   2062
   2063static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
   2064				     struct rdma_cm_event *event)
   2065{
   2066	switch (event->event) {
   2067	case RDMA_CM_EVENT_CONNECT_REQUEST: {
   2068		int ret = smb_direct_handle_connect_request(cm_id);
   2069
   2070		if (ret) {
   2071			pr_err("Can't create transport: %d\n", ret);
   2072			return ret;
   2073		}
   2074
   2075		ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n",
   2076			    cm_id);
   2077		break;
   2078	}
   2079	default:
   2080		pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n",
   2081		       cm_id, rdma_event_msg(event->event), event->event);
   2082		break;
   2083	}
   2084	return 0;
   2085}
   2086
   2087static int smb_direct_listen(int port)
   2088{
   2089	int ret;
   2090	struct rdma_cm_id *cm_id;
   2091	struct sockaddr_in sin = {
   2092		.sin_family		= AF_INET,
   2093		.sin_addr.s_addr	= htonl(INADDR_ANY),
   2094		.sin_port		= htons(port),
   2095	};
   2096
   2097	cm_id = rdma_create_id(&init_net, smb_direct_listen_handler,
   2098			       &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC);
   2099	if (IS_ERR(cm_id)) {
   2100		pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id));
   2101		return PTR_ERR(cm_id);
   2102	}
   2103
   2104	ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
   2105	if (ret) {
   2106		pr_err("Can't bind: %d\n", ret);
   2107		goto err;
   2108	}
   2109
   2110	smb_direct_listener.cm_id = cm_id;
   2111
   2112	ret = rdma_listen(cm_id, 10);
   2113	if (ret) {
   2114		pr_err("Can't listen: %d\n", ret);
   2115		goto err;
   2116	}
   2117	return 0;
   2118err:
   2119	smb_direct_listener.cm_id = NULL;
   2120	rdma_destroy_id(cm_id);
   2121	return ret;
   2122}
   2123
   2124static int smb_direct_ib_client_add(struct ib_device *ib_dev)
   2125{
   2126	struct smb_direct_device *smb_dev;
   2127
   2128	/* Set 5445 port if device type is iWARP(No IB) */
   2129	if (ib_dev->node_type != RDMA_NODE_IB_CA)
   2130		smb_direct_port = SMB_DIRECT_PORT_IWARP;
   2131
   2132	if (!ib_dev->ops.get_netdev ||
   2133	    !rdma_frwr_is_supported(&ib_dev->attrs))
   2134		return 0;
   2135
   2136	smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
   2137	if (!smb_dev)
   2138		return -ENOMEM;
   2139	smb_dev->ib_dev = ib_dev;
   2140
   2141	write_lock(&smb_direct_device_lock);
   2142	list_add(&smb_dev->list, &smb_direct_device_list);
   2143	write_unlock(&smb_direct_device_lock);
   2144
   2145	ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name);
   2146	return 0;
   2147}
   2148
   2149static void smb_direct_ib_client_remove(struct ib_device *ib_dev,
   2150					void *client_data)
   2151{
   2152	struct smb_direct_device *smb_dev, *tmp;
   2153
   2154	write_lock(&smb_direct_device_lock);
   2155	list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) {
   2156		if (smb_dev->ib_dev == ib_dev) {
   2157			list_del(&smb_dev->list);
   2158			kfree(smb_dev);
   2159			break;
   2160		}
   2161	}
   2162	write_unlock(&smb_direct_device_lock);
   2163}
   2164
   2165static struct ib_client smb_direct_ib_client = {
   2166	.name	= "ksmbd_smb_direct_ib",
   2167	.add	= smb_direct_ib_client_add,
   2168	.remove	= smb_direct_ib_client_remove,
   2169};
   2170
   2171int ksmbd_rdma_init(void)
   2172{
   2173	int ret;
   2174
   2175	smb_direct_listener.cm_id = NULL;
   2176
   2177	ret = ib_register_client(&smb_direct_ib_client);
   2178	if (ret) {
   2179		pr_err("failed to ib_register_client\n");
   2180		return ret;
   2181	}
   2182
   2183	/* When a client is running out of send credits, the credits are
   2184	 * granted by the server's sending a packet using this queue.
   2185	 * This avoids the situation that a clients cannot send packets
   2186	 * for lack of credits
   2187	 */
   2188	smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq",
   2189					WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
   2190	if (!smb_direct_wq)
   2191		return -ENOMEM;
   2192
   2193	ret = smb_direct_listen(smb_direct_port);
   2194	if (ret) {
   2195		destroy_workqueue(smb_direct_wq);
   2196		smb_direct_wq = NULL;
   2197		pr_err("Can't listen: %d\n", ret);
   2198		return ret;
   2199	}
   2200
   2201	ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n",
   2202		    smb_direct_listener.cm_id);
   2203	return 0;
   2204}
   2205
   2206void ksmbd_rdma_destroy(void)
   2207{
   2208	if (!smb_direct_listener.cm_id)
   2209		return;
   2210
   2211	ib_unregister_client(&smb_direct_ib_client);
   2212	rdma_destroy_id(smb_direct_listener.cm_id);
   2213
   2214	smb_direct_listener.cm_id = NULL;
   2215
   2216	if (smb_direct_wq) {
   2217		destroy_workqueue(smb_direct_wq);
   2218		smb_direct_wq = NULL;
   2219	}
   2220}
   2221
   2222bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
   2223{
   2224	struct smb_direct_device *smb_dev;
   2225	int i;
   2226	bool rdma_capable = false;
   2227
   2228	read_lock(&smb_direct_device_lock);
   2229	list_for_each_entry(smb_dev, &smb_direct_device_list, list) {
   2230		for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
   2231			struct net_device *ndev;
   2232
   2233			ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
   2234							       i + 1);
   2235			if (!ndev)
   2236				continue;
   2237
   2238			if (ndev == netdev) {
   2239				dev_put(ndev);
   2240				rdma_capable = true;
   2241				goto out;
   2242			}
   2243			dev_put(ndev);
   2244		}
   2245	}
   2246out:
   2247	read_unlock(&smb_direct_device_lock);
   2248
   2249	if (rdma_capable == false) {
   2250		struct ib_device *ibdev;
   2251
   2252		ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
   2253		if (ibdev) {
   2254			if (rdma_frwr_is_supported(&ibdev->attrs))
   2255				rdma_capable = true;
   2256			ib_device_put(ibdev);
   2257		}
   2258	}
   2259
   2260	return rdma_capable;
   2261}
   2262
   2263static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
   2264	.prepare	= smb_direct_prepare,
   2265	.disconnect	= smb_direct_disconnect,
   2266	.shutdown	= smb_direct_shutdown,
   2267	.writev		= smb_direct_writev,
   2268	.read		= smb_direct_read,
   2269	.rdma_read	= smb_direct_rdma_read,
   2270	.rdma_write	= smb_direct_rdma_write,
   2271};