cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

trans_rdma.c (19060B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * RDMA transport layer based on the trans_fd.c implementation.
      4 *
      5 *  Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com>
      6 *  Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
      7 *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
      8 *  Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
      9 *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
     10 */
     11
     12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
     13
     14#include <linux/in.h>
     15#include <linux/module.h>
     16#include <linux/net.h>
     17#include <linux/ipv6.h>
     18#include <linux/kthread.h>
     19#include <linux/errno.h>
     20#include <linux/kernel.h>
     21#include <linux/un.h>
     22#include <linux/uaccess.h>
     23#include <linux/inet.h>
     24#include <linux/idr.h>
     25#include <linux/file.h>
     26#include <linux/parser.h>
     27#include <linux/semaphore.h>
     28#include <linux/slab.h>
     29#include <linux/seq_file.h>
     30#include <net/9p/9p.h>
     31#include <net/9p/client.h>
     32#include <net/9p/transport.h>
     33#include <rdma/ib_verbs.h>
     34#include <rdma/rdma_cm.h>
     35
     36#define P9_PORT			5640
     37#define P9_RDMA_SQ_DEPTH	32
     38#define P9_RDMA_RQ_DEPTH	32
     39#define P9_RDMA_SEND_SGE	4
     40#define P9_RDMA_RECV_SGE	4
     41#define P9_RDMA_IRD		0
     42#define P9_RDMA_ORD		0
     43#define P9_RDMA_TIMEOUT		30000		/* 30 seconds */
     44#define P9_RDMA_MAXSIZE		(1024*1024)	/* 1MB */
     45
     46/**
     47 * struct p9_trans_rdma - RDMA transport instance
     48 *
     49 * @state: tracks the transport state machine for connection setup and tear down
     50 * @cm_id: The RDMA CM ID
     51 * @pd: Protection Domain pointer
     52 * @qp: Queue Pair pointer
     53 * @cq: Completion Queue pointer
     54 * @timeout: Number of uSecs to wait for connection management events
     55 * @privport: Whether a privileged port may be used
     56 * @port: The port to use
     57 * @sq_depth: The depth of the Send Queue
     58 * @sq_sem: Semaphore for the SQ
     59 * @rq_depth: The depth of the Receive Queue.
     60 * @rq_sem: Semaphore for the RQ
     61 * @excess_rc : Amount of posted Receive Contexts without a pending request.
     62 *		See rdma_request()
     63 * @addr: The remote peer's address
     64 * @req_lock: Protects the active request list
     65 * @cm_done: Completion event for connection management tracking
     66 */
     67struct p9_trans_rdma {
     68	enum {
     69		P9_RDMA_INIT,
     70		P9_RDMA_ADDR_RESOLVED,
     71		P9_RDMA_ROUTE_RESOLVED,
     72		P9_RDMA_CONNECTED,
     73		P9_RDMA_FLUSHING,
     74		P9_RDMA_CLOSING,
     75		P9_RDMA_CLOSED,
     76	} state;
     77	struct rdma_cm_id *cm_id;
     78	struct ib_pd *pd;
     79	struct ib_qp *qp;
     80	struct ib_cq *cq;
     81	long timeout;
     82	bool privport;
     83	u16 port;
     84	int sq_depth;
     85	struct semaphore sq_sem;
     86	int rq_depth;
     87	struct semaphore rq_sem;
     88	atomic_t excess_rc;
     89	struct sockaddr_in addr;
     90	spinlock_t req_lock;
     91
     92	struct completion cm_done;
     93};
     94
     95struct p9_rdma_req;
     96
     97/**
     98 * struct p9_rdma_context - Keeps track of in-process WR
     99 *
    100 * @cqe: completion queue entry
    101 * @busa: Bus address to unmap when the WR completes
    102 * @req: Keeps track of requests (send)
    103 * @rc: Keepts track of replies (receive)
    104 */
    105struct p9_rdma_context {
    106	struct ib_cqe cqe;
    107	dma_addr_t busa;
    108	union {
    109		struct p9_req_t *req;
    110		struct p9_fcall rc;
    111	};
    112};
    113
    114/**
    115 * struct p9_rdma_opts - Collection of mount options
    116 * @port: port of connection
    117 * @privport: Whether a privileged port may be used
    118 * @sq_depth: The requested depth of the SQ. This really doesn't need
    119 * to be any deeper than the number of threads used in the client
    120 * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth
    121 * @timeout: Time to wait in msecs for CM events
    122 */
    123struct p9_rdma_opts {
    124	short port;
    125	bool privport;
    126	int sq_depth;
    127	int rq_depth;
    128	long timeout;
    129};
    130
    131/*
    132 * Option Parsing (code inspired by NFS code)
    133 */
    134enum {
    135	/* Options that take integer arguments */
    136	Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout,
    137	/* Options that take no argument */
    138	Opt_privport,
    139	Opt_err,
    140};
    141
    142static match_table_t tokens = {
    143	{Opt_port, "port=%u"},
    144	{Opt_sq_depth, "sq=%u"},
    145	{Opt_rq_depth, "rq=%u"},
    146	{Opt_timeout, "timeout=%u"},
    147	{Opt_privport, "privport"},
    148	{Opt_err, NULL},
    149};
    150
    151static int p9_rdma_show_options(struct seq_file *m, struct p9_client *clnt)
    152{
    153	struct p9_trans_rdma *rdma = clnt->trans;
    154
    155	if (rdma->port != P9_PORT)
    156		seq_printf(m, ",port=%u", rdma->port);
    157	if (rdma->sq_depth != P9_RDMA_SQ_DEPTH)
    158		seq_printf(m, ",sq=%u", rdma->sq_depth);
    159	if (rdma->rq_depth != P9_RDMA_RQ_DEPTH)
    160		seq_printf(m, ",rq=%u", rdma->rq_depth);
    161	if (rdma->timeout != P9_RDMA_TIMEOUT)
    162		seq_printf(m, ",timeout=%lu", rdma->timeout);
    163	if (rdma->privport)
    164		seq_puts(m, ",privport");
    165	return 0;
    166}
    167
    168/**
    169 * parse_opts - parse mount options into rdma options structure
    170 * @params: options string passed from mount
    171 * @opts: rdma transport-specific structure to parse options into
    172 *
    173 * Returns 0 upon success, -ERRNO upon failure
    174 */
    175static int parse_opts(char *params, struct p9_rdma_opts *opts)
    176{
    177	char *p;
    178	substring_t args[MAX_OPT_ARGS];
    179	int option;
    180	char *options, *tmp_options;
    181
    182	opts->port = P9_PORT;
    183	opts->sq_depth = P9_RDMA_SQ_DEPTH;
    184	opts->rq_depth = P9_RDMA_RQ_DEPTH;
    185	opts->timeout = P9_RDMA_TIMEOUT;
    186	opts->privport = false;
    187
    188	if (!params)
    189		return 0;
    190
    191	tmp_options = kstrdup(params, GFP_KERNEL);
    192	if (!tmp_options) {
    193		p9_debug(P9_DEBUG_ERROR,
    194			 "failed to allocate copy of option string\n");
    195		return -ENOMEM;
    196	}
    197	options = tmp_options;
    198
    199	while ((p = strsep(&options, ",")) != NULL) {
    200		int token;
    201		int r;
    202		if (!*p)
    203			continue;
    204		token = match_token(p, tokens, args);
    205		if ((token != Opt_err) && (token != Opt_privport)) {
    206			r = match_int(&args[0], &option);
    207			if (r < 0) {
    208				p9_debug(P9_DEBUG_ERROR,
    209					 "integer field, but no integer?\n");
    210				continue;
    211			}
    212		}
    213		switch (token) {
    214		case Opt_port:
    215			opts->port = option;
    216			break;
    217		case Opt_sq_depth:
    218			opts->sq_depth = option;
    219			break;
    220		case Opt_rq_depth:
    221			opts->rq_depth = option;
    222			break;
    223		case Opt_timeout:
    224			opts->timeout = option;
    225			break;
    226		case Opt_privport:
    227			opts->privport = true;
    228			break;
    229		default:
    230			continue;
    231		}
    232	}
    233	/* RQ must be at least as large as the SQ */
    234	opts->rq_depth = max(opts->rq_depth, opts->sq_depth);
    235	kfree(tmp_options);
    236	return 0;
    237}
    238
    239static int
    240p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
    241{
    242	struct p9_client *c = id->context;
    243	struct p9_trans_rdma *rdma = c->trans;
    244	switch (event->event) {
    245	case RDMA_CM_EVENT_ADDR_RESOLVED:
    246		BUG_ON(rdma->state != P9_RDMA_INIT);
    247		rdma->state = P9_RDMA_ADDR_RESOLVED;
    248		break;
    249
    250	case RDMA_CM_EVENT_ROUTE_RESOLVED:
    251		BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED);
    252		rdma->state = P9_RDMA_ROUTE_RESOLVED;
    253		break;
    254
    255	case RDMA_CM_EVENT_ESTABLISHED:
    256		BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED);
    257		rdma->state = P9_RDMA_CONNECTED;
    258		break;
    259
    260	case RDMA_CM_EVENT_DISCONNECTED:
    261		if (rdma)
    262			rdma->state = P9_RDMA_CLOSED;
    263		c->status = Disconnected;
    264		break;
    265
    266	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
    267		break;
    268
    269	case RDMA_CM_EVENT_ADDR_CHANGE:
    270	case RDMA_CM_EVENT_ROUTE_ERROR:
    271	case RDMA_CM_EVENT_DEVICE_REMOVAL:
    272	case RDMA_CM_EVENT_MULTICAST_JOIN:
    273	case RDMA_CM_EVENT_MULTICAST_ERROR:
    274	case RDMA_CM_EVENT_REJECTED:
    275	case RDMA_CM_EVENT_CONNECT_REQUEST:
    276	case RDMA_CM_EVENT_CONNECT_RESPONSE:
    277	case RDMA_CM_EVENT_CONNECT_ERROR:
    278	case RDMA_CM_EVENT_ADDR_ERROR:
    279	case RDMA_CM_EVENT_UNREACHABLE:
    280		c->status = Disconnected;
    281		rdma_disconnect(rdma->cm_id);
    282		break;
    283	default:
    284		BUG();
    285	}
    286	complete(&rdma->cm_done);
    287	return 0;
    288}
    289
    290static void
    291recv_done(struct ib_cq *cq, struct ib_wc *wc)
    292{
    293	struct p9_client *client = cq->cq_context;
    294	struct p9_trans_rdma *rdma = client->trans;
    295	struct p9_rdma_context *c =
    296		container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
    297	struct p9_req_t *req;
    298	int err = 0;
    299	int16_t tag;
    300
    301	req = NULL;
    302	ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
    303							 DMA_FROM_DEVICE);
    304
    305	if (wc->status != IB_WC_SUCCESS)
    306		goto err_out;
    307
    308	c->rc.size = wc->byte_len;
    309	err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1);
    310	if (err)
    311		goto err_out;
    312
    313	req = p9_tag_lookup(client, tag);
    314	if (!req)
    315		goto err_out;
    316
    317	/* Check that we have not yet received a reply for this request.
    318	 */
    319	if (unlikely(req->rc.sdata)) {
    320		pr_err("Duplicate reply for request %d", tag);
    321		goto err_out;
    322	}
    323
    324	req->rc.size = c->rc.size;
    325	req->rc.sdata = c->rc.sdata;
    326	p9_client_cb(client, req, REQ_STATUS_RCVD);
    327
    328 out:
    329	up(&rdma->rq_sem);
    330	kfree(c);
    331	return;
    332
    333 err_out:
    334	p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n",
    335			req, err, wc->status);
    336	rdma->state = P9_RDMA_FLUSHING;
    337	client->status = Disconnected;
    338	goto out;
    339}
    340
    341static void
    342send_done(struct ib_cq *cq, struct ib_wc *wc)
    343{
    344	struct p9_client *client = cq->cq_context;
    345	struct p9_trans_rdma *rdma = client->trans;
    346	struct p9_rdma_context *c =
    347		container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
    348
    349	ib_dma_unmap_single(rdma->cm_id->device,
    350			    c->busa, c->req->tc.size,
    351			    DMA_TO_DEVICE);
    352	up(&rdma->sq_sem);
    353	p9_req_put(c->req);
    354	kfree(c);
    355}
    356
    357static void qp_event_handler(struct ib_event *event, void *context)
    358{
    359	p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n",
    360		 event->event, context);
    361}
    362
    363static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
    364{
    365	if (!rdma)
    366		return;
    367
    368	if (rdma->qp && !IS_ERR(rdma->qp))
    369		ib_destroy_qp(rdma->qp);
    370
    371	if (rdma->pd && !IS_ERR(rdma->pd))
    372		ib_dealloc_pd(rdma->pd);
    373
    374	if (rdma->cq && !IS_ERR(rdma->cq))
    375		ib_free_cq(rdma->cq);
    376
    377	if (rdma->cm_id && !IS_ERR(rdma->cm_id))
    378		rdma_destroy_id(rdma->cm_id);
    379
    380	kfree(rdma);
    381}
    382
    383static int
    384post_recv(struct p9_client *client, struct p9_rdma_context *c)
    385{
    386	struct p9_trans_rdma *rdma = client->trans;
    387	struct ib_recv_wr wr;
    388	struct ib_sge sge;
    389
    390	c->busa = ib_dma_map_single(rdma->cm_id->device,
    391				    c->rc.sdata, client->msize,
    392				    DMA_FROM_DEVICE);
    393	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
    394		goto error;
    395
    396	c->cqe.done = recv_done;
    397
    398	sge.addr = c->busa;
    399	sge.length = client->msize;
    400	sge.lkey = rdma->pd->local_dma_lkey;
    401
    402	wr.next = NULL;
    403	wr.wr_cqe = &c->cqe;
    404	wr.sg_list = &sge;
    405	wr.num_sge = 1;
    406	return ib_post_recv(rdma->qp, &wr, NULL);
    407
    408 error:
    409	p9_debug(P9_DEBUG_ERROR, "EIO\n");
    410	return -EIO;
    411}
    412
    413static int rdma_request(struct p9_client *client, struct p9_req_t *req)
    414{
    415	struct p9_trans_rdma *rdma = client->trans;
    416	struct ib_send_wr wr;
    417	struct ib_sge sge;
    418	int err = 0;
    419	unsigned long flags;
    420	struct p9_rdma_context *c = NULL;
    421	struct p9_rdma_context *rpl_context = NULL;
    422
    423	/* When an error occurs between posting the recv and the send,
    424	 * there will be a receive context posted without a pending request.
    425	 * Since there is no way to "un-post" it, we remember it and skip
    426	 * post_recv() for the next request.
    427	 * So here,
    428	 * see if we are this `next request' and need to absorb an excess rc.
    429	 * If yes, then drop and free our own, and do not recv_post().
    430	 **/
    431	if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
    432		if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
    433			/* Got one! */
    434			p9_fcall_fini(&req->rc);
    435			req->rc.sdata = NULL;
    436			goto dont_need_post_recv;
    437		} else {
    438			/* We raced and lost. */
    439			atomic_inc(&rdma->excess_rc);
    440		}
    441	}
    442
    443	/* Allocate an fcall for the reply */
    444	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
    445	if (!rpl_context) {
    446		err = -ENOMEM;
    447		goto recv_error;
    448	}
    449	rpl_context->rc.sdata = req->rc.sdata;
    450
    451	/*
    452	 * Post a receive buffer for this request. We need to ensure
    453	 * there is a reply buffer available for every outstanding
    454	 * request. A flushed request can result in no reply for an
    455	 * outstanding request, so we must keep a count to avoid
    456	 * overflowing the RQ.
    457	 */
    458	if (down_interruptible(&rdma->rq_sem)) {
    459		err = -EINTR;
    460		goto recv_error;
    461	}
    462
    463	err = post_recv(client, rpl_context);
    464	if (err) {
    465		p9_debug(P9_DEBUG_ERROR, "POST RECV failed: %d\n", err);
    466		goto recv_error;
    467	}
    468	/* remove posted receive buffer from request structure */
    469	req->rc.sdata = NULL;
    470
    471dont_need_post_recv:
    472	/* Post the request */
    473	c = kmalloc(sizeof *c, GFP_NOFS);
    474	if (!c) {
    475		err = -ENOMEM;
    476		goto send_error;
    477	}
    478	c->req = req;
    479
    480	c->busa = ib_dma_map_single(rdma->cm_id->device,
    481				    c->req->tc.sdata, c->req->tc.size,
    482				    DMA_TO_DEVICE);
    483	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
    484		err = -EIO;
    485		goto send_error;
    486	}
    487
    488	c->cqe.done = send_done;
    489
    490	sge.addr = c->busa;
    491	sge.length = c->req->tc.size;
    492	sge.lkey = rdma->pd->local_dma_lkey;
    493
    494	wr.next = NULL;
    495	wr.wr_cqe = &c->cqe;
    496	wr.opcode = IB_WR_SEND;
    497	wr.send_flags = IB_SEND_SIGNALED;
    498	wr.sg_list = &sge;
    499	wr.num_sge = 1;
    500
    501	if (down_interruptible(&rdma->sq_sem)) {
    502		err = -EINTR;
    503		goto send_error;
    504	}
    505
    506	/* Mark request as `sent' *before* we actually send it,
    507	 * because doing if after could erase the REQ_STATUS_RCVD
    508	 * status in case of a very fast reply.
    509	 */
    510	req->status = REQ_STATUS_SENT;
    511	err = ib_post_send(rdma->qp, &wr, NULL);
    512	if (err)
    513		goto send_error;
    514
    515	/* Success */
    516	return 0;
    517
    518 /* Handle errors that happened during or while preparing the send: */
    519 send_error:
    520	req->status = REQ_STATUS_ERROR;
    521	kfree(c);
    522	p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
    523
    524	/* Ach.
    525	 *  We did recv_post(), but not send. We have one recv_post in excess.
    526	 */
    527	atomic_inc(&rdma->excess_rc);
    528	return err;
    529
    530 /* Handle errors that happened during or while preparing post_recv(): */
    531 recv_error:
    532	kfree(rpl_context);
    533	spin_lock_irqsave(&rdma->req_lock, flags);
    534	if (err != -EINTR && rdma->state < P9_RDMA_CLOSING) {
    535		rdma->state = P9_RDMA_CLOSING;
    536		spin_unlock_irqrestore(&rdma->req_lock, flags);
    537		rdma_disconnect(rdma->cm_id);
    538	} else
    539		spin_unlock_irqrestore(&rdma->req_lock, flags);
    540	return err;
    541}
    542
    543static void rdma_close(struct p9_client *client)
    544{
    545	struct p9_trans_rdma *rdma;
    546
    547	if (!client)
    548		return;
    549
    550	rdma = client->trans;
    551	if (!rdma)
    552		return;
    553
    554	client->status = Disconnected;
    555	rdma_disconnect(rdma->cm_id);
    556	rdma_destroy_trans(rdma);
    557}
    558
    559/**
    560 * alloc_rdma - Allocate and initialize the rdma transport structure
    561 * @opts: Mount options structure
    562 */
    563static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
    564{
    565	struct p9_trans_rdma *rdma;
    566
    567	rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL);
    568	if (!rdma)
    569		return NULL;
    570
    571	rdma->port = opts->port;
    572	rdma->privport = opts->privport;
    573	rdma->sq_depth = opts->sq_depth;
    574	rdma->rq_depth = opts->rq_depth;
    575	rdma->timeout = opts->timeout;
    576	spin_lock_init(&rdma->req_lock);
    577	init_completion(&rdma->cm_done);
    578	sema_init(&rdma->sq_sem, rdma->sq_depth);
    579	sema_init(&rdma->rq_sem, rdma->rq_depth);
    580	atomic_set(&rdma->excess_rc, 0);
    581
    582	return rdma;
    583}
    584
    585static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
    586{
    587	/* Nothing to do here.
    588	 * We will take care of it (if we have to) in rdma_cancelled()
    589	 */
    590	return 1;
    591}
    592
    593/* A request has been fully flushed without a reply.
    594 * That means we have posted one buffer in excess.
    595 */
    596static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
    597{
    598	struct p9_trans_rdma *rdma = client->trans;
    599	atomic_inc(&rdma->excess_rc);
    600	return 0;
    601}
    602
    603static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma)
    604{
    605	struct sockaddr_in cl = {
    606		.sin_family = AF_INET,
    607		.sin_addr.s_addr = htonl(INADDR_ANY),
    608	};
    609	int port, err = -EINVAL;
    610
    611	for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) {
    612		cl.sin_port = htons((ushort)port);
    613		err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl);
    614		if (err != -EADDRINUSE)
    615			break;
    616	}
    617	return err;
    618}
    619
    620/**
    621 * rdma_create_trans - Transport method for creating a transport instance
    622 * @client: client instance
    623 * @addr: IP address string
    624 * @args: Mount options string
    625 */
    626static int
    627rdma_create_trans(struct p9_client *client, const char *addr, char *args)
    628{
    629	int err;
    630	struct p9_rdma_opts opts;
    631	struct p9_trans_rdma *rdma;
    632	struct rdma_conn_param conn_param;
    633	struct ib_qp_init_attr qp_attr;
    634
    635	if (addr == NULL)
    636		return -EINVAL;
    637
    638	/* Parse the transport specific mount options */
    639	err = parse_opts(args, &opts);
    640	if (err < 0)
    641		return err;
    642
    643	/* Create and initialize the RDMA transport structure */
    644	rdma = alloc_rdma(&opts);
    645	if (!rdma)
    646		return -ENOMEM;
    647
    648	/* Create the RDMA CM ID */
    649	rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client,
    650				     RDMA_PS_TCP, IB_QPT_RC);
    651	if (IS_ERR(rdma->cm_id))
    652		goto error;
    653
    654	/* Associate the client with the transport */
    655	client->trans = rdma;
    656
    657	/* Bind to a privileged port if we need to */
    658	if (opts.privport) {
    659		err = p9_rdma_bind_privport(rdma);
    660		if (err < 0) {
    661			pr_err("%s (%d): problem binding to privport: %d\n",
    662			       __func__, task_pid_nr(current), -err);
    663			goto error;
    664		}
    665	}
    666
    667	/* Resolve the server's address */
    668	rdma->addr.sin_family = AF_INET;
    669	rdma->addr.sin_addr.s_addr = in_aton(addr);
    670	rdma->addr.sin_port = htons(opts.port);
    671	err = rdma_resolve_addr(rdma->cm_id, NULL,
    672				(struct sockaddr *)&rdma->addr,
    673				rdma->timeout);
    674	if (err)
    675		goto error;
    676	err = wait_for_completion_interruptible(&rdma->cm_done);
    677	if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED))
    678		goto error;
    679
    680	/* Resolve the route to the server */
    681	err = rdma_resolve_route(rdma->cm_id, rdma->timeout);
    682	if (err)
    683		goto error;
    684	err = wait_for_completion_interruptible(&rdma->cm_done);
    685	if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED))
    686		goto error;
    687
    688	/* Create the Completion Queue */
    689	rdma->cq = ib_alloc_cq_any(rdma->cm_id->device, client,
    690				   opts.sq_depth + opts.rq_depth + 1,
    691				   IB_POLL_SOFTIRQ);
    692	if (IS_ERR(rdma->cq))
    693		goto error;
    694
    695	/* Create the Protection Domain */
    696	rdma->pd = ib_alloc_pd(rdma->cm_id->device, 0);
    697	if (IS_ERR(rdma->pd))
    698		goto error;
    699
    700	/* Create the Queue Pair */
    701	memset(&qp_attr, 0, sizeof qp_attr);
    702	qp_attr.event_handler = qp_event_handler;
    703	qp_attr.qp_context = client;
    704	qp_attr.cap.max_send_wr = opts.sq_depth;
    705	qp_attr.cap.max_recv_wr = opts.rq_depth;
    706	qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE;
    707	qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE;
    708	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
    709	qp_attr.qp_type = IB_QPT_RC;
    710	qp_attr.send_cq = rdma->cq;
    711	qp_attr.recv_cq = rdma->cq;
    712	err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
    713	if (err)
    714		goto error;
    715	rdma->qp = rdma->cm_id->qp;
    716
    717	/* Request a connection */
    718	memset(&conn_param, 0, sizeof(conn_param));
    719	conn_param.private_data = NULL;
    720	conn_param.private_data_len = 0;
    721	conn_param.responder_resources = P9_RDMA_IRD;
    722	conn_param.initiator_depth = P9_RDMA_ORD;
    723	err = rdma_connect(rdma->cm_id, &conn_param);
    724	if (err)
    725		goto error;
    726	err = wait_for_completion_interruptible(&rdma->cm_done);
    727	if (err || (rdma->state != P9_RDMA_CONNECTED))
    728		goto error;
    729
    730	client->status = Connected;
    731
    732	return 0;
    733
    734error:
    735	rdma_destroy_trans(rdma);
    736	return -ENOTCONN;
    737}
    738
    739static struct p9_trans_module p9_rdma_trans = {
    740	.name = "rdma",
    741	.maxsize = P9_RDMA_MAXSIZE,
    742	.def = 0,
    743	.owner = THIS_MODULE,
    744	.create = rdma_create_trans,
    745	.close = rdma_close,
    746	.request = rdma_request,
    747	.cancel = rdma_cancel,
    748	.cancelled = rdma_cancelled,
    749	.show_options = p9_rdma_show_options,
    750};
    751
    752/**
    753 * p9_trans_rdma_init - Register the 9P RDMA transport driver
    754 */
    755static int __init p9_trans_rdma_init(void)
    756{
    757	v9fs_register_trans(&p9_rdma_trans);
    758	return 0;
    759}
    760
    761static void __exit p9_trans_rdma_exit(void)
    762{
    763	v9fs_unregister_trans(&p9_rdma_trans);
    764}
    765
    766module_init(p9_trans_rdma_init);
    767module_exit(p9_trans_rdma_exit);
    768MODULE_ALIAS_9P("rdma");
    769
    770MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
    771MODULE_DESCRIPTION("RDMA Transport for 9P");
    772MODULE_LICENSE("Dual BSD/GPL");