cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ib.c (17506B)


      1/*
      2 * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
      3 *
      4 * This software is available to you under a choice of one of two
      5 * licenses.  You may choose to be licensed under the terms of the GNU
      6 * General Public License (GPL) Version 2, available from the file
      7 * COPYING in the main directory of this source tree, or the
      8 * OpenIB.org BSD license below:
      9 *
     10 *     Redistribution and use in source and binary forms, with or
     11 *     without modification, are permitted provided that the following
     12 *     conditions are met:
     13 *
     14 *      - Redistributions of source code must retain the above
     15 *        copyright notice, this list of conditions and the following
     16 *        disclaimer.
     17 *
     18 *      - Redistributions in binary form must reproduce the above
     19 *        copyright notice, this list of conditions and the following
     20 *        disclaimer in the documentation and/or other materials
     21 *        provided with the distribution.
     22 *
     23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     30 * SOFTWARE.
     31 *
     32 */
     33#include <linux/kernel.h>
     34#include <linux/in.h>
     35#include <linux/if.h>
     36#include <linux/netdevice.h>
     37#include <linux/inetdevice.h>
     38#include <linux/if_arp.h>
     39#include <linux/delay.h>
     40#include <linux/slab.h>
     41#include <linux/module.h>
     42#include <net/addrconf.h>
     43
     44#include "rds_single_path.h"
     45#include "rds.h"
     46#include "ib.h"
     47#include "ib_mr.h"
     48
     49static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
     50static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
     51unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
     52static atomic_t rds_ib_unloading;
     53
     54module_param(rds_ib_mr_1m_pool_size, int, 0444);
     55MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA");
     56module_param(rds_ib_mr_8k_pool_size, int, 0444);
     57MODULE_PARM_DESC(rds_ib_mr_8k_pool_size, " Max number of 8K mr per HCA");
     58module_param(rds_ib_retry_count, int, 0444);
     59MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
     60
     61/*
     62 * we have a clumsy combination of RCU and a rwsem protecting this list
     63 * because it is used both in the get_mr fast path and while blocking in
     64 * the FMR flushing path.
     65 */
     66DECLARE_RWSEM(rds_ib_devices_lock);
     67struct list_head rds_ib_devices;
     68
     69/* NOTE: if also grabbing ibdev lock, grab this first */
     70DEFINE_SPINLOCK(ib_nodev_conns_lock);
     71LIST_HEAD(ib_nodev_conns);
     72
     73static void rds_ib_nodev_connect(void)
     74{
     75	struct rds_ib_connection *ic;
     76
     77	spin_lock(&ib_nodev_conns_lock);
     78	list_for_each_entry(ic, &ib_nodev_conns, ib_node)
     79		rds_conn_connect_if_down(ic->conn);
     80	spin_unlock(&ib_nodev_conns_lock);
     81}
     82
     83static void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev)
     84{
     85	struct rds_ib_connection *ic;
     86	unsigned long flags;
     87
     88	spin_lock_irqsave(&rds_ibdev->spinlock, flags);
     89	list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node)
     90		rds_conn_path_drop(&ic->conn->c_path[0], true);
     91	spin_unlock_irqrestore(&rds_ibdev->spinlock, flags);
     92}
     93
     94/*
     95 * rds_ib_destroy_mr_pool() blocks on a few things and mrs drop references
     96 * from interrupt context so we push freing off into a work struct in krdsd.
     97 */
     98static void rds_ib_dev_free(struct work_struct *work)
     99{
    100	struct rds_ib_ipaddr *i_ipaddr, *i_next;
    101	struct rds_ib_device *rds_ibdev = container_of(work,
    102					struct rds_ib_device, free_work);
    103
    104	if (rds_ibdev->mr_8k_pool)
    105		rds_ib_destroy_mr_pool(rds_ibdev->mr_8k_pool);
    106	if (rds_ibdev->mr_1m_pool)
    107		rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool);
    108	if (rds_ibdev->pd)
    109		ib_dealloc_pd(rds_ibdev->pd);
    110
    111	list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) {
    112		list_del(&i_ipaddr->list);
    113		kfree(i_ipaddr);
    114	}
    115
    116	kfree(rds_ibdev->vector_load);
    117
    118	kfree(rds_ibdev);
    119}
    120
    121void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
    122{
    123	BUG_ON(refcount_read(&rds_ibdev->refcount) == 0);
    124	if (refcount_dec_and_test(&rds_ibdev->refcount))
    125		queue_work(rds_wq, &rds_ibdev->free_work);
    126}
    127
    128static int rds_ib_add_one(struct ib_device *device)
    129{
    130	struct rds_ib_device *rds_ibdev;
    131	int ret;
    132
    133	/* Only handle IB (no iWARP) devices */
    134	if (device->node_type != RDMA_NODE_IB_CA)
    135		return -EOPNOTSUPP;
    136
    137	/* Device must support FRWR */
    138	if (!(device->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
    139		return -EOPNOTSUPP;
    140
    141	rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
    142				 ibdev_to_node(device));
    143	if (!rds_ibdev)
    144		return -ENOMEM;
    145
    146	spin_lock_init(&rds_ibdev->spinlock);
    147	refcount_set(&rds_ibdev->refcount, 1);
    148	INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
    149
    150	INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
    151	INIT_LIST_HEAD(&rds_ibdev->conn_list);
    152
    153	rds_ibdev->max_wrs = device->attrs.max_qp_wr;
    154	rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE);
    155
    156	rds_ibdev->odp_capable =
    157		!!(device->attrs.kernel_cap_flags &
    158		   IBK_ON_DEMAND_PAGING) &&
    159		!!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps &
    160		   IB_ODP_SUPPORT_WRITE) &&
    161		!!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps &
    162		   IB_ODP_SUPPORT_READ);
    163
    164	rds_ibdev->max_1m_mrs = device->attrs.max_mr ?
    165		min_t(unsigned int, (device->attrs.max_mr / 2),
    166		      rds_ib_mr_1m_pool_size) : rds_ib_mr_1m_pool_size;
    167
    168	rds_ibdev->max_8k_mrs = device->attrs.max_mr ?
    169		min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE),
    170		      rds_ib_mr_8k_pool_size) : rds_ib_mr_8k_pool_size;
    171
    172	rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom;
    173	rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;
    174
    175	rds_ibdev->vector_load = kcalloc(device->num_comp_vectors,
    176					 sizeof(int),
    177					 GFP_KERNEL);
    178	if (!rds_ibdev->vector_load) {
    179		pr_err("RDS/IB: %s failed to allocate vector memory\n",
    180			__func__);
    181		ret = -ENOMEM;
    182		goto put_dev;
    183	}
    184
    185	rds_ibdev->dev = device;
    186	rds_ibdev->pd = ib_alloc_pd(device, 0);
    187	if (IS_ERR(rds_ibdev->pd)) {
    188		ret = PTR_ERR(rds_ibdev->pd);
    189		rds_ibdev->pd = NULL;
    190		goto put_dev;
    191	}
    192
    193	rds_ibdev->mr_1m_pool =
    194		rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL);
    195	if (IS_ERR(rds_ibdev->mr_1m_pool)) {
    196		ret = PTR_ERR(rds_ibdev->mr_1m_pool);
    197		rds_ibdev->mr_1m_pool = NULL;
    198		goto put_dev;
    199	}
    200
    201	rds_ibdev->mr_8k_pool =
    202		rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_8K_POOL);
    203	if (IS_ERR(rds_ibdev->mr_8k_pool)) {
    204		ret = PTR_ERR(rds_ibdev->mr_8k_pool);
    205		rds_ibdev->mr_8k_pool = NULL;
    206		goto put_dev;
    207	}
    208
    209	rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, max_1m_mrs = %d, max_8k_mrs = %d\n",
    210		 device->attrs.max_mr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
    211		 rds_ibdev->max_1m_mrs, rds_ibdev->max_8k_mrs);
    212
    213	pr_info("RDS/IB: %s: added\n", device->name);
    214
    215	down_write(&rds_ib_devices_lock);
    216	list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
    217	up_write(&rds_ib_devices_lock);
    218	refcount_inc(&rds_ibdev->refcount);
    219
    220	ib_set_client_data(device, &rds_ib_client, rds_ibdev);
    221
    222	rds_ib_nodev_connect();
    223	return 0;
    224
    225put_dev:
    226	rds_ib_dev_put(rds_ibdev);
    227	return ret;
    228}
    229
    230/*
    231 * New connections use this to find the device to associate with the
    232 * connection.  It's not in the fast path so we're not concerned about the
    233 * performance of the IB call.  (As of this writing, it uses an interrupt
    234 * blocking spinlock to serialize walking a per-device list of all registered
    235 * clients.)
    236 *
    237 * RCU is used to handle incoming connections racing with device teardown.
    238 * Rather than use a lock to serialize removal from the client_data and
    239 * getting a new reference, we use an RCU grace period.  The destruction
    240 * path removes the device from client_data and then waits for all RCU
    241 * readers to finish.
    242 *
    243 * A new connection can get NULL from this if its arriving on a
    244 * device that is in the process of being removed.
    245 */
    246struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device)
    247{
    248	struct rds_ib_device *rds_ibdev;
    249
    250	rcu_read_lock();
    251	rds_ibdev = ib_get_client_data(device, &rds_ib_client);
    252	if (rds_ibdev)
    253		refcount_inc(&rds_ibdev->refcount);
    254	rcu_read_unlock();
    255	return rds_ibdev;
    256}
    257
    258/*
    259 * The IB stack is letting us know that a device is going away.  This can
    260 * happen if the underlying HCA driver is removed or if PCI hotplug is removing
    261 * the pci function, for example.
    262 *
    263 * This can be called at any time and can be racing with any other RDS path.
    264 */
    265static void rds_ib_remove_one(struct ib_device *device, void *client_data)
    266{
    267	struct rds_ib_device *rds_ibdev = client_data;
    268
    269	rds_ib_dev_shutdown(rds_ibdev);
    270
    271	/* stop connection attempts from getting a reference to this device. */
    272	ib_set_client_data(device, &rds_ib_client, NULL);
    273
    274	down_write(&rds_ib_devices_lock);
    275	list_del_rcu(&rds_ibdev->list);
    276	up_write(&rds_ib_devices_lock);
    277
    278	/*
    279	 * This synchronize rcu is waiting for readers of both the ib
    280	 * client data and the devices list to finish before we drop
    281	 * both of those references.
    282	 */
    283	synchronize_rcu();
    284	rds_ib_dev_put(rds_ibdev);
    285	rds_ib_dev_put(rds_ibdev);
    286}
    287
    288struct ib_client rds_ib_client = {
    289	.name   = "rds_ib",
    290	.add    = rds_ib_add_one,
    291	.remove = rds_ib_remove_one
    292};
    293
    294static int rds_ib_conn_info_visitor(struct rds_connection *conn,
    295				    void *buffer)
    296{
    297	struct rds_info_rdma_connection *iinfo = buffer;
    298	struct rds_ib_connection *ic = conn->c_transport_data;
    299
    300	/* We will only ever look at IB transports */
    301	if (conn->c_trans != &rds_ib_transport)
    302		return 0;
    303	if (conn->c_isv6)
    304		return 0;
    305
    306	iinfo->src_addr = conn->c_laddr.s6_addr32[3];
    307	iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
    308	if (ic) {
    309		iinfo->tos = conn->c_tos;
    310		iinfo->sl = ic->i_sl;
    311	}
    312
    313	memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
    314	memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
    315	if (rds_conn_state(conn) == RDS_CONN_UP) {
    316		struct rds_ib_device *rds_ibdev;
    317
    318		rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid,
    319			       (union ib_gid *)&iinfo->dst_gid);
    320
    321		rds_ibdev = ic->rds_ibdev;
    322		iinfo->max_send_wr = ic->i_send_ring.w_nr;
    323		iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
    324		iinfo->max_send_sge = rds_ibdev->max_sge;
    325		rds_ib_get_mr_info(rds_ibdev, iinfo);
    326		iinfo->cache_allocs = atomic_read(&ic->i_cache_allocs);
    327	}
    328	return 1;
    329}
    330
    331#if IS_ENABLED(CONFIG_IPV6)
    332/* IPv6 version of rds_ib_conn_info_visitor(). */
    333static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
    334				     void *buffer)
    335{
    336	struct rds6_info_rdma_connection *iinfo6 = buffer;
    337	struct rds_ib_connection *ic = conn->c_transport_data;
    338
    339	/* We will only ever look at IB transports */
    340	if (conn->c_trans != &rds_ib_transport)
    341		return 0;
    342
    343	iinfo6->src_addr = conn->c_laddr;
    344	iinfo6->dst_addr = conn->c_faddr;
    345	if (ic) {
    346		iinfo6->tos = conn->c_tos;
    347		iinfo6->sl = ic->i_sl;
    348	}
    349
    350	memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid));
    351	memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid));
    352
    353	if (rds_conn_state(conn) == RDS_CONN_UP) {
    354		struct rds_ib_device *rds_ibdev;
    355
    356		rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo6->src_gid,
    357			       (union ib_gid *)&iinfo6->dst_gid);
    358		rds_ibdev = ic->rds_ibdev;
    359		iinfo6->max_send_wr = ic->i_send_ring.w_nr;
    360		iinfo6->max_recv_wr = ic->i_recv_ring.w_nr;
    361		iinfo6->max_send_sge = rds_ibdev->max_sge;
    362		rds6_ib_get_mr_info(rds_ibdev, iinfo6);
    363		iinfo6->cache_allocs = atomic_read(&ic->i_cache_allocs);
    364	}
    365	return 1;
    366}
    367#endif
    368
    369static void rds_ib_ic_info(struct socket *sock, unsigned int len,
    370			   struct rds_info_iterator *iter,
    371			   struct rds_info_lengths *lens)
    372{
    373	u64 buffer[(sizeof(struct rds_info_rdma_connection) + 7) / 8];
    374
    375	rds_for_each_conn_info(sock, len, iter, lens,
    376				rds_ib_conn_info_visitor,
    377				buffer,
    378				sizeof(struct rds_info_rdma_connection));
    379}
    380
    381#if IS_ENABLED(CONFIG_IPV6)
    382/* IPv6 version of rds_ib_ic_info(). */
    383static void rds6_ib_ic_info(struct socket *sock, unsigned int len,
    384			    struct rds_info_iterator *iter,
    385			    struct rds_info_lengths *lens)
    386{
    387	u64 buffer[(sizeof(struct rds6_info_rdma_connection) + 7) / 8];
    388
    389	rds_for_each_conn_info(sock, len, iter, lens,
    390			       rds6_ib_conn_info_visitor,
    391			       buffer,
    392			       sizeof(struct rds6_info_rdma_connection));
    393}
    394#endif
    395
    396/*
    397 * Early RDS/IB was built to only bind to an address if there is an IPoIB
    398 * device with that address set.
    399 *
    400 * If it were me, I'd advocate for something more flexible.  Sending and
    401 * receiving should be device-agnostic.  Transports would try and maintain
    402 * connections between peers who have messages queued.  Userspace would be
    403 * allowed to influence which paths have priority.  We could call userspace
    404 * asserting this policy "routing".
    405 */
    406static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr,
    407			      __u32 scope_id)
    408{
    409	int ret;
    410	struct rdma_cm_id *cm_id;
    411#if IS_ENABLED(CONFIG_IPV6)
    412	struct sockaddr_in6 sin6;
    413#endif
    414	struct sockaddr_in sin;
    415	struct sockaddr *sa;
    416	bool isv4;
    417
    418	isv4 = ipv6_addr_v4mapped(addr);
    419	/* Create a CMA ID and try to bind it. This catches both
    420	 * IB and iWARP capable NICs.
    421	 */
    422	cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler,
    423			       NULL, RDMA_PS_TCP, IB_QPT_RC);
    424	if (IS_ERR(cm_id))
    425		return PTR_ERR(cm_id);
    426
    427	if (isv4) {
    428		memset(&sin, 0, sizeof(sin));
    429		sin.sin_family = AF_INET;
    430		sin.sin_addr.s_addr = addr->s6_addr32[3];
    431		sa = (struct sockaddr *)&sin;
    432	} else {
    433#if IS_ENABLED(CONFIG_IPV6)
    434		memset(&sin6, 0, sizeof(sin6));
    435		sin6.sin6_family = AF_INET6;
    436		sin6.sin6_addr = *addr;
    437		sin6.sin6_scope_id = scope_id;
    438		sa = (struct sockaddr *)&sin6;
    439
    440		/* XXX Do a special IPv6 link local address check here.  The
    441		 * reason is that rdma_bind_addr() always succeeds with IPv6
    442		 * link local address regardless it is indeed configured in a
    443		 * system.
    444		 */
    445		if (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL) {
    446			struct net_device *dev;
    447
    448			if (scope_id == 0) {
    449				ret = -EADDRNOTAVAIL;
    450				goto out;
    451			}
    452
    453			/* Use init_net for now as RDS is not network
    454			 * name space aware.
    455			 */
    456			dev = dev_get_by_index(&init_net, scope_id);
    457			if (!dev) {
    458				ret = -EADDRNOTAVAIL;
    459				goto out;
    460			}
    461			if (!ipv6_chk_addr(&init_net, addr, dev, 1)) {
    462				dev_put(dev);
    463				ret = -EADDRNOTAVAIL;
    464				goto out;
    465			}
    466			dev_put(dev);
    467		}
    468#else
    469		ret = -EADDRNOTAVAIL;
    470		goto out;
    471#endif
    472	}
    473
    474	/* rdma_bind_addr will only succeed for IB & iWARP devices */
    475	ret = rdma_bind_addr(cm_id, sa);
    476	/* due to this, we will claim to support iWARP devices unless we
    477	   check node_type. */
    478	if (ret || !cm_id->device ||
    479	    cm_id->device->node_type != RDMA_NODE_IB_CA)
    480		ret = -EADDRNOTAVAIL;
    481
    482	rdsdebug("addr %pI6c%%%u ret %d node type %d\n",
    483		 addr, scope_id, ret,
    484		 cm_id->device ? cm_id->device->node_type : -1);
    485
    486out:
    487	rdma_destroy_id(cm_id);
    488
    489	return ret;
    490}
    491
    492static void rds_ib_unregister_client(void)
    493{
    494	ib_unregister_client(&rds_ib_client);
    495	/* wait for rds_ib_dev_free() to complete */
    496	flush_workqueue(rds_wq);
    497}
    498
    499static void rds_ib_set_unloading(void)
    500{
    501	atomic_set(&rds_ib_unloading, 1);
    502}
    503
    504static bool rds_ib_is_unloading(struct rds_connection *conn)
    505{
    506	struct rds_conn_path *cp = &conn->c_path[0];
    507
    508	return (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags) ||
    509		atomic_read(&rds_ib_unloading) != 0);
    510}
    511
    512void rds_ib_exit(void)
    513{
    514	rds_ib_set_unloading();
    515	synchronize_rcu();
    516	rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
    517#if IS_ENABLED(CONFIG_IPV6)
    518	rds_info_deregister_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info);
    519#endif
    520	rds_ib_unregister_client();
    521	rds_ib_destroy_nodev_conns();
    522	rds_ib_sysctl_exit();
    523	rds_ib_recv_exit();
    524	rds_trans_unregister(&rds_ib_transport);
    525	rds_ib_mr_exit();
    526}
    527
    528static u8 rds_ib_get_tos_map(u8 tos)
    529{
    530	/* 1:1 user to transport map for RDMA transport.
    531	 * In future, if custom map is desired, hook can export
    532	 * user configurable map.
    533	 */
    534	return tos;
    535}
    536
    537struct rds_transport rds_ib_transport = {
    538	.laddr_check		= rds_ib_laddr_check,
    539	.xmit_path_complete	= rds_ib_xmit_path_complete,
    540	.xmit			= rds_ib_xmit,
    541	.xmit_rdma		= rds_ib_xmit_rdma,
    542	.xmit_atomic		= rds_ib_xmit_atomic,
    543	.recv_path		= rds_ib_recv_path,
    544	.conn_alloc		= rds_ib_conn_alloc,
    545	.conn_free		= rds_ib_conn_free,
    546	.conn_path_connect	= rds_ib_conn_path_connect,
    547	.conn_path_shutdown	= rds_ib_conn_path_shutdown,
    548	.inc_copy_to_user	= rds_ib_inc_copy_to_user,
    549	.inc_free		= rds_ib_inc_free,
    550	.cm_initiate_connect	= rds_ib_cm_initiate_connect,
    551	.cm_handle_connect	= rds_ib_cm_handle_connect,
    552	.cm_connect_complete	= rds_ib_cm_connect_complete,
    553	.stats_info_copy	= rds_ib_stats_info_copy,
    554	.exit			= rds_ib_exit,
    555	.get_mr			= rds_ib_get_mr,
    556	.sync_mr		= rds_ib_sync_mr,
    557	.free_mr		= rds_ib_free_mr,
    558	.flush_mrs		= rds_ib_flush_mrs,
    559	.get_tos_map		= rds_ib_get_tos_map,
    560	.t_owner		= THIS_MODULE,
    561	.t_name			= "infiniband",
    562	.t_unloading		= rds_ib_is_unloading,
    563	.t_type			= RDS_TRANS_IB
    564};
    565
    566int rds_ib_init(void)
    567{
    568	int ret;
    569
    570	INIT_LIST_HEAD(&rds_ib_devices);
    571
    572	ret = rds_ib_mr_init();
    573	if (ret)
    574		goto out;
    575
    576	ret = ib_register_client(&rds_ib_client);
    577	if (ret)
    578		goto out_mr_exit;
    579
    580	ret = rds_ib_sysctl_init();
    581	if (ret)
    582		goto out_ibreg;
    583
    584	ret = rds_ib_recv_init();
    585	if (ret)
    586		goto out_sysctl;
    587
    588	rds_trans_register(&rds_ib_transport);
    589
    590	rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
    591#if IS_ENABLED(CONFIG_IPV6)
    592	rds_info_register_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info);
    593#endif
    594
    595	goto out;
    596
    597out_sysctl:
    598	rds_ib_sysctl_exit();
    599out_ibreg:
    600	rds_ib_unregister_client();
    601out_mr_exit:
    602	rds_ib_mr_exit();
    603out:
    604	return ret;
    605}
    606
    607MODULE_LICENSE("GPL");