cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

cm.c (132301B)


      1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
      2/*
      3 * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
      4 * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
      5 * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
      6 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
      7 * Copyright (c) 2019, Mellanox Technologies inc.  All rights reserved.
      8 */
      9
     10#include <linux/completion.h>
     11#include <linux/dma-mapping.h>
     12#include <linux/device.h>
     13#include <linux/module.h>
     14#include <linux/err.h>
     15#include <linux/idr.h>
     16#include <linux/interrupt.h>
     17#include <linux/random.h>
     18#include <linux/rbtree.h>
     19#include <linux/spinlock.h>
     20#include <linux/slab.h>
     21#include <linux/sysfs.h>
     22#include <linux/workqueue.h>
     23#include <linux/kdev_t.h>
     24#include <linux/etherdevice.h>
     25
     26#include <rdma/ib_cache.h>
     27#include <rdma/ib_cm.h>
     28#include <rdma/ib_sysfs.h>
     29#include "cm_msgs.h"
     30#include "core_priv.h"
     31#include "cm_trace.h"
     32
     33MODULE_AUTHOR("Sean Hefty");
     34MODULE_DESCRIPTION("InfiniBand CM");
     35MODULE_LICENSE("Dual BSD/GPL");
     36
     37static const char * const ibcm_rej_reason_strs[] = {
     38	[IB_CM_REJ_NO_QP]			= "no QP",
     39	[IB_CM_REJ_NO_EEC]			= "no EEC",
     40	[IB_CM_REJ_NO_RESOURCES]		= "no resources",
     41	[IB_CM_REJ_TIMEOUT]			= "timeout",
     42	[IB_CM_REJ_UNSUPPORTED]			= "unsupported",
     43	[IB_CM_REJ_INVALID_COMM_ID]		= "invalid comm ID",
     44	[IB_CM_REJ_INVALID_COMM_INSTANCE]	= "invalid comm instance",
     45	[IB_CM_REJ_INVALID_SERVICE_ID]		= "invalid service ID",
     46	[IB_CM_REJ_INVALID_TRANSPORT_TYPE]	= "invalid transport type",
     47	[IB_CM_REJ_STALE_CONN]			= "stale conn",
     48	[IB_CM_REJ_RDC_NOT_EXIST]		= "RDC not exist",
     49	[IB_CM_REJ_INVALID_GID]			= "invalid GID",
     50	[IB_CM_REJ_INVALID_LID]			= "invalid LID",
     51	[IB_CM_REJ_INVALID_SL]			= "invalid SL",
     52	[IB_CM_REJ_INVALID_TRAFFIC_CLASS]	= "invalid traffic class",
     53	[IB_CM_REJ_INVALID_HOP_LIMIT]		= "invalid hop limit",
     54	[IB_CM_REJ_INVALID_PACKET_RATE]		= "invalid packet rate",
     55	[IB_CM_REJ_INVALID_ALT_GID]		= "invalid alt GID",
     56	[IB_CM_REJ_INVALID_ALT_LID]		= "invalid alt LID",
     57	[IB_CM_REJ_INVALID_ALT_SL]		= "invalid alt SL",
     58	[IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS]	= "invalid alt traffic class",
     59	[IB_CM_REJ_INVALID_ALT_HOP_LIMIT]	= "invalid alt hop limit",
     60	[IB_CM_REJ_INVALID_ALT_PACKET_RATE]	= "invalid alt packet rate",
     61	[IB_CM_REJ_PORT_CM_REDIRECT]		= "port CM redirect",
     62	[IB_CM_REJ_PORT_REDIRECT]		= "port redirect",
     63	[IB_CM_REJ_INVALID_MTU]			= "invalid MTU",
     64	[IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES]	= "insufficient resp resources",
     65	[IB_CM_REJ_CONSUMER_DEFINED]		= "consumer defined",
     66	[IB_CM_REJ_INVALID_RNR_RETRY]		= "invalid RNR retry",
     67	[IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID]	= "duplicate local comm ID",
     68	[IB_CM_REJ_INVALID_CLASS_VERSION]	= "invalid class version",
     69	[IB_CM_REJ_INVALID_FLOW_LABEL]		= "invalid flow label",
     70	[IB_CM_REJ_INVALID_ALT_FLOW_LABEL]	= "invalid alt flow label",
     71	[IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED] =
     72		"vendor option is not supported",
     73};
     74
     75const char *__attribute_const__ ibcm_reject_msg(int reason)
     76{
     77	size_t index = reason;
     78
     79	if (index < ARRAY_SIZE(ibcm_rej_reason_strs) &&
     80	    ibcm_rej_reason_strs[index])
     81		return ibcm_rej_reason_strs[index];
     82	else
     83		return "unrecognized reason";
     84}
     85EXPORT_SYMBOL(ibcm_reject_msg);
     86
     87struct cm_id_private;
     88struct cm_work;
     89static int cm_add_one(struct ib_device *device);
     90static void cm_remove_one(struct ib_device *device, void *client_data);
     91static void cm_process_work(struct cm_id_private *cm_id_priv,
     92			    struct cm_work *work);
     93static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
     94				   struct ib_cm_sidr_rep_param *param);
     95static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
     96			       const void *private_data, u8 private_data_len);
     97static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
     98			       void *private_data, u8 private_data_len);
     99static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
    100			      enum ib_cm_rej_reason reason, void *ari,
    101			      u8 ari_length, const void *private_data,
    102			      u8 private_data_len);
    103
    104static struct ib_client cm_client = {
    105	.name   = "cm",
    106	.add    = cm_add_one,
    107	.remove = cm_remove_one
    108};
    109
    110static struct ib_cm {
    111	spinlock_t lock;
    112	struct list_head device_list;
    113	rwlock_t device_lock;
    114	struct rb_root listen_service_table;
    115	u64 listen_service_id;
    116	/* struct rb_root peer_service_table; todo: fix peer to peer */
    117	struct rb_root remote_qp_table;
    118	struct rb_root remote_id_table;
    119	struct rb_root remote_sidr_table;
    120	struct xarray local_id_table;
    121	u32 local_id_next;
    122	__be32 random_id_operand;
    123	struct list_head timewait_list;
    124	struct workqueue_struct *wq;
    125} cm;
    126
    127/* Counter indexes ordered by attribute ID */
    128enum {
    129	CM_REQ_COUNTER,
    130	CM_MRA_COUNTER,
    131	CM_REJ_COUNTER,
    132	CM_REP_COUNTER,
    133	CM_RTU_COUNTER,
    134	CM_DREQ_COUNTER,
    135	CM_DREP_COUNTER,
    136	CM_SIDR_REQ_COUNTER,
    137	CM_SIDR_REP_COUNTER,
    138	CM_LAP_COUNTER,
    139	CM_APR_COUNTER,
    140	CM_ATTR_COUNT,
    141	CM_ATTR_ID_OFFSET = 0x0010,
    142};
    143
    144enum {
    145	CM_XMIT,
    146	CM_XMIT_RETRIES,
    147	CM_RECV,
    148	CM_RECV_DUPLICATES,
    149	CM_COUNTER_GROUPS
    150};
    151
    152struct cm_counter_attribute {
    153	struct ib_port_attribute attr;
    154	unsigned short group;
    155	unsigned short index;
    156};
    157
    158struct cm_port {
    159	struct cm_device *cm_dev;
    160	struct ib_mad_agent *mad_agent;
    161	u32 port_num;
    162	atomic_long_t counters[CM_COUNTER_GROUPS][CM_ATTR_COUNT];
    163};
    164
    165struct cm_device {
    166	struct kref kref;
    167	struct list_head list;
    168	spinlock_t mad_agent_lock;
    169	struct ib_device *ib_device;
    170	u8 ack_delay;
    171	int going_down;
    172	struct cm_port *port[];
    173};
    174
    175struct cm_av {
    176	struct cm_port *port;
    177	struct rdma_ah_attr ah_attr;
    178	u16 pkey_index;
    179	u8 timeout;
    180};
    181
    182struct cm_work {
    183	struct delayed_work work;
    184	struct list_head list;
    185	struct cm_port *port;
    186	struct ib_mad_recv_wc *mad_recv_wc;	/* Received MADs */
    187	__be32 local_id;			/* Established / timewait */
    188	__be32 remote_id;
    189	struct ib_cm_event cm_event;
    190	struct sa_path_rec path[];
    191};
    192
    193struct cm_timewait_info {
    194	struct cm_work work;
    195	struct list_head list;
    196	struct rb_node remote_qp_node;
    197	struct rb_node remote_id_node;
    198	__be64 remote_ca_guid;
    199	__be32 remote_qpn;
    200	u8 inserted_remote_qp;
    201	u8 inserted_remote_id;
    202};
    203
    204struct cm_id_private {
    205	struct ib_cm_id	id;
    206
    207	struct rb_node service_node;
    208	struct rb_node sidr_id_node;
    209	u32 sidr_slid;
    210	spinlock_t lock;	/* Do not acquire inside cm.lock */
    211	struct completion comp;
    212	refcount_t refcount;
    213	/* Number of clients sharing this ib_cm_id. Only valid for listeners.
    214	 * Protected by the cm.lock spinlock.
    215	 */
    216	int listen_sharecount;
    217	struct rcu_head rcu;
    218
    219	struct ib_mad_send_buf *msg;
    220	struct cm_timewait_info *timewait_info;
    221	/* todo: use alternate port on send failure */
    222	struct cm_av av;
    223	struct cm_av alt_av;
    224
    225	void *private_data;
    226	__be64 tid;
    227	__be32 local_qpn;
    228	__be32 remote_qpn;
    229	enum ib_qp_type qp_type;
    230	__be32 sq_psn;
    231	__be32 rq_psn;
    232	int timeout_ms;
    233	enum ib_mtu path_mtu;
    234	__be16 pkey;
    235	u8 private_data_len;
    236	u8 max_cm_retries;
    237	u8 responder_resources;
    238	u8 initiator_depth;
    239	u8 retry_count;
    240	u8 rnr_retry_count;
    241	u8 service_timeout;
    242	u8 target_ack_delay;
    243
    244	struct list_head work_list;
    245	atomic_t work_count;
    246
    247	struct rdma_ucm_ece ece;
    248};
    249
    250static void cm_dev_release(struct kref *kref)
    251{
    252	struct cm_device *cm_dev = container_of(kref, struct cm_device, kref);
    253	u32 i;
    254
    255	rdma_for_each_port(cm_dev->ib_device, i)
    256		kfree(cm_dev->port[i - 1]);
    257
    258	kfree(cm_dev);
    259}
    260
    261static void cm_device_put(struct cm_device *cm_dev)
    262{
    263	kref_put(&cm_dev->kref, cm_dev_release);
    264}
    265
    266static void cm_work_handler(struct work_struct *work);
    267
    268static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
    269{
    270	if (refcount_dec_and_test(&cm_id_priv->refcount))
    271		complete(&cm_id_priv->comp);
    272}
    273
    274static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
    275{
    276	struct ib_mad_agent *mad_agent;
    277	struct ib_mad_send_buf *m;
    278	struct ib_ah *ah;
    279
    280	lockdep_assert_held(&cm_id_priv->lock);
    281
    282	if (!cm_id_priv->av.port)
    283		return ERR_PTR(-EINVAL);
    284
    285	spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
    286	mad_agent = cm_id_priv->av.port->mad_agent;
    287	if (!mad_agent) {
    288		m = ERR_PTR(-EINVAL);
    289		goto out;
    290	}
    291
    292	ah = rdma_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr, 0);
    293	if (IS_ERR(ah)) {
    294		m = ERR_CAST(ah);
    295		goto out;
    296	}
    297
    298	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
    299			       cm_id_priv->av.pkey_index,
    300			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
    301			       GFP_ATOMIC,
    302			       IB_MGMT_BASE_VERSION);
    303	if (IS_ERR(m)) {
    304		rdma_destroy_ah(ah, 0);
    305		goto out;
    306	}
    307
    308	/* Timeout set by caller if response is expected. */
    309	m->ah = ah;
    310	m->retries = cm_id_priv->max_cm_retries;
    311
    312	refcount_inc(&cm_id_priv->refcount);
    313	m->context[0] = cm_id_priv;
    314
    315out:
    316	spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
    317	return m;
    318}
    319
    320static void cm_free_msg(struct ib_mad_send_buf *msg)
    321{
    322	struct cm_id_private *cm_id_priv = msg->context[0];
    323
    324	if (msg->ah)
    325		rdma_destroy_ah(msg->ah, 0);
    326	cm_deref_id(cm_id_priv);
    327	ib_free_send_mad(msg);
    328}
    329
    330static struct ib_mad_send_buf *
    331cm_alloc_priv_msg(struct cm_id_private *cm_id_priv)
    332{
    333	struct ib_mad_send_buf *msg;
    334
    335	lockdep_assert_held(&cm_id_priv->lock);
    336
    337	msg = cm_alloc_msg(cm_id_priv);
    338	if (IS_ERR(msg))
    339		return msg;
    340	cm_id_priv->msg = msg;
    341	return msg;
    342}
    343
    344static void cm_free_priv_msg(struct ib_mad_send_buf *msg)
    345{
    346	struct cm_id_private *cm_id_priv = msg->context[0];
    347
    348	lockdep_assert_held(&cm_id_priv->lock);
    349
    350	if (!WARN_ON(cm_id_priv->msg != msg))
    351		cm_id_priv->msg = NULL;
    352
    353	if (msg->ah)
    354		rdma_destroy_ah(msg->ah, 0);
    355	cm_deref_id(cm_id_priv);
    356	ib_free_send_mad(msg);
    357}
    358
    359static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
    360							   struct ib_mad_recv_wc *mad_recv_wc)
    361{
    362	return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
    363				  0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
    364				  GFP_ATOMIC,
    365				  IB_MGMT_BASE_VERSION);
    366}
    367
    368static int cm_create_response_msg_ah(struct cm_port *port,
    369				     struct ib_mad_recv_wc *mad_recv_wc,
    370				     struct ib_mad_send_buf *msg)
    371{
    372	struct ib_ah *ah;
    373
    374	ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
    375				  mad_recv_wc->recv_buf.grh, port->port_num);
    376	if (IS_ERR(ah))
    377		return PTR_ERR(ah);
    378
    379	msg->ah = ah;
    380	return 0;
    381}
    382
    383static int cm_alloc_response_msg(struct cm_port *port,
    384				 struct ib_mad_recv_wc *mad_recv_wc,
    385				 struct ib_mad_send_buf **msg)
    386{
    387	struct ib_mad_send_buf *m;
    388	int ret;
    389
    390	m = cm_alloc_response_msg_no_ah(port, mad_recv_wc);
    391	if (IS_ERR(m))
    392		return PTR_ERR(m);
    393
    394	ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
    395	if (ret) {
    396		ib_free_send_mad(m);
    397		return ret;
    398	}
    399
    400	*msg = m;
    401	return 0;
    402}
    403
    404static void cm_free_response_msg(struct ib_mad_send_buf *msg)
    405{
    406	if (msg->ah)
    407		rdma_destroy_ah(msg->ah, 0);
    408	ib_free_send_mad(msg);
    409}
    410
    411static void *cm_copy_private_data(const void *private_data, u8 private_data_len)
    412{
    413	void *data;
    414
    415	if (!private_data || !private_data_len)
    416		return NULL;
    417
    418	data = kmemdup(private_data, private_data_len, GFP_KERNEL);
    419	if (!data)
    420		return ERR_PTR(-ENOMEM);
    421
    422	return data;
    423}
    424
    425static void cm_set_private_data(struct cm_id_private *cm_id_priv,
    426				 void *private_data, u8 private_data_len)
    427{
    428	if (cm_id_priv->private_data && cm_id_priv->private_data_len)
    429		kfree(cm_id_priv->private_data);
    430
    431	cm_id_priv->private_data = private_data;
    432	cm_id_priv->private_data_len = private_data_len;
    433}
    434
    435static void cm_set_av_port(struct cm_av *av, struct cm_port *port)
    436{
    437	struct cm_port *old_port = av->port;
    438
    439	if (old_port == port)
    440		return;
    441
    442	av->port = port;
    443	if (old_port)
    444		cm_device_put(old_port->cm_dev);
    445	if (port)
    446		kref_get(&port->cm_dev->kref);
    447}
    448
    449static void cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
    450			       struct rdma_ah_attr *ah_attr, struct cm_av *av)
    451{
    452	cm_set_av_port(av, port);
    453	av->pkey_index = wc->pkey_index;
    454	rdma_move_ah_attr(&av->ah_attr, ah_attr);
    455}
    456
    457static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
    458				   struct ib_grh *grh, struct cm_av *av)
    459{
    460	cm_set_av_port(av, port);
    461	av->pkey_index = wc->pkey_index;
    462	return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
    463				       port->port_num, wc,
    464				       grh, &av->ah_attr);
    465}
    466
    467static struct cm_port *
    468get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
    469{
    470	struct cm_device *cm_dev;
    471	struct cm_port *port = NULL;
    472	unsigned long flags;
    473
    474	if (attr) {
    475		read_lock_irqsave(&cm.device_lock, flags);
    476		list_for_each_entry(cm_dev, &cm.device_list, list) {
    477			if (cm_dev->ib_device == attr->device) {
    478				port = cm_dev->port[attr->port_num - 1];
    479				break;
    480			}
    481		}
    482		read_unlock_irqrestore(&cm.device_lock, flags);
    483	} else {
    484		/* SGID attribute can be NULL in following
    485		 * conditions.
    486		 * (a) Alternative path
    487		 * (b) IB link layer without GRH
    488		 * (c) LAP send messages
    489		 */
    490		read_lock_irqsave(&cm.device_lock, flags);
    491		list_for_each_entry(cm_dev, &cm.device_list, list) {
    492			attr = rdma_find_gid(cm_dev->ib_device,
    493					     &path->sgid,
    494					     sa_conv_pathrec_to_gid_type(path),
    495					     NULL);
    496			if (!IS_ERR(attr)) {
    497				port = cm_dev->port[attr->port_num - 1];
    498				break;
    499			}
    500		}
    501		read_unlock_irqrestore(&cm.device_lock, flags);
    502		if (port)
    503			rdma_put_gid_attr(attr);
    504	}
    505	return port;
    506}
    507
    508static int cm_init_av_by_path(struct sa_path_rec *path,
    509			      const struct ib_gid_attr *sgid_attr,
    510			      struct cm_av *av)
    511{
    512	struct rdma_ah_attr new_ah_attr;
    513	struct cm_device *cm_dev;
    514	struct cm_port *port;
    515	int ret;
    516
    517	port = get_cm_port_from_path(path, sgid_attr);
    518	if (!port)
    519		return -EINVAL;
    520	cm_dev = port->cm_dev;
    521
    522	ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
    523				  be16_to_cpu(path->pkey), &av->pkey_index);
    524	if (ret)
    525		return ret;
    526
    527	cm_set_av_port(av, port);
    528
    529	/*
    530	 * av->ah_attr might be initialized based on wc or during
    531	 * request processing time which might have reference to sgid_attr.
    532	 * So initialize a new ah_attr on stack.
    533	 * If initialization fails, old ah_attr is used for sending any
    534	 * responses. If initialization is successful, than new ah_attr
    535	 * is used by overwriting the old one. So that right ah_attr
    536	 * can be used to return an error response.
    537	 */
    538	ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
    539					&new_ah_attr, sgid_attr);
    540	if (ret)
    541		return ret;
    542
    543	av->timeout = path->packet_life_time + 1;
    544	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
    545	return 0;
    546}
    547
    548/* Move av created by cm_init_av_by_path(), so av.dgid is not moved */
    549static void cm_move_av_from_path(struct cm_av *dest, struct cm_av *src)
    550{
    551	cm_set_av_port(dest, src->port);
    552	cm_set_av_port(src, NULL);
    553	dest->pkey_index = src->pkey_index;
    554	rdma_move_ah_attr(&dest->ah_attr, &src->ah_attr);
    555	dest->timeout = src->timeout;
    556}
    557
    558static void cm_destroy_av(struct cm_av *av)
    559{
    560	rdma_destroy_ah_attr(&av->ah_attr);
    561	cm_set_av_port(av, NULL);
    562}
    563
    564static u32 cm_local_id(__be32 local_id)
    565{
    566	return (__force u32) (local_id ^ cm.random_id_operand);
    567}
    568
    569static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id)
    570{
    571	struct cm_id_private *cm_id_priv;
    572
    573	rcu_read_lock();
    574	cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id));
    575	if (!cm_id_priv || cm_id_priv->id.remote_id != remote_id ||
    576	    !refcount_inc_not_zero(&cm_id_priv->refcount))
    577		cm_id_priv = NULL;
    578	rcu_read_unlock();
    579
    580	return cm_id_priv;
    581}
    582
    583/*
    584 * Trivial helpers to strip endian annotation and compare; the
    585 * endianness doesn't actually matter since we just need a stable
    586 * order for the RB tree.
    587 */
    588static int be32_lt(__be32 a, __be32 b)
    589{
    590	return (__force u32) a < (__force u32) b;
    591}
    592
    593static int be32_gt(__be32 a, __be32 b)
    594{
    595	return (__force u32) a > (__force u32) b;
    596}
    597
    598static int be64_lt(__be64 a, __be64 b)
    599{
    600	return (__force u64) a < (__force u64) b;
    601}
    602
    603static int be64_gt(__be64 a, __be64 b)
    604{
    605	return (__force u64) a > (__force u64) b;
    606}
    607
    608/*
    609 * Inserts a new cm_id_priv into the listen_service_table. Returns cm_id_priv
    610 * if the new ID was inserted, NULL if it could not be inserted due to a
    611 * collision, or the existing cm_id_priv ready for shared usage.
    612 */
    613static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv,
    614					      ib_cm_handler shared_handler)
    615{
    616	struct rb_node **link = &cm.listen_service_table.rb_node;
    617	struct rb_node *parent = NULL;
    618	struct cm_id_private *cur_cm_id_priv;
    619	__be64 service_id = cm_id_priv->id.service_id;
    620	__be64 service_mask = cm_id_priv->id.service_mask;
    621	unsigned long flags;
    622
    623	spin_lock_irqsave(&cm.lock, flags);
    624	while (*link) {
    625		parent = *link;
    626		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
    627					  service_node);
    628		if ((cur_cm_id_priv->id.service_mask & service_id) ==
    629		    (service_mask & cur_cm_id_priv->id.service_id) &&
    630		    (cm_id_priv->id.device == cur_cm_id_priv->id.device)) {
    631			/*
    632			 * Sharing an ib_cm_id with different handlers is not
    633			 * supported
    634			 */
    635			if (cur_cm_id_priv->id.cm_handler != shared_handler ||
    636			    cur_cm_id_priv->id.context ||
    637			    WARN_ON(!cur_cm_id_priv->id.cm_handler)) {
    638				spin_unlock_irqrestore(&cm.lock, flags);
    639				return NULL;
    640			}
    641			refcount_inc(&cur_cm_id_priv->refcount);
    642			cur_cm_id_priv->listen_sharecount++;
    643			spin_unlock_irqrestore(&cm.lock, flags);
    644			return cur_cm_id_priv;
    645		}
    646
    647		if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
    648			link = &(*link)->rb_left;
    649		else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
    650			link = &(*link)->rb_right;
    651		else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
    652			link = &(*link)->rb_left;
    653		else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
    654			link = &(*link)->rb_right;
    655		else
    656			link = &(*link)->rb_right;
    657	}
    658	cm_id_priv->listen_sharecount++;
    659	rb_link_node(&cm_id_priv->service_node, parent, link);
    660	rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
    661	spin_unlock_irqrestore(&cm.lock, flags);
    662	return cm_id_priv;
    663}
    664
    665static struct cm_id_private *cm_find_listen(struct ib_device *device,
    666					    __be64 service_id)
    667{
    668	struct rb_node *node = cm.listen_service_table.rb_node;
    669	struct cm_id_private *cm_id_priv;
    670
    671	while (node) {
    672		cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
    673		if ((cm_id_priv->id.service_mask & service_id) ==
    674		     cm_id_priv->id.service_id &&
    675		    (cm_id_priv->id.device == device)) {
    676			refcount_inc(&cm_id_priv->refcount);
    677			return cm_id_priv;
    678		}
    679		if (device < cm_id_priv->id.device)
    680			node = node->rb_left;
    681		else if (device > cm_id_priv->id.device)
    682			node = node->rb_right;
    683		else if (be64_lt(service_id, cm_id_priv->id.service_id))
    684			node = node->rb_left;
    685		else if (be64_gt(service_id, cm_id_priv->id.service_id))
    686			node = node->rb_right;
    687		else
    688			node = node->rb_right;
    689	}
    690	return NULL;
    691}
    692
    693static struct cm_timewait_info *
    694cm_insert_remote_id(struct cm_timewait_info *timewait_info)
    695{
    696	struct rb_node **link = &cm.remote_id_table.rb_node;
    697	struct rb_node *parent = NULL;
    698	struct cm_timewait_info *cur_timewait_info;
    699	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
    700	__be32 remote_id = timewait_info->work.remote_id;
    701
    702	while (*link) {
    703		parent = *link;
    704		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
    705					     remote_id_node);
    706		if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
    707			link = &(*link)->rb_left;
    708		else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
    709			link = &(*link)->rb_right;
    710		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
    711			link = &(*link)->rb_left;
    712		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
    713			link = &(*link)->rb_right;
    714		else
    715			return cur_timewait_info;
    716	}
    717	timewait_info->inserted_remote_id = 1;
    718	rb_link_node(&timewait_info->remote_id_node, parent, link);
    719	rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
    720	return NULL;
    721}
    722
    723static struct cm_id_private *cm_find_remote_id(__be64 remote_ca_guid,
    724					       __be32 remote_id)
    725{
    726	struct rb_node *node = cm.remote_id_table.rb_node;
    727	struct cm_timewait_info *timewait_info;
    728	struct cm_id_private *res = NULL;
    729
    730	spin_lock_irq(&cm.lock);
    731	while (node) {
    732		timewait_info = rb_entry(node, struct cm_timewait_info,
    733					 remote_id_node);
    734		if (be32_lt(remote_id, timewait_info->work.remote_id))
    735			node = node->rb_left;
    736		else if (be32_gt(remote_id, timewait_info->work.remote_id))
    737			node = node->rb_right;
    738		else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
    739			node = node->rb_left;
    740		else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
    741			node = node->rb_right;
    742		else {
    743			res = cm_acquire_id(timewait_info->work.local_id,
    744					     timewait_info->work.remote_id);
    745			break;
    746		}
    747	}
    748	spin_unlock_irq(&cm.lock);
    749	return res;
    750}
    751
    752static struct cm_timewait_info *
    753cm_insert_remote_qpn(struct cm_timewait_info *timewait_info)
    754{
    755	struct rb_node **link = &cm.remote_qp_table.rb_node;
    756	struct rb_node *parent = NULL;
    757	struct cm_timewait_info *cur_timewait_info;
    758	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
    759	__be32 remote_qpn = timewait_info->remote_qpn;
    760
    761	while (*link) {
    762		parent = *link;
    763		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
    764					     remote_qp_node);
    765		if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
    766			link = &(*link)->rb_left;
    767		else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
    768			link = &(*link)->rb_right;
    769		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
    770			link = &(*link)->rb_left;
    771		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
    772			link = &(*link)->rb_right;
    773		else
    774			return cur_timewait_info;
    775	}
    776	timewait_info->inserted_remote_qp = 1;
    777	rb_link_node(&timewait_info->remote_qp_node, parent, link);
    778	rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
    779	return NULL;
    780}
    781
    782static struct cm_id_private *
    783cm_insert_remote_sidr(struct cm_id_private *cm_id_priv)
    784{
    785	struct rb_node **link = &cm.remote_sidr_table.rb_node;
    786	struct rb_node *parent = NULL;
    787	struct cm_id_private *cur_cm_id_priv;
    788	__be32 remote_id = cm_id_priv->id.remote_id;
    789
    790	while (*link) {
    791		parent = *link;
    792		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
    793					  sidr_id_node);
    794		if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
    795			link = &(*link)->rb_left;
    796		else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
    797			link = &(*link)->rb_right;
    798		else {
    799			if (cur_cm_id_priv->sidr_slid < cm_id_priv->sidr_slid)
    800				link = &(*link)->rb_left;
    801			else if (cur_cm_id_priv->sidr_slid > cm_id_priv->sidr_slid)
    802				link = &(*link)->rb_right;
    803			else
    804				return cur_cm_id_priv;
    805		}
    806	}
    807	rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
    808	rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
    809	return NULL;
    810}
    811
    812static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device,
    813					      ib_cm_handler cm_handler,
    814					      void *context)
    815{
    816	struct cm_id_private *cm_id_priv;
    817	u32 id;
    818	int ret;
    819
    820	cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
    821	if (!cm_id_priv)
    822		return ERR_PTR(-ENOMEM);
    823
    824	cm_id_priv->id.state = IB_CM_IDLE;
    825	cm_id_priv->id.device = device;
    826	cm_id_priv->id.cm_handler = cm_handler;
    827	cm_id_priv->id.context = context;
    828	cm_id_priv->id.remote_cm_qpn = 1;
    829
    830	RB_CLEAR_NODE(&cm_id_priv->service_node);
    831	RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
    832	spin_lock_init(&cm_id_priv->lock);
    833	init_completion(&cm_id_priv->comp);
    834	INIT_LIST_HEAD(&cm_id_priv->work_list);
    835	atomic_set(&cm_id_priv->work_count, -1);
    836	refcount_set(&cm_id_priv->refcount, 1);
    837
    838	ret = xa_alloc_cyclic(&cm.local_id_table, &id, NULL, xa_limit_32b,
    839			      &cm.local_id_next, GFP_KERNEL);
    840	if (ret < 0)
    841		goto error;
    842	cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
    843
    844	return cm_id_priv;
    845
    846error:
    847	kfree(cm_id_priv);
    848	return ERR_PTR(ret);
    849}
    850
    851/*
    852 * Make the ID visible to the MAD handlers and other threads that use the
    853 * xarray.
    854 */
    855static void cm_finalize_id(struct cm_id_private *cm_id_priv)
    856{
    857	xa_store(&cm.local_id_table, cm_local_id(cm_id_priv->id.local_id),
    858		 cm_id_priv, GFP_ATOMIC);
    859}
    860
    861struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
    862				 ib_cm_handler cm_handler,
    863				 void *context)
    864{
    865	struct cm_id_private *cm_id_priv;
    866
    867	cm_id_priv = cm_alloc_id_priv(device, cm_handler, context);
    868	if (IS_ERR(cm_id_priv))
    869		return ERR_CAST(cm_id_priv);
    870
    871	cm_finalize_id(cm_id_priv);
    872	return &cm_id_priv->id;
    873}
    874EXPORT_SYMBOL(ib_create_cm_id);
    875
    876static struct cm_work *cm_dequeue_work(struct cm_id_private *cm_id_priv)
    877{
    878	struct cm_work *work;
    879
    880	if (list_empty(&cm_id_priv->work_list))
    881		return NULL;
    882
    883	work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
    884	list_del(&work->list);
    885	return work;
    886}
    887
    888static void cm_free_work(struct cm_work *work)
    889{
    890	if (work->mad_recv_wc)
    891		ib_free_recv_mad(work->mad_recv_wc);
    892	kfree(work);
    893}
    894
    895static void cm_queue_work_unlock(struct cm_id_private *cm_id_priv,
    896				 struct cm_work *work)
    897	__releases(&cm_id_priv->lock)
    898{
    899	bool immediate;
    900
    901	/*
    902	 * To deliver the event to the user callback we have the drop the
    903	 * spinlock, however, we need to ensure that the user callback is single
    904	 * threaded and receives events in the temporal order. If there are
    905	 * already events being processed then thread new events onto a list,
    906	 * the thread currently processing will pick them up.
    907	 */
    908	immediate = atomic_inc_and_test(&cm_id_priv->work_count);
    909	if (!immediate) {
    910		list_add_tail(&work->list, &cm_id_priv->work_list);
    911		/*
    912		 * This routine always consumes incoming reference. Once queued
    913		 * to the work_list then a reference is held by the thread
    914		 * currently running cm_process_work() and this reference is not
    915		 * needed.
    916		 */
    917		cm_deref_id(cm_id_priv);
    918	}
    919	spin_unlock_irq(&cm_id_priv->lock);
    920
    921	if (immediate)
    922		cm_process_work(cm_id_priv, work);
    923}
    924
    925static inline int cm_convert_to_ms(int iba_time)
    926{
    927	/* approximate conversion to ms from 4.096us x 2^iba_time */
    928	return 1 << max(iba_time - 8, 0);
    929}
    930
    931/*
    932 * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
    933 * Because of how ack_timeout is stored, adding one doubles the timeout.
    934 * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
    935 * increment it (round up) only if the other is within 50%.
    936 */
    937static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
    938{
    939	int ack_timeout = packet_life_time + 1;
    940
    941	if (ack_timeout >= ca_ack_delay)
    942		ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
    943	else
    944		ack_timeout = ca_ack_delay +
    945			      (ack_timeout >= (ca_ack_delay - 1));
    946
    947	return min(31, ack_timeout);
    948}
    949
    950static void cm_remove_remote(struct cm_id_private *cm_id_priv)
    951{
    952	struct cm_timewait_info *timewait_info = cm_id_priv->timewait_info;
    953
    954	if (timewait_info->inserted_remote_id) {
    955		rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
    956		timewait_info->inserted_remote_id = 0;
    957	}
    958
    959	if (timewait_info->inserted_remote_qp) {
    960		rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
    961		timewait_info->inserted_remote_qp = 0;
    962	}
    963}
    964
    965static struct cm_timewait_info *cm_create_timewait_info(__be32 local_id)
    966{
    967	struct cm_timewait_info *timewait_info;
    968
    969	timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
    970	if (!timewait_info)
    971		return ERR_PTR(-ENOMEM);
    972
    973	timewait_info->work.local_id = local_id;
    974	INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
    975	timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
    976	return timewait_info;
    977}
    978
    979static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
    980{
    981	int wait_time;
    982	unsigned long flags;
    983	struct cm_device *cm_dev;
    984
    985	lockdep_assert_held(&cm_id_priv->lock);
    986
    987	cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
    988	if (!cm_dev)
    989		return;
    990
    991	spin_lock_irqsave(&cm.lock, flags);
    992	cm_remove_remote(cm_id_priv);
    993	list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
    994	spin_unlock_irqrestore(&cm.lock, flags);
    995
    996	/*
    997	 * The cm_id could be destroyed by the user before we exit timewait.
    998	 * To protect against this, we search for the cm_id after exiting
    999	 * timewait before notifying the user that we've exited timewait.
   1000	 */
   1001	cm_id_priv->id.state = IB_CM_TIMEWAIT;
   1002	wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
   1003
   1004	/* Check if the device started its remove_one */
   1005	spin_lock_irqsave(&cm.lock, flags);
   1006	if (!cm_dev->going_down)
   1007		queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
   1008				   msecs_to_jiffies(wait_time));
   1009	spin_unlock_irqrestore(&cm.lock, flags);
   1010
   1011	/*
   1012	 * The timewait_info is converted into a work and gets freed during
   1013	 * cm_free_work() in cm_timewait_handler().
   1014	 */
   1015	BUILD_BUG_ON(offsetof(struct cm_timewait_info, work) != 0);
   1016	cm_id_priv->timewait_info = NULL;
   1017}
   1018
   1019static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
   1020{
   1021	unsigned long flags;
   1022
   1023	lockdep_assert_held(&cm_id_priv->lock);
   1024
   1025	cm_id_priv->id.state = IB_CM_IDLE;
   1026	if (cm_id_priv->timewait_info) {
   1027		spin_lock_irqsave(&cm.lock, flags);
   1028		cm_remove_remote(cm_id_priv);
   1029		spin_unlock_irqrestore(&cm.lock, flags);
   1030		kfree(cm_id_priv->timewait_info);
   1031		cm_id_priv->timewait_info = NULL;
   1032	}
   1033}
   1034
   1035static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
   1036{
   1037	struct cm_id_private *cm_id_priv;
   1038	struct cm_work *work;
   1039
   1040	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
   1041	spin_lock_irq(&cm_id_priv->lock);
   1042retest:
   1043	switch (cm_id->state) {
   1044	case IB_CM_LISTEN:
   1045		spin_lock(&cm.lock);
   1046		if (--cm_id_priv->listen_sharecount > 0) {
   1047			/* The id is still shared. */
   1048			WARN_ON(refcount_read(&cm_id_priv->refcount) == 1);
   1049			spin_unlock(&cm.lock);
   1050			spin_unlock_irq(&cm_id_priv->lock);
   1051			cm_deref_id(cm_id_priv);
   1052			return;
   1053		}
   1054		cm_id->state = IB_CM_IDLE;
   1055		rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
   1056		RB_CLEAR_NODE(&cm_id_priv->service_node);
   1057		spin_unlock(&cm.lock);
   1058		break;
   1059	case IB_CM_SIDR_REQ_SENT:
   1060		cm_id->state = IB_CM_IDLE;
   1061		ib_cancel_mad(cm_id_priv->msg);
   1062		break;
   1063	case IB_CM_SIDR_REQ_RCVD:
   1064		cm_send_sidr_rep_locked(cm_id_priv,
   1065					&(struct ib_cm_sidr_rep_param){
   1066						.status = IB_SIDR_REJECT });
   1067		/* cm_send_sidr_rep_locked will not move to IDLE if it fails */
   1068		cm_id->state = IB_CM_IDLE;
   1069		break;
   1070	case IB_CM_REQ_SENT:
   1071	case IB_CM_MRA_REQ_RCVD:
   1072		ib_cancel_mad(cm_id_priv->msg);
   1073		cm_send_rej_locked(cm_id_priv, IB_CM_REJ_TIMEOUT,
   1074				   &cm_id_priv->id.device->node_guid,
   1075				   sizeof(cm_id_priv->id.device->node_guid),
   1076				   NULL, 0);
   1077		break;
   1078	case IB_CM_REQ_RCVD:
   1079		if (err == -ENOMEM) {
   1080			/* Do not reject to allow future retries. */
   1081			cm_reset_to_idle(cm_id_priv);
   1082		} else {
   1083			cm_send_rej_locked(cm_id_priv,
   1084					   IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
   1085					   NULL, 0);
   1086		}
   1087		break;
   1088	case IB_CM_REP_SENT:
   1089	case IB_CM_MRA_REP_RCVD:
   1090		ib_cancel_mad(cm_id_priv->msg);
   1091		cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
   1092				   0, NULL, 0);
   1093		goto retest;
   1094	case IB_CM_MRA_REQ_SENT:
   1095	case IB_CM_REP_RCVD:
   1096	case IB_CM_MRA_REP_SENT:
   1097		cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
   1098				   0, NULL, 0);
   1099		break;
   1100	case IB_CM_ESTABLISHED:
   1101		if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
   1102			cm_id->state = IB_CM_IDLE;
   1103			break;
   1104		}
   1105		cm_send_dreq_locked(cm_id_priv, NULL, 0);
   1106		goto retest;
   1107	case IB_CM_DREQ_SENT:
   1108		ib_cancel_mad(cm_id_priv->msg);
   1109		cm_enter_timewait(cm_id_priv);
   1110		goto retest;
   1111	case IB_CM_DREQ_RCVD:
   1112		cm_send_drep_locked(cm_id_priv, NULL, 0);
   1113		WARN_ON(cm_id->state != IB_CM_TIMEWAIT);
   1114		goto retest;
   1115	case IB_CM_TIMEWAIT:
   1116		/*
   1117		 * The cm_acquire_id in cm_timewait_handler will stop working
   1118		 * once we do xa_erase below, so just move to idle here for
   1119		 * consistency.
   1120		 */
   1121		cm_id->state = IB_CM_IDLE;
   1122		break;
   1123	case IB_CM_IDLE:
   1124		break;
   1125	}
   1126	WARN_ON(cm_id->state != IB_CM_IDLE);
   1127
   1128	spin_lock(&cm.lock);
   1129	/* Required for cleanup paths related cm_req_handler() */
   1130	if (cm_id_priv->timewait_info) {
   1131		cm_remove_remote(cm_id_priv);
   1132		kfree(cm_id_priv->timewait_info);
   1133		cm_id_priv->timewait_info = NULL;
   1134	}
   1135
   1136	WARN_ON(cm_id_priv->listen_sharecount);
   1137	WARN_ON(!RB_EMPTY_NODE(&cm_id_priv->service_node));
   1138	if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
   1139		rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
   1140	spin_unlock(&cm.lock);
   1141	spin_unlock_irq(&cm_id_priv->lock);
   1142
   1143	xa_erase(&cm.local_id_table, cm_local_id(cm_id->local_id));
   1144	cm_deref_id(cm_id_priv);
   1145	wait_for_completion(&cm_id_priv->comp);
   1146	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
   1147		cm_free_work(work);
   1148
   1149	cm_destroy_av(&cm_id_priv->av);
   1150	cm_destroy_av(&cm_id_priv->alt_av);
   1151	kfree(cm_id_priv->private_data);
   1152	kfree_rcu(cm_id_priv, rcu);
   1153}
   1154
   1155void ib_destroy_cm_id(struct ib_cm_id *cm_id)
   1156{
   1157	cm_destroy_id(cm_id, 0);
   1158}
   1159EXPORT_SYMBOL(ib_destroy_cm_id);
   1160
   1161static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id,
   1162			  __be64 service_mask)
   1163{
   1164	service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
   1165	service_id &= service_mask;
   1166	if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
   1167	    (service_id != IB_CM_ASSIGN_SERVICE_ID))
   1168		return -EINVAL;
   1169
   1170	if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
   1171		cm_id_priv->id.service_id = cpu_to_be64(cm.listen_service_id++);
   1172		cm_id_priv->id.service_mask = ~cpu_to_be64(0);
   1173	} else {
   1174		cm_id_priv->id.service_id = service_id;
   1175		cm_id_priv->id.service_mask = service_mask;
   1176	}
   1177	return 0;
   1178}
   1179
   1180/**
   1181 * ib_cm_listen - Initiates listening on the specified service ID for
   1182 *   connection and service ID resolution requests.
   1183 * @cm_id: Connection identifier associated with the listen request.
   1184 * @service_id: Service identifier matched against incoming connection
   1185 *   and service ID resolution requests.  The service ID should be specified
   1186 *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
   1187 *   assign a service ID to the caller.
   1188 * @service_mask: Mask applied to service ID used to listen across a
   1189 *   range of service IDs.  If set to 0, the service ID is matched
   1190 *   exactly.  This parameter is ignored if %service_id is set to
   1191 *   IB_CM_ASSIGN_SERVICE_ID.
   1192 */
   1193int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
   1194{
   1195	struct cm_id_private *cm_id_priv =
   1196		container_of(cm_id, struct cm_id_private, id);
   1197	unsigned long flags;
   1198	int ret;
   1199
   1200	spin_lock_irqsave(&cm_id_priv->lock, flags);
   1201	if (cm_id_priv->id.state != IB_CM_IDLE) {
   1202		ret = -EINVAL;
   1203		goto out;
   1204	}
   1205
   1206	ret = cm_init_listen(cm_id_priv, service_id, service_mask);
   1207	if (ret)
   1208		goto out;
   1209
   1210	if (!cm_insert_listen(cm_id_priv, NULL)) {
   1211		ret = -EBUSY;
   1212		goto out;
   1213	}
   1214
   1215	cm_id_priv->id.state = IB_CM_LISTEN;
   1216	ret = 0;
   1217
   1218out:
   1219	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   1220	return ret;
   1221}
   1222EXPORT_SYMBOL(ib_cm_listen);
   1223
   1224/**
   1225 * ib_cm_insert_listen - Create a new listening ib_cm_id and listen on
   1226 *			 the given service ID.
   1227 *
   1228 * If there's an existing ID listening on that same device and service ID,
   1229 * return it.
   1230 *
   1231 * @device: Device associated with the cm_id.  All related communication will
   1232 * be associated with the specified device.
   1233 * @cm_handler: Callback invoked to notify the user of CM events.
   1234 * @service_id: Service identifier matched against incoming connection
   1235 *   and service ID resolution requests.  The service ID should be specified
   1236 *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
   1237 *   assign a service ID to the caller.
   1238 *
   1239 * Callers should call ib_destroy_cm_id when done with the listener ID.
   1240 */
   1241struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
   1242				     ib_cm_handler cm_handler,
   1243				     __be64 service_id)
   1244{
   1245	struct cm_id_private *listen_id_priv;
   1246	struct cm_id_private *cm_id_priv;
   1247	int err = 0;
   1248
   1249	/* Create an ID in advance, since the creation may sleep */
   1250	cm_id_priv = cm_alloc_id_priv(device, cm_handler, NULL);
   1251	if (IS_ERR(cm_id_priv))
   1252		return ERR_CAST(cm_id_priv);
   1253
   1254	err = cm_init_listen(cm_id_priv, service_id, 0);
   1255	if (err) {
   1256		ib_destroy_cm_id(&cm_id_priv->id);
   1257		return ERR_PTR(err);
   1258	}
   1259
   1260	spin_lock_irq(&cm_id_priv->lock);
   1261	listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler);
   1262	if (listen_id_priv != cm_id_priv) {
   1263		spin_unlock_irq(&cm_id_priv->lock);
   1264		ib_destroy_cm_id(&cm_id_priv->id);
   1265		if (!listen_id_priv)
   1266			return ERR_PTR(-EINVAL);
   1267		return &listen_id_priv->id;
   1268	}
   1269	cm_id_priv->id.state = IB_CM_LISTEN;
   1270	spin_unlock_irq(&cm_id_priv->lock);
   1271
   1272	/*
   1273	 * A listen ID does not need to be in the xarray since it does not
   1274	 * receive mads, is not placed in the remote_id or remote_qpn rbtree,
   1275	 * and does not enter timewait.
   1276	 */
   1277
   1278	return &cm_id_priv->id;
   1279}
   1280EXPORT_SYMBOL(ib_cm_insert_listen);
   1281
   1282static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
   1283{
   1284	u64 hi_tid = 0, low_tid;
   1285
   1286	lockdep_assert_held(&cm_id_priv->lock);
   1287
   1288	low_tid = (u64)cm_id_priv->id.local_id;
   1289	if (!cm_id_priv->av.port)
   1290		return cpu_to_be64(low_tid);
   1291
   1292	spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
   1293	if (cm_id_priv->av.port->mad_agent)
   1294		hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid) << 32;
   1295	spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
   1296	return cpu_to_be64(hi_tid | low_tid);
   1297}
   1298
   1299static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
   1300			      __be16 attr_id, __be64 tid)
   1301{
   1302	hdr->base_version  = IB_MGMT_BASE_VERSION;
   1303	hdr->mgmt_class	   = IB_MGMT_CLASS_CM;
   1304	hdr->class_version = IB_CM_CLASS_VERSION;
   1305	hdr->method	   = IB_MGMT_METHOD_SEND;
   1306	hdr->attr_id	   = attr_id;
   1307	hdr->tid	   = tid;
   1308}
   1309
   1310static void cm_format_mad_ece_hdr(struct ib_mad_hdr *hdr, __be16 attr_id,
   1311				  __be64 tid, u32 attr_mod)
   1312{
   1313	cm_format_mad_hdr(hdr, attr_id, tid);
   1314	hdr->attr_mod = cpu_to_be32(attr_mod);
   1315}
   1316
   1317static void cm_format_req(struct cm_req_msg *req_msg,
   1318			  struct cm_id_private *cm_id_priv,
   1319			  struct ib_cm_req_param *param)
   1320{
   1321	struct sa_path_rec *pri_path = param->primary_path;
   1322	struct sa_path_rec *alt_path = param->alternate_path;
   1323	bool pri_ext = false;
   1324
   1325	if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA)
   1326		pri_ext = opa_is_extended_lid(pri_path->opa.dlid,
   1327					      pri_path->opa.slid);
   1328
   1329	cm_format_mad_ece_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
   1330			      cm_form_tid(cm_id_priv), param->ece.attr_mod);
   1331
   1332	IBA_SET(CM_REQ_LOCAL_COMM_ID, req_msg,
   1333		be32_to_cpu(cm_id_priv->id.local_id));
   1334	IBA_SET(CM_REQ_SERVICE_ID, req_msg, be64_to_cpu(param->service_id));
   1335	IBA_SET(CM_REQ_LOCAL_CA_GUID, req_msg,
   1336		be64_to_cpu(cm_id_priv->id.device->node_guid));
   1337	IBA_SET(CM_REQ_LOCAL_QPN, req_msg, param->qp_num);
   1338	IBA_SET(CM_REQ_INITIATOR_DEPTH, req_msg, param->initiator_depth);
   1339	IBA_SET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg,
   1340		param->remote_cm_response_timeout);
   1341	cm_req_set_qp_type(req_msg, param->qp_type);
   1342	IBA_SET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg, param->flow_control);
   1343	IBA_SET(CM_REQ_STARTING_PSN, req_msg, param->starting_psn);
   1344	IBA_SET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg,
   1345		param->local_cm_response_timeout);
   1346	IBA_SET(CM_REQ_PARTITION_KEY, req_msg,
   1347		be16_to_cpu(param->primary_path->pkey));
   1348	IBA_SET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg,
   1349		param->primary_path->mtu);
   1350	IBA_SET(CM_REQ_MAX_CM_RETRIES, req_msg, param->max_cm_retries);
   1351
   1352	if (param->qp_type != IB_QPT_XRC_INI) {
   1353		IBA_SET(CM_REQ_RESPONDER_RESOURCES, req_msg,
   1354			param->responder_resources);
   1355		IBA_SET(CM_REQ_RETRY_COUNT, req_msg, param->retry_count);
   1356		IBA_SET(CM_REQ_RNR_RETRY_COUNT, req_msg,
   1357			param->rnr_retry_count);
   1358		IBA_SET(CM_REQ_SRQ, req_msg, param->srq);
   1359	}
   1360
   1361	*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg) =
   1362		pri_path->sgid;
   1363	*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg) =
   1364		pri_path->dgid;
   1365	if (pri_ext) {
   1366		IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg)
   1367			->global.interface_id =
   1368			OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid));
   1369		IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg)
   1370			->global.interface_id =
   1371			OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid));
   1372	}
   1373	if (pri_path->hop_limit <= 1) {
   1374		IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
   1375			be16_to_cpu(pri_ext ? 0 :
   1376					      htons(ntohl(sa_path_get_slid(
   1377						      pri_path)))));
   1378		IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
   1379			be16_to_cpu(pri_ext ? 0 :
   1380					      htons(ntohl(sa_path_get_dlid(
   1381						      pri_path)))));
   1382	} else {
   1383		/* Work-around until there's a way to obtain remote LID info */
   1384		IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
   1385			be16_to_cpu(IB_LID_PERMISSIVE));
   1386		IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
   1387			be16_to_cpu(IB_LID_PERMISSIVE));
   1388	}
   1389	IBA_SET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg,
   1390		be32_to_cpu(pri_path->flow_label));
   1391	IBA_SET(CM_REQ_PRIMARY_PACKET_RATE, req_msg, pri_path->rate);
   1392	IBA_SET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg, pri_path->traffic_class);
   1393	IBA_SET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg, pri_path->hop_limit);
   1394	IBA_SET(CM_REQ_PRIMARY_SL, req_msg, pri_path->sl);
   1395	IBA_SET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg,
   1396		(pri_path->hop_limit <= 1));
   1397	IBA_SET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg,
   1398		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
   1399			       pri_path->packet_life_time));
   1400
   1401	if (alt_path) {
   1402		bool alt_ext = false;
   1403
   1404		if (alt_path->rec_type == SA_PATH_REC_TYPE_OPA)
   1405			alt_ext = opa_is_extended_lid(alt_path->opa.dlid,
   1406						      alt_path->opa.slid);
   1407
   1408		*IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg) =
   1409			alt_path->sgid;
   1410		*IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg) =
   1411			alt_path->dgid;
   1412		if (alt_ext) {
   1413			IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
   1414					req_msg)
   1415				->global.interface_id =
   1416				OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid));
   1417			IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID,
   1418					req_msg)
   1419				->global.interface_id =
   1420				OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid));
   1421		}
   1422		if (alt_path->hop_limit <= 1) {
   1423			IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
   1424				be16_to_cpu(
   1425					alt_ext ? 0 :
   1426						  htons(ntohl(sa_path_get_slid(
   1427							  alt_path)))));
   1428			IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
   1429				be16_to_cpu(
   1430					alt_ext ? 0 :
   1431						  htons(ntohl(sa_path_get_dlid(
   1432							  alt_path)))));
   1433		} else {
   1434			IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
   1435				be16_to_cpu(IB_LID_PERMISSIVE));
   1436			IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
   1437				be16_to_cpu(IB_LID_PERMISSIVE));
   1438		}
   1439		IBA_SET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg,
   1440			be32_to_cpu(alt_path->flow_label));
   1441		IBA_SET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg, alt_path->rate);
   1442		IBA_SET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg,
   1443			alt_path->traffic_class);
   1444		IBA_SET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg,
   1445			alt_path->hop_limit);
   1446		IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, alt_path->sl);
   1447		IBA_SET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg,
   1448			(alt_path->hop_limit <= 1));
   1449		IBA_SET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg,
   1450			cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
   1451				       alt_path->packet_life_time));
   1452	}
   1453	IBA_SET(CM_REQ_VENDOR_ID, req_msg, param->ece.vendor_id);
   1454
   1455	if (param->private_data && param->private_data_len)
   1456		IBA_SET_MEM(CM_REQ_PRIVATE_DATA, req_msg, param->private_data,
   1457			    param->private_data_len);
   1458}
   1459
   1460static int cm_validate_req_param(struct ib_cm_req_param *param)
   1461{
   1462	if (!param->primary_path)
   1463		return -EINVAL;
   1464
   1465	if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
   1466	    param->qp_type != IB_QPT_XRC_INI)
   1467		return -EINVAL;
   1468
   1469	if (param->private_data &&
   1470	    param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
   1471		return -EINVAL;
   1472
   1473	if (param->alternate_path &&
   1474	    (param->alternate_path->pkey != param->primary_path->pkey ||
   1475	     param->alternate_path->mtu != param->primary_path->mtu))
   1476		return -EINVAL;
   1477
   1478	return 0;
   1479}
   1480
   1481int ib_send_cm_req(struct ib_cm_id *cm_id,
   1482		   struct ib_cm_req_param *param)
   1483{
   1484	struct cm_av av = {}, alt_av = {};
   1485	struct cm_id_private *cm_id_priv;
   1486	struct ib_mad_send_buf *msg;
   1487	struct cm_req_msg *req_msg;
   1488	unsigned long flags;
   1489	int ret;
   1490
   1491	ret = cm_validate_req_param(param);
   1492	if (ret)
   1493		return ret;
   1494
   1495	/* Verify that we're not in timewait. */
   1496	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
   1497	spin_lock_irqsave(&cm_id_priv->lock, flags);
   1498	if (cm_id->state != IB_CM_IDLE || WARN_ON(cm_id_priv->timewait_info)) {
   1499		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   1500		return -EINVAL;
   1501	}
   1502	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   1503
   1504	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
   1505							    id.local_id);
   1506	if (IS_ERR(cm_id_priv->timewait_info)) {
   1507		ret = PTR_ERR(cm_id_priv->timewait_info);
   1508		cm_id_priv->timewait_info = NULL;
   1509		return ret;
   1510	}
   1511
   1512	ret = cm_init_av_by_path(param->primary_path,
   1513				 param->ppath_sgid_attr, &av);
   1514	if (ret)
   1515		return ret;
   1516	if (param->alternate_path) {
   1517		ret = cm_init_av_by_path(param->alternate_path, NULL,
   1518					 &alt_av);
   1519		if (ret) {
   1520			cm_destroy_av(&av);
   1521			return ret;
   1522		}
   1523	}
   1524	cm_id->service_id = param->service_id;
   1525	cm_id->service_mask = ~cpu_to_be64(0);
   1526	cm_id_priv->timeout_ms = cm_convert_to_ms(
   1527				    param->primary_path->packet_life_time) * 2 +
   1528				 cm_convert_to_ms(
   1529				    param->remote_cm_response_timeout);
   1530	cm_id_priv->max_cm_retries = param->max_cm_retries;
   1531	cm_id_priv->initiator_depth = param->initiator_depth;
   1532	cm_id_priv->responder_resources = param->responder_resources;
   1533	cm_id_priv->retry_count = param->retry_count;
   1534	cm_id_priv->path_mtu = param->primary_path->mtu;
   1535	cm_id_priv->pkey = param->primary_path->pkey;
   1536	cm_id_priv->qp_type = param->qp_type;
   1537
   1538	spin_lock_irqsave(&cm_id_priv->lock, flags);
   1539
   1540	cm_move_av_from_path(&cm_id_priv->av, &av);
   1541	if (param->alternate_path)
   1542		cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av);
   1543
   1544	msg = cm_alloc_priv_msg(cm_id_priv);
   1545	if (IS_ERR(msg)) {
   1546		ret = PTR_ERR(msg);
   1547		goto out_unlock;
   1548	}
   1549
   1550	req_msg = (struct cm_req_msg *)msg->mad;
   1551	cm_format_req(req_msg, cm_id_priv, param);
   1552	cm_id_priv->tid = req_msg->hdr.tid;
   1553	msg->timeout_ms = cm_id_priv->timeout_ms;
   1554	msg->context[1] = (void *)(unsigned long)IB_CM_REQ_SENT;
   1555
   1556	cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
   1557	cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
   1558
   1559	trace_icm_send_req(&cm_id_priv->id);
   1560	ret = ib_post_send_mad(msg, NULL);
   1561	if (ret)
   1562		goto out_free;
   1563	BUG_ON(cm_id->state != IB_CM_IDLE);
   1564	cm_id->state = IB_CM_REQ_SENT;
   1565	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   1566	return 0;
   1567out_free:
   1568	cm_free_priv_msg(msg);
   1569out_unlock:
   1570	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   1571	return ret;
   1572}
   1573EXPORT_SYMBOL(ib_send_cm_req);
   1574
   1575static int cm_issue_rej(struct cm_port *port,
   1576			struct ib_mad_recv_wc *mad_recv_wc,
   1577			enum ib_cm_rej_reason reason,
   1578			enum cm_msg_response msg_rejected,
   1579			void *ari, u8 ari_length)
   1580{
   1581	struct ib_mad_send_buf *msg = NULL;
   1582	struct cm_rej_msg *rej_msg, *rcv_msg;
   1583	int ret;
   1584
   1585	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
   1586	if (ret)
   1587		return ret;
   1588
   1589	/* We just need common CM header information.  Cast to any message. */
   1590	rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
   1591	rej_msg = (struct cm_rej_msg *) msg->mad;
   1592
   1593	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
   1594	IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
   1595		IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg));
   1596	IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
   1597		IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
   1598	IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, msg_rejected);
   1599	IBA_SET(CM_REJ_REASON, rej_msg, reason);
   1600
   1601	if (ari && ari_length) {
   1602		IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
   1603		IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
   1604	}
   1605
   1606	trace_icm_issue_rej(
   1607		IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg),
   1608		IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
   1609	ret = ib_post_send_mad(msg, NULL);
   1610	if (ret)
   1611		cm_free_response_msg(msg);
   1612
   1613	return ret;
   1614}
   1615
   1616static bool cm_req_has_alt_path(struct cm_req_msg *req_msg)
   1617{
   1618	return ((cpu_to_be16(
   1619			IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg))) ||
   1620		(ib_is_opa_gid(IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
   1621					       req_msg))));
   1622}
   1623
   1624static void cm_path_set_rec_type(struct ib_device *ib_device, u32 port_num,
   1625				 struct sa_path_rec *path, union ib_gid *gid)
   1626{
   1627	if (ib_is_opa_gid(gid) && rdma_cap_opa_ah(ib_device, port_num))
   1628		path->rec_type = SA_PATH_REC_TYPE_OPA;
   1629	else
   1630		path->rec_type = SA_PATH_REC_TYPE_IB;
   1631}
   1632
   1633static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
   1634					struct sa_path_rec *primary_path,
   1635					struct sa_path_rec *alt_path)
   1636{
   1637	u32 lid;
   1638
   1639	if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
   1640		sa_path_set_dlid(primary_path,
   1641				 IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
   1642					 req_msg));
   1643		sa_path_set_slid(primary_path,
   1644				 IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
   1645					 req_msg));
   1646	} else {
   1647		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
   1648			CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg));
   1649		sa_path_set_dlid(primary_path, lid);
   1650
   1651		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
   1652			CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg));
   1653		sa_path_set_slid(primary_path, lid);
   1654	}
   1655
   1656	if (!cm_req_has_alt_path(req_msg))
   1657		return;
   1658
   1659	if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) {
   1660		sa_path_set_dlid(alt_path,
   1661				 IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
   1662					 req_msg));
   1663		sa_path_set_slid(alt_path,
   1664				 IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
   1665					 req_msg));
   1666	} else {
   1667		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
   1668			CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg));
   1669		sa_path_set_dlid(alt_path, lid);
   1670
   1671		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
   1672			CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg));
   1673		sa_path_set_slid(alt_path, lid);
   1674	}
   1675}
   1676
   1677static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
   1678				     struct sa_path_rec *primary_path,
   1679				     struct sa_path_rec *alt_path)
   1680{
   1681	primary_path->dgid =
   1682		*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg);
   1683	primary_path->sgid =
   1684		*IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg);
   1685	primary_path->flow_label =
   1686		cpu_to_be32(IBA_GET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg));
   1687	primary_path->hop_limit = IBA_GET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg);
   1688	primary_path->traffic_class =
   1689		IBA_GET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg);
   1690	primary_path->reversible = 1;
   1691	primary_path->pkey =
   1692		cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
   1693	primary_path->sl = IBA_GET(CM_REQ_PRIMARY_SL, req_msg);
   1694	primary_path->mtu_selector = IB_SA_EQ;
   1695	primary_path->mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
   1696	primary_path->rate_selector = IB_SA_EQ;
   1697	primary_path->rate = IBA_GET(CM_REQ_PRIMARY_PACKET_RATE, req_msg);
   1698	primary_path->packet_life_time_selector = IB_SA_EQ;
   1699	primary_path->packet_life_time =
   1700		IBA_GET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg);
   1701	primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
   1702	primary_path->service_id =
   1703		cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
   1704	if (sa_path_is_roce(primary_path))
   1705		primary_path->roce.route_resolved = false;
   1706
   1707	if (cm_req_has_alt_path(req_msg)) {
   1708		alt_path->dgid = *IBA_GET_MEM_PTR(
   1709			CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg);
   1710		alt_path->sgid = *IBA_GET_MEM_PTR(
   1711			CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg);
   1712		alt_path->flow_label = cpu_to_be32(
   1713			IBA_GET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg));
   1714		alt_path->hop_limit =
   1715			IBA_GET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg);
   1716		alt_path->traffic_class =
   1717			IBA_GET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg);
   1718		alt_path->reversible = 1;
   1719		alt_path->pkey =
   1720			cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
   1721		alt_path->sl = IBA_GET(CM_REQ_ALTERNATE_SL, req_msg);
   1722		alt_path->mtu_selector = IB_SA_EQ;
   1723		alt_path->mtu =
   1724			IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
   1725		alt_path->rate_selector = IB_SA_EQ;
   1726		alt_path->rate = IBA_GET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg);
   1727		alt_path->packet_life_time_selector = IB_SA_EQ;
   1728		alt_path->packet_life_time =
   1729			IBA_GET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg);
   1730		alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
   1731		alt_path->service_id =
   1732			cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
   1733
   1734		if (sa_path_is_roce(alt_path))
   1735			alt_path->roce.route_resolved = false;
   1736	}
   1737	cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
   1738}
   1739
   1740static u16 cm_get_bth_pkey(struct cm_work *work)
   1741{
   1742	struct ib_device *ib_dev = work->port->cm_dev->ib_device;
   1743	u32 port_num = work->port->port_num;
   1744	u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
   1745	u16 pkey;
   1746	int ret;
   1747
   1748	ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
   1749	if (ret) {
   1750		dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %u, pkey index %u). %d\n",
   1751				     port_num, pkey_index, ret);
   1752		return 0;
   1753	}
   1754
   1755	return pkey;
   1756}
   1757
   1758/**
   1759 * cm_opa_to_ib_sgid - Convert OPA SGID to IB SGID
   1760 * ULPs (such as IPoIB) do not understand OPA GIDs and will
   1761 * reject them as the local_gid will not match the sgid. Therefore,
   1762 * change the pathrec's SGID to an IB SGID.
   1763 *
   1764 * @work: Work completion
   1765 * @path: Path record
   1766 */
   1767static void cm_opa_to_ib_sgid(struct cm_work *work,
   1768			      struct sa_path_rec *path)
   1769{
   1770	struct ib_device *dev = work->port->cm_dev->ib_device;
   1771	u32 port_num = work->port->port_num;
   1772
   1773	if (rdma_cap_opa_ah(dev, port_num) &&
   1774	    (ib_is_opa_gid(&path->sgid))) {
   1775		union ib_gid sgid;
   1776
   1777		if (rdma_query_gid(dev, port_num, 0, &sgid)) {
   1778			dev_warn(&dev->dev,
   1779				 "Error updating sgid in CM request\n");
   1780			return;
   1781		}
   1782
   1783		path->sgid = sgid;
   1784	}
   1785}
   1786
   1787static void cm_format_req_event(struct cm_work *work,
   1788				struct cm_id_private *cm_id_priv,
   1789				struct ib_cm_id *listen_id)
   1790{
   1791	struct cm_req_msg *req_msg;
   1792	struct ib_cm_req_event_param *param;
   1793
   1794	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
   1795	param = &work->cm_event.param.req_rcvd;
   1796	param->listen_id = listen_id;
   1797	param->bth_pkey = cm_get_bth_pkey(work);
   1798	param->port = cm_id_priv->av.port->port_num;
   1799	param->primary_path = &work->path[0];
   1800	cm_opa_to_ib_sgid(work, param->primary_path);
   1801	if (cm_req_has_alt_path(req_msg)) {
   1802		param->alternate_path = &work->path[1];
   1803		cm_opa_to_ib_sgid(work, param->alternate_path);
   1804	} else {
   1805		param->alternate_path = NULL;
   1806	}
   1807	param->remote_ca_guid =
   1808		cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
   1809	param->remote_qkey = IBA_GET(CM_REQ_LOCAL_Q_KEY, req_msg);
   1810	param->remote_qpn = IBA_GET(CM_REQ_LOCAL_QPN, req_msg);
   1811	param->qp_type = cm_req_get_qp_type(req_msg);
   1812	param->starting_psn = IBA_GET(CM_REQ_STARTING_PSN, req_msg);
   1813	param->responder_resources = IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
   1814	param->initiator_depth = IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
   1815	param->local_cm_response_timeout =
   1816		IBA_GET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg);
   1817	param->flow_control = IBA_GET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg);
   1818	param->remote_cm_response_timeout =
   1819		IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg);
   1820	param->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
   1821	param->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
   1822	param->srq = IBA_GET(CM_REQ_SRQ, req_msg);
   1823	param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
   1824	param->ece.vendor_id = IBA_GET(CM_REQ_VENDOR_ID, req_msg);
   1825	param->ece.attr_mod = be32_to_cpu(req_msg->hdr.attr_mod);
   1826
   1827	work->cm_event.private_data =
   1828		IBA_GET_MEM_PTR(CM_REQ_PRIVATE_DATA, req_msg);
   1829}
   1830
   1831static void cm_process_work(struct cm_id_private *cm_id_priv,
   1832			    struct cm_work *work)
   1833{
   1834	int ret;
   1835
   1836	/* We will typically only have the current event to report. */
   1837	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
   1838	cm_free_work(work);
   1839
   1840	while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
   1841		spin_lock_irq(&cm_id_priv->lock);
   1842		work = cm_dequeue_work(cm_id_priv);
   1843		spin_unlock_irq(&cm_id_priv->lock);
   1844		if (!work)
   1845			return;
   1846
   1847		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
   1848						&work->cm_event);
   1849		cm_free_work(work);
   1850	}
   1851	cm_deref_id(cm_id_priv);
   1852	if (ret)
   1853		cm_destroy_id(&cm_id_priv->id, ret);
   1854}
   1855
   1856static void cm_format_mra(struct cm_mra_msg *mra_msg,
   1857			  struct cm_id_private *cm_id_priv,
   1858			  enum cm_msg_response msg_mraed, u8 service_timeout,
   1859			  const void *private_data, u8 private_data_len)
   1860{
   1861	cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
   1862	IBA_SET(CM_MRA_MESSAGE_MRAED, mra_msg, msg_mraed);
   1863	IBA_SET(CM_MRA_LOCAL_COMM_ID, mra_msg,
   1864		be32_to_cpu(cm_id_priv->id.local_id));
   1865	IBA_SET(CM_MRA_REMOTE_COMM_ID, mra_msg,
   1866		be32_to_cpu(cm_id_priv->id.remote_id));
   1867	IBA_SET(CM_MRA_SERVICE_TIMEOUT, mra_msg, service_timeout);
   1868
   1869	if (private_data && private_data_len)
   1870		IBA_SET_MEM(CM_MRA_PRIVATE_DATA, mra_msg, private_data,
   1871			    private_data_len);
   1872}
   1873
   1874static void cm_format_rej(struct cm_rej_msg *rej_msg,
   1875			  struct cm_id_private *cm_id_priv,
   1876			  enum ib_cm_rej_reason reason, void *ari,
   1877			  u8 ari_length, const void *private_data,
   1878			  u8 private_data_len, enum ib_cm_state state)
   1879{
   1880	lockdep_assert_held(&cm_id_priv->lock);
   1881
   1882	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
   1883	IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
   1884		be32_to_cpu(cm_id_priv->id.remote_id));
   1885
   1886	switch (state) {
   1887	case IB_CM_REQ_RCVD:
   1888		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, be32_to_cpu(0));
   1889		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
   1890		break;
   1891	case IB_CM_MRA_REQ_SENT:
   1892		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
   1893			be32_to_cpu(cm_id_priv->id.local_id));
   1894		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
   1895		break;
   1896	case IB_CM_REP_RCVD:
   1897	case IB_CM_MRA_REP_SENT:
   1898		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
   1899			be32_to_cpu(cm_id_priv->id.local_id));
   1900		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REP);
   1901		break;
   1902	default:
   1903		IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
   1904			be32_to_cpu(cm_id_priv->id.local_id));
   1905		IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg,
   1906			CM_MSG_RESPONSE_OTHER);
   1907		break;
   1908	}
   1909
   1910	IBA_SET(CM_REJ_REASON, rej_msg, reason);
   1911	if (ari && ari_length) {
   1912		IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
   1913		IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
   1914	}
   1915
   1916	if (private_data && private_data_len)
   1917		IBA_SET_MEM(CM_REJ_PRIVATE_DATA, rej_msg, private_data,
   1918			    private_data_len);
   1919}
   1920
   1921static void cm_dup_req_handler(struct cm_work *work,
   1922			       struct cm_id_private *cm_id_priv)
   1923{
   1924	struct ib_mad_send_buf *msg = NULL;
   1925	int ret;
   1926
   1927	atomic_long_inc(
   1928		&work->port->counters[CM_RECV_DUPLICATES][CM_REQ_COUNTER]);
   1929
   1930	/* Quick state check to discard duplicate REQs. */
   1931	spin_lock_irq(&cm_id_priv->lock);
   1932	if (cm_id_priv->id.state == IB_CM_REQ_RCVD) {
   1933		spin_unlock_irq(&cm_id_priv->lock);
   1934		return;
   1935	}
   1936	spin_unlock_irq(&cm_id_priv->lock);
   1937
   1938	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
   1939	if (ret)
   1940		return;
   1941
   1942	spin_lock_irq(&cm_id_priv->lock);
   1943	switch (cm_id_priv->id.state) {
   1944	case IB_CM_MRA_REQ_SENT:
   1945		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
   1946			      CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
   1947			      cm_id_priv->private_data,
   1948			      cm_id_priv->private_data_len);
   1949		break;
   1950	case IB_CM_TIMEWAIT:
   1951		cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv,
   1952			      IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0,
   1953			      IB_CM_TIMEWAIT);
   1954		break;
   1955	default:
   1956		goto unlock;
   1957	}
   1958	spin_unlock_irq(&cm_id_priv->lock);
   1959
   1960	trace_icm_send_dup_req(&cm_id_priv->id);
   1961	ret = ib_post_send_mad(msg, NULL);
   1962	if (ret)
   1963		goto free;
   1964	return;
   1965
   1966unlock:	spin_unlock_irq(&cm_id_priv->lock);
   1967free:	cm_free_response_msg(msg);
   1968}
   1969
   1970static struct cm_id_private *cm_match_req(struct cm_work *work,
   1971					  struct cm_id_private *cm_id_priv)
   1972{
   1973	struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
   1974	struct cm_timewait_info *timewait_info;
   1975	struct cm_req_msg *req_msg;
   1976
   1977	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
   1978
   1979	/* Check for possible duplicate REQ. */
   1980	spin_lock_irq(&cm.lock);
   1981	timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
   1982	if (timewait_info) {
   1983		cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
   1984					   timewait_info->work.remote_id);
   1985		spin_unlock_irq(&cm.lock);
   1986		if (cur_cm_id_priv) {
   1987			cm_dup_req_handler(work, cur_cm_id_priv);
   1988			cm_deref_id(cur_cm_id_priv);
   1989		}
   1990		return NULL;
   1991	}
   1992
   1993	/* Check for stale connections. */
   1994	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
   1995	if (timewait_info) {
   1996		cm_remove_remote(cm_id_priv);
   1997		cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
   1998					   timewait_info->work.remote_id);
   1999
   2000		spin_unlock_irq(&cm.lock);
   2001		cm_issue_rej(work->port, work->mad_recv_wc,
   2002			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
   2003			     NULL, 0);
   2004		if (cur_cm_id_priv) {
   2005			ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0);
   2006			cm_deref_id(cur_cm_id_priv);
   2007		}
   2008		return NULL;
   2009	}
   2010
   2011	/* Find matching listen request. */
   2012	listen_cm_id_priv = cm_find_listen(
   2013		cm_id_priv->id.device,
   2014		cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)));
   2015	if (!listen_cm_id_priv) {
   2016		cm_remove_remote(cm_id_priv);
   2017		spin_unlock_irq(&cm.lock);
   2018		cm_issue_rej(work->port, work->mad_recv_wc,
   2019			     IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
   2020			     NULL, 0);
   2021		return NULL;
   2022	}
   2023	spin_unlock_irq(&cm.lock);
   2024	return listen_cm_id_priv;
   2025}
   2026
   2027/*
   2028 * Work-around for inter-subnet connections.  If the LIDs are permissive,
   2029 * we need to override the LID/SL data in the REQ with the LID information
   2030 * in the work completion.
   2031 */
   2032static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
   2033{
   2034	if (!IBA_GET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg)) {
   2035		if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
   2036					req_msg)) == IB_LID_PERMISSIVE) {
   2037			IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
   2038				be16_to_cpu(ib_lid_be16(wc->slid)));
   2039			IBA_SET(CM_REQ_PRIMARY_SL, req_msg, wc->sl);
   2040		}
   2041
   2042		if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
   2043					req_msg)) == IB_LID_PERMISSIVE)
   2044			IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
   2045				wc->dlid_path_bits);
   2046	}
   2047
   2048	if (!IBA_GET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg)) {
   2049		if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
   2050					req_msg)) == IB_LID_PERMISSIVE) {
   2051			IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
   2052				be16_to_cpu(ib_lid_be16(wc->slid)));
   2053			IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, wc->sl);
   2054		}
   2055
   2056		if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
   2057					req_msg)) == IB_LID_PERMISSIVE)
   2058			IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
   2059				wc->dlid_path_bits);
   2060	}
   2061}
   2062
   2063static int cm_req_handler(struct cm_work *work)
   2064{
   2065	struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
   2066	struct cm_req_msg *req_msg;
   2067	const struct ib_global_route *grh;
   2068	const struct ib_gid_attr *gid_attr;
   2069	int ret;
   2070
   2071	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
   2072
   2073	cm_id_priv =
   2074		cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL);
   2075	if (IS_ERR(cm_id_priv))
   2076		return PTR_ERR(cm_id_priv);
   2077
   2078	cm_id_priv->id.remote_id =
   2079		cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
   2080	cm_id_priv->id.service_id =
   2081		cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
   2082	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
   2083	cm_id_priv->tid = req_msg->hdr.tid;
   2084	cm_id_priv->timeout_ms = cm_convert_to_ms(
   2085		IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg));
   2086	cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg);
   2087	cm_id_priv->remote_qpn =
   2088		cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
   2089	cm_id_priv->initiator_depth =
   2090		IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
   2091	cm_id_priv->responder_resources =
   2092		IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
   2093	cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
   2094	cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
   2095	cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
   2096	cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
   2097	cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
   2098	cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
   2099
   2100	ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
   2101				      work->mad_recv_wc->recv_buf.grh,
   2102				      &cm_id_priv->av);
   2103	if (ret)
   2104		goto destroy;
   2105	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
   2106							    id.local_id);
   2107	if (IS_ERR(cm_id_priv->timewait_info)) {
   2108		ret = PTR_ERR(cm_id_priv->timewait_info);
   2109		cm_id_priv->timewait_info = NULL;
   2110		goto destroy;
   2111	}
   2112	cm_id_priv->timewait_info->work.remote_id = cm_id_priv->id.remote_id;
   2113	cm_id_priv->timewait_info->remote_ca_guid =
   2114		cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
   2115	cm_id_priv->timewait_info->remote_qpn = cm_id_priv->remote_qpn;
   2116
   2117	/*
   2118	 * Note that the ID pointer is not in the xarray at this point,
   2119	 * so this set is only visible to the local thread.
   2120	 */
   2121	cm_id_priv->id.state = IB_CM_REQ_RCVD;
   2122
   2123	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
   2124	if (!listen_cm_id_priv) {
   2125		trace_icm_no_listener_err(&cm_id_priv->id);
   2126		cm_id_priv->id.state = IB_CM_IDLE;
   2127		ret = -EINVAL;
   2128		goto destroy;
   2129	}
   2130
   2131	memset(&work->path[0], 0, sizeof(work->path[0]));
   2132	if (cm_req_has_alt_path(req_msg))
   2133		memset(&work->path[1], 0, sizeof(work->path[1]));
   2134	grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
   2135	gid_attr = grh->sgid_attr;
   2136
   2137	if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) {
   2138		work->path[0].rec_type =
   2139			sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
   2140	} else {
   2141		cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
   2142		cm_path_set_rec_type(
   2143			work->port->cm_dev->ib_device, work->port->port_num,
   2144			&work->path[0],
   2145			IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID,
   2146					req_msg));
   2147	}
   2148	if (cm_req_has_alt_path(req_msg))
   2149		work->path[1].rec_type = work->path[0].rec_type;
   2150	cm_format_paths_from_req(req_msg, &work->path[0],
   2151				 &work->path[1]);
   2152	if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
   2153		sa_path_set_dmac(&work->path[0],
   2154				 cm_id_priv->av.ah_attr.roce.dmac);
   2155	work->path[0].hop_limit = grh->hop_limit;
   2156
   2157	/* This destroy call is needed to pair with cm_init_av_for_response */
   2158	cm_destroy_av(&cm_id_priv->av);
   2159	ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av);
   2160	if (ret) {
   2161		int err;
   2162
   2163		err = rdma_query_gid(work->port->cm_dev->ib_device,
   2164				     work->port->port_num, 0,
   2165				     &work->path[0].sgid);
   2166		if (err)
   2167			ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID,
   2168				       NULL, 0, NULL, 0);
   2169		else
   2170			ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID,
   2171				       &work->path[0].sgid,
   2172				       sizeof(work->path[0].sgid),
   2173				       NULL, 0);
   2174		goto rejected;
   2175	}
   2176	if (cm_req_has_alt_path(req_msg)) {
   2177		ret = cm_init_av_by_path(&work->path[1], NULL,
   2178					 &cm_id_priv->alt_av);
   2179		if (ret) {
   2180			ib_send_cm_rej(&cm_id_priv->id,
   2181				       IB_CM_REJ_INVALID_ALT_GID,
   2182				       &work->path[0].sgid,
   2183				       sizeof(work->path[0].sgid), NULL, 0);
   2184			goto rejected;
   2185		}
   2186	}
   2187
   2188	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
   2189	cm_id_priv->id.context = listen_cm_id_priv->id.context;
   2190	cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
   2191
   2192	/* Now MAD handlers can see the new ID */
   2193	spin_lock_irq(&cm_id_priv->lock);
   2194	cm_finalize_id(cm_id_priv);
   2195
   2196	/* Refcount belongs to the event, pairs with cm_process_work() */
   2197	refcount_inc(&cm_id_priv->refcount);
   2198	cm_queue_work_unlock(cm_id_priv, work);
   2199	/*
   2200	 * Since this ID was just created and was not made visible to other MAD
   2201	 * handlers until the cm_finalize_id() above we know that the
   2202	 * cm_process_work() will deliver the event and the listen_cm_id
   2203	 * embedded in the event can be derefed here.
   2204	 */
   2205	cm_deref_id(listen_cm_id_priv);
   2206	return 0;
   2207
   2208rejected:
   2209	cm_deref_id(listen_cm_id_priv);
   2210destroy:
   2211	ib_destroy_cm_id(&cm_id_priv->id);
   2212	return ret;
   2213}
   2214
   2215static void cm_format_rep(struct cm_rep_msg *rep_msg,
   2216			  struct cm_id_private *cm_id_priv,
   2217			  struct ib_cm_rep_param *param)
   2218{
   2219	cm_format_mad_ece_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid,
   2220			      param->ece.attr_mod);
   2221	IBA_SET(CM_REP_LOCAL_COMM_ID, rep_msg,
   2222		be32_to_cpu(cm_id_priv->id.local_id));
   2223	IBA_SET(CM_REP_REMOTE_COMM_ID, rep_msg,
   2224		be32_to_cpu(cm_id_priv->id.remote_id));
   2225	IBA_SET(CM_REP_STARTING_PSN, rep_msg, param->starting_psn);
   2226	IBA_SET(CM_REP_RESPONDER_RESOURCES, rep_msg,
   2227		param->responder_resources);
   2228	IBA_SET(CM_REP_TARGET_ACK_DELAY, rep_msg,
   2229		cm_id_priv->av.port->cm_dev->ack_delay);
   2230	IBA_SET(CM_REP_FAILOVER_ACCEPTED, rep_msg, param->failover_accepted);
   2231	IBA_SET(CM_REP_RNR_RETRY_COUNT, rep_msg, param->rnr_retry_count);
   2232	IBA_SET(CM_REP_LOCAL_CA_GUID, rep_msg,
   2233		be64_to_cpu(cm_id_priv->id.device->node_guid));
   2234
   2235	if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
   2236		IBA_SET(CM_REP_INITIATOR_DEPTH, rep_msg,
   2237			param->initiator_depth);
   2238		IBA_SET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg,
   2239			param->flow_control);
   2240		IBA_SET(CM_REP_SRQ, rep_msg, param->srq);
   2241		IBA_SET(CM_REP_LOCAL_QPN, rep_msg, param->qp_num);
   2242	} else {
   2243		IBA_SET(CM_REP_SRQ, rep_msg, 1);
   2244		IBA_SET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, rep_msg, param->qp_num);
   2245	}
   2246
   2247	IBA_SET(CM_REP_VENDOR_ID_L, rep_msg, param->ece.vendor_id);
   2248	IBA_SET(CM_REP_VENDOR_ID_M, rep_msg, param->ece.vendor_id >> 8);
   2249	IBA_SET(CM_REP_VENDOR_ID_H, rep_msg, param->ece.vendor_id >> 16);
   2250
   2251	if (param->private_data && param->private_data_len)
   2252		IBA_SET_MEM(CM_REP_PRIVATE_DATA, rep_msg, param->private_data,
   2253			    param->private_data_len);
   2254}
   2255
   2256int ib_send_cm_rep(struct ib_cm_id *cm_id,
   2257		   struct ib_cm_rep_param *param)
   2258{
   2259	struct cm_id_private *cm_id_priv;
   2260	struct ib_mad_send_buf *msg;
   2261	struct cm_rep_msg *rep_msg;
   2262	unsigned long flags;
   2263	int ret;
   2264
   2265	if (param->private_data &&
   2266	    param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
   2267		return -EINVAL;
   2268
   2269	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
   2270	spin_lock_irqsave(&cm_id_priv->lock, flags);
   2271	if (cm_id->state != IB_CM_REQ_RCVD &&
   2272	    cm_id->state != IB_CM_MRA_REQ_SENT) {
   2273		trace_icm_send_rep_err(cm_id_priv->id.local_id, cm_id->state);
   2274		ret = -EINVAL;
   2275		goto out;
   2276	}
   2277
   2278	msg = cm_alloc_priv_msg(cm_id_priv);
   2279	if (IS_ERR(msg)) {
   2280		ret = PTR_ERR(msg);
   2281		goto out;
   2282	}
   2283
   2284	rep_msg = (struct cm_rep_msg *) msg->mad;
   2285	cm_format_rep(rep_msg, cm_id_priv, param);
   2286	msg->timeout_ms = cm_id_priv->timeout_ms;
   2287	msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
   2288
   2289	trace_icm_send_rep(cm_id);
   2290	ret = ib_post_send_mad(msg, NULL);
   2291	if (ret)
   2292		goto out_free;
   2293
   2294	cm_id->state = IB_CM_REP_SENT;
   2295	cm_id_priv->initiator_depth = param->initiator_depth;
   2296	cm_id_priv->responder_resources = param->responder_resources;
   2297	cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
   2298	WARN_ONCE(param->qp_num & 0xFF000000,
   2299		  "IBTA declares QPN to be 24 bits, but it is 0x%X\n",
   2300		  param->qp_num);
   2301	cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
   2302	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   2303	return 0;
   2304
   2305out_free:
   2306	cm_free_priv_msg(msg);
   2307out:
   2308	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   2309	return ret;
   2310}
   2311EXPORT_SYMBOL(ib_send_cm_rep);
   2312
   2313static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
   2314			  struct cm_id_private *cm_id_priv,
   2315			  const void *private_data,
   2316			  u8 private_data_len)
   2317{
   2318	cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
   2319	IBA_SET(CM_RTU_LOCAL_COMM_ID, rtu_msg,
   2320		be32_to_cpu(cm_id_priv->id.local_id));
   2321	IBA_SET(CM_RTU_REMOTE_COMM_ID, rtu_msg,
   2322		be32_to_cpu(cm_id_priv->id.remote_id));
   2323
   2324	if (private_data && private_data_len)
   2325		IBA_SET_MEM(CM_RTU_PRIVATE_DATA, rtu_msg, private_data,
   2326			    private_data_len);
   2327}
   2328
   2329int ib_send_cm_rtu(struct ib_cm_id *cm_id,
   2330		   const void *private_data,
   2331		   u8 private_data_len)
   2332{
   2333	struct cm_id_private *cm_id_priv;
   2334	struct ib_mad_send_buf *msg;
   2335	unsigned long flags;
   2336	void *data;
   2337	int ret;
   2338
   2339	if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
   2340		return -EINVAL;
   2341
   2342	data = cm_copy_private_data(private_data, private_data_len);
   2343	if (IS_ERR(data))
   2344		return PTR_ERR(data);
   2345
   2346	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
   2347	spin_lock_irqsave(&cm_id_priv->lock, flags);
   2348	if (cm_id->state != IB_CM_REP_RCVD &&
   2349	    cm_id->state != IB_CM_MRA_REP_SENT) {
   2350		trace_icm_send_cm_rtu_err(cm_id);
   2351		ret = -EINVAL;
   2352		goto error;
   2353	}
   2354
   2355	msg = cm_alloc_msg(cm_id_priv);
   2356	if (IS_ERR(msg)) {
   2357		ret = PTR_ERR(msg);
   2358		goto error;
   2359	}
   2360
   2361	cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
   2362		      private_data, private_data_len);
   2363
   2364	trace_icm_send_rtu(cm_id);
   2365	ret = ib_post_send_mad(msg, NULL);
   2366	if (ret) {
   2367		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   2368		cm_free_msg(msg);
   2369		kfree(data);
   2370		return ret;
   2371	}
   2372
   2373	cm_id->state = IB_CM_ESTABLISHED;
   2374	cm_set_private_data(cm_id_priv, data, private_data_len);
   2375	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   2376	return 0;
   2377
   2378error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   2379	kfree(data);
   2380	return ret;
   2381}
   2382EXPORT_SYMBOL(ib_send_cm_rtu);
   2383
   2384static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
   2385{
   2386	struct cm_rep_msg *rep_msg;
   2387	struct ib_cm_rep_event_param *param;
   2388
   2389	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
   2390	param = &work->cm_event.param.rep_rcvd;
   2391	param->remote_ca_guid =
   2392		cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg));
   2393	param->remote_qkey = IBA_GET(CM_REP_LOCAL_Q_KEY, rep_msg);
   2394	param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
   2395	param->starting_psn = IBA_GET(CM_REP_STARTING_PSN, rep_msg);
   2396	param->responder_resources = IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg);
   2397	param->initiator_depth = IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg);
   2398	param->target_ack_delay = IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg);
   2399	param->failover_accepted = IBA_GET(CM_REP_FAILOVER_ACCEPTED, rep_msg);
   2400	param->flow_control = IBA_GET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg);
   2401	param->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
   2402	param->srq = IBA_GET(CM_REP_SRQ, rep_msg);
   2403	param->ece.vendor_id = IBA_GET(CM_REP_VENDOR_ID_H, rep_msg) << 16;
   2404	param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_M, rep_msg) << 8;
   2405	param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_L, rep_msg);
   2406	param->ece.attr_mod = be32_to_cpu(rep_msg->hdr.attr_mod);
   2407
   2408	work->cm_event.private_data =
   2409		IBA_GET_MEM_PTR(CM_REP_PRIVATE_DATA, rep_msg);
   2410}
   2411
   2412static void cm_dup_rep_handler(struct cm_work *work)
   2413{
   2414	struct cm_id_private *cm_id_priv;
   2415	struct cm_rep_msg *rep_msg;
   2416	struct ib_mad_send_buf *msg = NULL;
   2417	int ret;
   2418
   2419	rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
   2420	cm_id_priv = cm_acquire_id(
   2421		cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)),
   2422		cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg)));
   2423	if (!cm_id_priv)
   2424		return;
   2425
   2426	atomic_long_inc(
   2427		&work->port->counters[CM_RECV_DUPLICATES][CM_REP_COUNTER]);
   2428	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
   2429	if (ret)
   2430		goto deref;
   2431
   2432	spin_lock_irq(&cm_id_priv->lock);
   2433	if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
   2434		cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
   2435			      cm_id_priv->private_data,
   2436			      cm_id_priv->private_data_len);
   2437	else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
   2438		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
   2439			      CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
   2440			      cm_id_priv->private_data,
   2441			      cm_id_priv->private_data_len);
   2442	else
   2443		goto unlock;
   2444	spin_unlock_irq(&cm_id_priv->lock);
   2445
   2446	trace_icm_send_dup_rep(&cm_id_priv->id);
   2447	ret = ib_post_send_mad(msg, NULL);
   2448	if (ret)
   2449		goto free;
   2450	goto deref;
   2451
   2452unlock:	spin_unlock_irq(&cm_id_priv->lock);
   2453free:	cm_free_response_msg(msg);
   2454deref:	cm_deref_id(cm_id_priv);
   2455}
   2456
   2457static int cm_rep_handler(struct cm_work *work)
   2458{
   2459	struct cm_id_private *cm_id_priv;
   2460	struct cm_rep_msg *rep_msg;
   2461	int ret;
   2462	struct cm_id_private *cur_cm_id_priv;
   2463	struct cm_timewait_info *timewait_info;
   2464
   2465	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
   2466	cm_id_priv = cm_acquire_id(
   2467		cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
   2468	if (!cm_id_priv) {
   2469		cm_dup_rep_handler(work);
   2470		trace_icm_remote_no_priv_err(
   2471			 IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
   2472		return -EINVAL;
   2473	}
   2474
   2475	cm_format_rep_event(work, cm_id_priv->qp_type);
   2476
   2477	spin_lock_irq(&cm_id_priv->lock);
   2478	switch (cm_id_priv->id.state) {
   2479	case IB_CM_REQ_SENT:
   2480	case IB_CM_MRA_REQ_RCVD:
   2481		break;
   2482	default:
   2483		ret = -EINVAL;
   2484		trace_icm_rep_unknown_err(
   2485			IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
   2486			IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg),
   2487			cm_id_priv->id.state);
   2488		spin_unlock_irq(&cm_id_priv->lock);
   2489		goto error;
   2490	}
   2491
   2492	cm_id_priv->timewait_info->work.remote_id =
   2493		cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg));
   2494	cm_id_priv->timewait_info->remote_ca_guid =
   2495		cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg));
   2496	cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
   2497
   2498	spin_lock(&cm.lock);
   2499	/* Check for duplicate REP. */
   2500	if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
   2501		spin_unlock(&cm.lock);
   2502		spin_unlock_irq(&cm_id_priv->lock);
   2503		ret = -EINVAL;
   2504		trace_icm_insert_failed_err(
   2505			 IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
   2506		goto error;
   2507	}
   2508	/* Check for a stale connection. */
   2509	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
   2510	if (timewait_info) {
   2511		cm_remove_remote(cm_id_priv);
   2512		cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
   2513					   timewait_info->work.remote_id);
   2514
   2515		spin_unlock(&cm.lock);
   2516		spin_unlock_irq(&cm_id_priv->lock);
   2517		cm_issue_rej(work->port, work->mad_recv_wc,
   2518			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
   2519			     NULL, 0);
   2520		ret = -EINVAL;
   2521		trace_icm_staleconn_err(
   2522			IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
   2523			IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
   2524
   2525		if (cur_cm_id_priv) {
   2526			ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0);
   2527			cm_deref_id(cur_cm_id_priv);
   2528		}
   2529
   2530		goto error;
   2531	}
   2532	spin_unlock(&cm.lock);
   2533
   2534	cm_id_priv->id.state = IB_CM_REP_RCVD;
   2535	cm_id_priv->id.remote_id =
   2536		cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg));
   2537	cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
   2538	cm_id_priv->initiator_depth =
   2539		IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg);
   2540	cm_id_priv->responder_resources =
   2541		IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg);
   2542	cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
   2543	cm_id_priv->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
   2544	cm_id_priv->target_ack_delay =
   2545		IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg);
   2546	cm_id_priv->av.timeout =
   2547			cm_ack_timeout(cm_id_priv->target_ack_delay,
   2548				       cm_id_priv->av.timeout - 1);
   2549	cm_id_priv->alt_av.timeout =
   2550			cm_ack_timeout(cm_id_priv->target_ack_delay,
   2551				       cm_id_priv->alt_av.timeout - 1);
   2552
   2553	ib_cancel_mad(cm_id_priv->msg);
   2554	cm_queue_work_unlock(cm_id_priv, work);
   2555	return 0;
   2556
   2557error:
   2558	cm_deref_id(cm_id_priv);
   2559	return ret;
   2560}
   2561
   2562static int cm_establish_handler(struct cm_work *work)
   2563{
   2564	struct cm_id_private *cm_id_priv;
   2565
   2566	/* See comment in cm_establish about lookup. */
   2567	cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
   2568	if (!cm_id_priv)
   2569		return -EINVAL;
   2570
   2571	spin_lock_irq(&cm_id_priv->lock);
   2572	if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
   2573		spin_unlock_irq(&cm_id_priv->lock);
   2574		goto out;
   2575	}
   2576
   2577	ib_cancel_mad(cm_id_priv->msg);
   2578	cm_queue_work_unlock(cm_id_priv, work);
   2579	return 0;
   2580out:
   2581	cm_deref_id(cm_id_priv);
   2582	return -EINVAL;
   2583}
   2584
   2585static int cm_rtu_handler(struct cm_work *work)
   2586{
   2587	struct cm_id_private *cm_id_priv;
   2588	struct cm_rtu_msg *rtu_msg;
   2589
   2590	rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
   2591	cm_id_priv = cm_acquire_id(
   2592		cpu_to_be32(IBA_GET(CM_RTU_REMOTE_COMM_ID, rtu_msg)),
   2593		cpu_to_be32(IBA_GET(CM_RTU_LOCAL_COMM_ID, rtu_msg)));
   2594	if (!cm_id_priv)
   2595		return -EINVAL;
   2596
   2597	work->cm_event.private_data =
   2598		IBA_GET_MEM_PTR(CM_RTU_PRIVATE_DATA, rtu_msg);
   2599
   2600	spin_lock_irq(&cm_id_priv->lock);
   2601	if (cm_id_priv->id.state != IB_CM_REP_SENT &&
   2602	    cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
   2603		spin_unlock_irq(&cm_id_priv->lock);
   2604		atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
   2605						     [CM_RTU_COUNTER]);
   2606		goto out;
   2607	}
   2608	cm_id_priv->id.state = IB_CM_ESTABLISHED;
   2609
   2610	ib_cancel_mad(cm_id_priv->msg);
   2611	cm_queue_work_unlock(cm_id_priv, work);
   2612	return 0;
   2613out:
   2614	cm_deref_id(cm_id_priv);
   2615	return -EINVAL;
   2616}
   2617
   2618static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
   2619			  struct cm_id_private *cm_id_priv,
   2620			  const void *private_data,
   2621			  u8 private_data_len)
   2622{
   2623	cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
   2624			  cm_form_tid(cm_id_priv));
   2625	IBA_SET(CM_DREQ_LOCAL_COMM_ID, dreq_msg,
   2626		be32_to_cpu(cm_id_priv->id.local_id));
   2627	IBA_SET(CM_DREQ_REMOTE_COMM_ID, dreq_msg,
   2628		be32_to_cpu(cm_id_priv->id.remote_id));
   2629	IBA_SET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg,
   2630		be32_to_cpu(cm_id_priv->remote_qpn));
   2631
   2632	if (private_data && private_data_len)
   2633		IBA_SET_MEM(CM_DREQ_PRIVATE_DATA, dreq_msg, private_data,
   2634			    private_data_len);
   2635}
   2636
   2637static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
   2638			       const void *private_data, u8 private_data_len)
   2639{
   2640	struct ib_mad_send_buf *msg;
   2641	int ret;
   2642
   2643	lockdep_assert_held(&cm_id_priv->lock);
   2644
   2645	if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
   2646		return -EINVAL;
   2647
   2648	if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
   2649		trace_icm_dreq_skipped(&cm_id_priv->id);
   2650		return -EINVAL;
   2651	}
   2652
   2653	if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
   2654	    cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
   2655		ib_cancel_mad(cm_id_priv->msg);
   2656
   2657	msg = cm_alloc_priv_msg(cm_id_priv);
   2658	if (IS_ERR(msg)) {
   2659		cm_enter_timewait(cm_id_priv);
   2660		return PTR_ERR(msg);
   2661	}
   2662
   2663	cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
   2664		       private_data, private_data_len);
   2665	msg->timeout_ms = cm_id_priv->timeout_ms;
   2666	msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
   2667
   2668	trace_icm_send_dreq(&cm_id_priv->id);
   2669	ret = ib_post_send_mad(msg, NULL);
   2670	if (ret) {
   2671		cm_enter_timewait(cm_id_priv);
   2672		cm_free_priv_msg(msg);
   2673		return ret;
   2674	}
   2675
   2676	cm_id_priv->id.state = IB_CM_DREQ_SENT;
   2677	return 0;
   2678}
   2679
   2680int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data,
   2681		    u8 private_data_len)
   2682{
   2683	struct cm_id_private *cm_id_priv =
   2684		container_of(cm_id, struct cm_id_private, id);
   2685	unsigned long flags;
   2686	int ret;
   2687
   2688	spin_lock_irqsave(&cm_id_priv->lock, flags);
   2689	ret = cm_send_dreq_locked(cm_id_priv, private_data, private_data_len);
   2690	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   2691	return ret;
   2692}
   2693EXPORT_SYMBOL(ib_send_cm_dreq);
   2694
   2695static void cm_format_drep(struct cm_drep_msg *drep_msg,
   2696			  struct cm_id_private *cm_id_priv,
   2697			  const void *private_data,
   2698			  u8 private_data_len)
   2699{
   2700	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
   2701	IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
   2702		be32_to_cpu(cm_id_priv->id.local_id));
   2703	IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg,
   2704		be32_to_cpu(cm_id_priv->id.remote_id));
   2705
   2706	if (private_data && private_data_len)
   2707		IBA_SET_MEM(CM_DREP_PRIVATE_DATA, drep_msg, private_data,
   2708			    private_data_len);
   2709}
   2710
   2711static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
   2712			       void *private_data, u8 private_data_len)
   2713{
   2714	struct ib_mad_send_buf *msg;
   2715	int ret;
   2716
   2717	lockdep_assert_held(&cm_id_priv->lock);
   2718
   2719	if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
   2720		return -EINVAL;
   2721
   2722	if (cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
   2723		trace_icm_send_drep_err(&cm_id_priv->id);
   2724		kfree(private_data);
   2725		return -EINVAL;
   2726	}
   2727
   2728	cm_set_private_data(cm_id_priv, private_data, private_data_len);
   2729	cm_enter_timewait(cm_id_priv);
   2730
   2731	msg = cm_alloc_msg(cm_id_priv);
   2732	if (IS_ERR(msg))
   2733		return PTR_ERR(msg);
   2734
   2735	cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
   2736		       private_data, private_data_len);
   2737
   2738	trace_icm_send_drep(&cm_id_priv->id);
   2739	ret = ib_post_send_mad(msg, NULL);
   2740	if (ret) {
   2741		cm_free_msg(msg);
   2742		return ret;
   2743	}
   2744	return 0;
   2745}
   2746
   2747int ib_send_cm_drep(struct ib_cm_id *cm_id, const void *private_data,
   2748		    u8 private_data_len)
   2749{
   2750	struct cm_id_private *cm_id_priv =
   2751		container_of(cm_id, struct cm_id_private, id);
   2752	unsigned long flags;
   2753	void *data;
   2754	int ret;
   2755
   2756	data = cm_copy_private_data(private_data, private_data_len);
   2757	if (IS_ERR(data))
   2758		return PTR_ERR(data);
   2759
   2760	spin_lock_irqsave(&cm_id_priv->lock, flags);
   2761	ret = cm_send_drep_locked(cm_id_priv, data, private_data_len);
   2762	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   2763	return ret;
   2764}
   2765EXPORT_SYMBOL(ib_send_cm_drep);
   2766
   2767static int cm_issue_drep(struct cm_port *port,
   2768			 struct ib_mad_recv_wc *mad_recv_wc)
   2769{
   2770	struct ib_mad_send_buf *msg = NULL;
   2771	struct cm_dreq_msg *dreq_msg;
   2772	struct cm_drep_msg *drep_msg;
   2773	int ret;
   2774
   2775	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
   2776	if (ret)
   2777		return ret;
   2778
   2779	dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
   2780	drep_msg = (struct cm_drep_msg *) msg->mad;
   2781
   2782	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
   2783	IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg,
   2784		IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg));
   2785	IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
   2786		IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
   2787
   2788	trace_icm_issue_drep(
   2789		IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
   2790		IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
   2791	ret = ib_post_send_mad(msg, NULL);
   2792	if (ret)
   2793		cm_free_response_msg(msg);
   2794
   2795	return ret;
   2796}
   2797
   2798static int cm_dreq_handler(struct cm_work *work)
   2799{
   2800	struct cm_id_private *cm_id_priv;
   2801	struct cm_dreq_msg *dreq_msg;
   2802	struct ib_mad_send_buf *msg = NULL;
   2803
   2804	dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
   2805	cm_id_priv = cm_acquire_id(
   2806		cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)),
   2807		cpu_to_be32(IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg)));
   2808	if (!cm_id_priv) {
   2809		atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
   2810						     [CM_DREQ_COUNTER]);
   2811		cm_issue_drep(work->port, work->mad_recv_wc);
   2812		trace_icm_no_priv_err(
   2813			IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
   2814			IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
   2815		return -EINVAL;
   2816	}
   2817
   2818	work->cm_event.private_data =
   2819		IBA_GET_MEM_PTR(CM_DREQ_PRIVATE_DATA, dreq_msg);
   2820
   2821	spin_lock_irq(&cm_id_priv->lock);
   2822	if (cm_id_priv->local_qpn !=
   2823	    cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg)))
   2824		goto unlock;
   2825
   2826	switch (cm_id_priv->id.state) {
   2827	case IB_CM_REP_SENT:
   2828	case IB_CM_DREQ_SENT:
   2829	case IB_CM_MRA_REP_RCVD:
   2830		ib_cancel_mad(cm_id_priv->msg);
   2831		break;
   2832	case IB_CM_ESTABLISHED:
   2833		if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
   2834		    cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
   2835			ib_cancel_mad(cm_id_priv->msg);
   2836		break;
   2837	case IB_CM_TIMEWAIT:
   2838		atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
   2839						     [CM_DREQ_COUNTER]);
   2840		msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
   2841		if (IS_ERR(msg))
   2842			goto unlock;
   2843
   2844		cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
   2845			       cm_id_priv->private_data,
   2846			       cm_id_priv->private_data_len);
   2847		spin_unlock_irq(&cm_id_priv->lock);
   2848
   2849		if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
   2850		    ib_post_send_mad(msg, NULL))
   2851			cm_free_response_msg(msg);
   2852		goto deref;
   2853	case IB_CM_DREQ_RCVD:
   2854		atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
   2855						     [CM_DREQ_COUNTER]);
   2856		goto unlock;
   2857	default:
   2858		trace_icm_dreq_unknown_err(&cm_id_priv->id);
   2859		goto unlock;
   2860	}
   2861	cm_id_priv->id.state = IB_CM_DREQ_RCVD;
   2862	cm_id_priv->tid = dreq_msg->hdr.tid;
   2863	cm_queue_work_unlock(cm_id_priv, work);
   2864	return 0;
   2865
   2866unlock:	spin_unlock_irq(&cm_id_priv->lock);
   2867deref:	cm_deref_id(cm_id_priv);
   2868	return -EINVAL;
   2869}
   2870
   2871static int cm_drep_handler(struct cm_work *work)
   2872{
   2873	struct cm_id_private *cm_id_priv;
   2874	struct cm_drep_msg *drep_msg;
   2875
   2876	drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
   2877	cm_id_priv = cm_acquire_id(
   2878		cpu_to_be32(IBA_GET(CM_DREP_REMOTE_COMM_ID, drep_msg)),
   2879		cpu_to_be32(IBA_GET(CM_DREP_LOCAL_COMM_ID, drep_msg)));
   2880	if (!cm_id_priv)
   2881		return -EINVAL;
   2882
   2883	work->cm_event.private_data =
   2884		IBA_GET_MEM_PTR(CM_DREP_PRIVATE_DATA, drep_msg);
   2885
   2886	spin_lock_irq(&cm_id_priv->lock);
   2887	if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
   2888	    cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
   2889		spin_unlock_irq(&cm_id_priv->lock);
   2890		goto out;
   2891	}
   2892	cm_enter_timewait(cm_id_priv);
   2893
   2894	ib_cancel_mad(cm_id_priv->msg);
   2895	cm_queue_work_unlock(cm_id_priv, work);
   2896	return 0;
   2897out:
   2898	cm_deref_id(cm_id_priv);
   2899	return -EINVAL;
   2900}
   2901
   2902static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
   2903			      enum ib_cm_rej_reason reason, void *ari,
   2904			      u8 ari_length, const void *private_data,
   2905			      u8 private_data_len)
   2906{
   2907	enum ib_cm_state state = cm_id_priv->id.state;
   2908	struct ib_mad_send_buf *msg;
   2909	int ret;
   2910
   2911	lockdep_assert_held(&cm_id_priv->lock);
   2912
   2913	if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
   2914	    (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
   2915		return -EINVAL;
   2916
   2917	switch (state) {
   2918	case IB_CM_REQ_SENT:
   2919	case IB_CM_MRA_REQ_RCVD:
   2920	case IB_CM_REQ_RCVD:
   2921	case IB_CM_MRA_REQ_SENT:
   2922	case IB_CM_REP_RCVD:
   2923	case IB_CM_MRA_REP_SENT:
   2924		cm_reset_to_idle(cm_id_priv);
   2925		msg = cm_alloc_msg(cm_id_priv);
   2926		if (IS_ERR(msg))
   2927			return PTR_ERR(msg);
   2928		cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason,
   2929			      ari, ari_length, private_data, private_data_len,
   2930			      state);
   2931		break;
   2932	case IB_CM_REP_SENT:
   2933	case IB_CM_MRA_REP_RCVD:
   2934		cm_enter_timewait(cm_id_priv);
   2935		msg = cm_alloc_msg(cm_id_priv);
   2936		if (IS_ERR(msg))
   2937			return PTR_ERR(msg);
   2938		cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason,
   2939			      ari, ari_length, private_data, private_data_len,
   2940			      state);
   2941		break;
   2942	default:
   2943		trace_icm_send_unknown_rej_err(&cm_id_priv->id);
   2944		return -EINVAL;
   2945	}
   2946
   2947	trace_icm_send_rej(&cm_id_priv->id, reason);
   2948	ret = ib_post_send_mad(msg, NULL);
   2949	if (ret) {
   2950		cm_free_msg(msg);
   2951		return ret;
   2952	}
   2953
   2954	return 0;
   2955}
   2956
   2957int ib_send_cm_rej(struct ib_cm_id *cm_id, enum ib_cm_rej_reason reason,
   2958		   void *ari, u8 ari_length, const void *private_data,
   2959		   u8 private_data_len)
   2960{
   2961	struct cm_id_private *cm_id_priv =
   2962		container_of(cm_id, struct cm_id_private, id);
   2963	unsigned long flags;
   2964	int ret;
   2965
   2966	spin_lock_irqsave(&cm_id_priv->lock, flags);
   2967	ret = cm_send_rej_locked(cm_id_priv, reason, ari, ari_length,
   2968				 private_data, private_data_len);
   2969	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   2970	return ret;
   2971}
   2972EXPORT_SYMBOL(ib_send_cm_rej);
   2973
   2974static void cm_format_rej_event(struct cm_work *work)
   2975{
   2976	struct cm_rej_msg *rej_msg;
   2977	struct ib_cm_rej_event_param *param;
   2978
   2979	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
   2980	param = &work->cm_event.param.rej_rcvd;
   2981	param->ari = IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg);
   2982	param->ari_length = IBA_GET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg);
   2983	param->reason = IBA_GET(CM_REJ_REASON, rej_msg);
   2984	work->cm_event.private_data =
   2985		IBA_GET_MEM_PTR(CM_REJ_PRIVATE_DATA, rej_msg);
   2986}
   2987
   2988static struct cm_id_private *cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
   2989{
   2990	struct cm_id_private *cm_id_priv;
   2991	__be32 remote_id;
   2992
   2993	remote_id = cpu_to_be32(IBA_GET(CM_REJ_LOCAL_COMM_ID, rej_msg));
   2994
   2995	if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT) {
   2996		cm_id_priv = cm_find_remote_id(
   2997			*((__be64 *)IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg)),
   2998			remote_id);
   2999	} else if (IBA_GET(CM_REJ_MESSAGE_REJECTED, rej_msg) ==
   3000		   CM_MSG_RESPONSE_REQ)
   3001		cm_id_priv = cm_acquire_id(
   3002			cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
   3003			0);
   3004	else
   3005		cm_id_priv = cm_acquire_id(
   3006			cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
   3007			remote_id);
   3008
   3009	return cm_id_priv;
   3010}
   3011
   3012static int cm_rej_handler(struct cm_work *work)
   3013{
   3014	struct cm_id_private *cm_id_priv;
   3015	struct cm_rej_msg *rej_msg;
   3016
   3017	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
   3018	cm_id_priv = cm_acquire_rejected_id(rej_msg);
   3019	if (!cm_id_priv)
   3020		return -EINVAL;
   3021
   3022	cm_format_rej_event(work);
   3023
   3024	spin_lock_irq(&cm_id_priv->lock);
   3025	switch (cm_id_priv->id.state) {
   3026	case IB_CM_REQ_SENT:
   3027	case IB_CM_MRA_REQ_RCVD:
   3028	case IB_CM_REP_SENT:
   3029	case IB_CM_MRA_REP_RCVD:
   3030		ib_cancel_mad(cm_id_priv->msg);
   3031		fallthrough;
   3032	case IB_CM_REQ_RCVD:
   3033	case IB_CM_MRA_REQ_SENT:
   3034		if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_STALE_CONN)
   3035			cm_enter_timewait(cm_id_priv);
   3036		else
   3037			cm_reset_to_idle(cm_id_priv);
   3038		break;
   3039	case IB_CM_DREQ_SENT:
   3040		ib_cancel_mad(cm_id_priv->msg);
   3041		fallthrough;
   3042	case IB_CM_REP_RCVD:
   3043	case IB_CM_MRA_REP_SENT:
   3044		cm_enter_timewait(cm_id_priv);
   3045		break;
   3046	case IB_CM_ESTABLISHED:
   3047		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
   3048		    cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
   3049			if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
   3050				ib_cancel_mad(cm_id_priv->msg);
   3051			cm_enter_timewait(cm_id_priv);
   3052			break;
   3053		}
   3054		fallthrough;
   3055	default:
   3056		trace_icm_rej_unknown_err(&cm_id_priv->id);
   3057		spin_unlock_irq(&cm_id_priv->lock);
   3058		goto out;
   3059	}
   3060
   3061	cm_queue_work_unlock(cm_id_priv, work);
   3062	return 0;
   3063out:
   3064	cm_deref_id(cm_id_priv);
   3065	return -EINVAL;
   3066}
   3067
   3068int ib_send_cm_mra(struct ib_cm_id *cm_id,
   3069		   u8 service_timeout,
   3070		   const void *private_data,
   3071		   u8 private_data_len)
   3072{
   3073	struct cm_id_private *cm_id_priv;
   3074	struct ib_mad_send_buf *msg;
   3075	enum ib_cm_state cm_state;
   3076	enum ib_cm_lap_state lap_state;
   3077	enum cm_msg_response msg_response;
   3078	void *data;
   3079	unsigned long flags;
   3080	int ret;
   3081
   3082	if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
   3083		return -EINVAL;
   3084
   3085	data = cm_copy_private_data(private_data, private_data_len);
   3086	if (IS_ERR(data))
   3087		return PTR_ERR(data);
   3088
   3089	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
   3090
   3091	spin_lock_irqsave(&cm_id_priv->lock, flags);
   3092	switch (cm_id_priv->id.state) {
   3093	case IB_CM_REQ_RCVD:
   3094		cm_state = IB_CM_MRA_REQ_SENT;
   3095		lap_state = cm_id->lap_state;
   3096		msg_response = CM_MSG_RESPONSE_REQ;
   3097		break;
   3098	case IB_CM_REP_RCVD:
   3099		cm_state = IB_CM_MRA_REP_SENT;
   3100		lap_state = cm_id->lap_state;
   3101		msg_response = CM_MSG_RESPONSE_REP;
   3102		break;
   3103	case IB_CM_ESTABLISHED:
   3104		if (cm_id->lap_state == IB_CM_LAP_RCVD) {
   3105			cm_state = cm_id->state;
   3106			lap_state = IB_CM_MRA_LAP_SENT;
   3107			msg_response = CM_MSG_RESPONSE_OTHER;
   3108			break;
   3109		}
   3110		fallthrough;
   3111	default:
   3112		trace_icm_send_mra_unknown_err(&cm_id_priv->id);
   3113		ret = -EINVAL;
   3114		goto error_unlock;
   3115	}
   3116
   3117	if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
   3118		msg = cm_alloc_msg(cm_id_priv);
   3119		if (IS_ERR(msg)) {
   3120			ret = PTR_ERR(msg);
   3121			goto error_unlock;
   3122		}
   3123
   3124		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
   3125			      msg_response, service_timeout,
   3126			      private_data, private_data_len);
   3127		trace_icm_send_mra(cm_id);
   3128		ret = ib_post_send_mad(msg, NULL);
   3129		if (ret)
   3130			goto error_free_msg;
   3131	}
   3132
   3133	cm_id->state = cm_state;
   3134	cm_id->lap_state = lap_state;
   3135	cm_id_priv->service_timeout = service_timeout;
   3136	cm_set_private_data(cm_id_priv, data, private_data_len);
   3137	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   3138	return 0;
   3139
   3140error_free_msg:
   3141	cm_free_msg(msg);
   3142error_unlock:
   3143	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   3144	kfree(data);
   3145	return ret;
   3146}
   3147EXPORT_SYMBOL(ib_send_cm_mra);
   3148
   3149static struct cm_id_private *cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
   3150{
   3151	switch (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg)) {
   3152	case CM_MSG_RESPONSE_REQ:
   3153		return cm_acquire_id(
   3154			cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)),
   3155			0);
   3156	case CM_MSG_RESPONSE_REP:
   3157	case CM_MSG_RESPONSE_OTHER:
   3158		return cm_acquire_id(
   3159			cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)),
   3160			cpu_to_be32(IBA_GET(CM_MRA_LOCAL_COMM_ID, mra_msg)));
   3161	default:
   3162		return NULL;
   3163	}
   3164}
   3165
   3166static int cm_mra_handler(struct cm_work *work)
   3167{
   3168	struct cm_id_private *cm_id_priv;
   3169	struct cm_mra_msg *mra_msg;
   3170	int timeout;
   3171
   3172	mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
   3173	cm_id_priv = cm_acquire_mraed_id(mra_msg);
   3174	if (!cm_id_priv)
   3175		return -EINVAL;
   3176
   3177	work->cm_event.private_data =
   3178		IBA_GET_MEM_PTR(CM_MRA_PRIVATE_DATA, mra_msg);
   3179	work->cm_event.param.mra_rcvd.service_timeout =
   3180		IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg);
   3181	timeout = cm_convert_to_ms(IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg)) +
   3182		  cm_convert_to_ms(cm_id_priv->av.timeout);
   3183
   3184	spin_lock_irq(&cm_id_priv->lock);
   3185	switch (cm_id_priv->id.state) {
   3186	case IB_CM_REQ_SENT:
   3187		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
   3188			    CM_MSG_RESPONSE_REQ ||
   3189		    ib_modify_mad(cm_id_priv->msg, timeout))
   3190			goto out;
   3191		cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
   3192		break;
   3193	case IB_CM_REP_SENT:
   3194		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
   3195			    CM_MSG_RESPONSE_REP ||
   3196		    ib_modify_mad(cm_id_priv->msg, timeout))
   3197			goto out;
   3198		cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
   3199		break;
   3200	case IB_CM_ESTABLISHED:
   3201		if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
   3202			    CM_MSG_RESPONSE_OTHER ||
   3203		    cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
   3204		    ib_modify_mad(cm_id_priv->msg, timeout)) {
   3205			if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
   3206				atomic_long_inc(
   3207					&work->port->counters[CM_RECV_DUPLICATES]
   3208							     [CM_MRA_COUNTER]);
   3209			goto out;
   3210		}
   3211		cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
   3212		break;
   3213	case IB_CM_MRA_REQ_RCVD:
   3214	case IB_CM_MRA_REP_RCVD:
   3215		atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
   3216						     [CM_MRA_COUNTER]);
   3217		fallthrough;
   3218	default:
   3219		trace_icm_mra_unknown_err(&cm_id_priv->id);
   3220		goto out;
   3221	}
   3222
   3223	cm_id_priv->msg->context[1] = (void *) (unsigned long)
   3224				      cm_id_priv->id.state;
   3225	cm_queue_work_unlock(cm_id_priv, work);
   3226	return 0;
   3227out:
   3228	spin_unlock_irq(&cm_id_priv->lock);
   3229	cm_deref_id(cm_id_priv);
   3230	return -EINVAL;
   3231}
   3232
   3233static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg,
   3234					struct sa_path_rec *path)
   3235{
   3236	u32 lid;
   3237
   3238	if (path->rec_type != SA_PATH_REC_TYPE_OPA) {
   3239		sa_path_set_dlid(path, IBA_GET(CM_LAP_ALTERNATE_LOCAL_PORT_LID,
   3240					       lap_msg));
   3241		sa_path_set_slid(path, IBA_GET(CM_LAP_ALTERNATE_REMOTE_PORT_LID,
   3242					       lap_msg));
   3243	} else {
   3244		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
   3245			CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg));
   3246		sa_path_set_dlid(path, lid);
   3247
   3248		lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
   3249			CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg));
   3250		sa_path_set_slid(path, lid);
   3251	}
   3252}
   3253
   3254static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
   3255				    struct sa_path_rec *path,
   3256				    struct cm_lap_msg *lap_msg)
   3257{
   3258	path->dgid = *IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg);
   3259	path->sgid =
   3260		*IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg);
   3261	path->flow_label =
   3262		cpu_to_be32(IBA_GET(CM_LAP_ALTERNATE_FLOW_LABEL, lap_msg));
   3263	path->hop_limit = IBA_GET(CM_LAP_ALTERNATE_HOP_LIMIT, lap_msg);
   3264	path->traffic_class = IBA_GET(CM_LAP_ALTERNATE_TRAFFIC_CLASS, lap_msg);
   3265	path->reversible = 1;
   3266	path->pkey = cm_id_priv->pkey;
   3267	path->sl = IBA_GET(CM_LAP_ALTERNATE_SL, lap_msg);
   3268	path->mtu_selector = IB_SA_EQ;
   3269	path->mtu = cm_id_priv->path_mtu;
   3270	path->rate_selector = IB_SA_EQ;
   3271	path->rate = IBA_GET(CM_LAP_ALTERNATE_PACKET_RATE, lap_msg);
   3272	path->packet_life_time_selector = IB_SA_EQ;
   3273	path->packet_life_time =
   3274		IBA_GET(CM_LAP_ALTERNATE_LOCAL_ACK_TIMEOUT, lap_msg);
   3275	path->packet_life_time -= (path->packet_life_time > 0);
   3276	cm_format_path_lid_from_lap(lap_msg, path);
   3277}
   3278
   3279static int cm_lap_handler(struct cm_work *work)
   3280{
   3281	struct cm_id_private *cm_id_priv;
   3282	struct cm_lap_msg *lap_msg;
   3283	struct ib_cm_lap_event_param *param;
   3284	struct ib_mad_send_buf *msg = NULL;
   3285	struct rdma_ah_attr ah_attr;
   3286	struct cm_av alt_av = {};
   3287	int ret;
   3288
   3289	/* Currently Alternate path messages are not supported for
   3290	 * RoCE link layer.
   3291	 */
   3292	if (rdma_protocol_roce(work->port->cm_dev->ib_device,
   3293			       work->port->port_num))
   3294		return -EINVAL;
   3295
   3296	/* todo: verify LAP request and send reject APR if invalid. */
   3297	lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
   3298	cm_id_priv = cm_acquire_id(
   3299		cpu_to_be32(IBA_GET(CM_LAP_REMOTE_COMM_ID, lap_msg)),
   3300		cpu_to_be32(IBA_GET(CM_LAP_LOCAL_COMM_ID, lap_msg)));
   3301	if (!cm_id_priv)
   3302		return -EINVAL;
   3303
   3304	param = &work->cm_event.param.lap_rcvd;
   3305	memset(&work->path[0], 0, sizeof(work->path[1]));
   3306	cm_path_set_rec_type(work->port->cm_dev->ib_device,
   3307			     work->port->port_num, &work->path[0],
   3308			     IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID,
   3309					     lap_msg));
   3310	param->alternate_path = &work->path[0];
   3311	cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
   3312	work->cm_event.private_data =
   3313		IBA_GET_MEM_PTR(CM_LAP_PRIVATE_DATA, lap_msg);
   3314
   3315	ret = ib_init_ah_attr_from_wc(work->port->cm_dev->ib_device,
   3316				      work->port->port_num,
   3317				      work->mad_recv_wc->wc,
   3318				      work->mad_recv_wc->recv_buf.grh,
   3319				      &ah_attr);
   3320	if (ret)
   3321		goto deref;
   3322
   3323	ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av);
   3324	if (ret) {
   3325		rdma_destroy_ah_attr(&ah_attr);
   3326		goto deref;
   3327	}
   3328
   3329	spin_lock_irq(&cm_id_priv->lock);
   3330	cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
   3331			   &ah_attr, &cm_id_priv->av);
   3332	cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av);
   3333
   3334	if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
   3335		goto unlock;
   3336
   3337	switch (cm_id_priv->id.lap_state) {
   3338	case IB_CM_LAP_UNINIT:
   3339	case IB_CM_LAP_IDLE:
   3340		break;
   3341	case IB_CM_MRA_LAP_SENT:
   3342		atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
   3343						     [CM_LAP_COUNTER]);
   3344		msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
   3345		if (IS_ERR(msg))
   3346			goto unlock;
   3347
   3348		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
   3349			      CM_MSG_RESPONSE_OTHER,
   3350			      cm_id_priv->service_timeout,
   3351			      cm_id_priv->private_data,
   3352			      cm_id_priv->private_data_len);
   3353		spin_unlock_irq(&cm_id_priv->lock);
   3354
   3355		if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
   3356		    ib_post_send_mad(msg, NULL))
   3357			cm_free_response_msg(msg);
   3358		goto deref;
   3359	case IB_CM_LAP_RCVD:
   3360		atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
   3361						     [CM_LAP_COUNTER]);
   3362		goto unlock;
   3363	default:
   3364		goto unlock;
   3365	}
   3366
   3367	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
   3368	cm_id_priv->tid = lap_msg->hdr.tid;
   3369	cm_queue_work_unlock(cm_id_priv, work);
   3370	return 0;
   3371
   3372unlock:	spin_unlock_irq(&cm_id_priv->lock);
   3373deref:	cm_deref_id(cm_id_priv);
   3374	return -EINVAL;
   3375}
   3376
   3377static int cm_apr_handler(struct cm_work *work)
   3378{
   3379	struct cm_id_private *cm_id_priv;
   3380	struct cm_apr_msg *apr_msg;
   3381
   3382	/* Currently Alternate path messages are not supported for
   3383	 * RoCE link layer.
   3384	 */
   3385	if (rdma_protocol_roce(work->port->cm_dev->ib_device,
   3386			       work->port->port_num))
   3387		return -EINVAL;
   3388
   3389	apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
   3390	cm_id_priv = cm_acquire_id(
   3391		cpu_to_be32(IBA_GET(CM_APR_REMOTE_COMM_ID, apr_msg)),
   3392		cpu_to_be32(IBA_GET(CM_APR_LOCAL_COMM_ID, apr_msg)));
   3393	if (!cm_id_priv)
   3394		return -EINVAL; /* Unmatched reply. */
   3395
   3396	work->cm_event.param.apr_rcvd.ap_status =
   3397		IBA_GET(CM_APR_AR_STATUS, apr_msg);
   3398	work->cm_event.param.apr_rcvd.apr_info =
   3399		IBA_GET_MEM_PTR(CM_APR_ADDITIONAL_INFORMATION, apr_msg);
   3400	work->cm_event.param.apr_rcvd.info_len =
   3401		IBA_GET(CM_APR_ADDITIONAL_INFORMATION_LENGTH, apr_msg);
   3402	work->cm_event.private_data =
   3403		IBA_GET_MEM_PTR(CM_APR_PRIVATE_DATA, apr_msg);
   3404
   3405	spin_lock_irq(&cm_id_priv->lock);
   3406	if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
   3407	    (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
   3408	     cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
   3409		spin_unlock_irq(&cm_id_priv->lock);
   3410		goto out;
   3411	}
   3412	cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
   3413	ib_cancel_mad(cm_id_priv->msg);
   3414	cm_queue_work_unlock(cm_id_priv, work);
   3415	return 0;
   3416out:
   3417	cm_deref_id(cm_id_priv);
   3418	return -EINVAL;
   3419}
   3420
   3421static int cm_timewait_handler(struct cm_work *work)
   3422{
   3423	struct cm_timewait_info *timewait_info;
   3424	struct cm_id_private *cm_id_priv;
   3425
   3426	timewait_info = container_of(work, struct cm_timewait_info, work);
   3427	spin_lock_irq(&cm.lock);
   3428	list_del(&timewait_info->list);
   3429	spin_unlock_irq(&cm.lock);
   3430
   3431	cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
   3432				   timewait_info->work.remote_id);
   3433	if (!cm_id_priv)
   3434		return -EINVAL;
   3435
   3436	spin_lock_irq(&cm_id_priv->lock);
   3437	if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
   3438	    cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
   3439		spin_unlock_irq(&cm_id_priv->lock);
   3440		goto out;
   3441	}
   3442	cm_id_priv->id.state = IB_CM_IDLE;
   3443	cm_queue_work_unlock(cm_id_priv, work);
   3444	return 0;
   3445out:
   3446	cm_deref_id(cm_id_priv);
   3447	return -EINVAL;
   3448}
   3449
   3450static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
   3451			       struct cm_id_private *cm_id_priv,
   3452			       struct ib_cm_sidr_req_param *param)
   3453{
   3454	cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
   3455			  cm_form_tid(cm_id_priv));
   3456	IBA_SET(CM_SIDR_REQ_REQUESTID, sidr_req_msg,
   3457		be32_to_cpu(cm_id_priv->id.local_id));
   3458	IBA_SET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg,
   3459		be16_to_cpu(param->path->pkey));
   3460	IBA_SET(CM_SIDR_REQ_SERVICEID, sidr_req_msg,
   3461		be64_to_cpu(param->service_id));
   3462
   3463	if (param->private_data && param->private_data_len)
   3464		IBA_SET_MEM(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg,
   3465			    param->private_data, param->private_data_len);
   3466}
   3467
   3468int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
   3469			struct ib_cm_sidr_req_param *param)
   3470{
   3471	struct cm_id_private *cm_id_priv;
   3472	struct ib_mad_send_buf *msg;
   3473	struct cm_av av = {};
   3474	unsigned long flags;
   3475	int ret;
   3476
   3477	if (!param->path || (param->private_data &&
   3478	     param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
   3479		return -EINVAL;
   3480
   3481	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
   3482	ret = cm_init_av_by_path(param->path, param->sgid_attr, &av);
   3483	if (ret)
   3484		return ret;
   3485
   3486	spin_lock_irqsave(&cm_id_priv->lock, flags);
   3487	cm_move_av_from_path(&cm_id_priv->av, &av);
   3488	cm_id->service_id = param->service_id;
   3489	cm_id->service_mask = ~cpu_to_be64(0);
   3490	cm_id_priv->timeout_ms = param->timeout_ms;
   3491	cm_id_priv->max_cm_retries = param->max_cm_retries;
   3492	if (cm_id->state != IB_CM_IDLE) {
   3493		ret = -EINVAL;
   3494		goto out_unlock;
   3495	}
   3496
   3497	msg = cm_alloc_priv_msg(cm_id_priv);
   3498	if (IS_ERR(msg)) {
   3499		ret = PTR_ERR(msg);
   3500		goto out_unlock;
   3501	}
   3502
   3503	cm_format_sidr_req((struct cm_sidr_req_msg *)msg->mad, cm_id_priv,
   3504			   param);
   3505	msg->timeout_ms = cm_id_priv->timeout_ms;
   3506	msg->context[1] = (void *)(unsigned long)IB_CM_SIDR_REQ_SENT;
   3507
   3508	trace_icm_send_sidr_req(&cm_id_priv->id);
   3509	ret = ib_post_send_mad(msg, NULL);
   3510	if (ret)
   3511		goto out_free;
   3512	cm_id->state = IB_CM_SIDR_REQ_SENT;
   3513	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   3514	return 0;
   3515out_free:
   3516	cm_free_priv_msg(msg);
   3517out_unlock:
   3518	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   3519	return ret;
   3520}
   3521EXPORT_SYMBOL(ib_send_cm_sidr_req);
   3522
   3523static void cm_format_sidr_req_event(struct cm_work *work,
   3524				     const struct cm_id_private *rx_cm_id,
   3525				     struct ib_cm_id *listen_id)
   3526{
   3527	struct cm_sidr_req_msg *sidr_req_msg;
   3528	struct ib_cm_sidr_req_event_param *param;
   3529
   3530	sidr_req_msg = (struct cm_sidr_req_msg *)
   3531				work->mad_recv_wc->recv_buf.mad;
   3532	param = &work->cm_event.param.sidr_req_rcvd;
   3533	param->pkey = IBA_GET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg);
   3534	param->listen_id = listen_id;
   3535	param->service_id =
   3536		cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
   3537	param->bth_pkey = cm_get_bth_pkey(work);
   3538	param->port = work->port->port_num;
   3539	param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr;
   3540	work->cm_event.private_data =
   3541		IBA_GET_MEM_PTR(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg);
   3542}
   3543
   3544static int cm_sidr_req_handler(struct cm_work *work)
   3545{
   3546	struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
   3547	struct cm_sidr_req_msg *sidr_req_msg;
   3548	struct ib_wc *wc;
   3549	int ret;
   3550
   3551	cm_id_priv =
   3552		cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL);
   3553	if (IS_ERR(cm_id_priv))
   3554		return PTR_ERR(cm_id_priv);
   3555
   3556	/* Record SGID/SLID and request ID for lookup. */
   3557	sidr_req_msg = (struct cm_sidr_req_msg *)
   3558				work->mad_recv_wc->recv_buf.mad;
   3559
   3560	cm_id_priv->id.remote_id =
   3561		cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
   3562	cm_id_priv->id.service_id =
   3563		cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
   3564	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
   3565	cm_id_priv->tid = sidr_req_msg->hdr.tid;
   3566
   3567	wc = work->mad_recv_wc->wc;
   3568	cm_id_priv->sidr_slid = wc->slid;
   3569	ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
   3570				      work->mad_recv_wc->recv_buf.grh,
   3571				      &cm_id_priv->av);
   3572	if (ret)
   3573		goto out;
   3574
   3575	spin_lock_irq(&cm.lock);
   3576	listen_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
   3577	if (listen_cm_id_priv) {
   3578		spin_unlock_irq(&cm.lock);
   3579		atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
   3580						     [CM_SIDR_REQ_COUNTER]);
   3581		goto out; /* Duplicate message. */
   3582	}
   3583	cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
   3584	listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
   3585					   cm_id_priv->id.service_id);
   3586	if (!listen_cm_id_priv) {
   3587		spin_unlock_irq(&cm.lock);
   3588		ib_send_cm_sidr_rep(&cm_id_priv->id,
   3589				    &(struct ib_cm_sidr_rep_param){
   3590					    .status = IB_SIDR_UNSUPPORTED });
   3591		goto out; /* No match. */
   3592	}
   3593	spin_unlock_irq(&cm.lock);
   3594
   3595	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
   3596	cm_id_priv->id.context = listen_cm_id_priv->id.context;
   3597
   3598	/*
   3599	 * A SIDR ID does not need to be in the xarray since it does not receive
   3600	 * mads, is not placed in the remote_id or remote_qpn rbtree, and does
   3601	 * not enter timewait.
   3602	 */
   3603
   3604	cm_format_sidr_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
   3605	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
   3606	cm_free_work(work);
   3607	/*
   3608	 * A pointer to the listen_cm_id is held in the event, so this deref
   3609	 * must be after the event is delivered above.
   3610	 */
   3611	cm_deref_id(listen_cm_id_priv);
   3612	if (ret)
   3613		cm_destroy_id(&cm_id_priv->id, ret);
   3614	return 0;
   3615out:
   3616	ib_destroy_cm_id(&cm_id_priv->id);
   3617	return -EINVAL;
   3618}
   3619
   3620static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
   3621			       struct cm_id_private *cm_id_priv,
   3622			       struct ib_cm_sidr_rep_param *param)
   3623{
   3624	cm_format_mad_ece_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
   3625			      cm_id_priv->tid, param->ece.attr_mod);
   3626	IBA_SET(CM_SIDR_REP_REQUESTID, sidr_rep_msg,
   3627		be32_to_cpu(cm_id_priv->id.remote_id));
   3628	IBA_SET(CM_SIDR_REP_STATUS, sidr_rep_msg, param->status);
   3629	IBA_SET(CM_SIDR_REP_QPN, sidr_rep_msg, param->qp_num);
   3630	IBA_SET(CM_SIDR_REP_SERVICEID, sidr_rep_msg,
   3631		be64_to_cpu(cm_id_priv->id.service_id));
   3632	IBA_SET(CM_SIDR_REP_Q_KEY, sidr_rep_msg, param->qkey);
   3633	IBA_SET(CM_SIDR_REP_VENDOR_ID_L, sidr_rep_msg,
   3634		param->ece.vendor_id & 0xFF);
   3635	IBA_SET(CM_SIDR_REP_VENDOR_ID_H, sidr_rep_msg,
   3636		(param->ece.vendor_id >> 8) & 0xFF);
   3637
   3638	if (param->info && param->info_length)
   3639		IBA_SET_MEM(CM_SIDR_REP_ADDITIONAL_INFORMATION, sidr_rep_msg,
   3640			    param->info, param->info_length);
   3641
   3642	if (param->private_data && param->private_data_len)
   3643		IBA_SET_MEM(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg,
   3644			    param->private_data, param->private_data_len);
   3645}
   3646
   3647static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
   3648				   struct ib_cm_sidr_rep_param *param)
   3649{
   3650	struct ib_mad_send_buf *msg;
   3651	unsigned long flags;
   3652	int ret;
   3653
   3654	lockdep_assert_held(&cm_id_priv->lock);
   3655
   3656	if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
   3657	    (param->private_data &&
   3658	     param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
   3659		return -EINVAL;
   3660
   3661	if (cm_id_priv->id.state != IB_CM_SIDR_REQ_RCVD)
   3662		return -EINVAL;
   3663
   3664	msg = cm_alloc_msg(cm_id_priv);
   3665	if (IS_ERR(msg))
   3666		return PTR_ERR(msg);
   3667
   3668	cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
   3669			   param);
   3670	trace_icm_send_sidr_rep(&cm_id_priv->id);
   3671	ret = ib_post_send_mad(msg, NULL);
   3672	if (ret) {
   3673		cm_free_msg(msg);
   3674		return ret;
   3675	}
   3676	cm_id_priv->id.state = IB_CM_IDLE;
   3677	spin_lock_irqsave(&cm.lock, flags);
   3678	if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
   3679		rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
   3680		RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
   3681	}
   3682	spin_unlock_irqrestore(&cm.lock, flags);
   3683	return 0;
   3684}
   3685
   3686int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
   3687			struct ib_cm_sidr_rep_param *param)
   3688{
   3689	struct cm_id_private *cm_id_priv =
   3690		container_of(cm_id, struct cm_id_private, id);
   3691	unsigned long flags;
   3692	int ret;
   3693
   3694	spin_lock_irqsave(&cm_id_priv->lock, flags);
   3695	ret = cm_send_sidr_rep_locked(cm_id_priv, param);
   3696	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   3697	return ret;
   3698}
   3699EXPORT_SYMBOL(ib_send_cm_sidr_rep);
   3700
   3701static void cm_format_sidr_rep_event(struct cm_work *work,
   3702				     const struct cm_id_private *cm_id_priv)
   3703{
   3704	struct cm_sidr_rep_msg *sidr_rep_msg;
   3705	struct ib_cm_sidr_rep_event_param *param;
   3706
   3707	sidr_rep_msg = (struct cm_sidr_rep_msg *)
   3708				work->mad_recv_wc->recv_buf.mad;
   3709	param = &work->cm_event.param.sidr_rep_rcvd;
   3710	param->status = IBA_GET(CM_SIDR_REP_STATUS, sidr_rep_msg);
   3711	param->qkey = IBA_GET(CM_SIDR_REP_Q_KEY, sidr_rep_msg);
   3712	param->qpn = IBA_GET(CM_SIDR_REP_QPN, sidr_rep_msg);
   3713	param->info = IBA_GET_MEM_PTR(CM_SIDR_REP_ADDITIONAL_INFORMATION,
   3714				      sidr_rep_msg);
   3715	param->info_len = IBA_GET(CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH,
   3716				  sidr_rep_msg);
   3717	param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
   3718	work->cm_event.private_data =
   3719		IBA_GET_MEM_PTR(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg);
   3720}
   3721
   3722static int cm_sidr_rep_handler(struct cm_work *work)
   3723{
   3724	struct cm_sidr_rep_msg *sidr_rep_msg;
   3725	struct cm_id_private *cm_id_priv;
   3726
   3727	sidr_rep_msg = (struct cm_sidr_rep_msg *)
   3728				work->mad_recv_wc->recv_buf.mad;
   3729	cm_id_priv = cm_acquire_id(
   3730		cpu_to_be32(IBA_GET(CM_SIDR_REP_REQUESTID, sidr_rep_msg)), 0);
   3731	if (!cm_id_priv)
   3732		return -EINVAL; /* Unmatched reply. */
   3733
   3734	spin_lock_irq(&cm_id_priv->lock);
   3735	if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
   3736		spin_unlock_irq(&cm_id_priv->lock);
   3737		goto out;
   3738	}
   3739	cm_id_priv->id.state = IB_CM_IDLE;
   3740	ib_cancel_mad(cm_id_priv->msg);
   3741	spin_unlock_irq(&cm_id_priv->lock);
   3742
   3743	cm_format_sidr_rep_event(work, cm_id_priv);
   3744	cm_process_work(cm_id_priv, work);
   3745	return 0;
   3746out:
   3747	cm_deref_id(cm_id_priv);
   3748	return -EINVAL;
   3749}
   3750
   3751static void cm_process_send_error(struct cm_id_private *cm_id_priv,
   3752				  struct ib_mad_send_buf *msg,
   3753				  enum ib_cm_state state,
   3754				  enum ib_wc_status wc_status)
   3755{
   3756	struct ib_cm_event cm_event = {};
   3757	int ret;
   3758
   3759	/* Discard old sends or ones without a response. */
   3760	spin_lock_irq(&cm_id_priv->lock);
   3761	if (msg != cm_id_priv->msg) {
   3762		spin_unlock_irq(&cm_id_priv->lock);
   3763		cm_free_msg(msg);
   3764		return;
   3765	}
   3766	cm_free_priv_msg(msg);
   3767
   3768	if (state != cm_id_priv->id.state || wc_status == IB_WC_SUCCESS ||
   3769	    wc_status == IB_WC_WR_FLUSH_ERR)
   3770		goto out_unlock;
   3771
   3772	trace_icm_mad_send_err(state, wc_status);
   3773	switch (state) {
   3774	case IB_CM_REQ_SENT:
   3775	case IB_CM_MRA_REQ_RCVD:
   3776		cm_reset_to_idle(cm_id_priv);
   3777		cm_event.event = IB_CM_REQ_ERROR;
   3778		break;
   3779	case IB_CM_REP_SENT:
   3780	case IB_CM_MRA_REP_RCVD:
   3781		cm_reset_to_idle(cm_id_priv);
   3782		cm_event.event = IB_CM_REP_ERROR;
   3783		break;
   3784	case IB_CM_DREQ_SENT:
   3785		cm_enter_timewait(cm_id_priv);
   3786		cm_event.event = IB_CM_DREQ_ERROR;
   3787		break;
   3788	case IB_CM_SIDR_REQ_SENT:
   3789		cm_id_priv->id.state = IB_CM_IDLE;
   3790		cm_event.event = IB_CM_SIDR_REQ_ERROR;
   3791		break;
   3792	default:
   3793		goto out_unlock;
   3794	}
   3795	spin_unlock_irq(&cm_id_priv->lock);
   3796	cm_event.param.send_status = wc_status;
   3797
   3798	/* No other events can occur on the cm_id at this point. */
   3799	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
   3800	if (ret)
   3801		ib_destroy_cm_id(&cm_id_priv->id);
   3802	return;
   3803out_unlock:
   3804	spin_unlock_irq(&cm_id_priv->lock);
   3805}
   3806
   3807static void cm_send_handler(struct ib_mad_agent *mad_agent,
   3808			    struct ib_mad_send_wc *mad_send_wc)
   3809{
   3810	struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
   3811	struct cm_id_private *cm_id_priv = msg->context[0];
   3812	enum ib_cm_state state =
   3813		(enum ib_cm_state)(unsigned long)msg->context[1];
   3814	struct cm_port *port;
   3815	u16 attr_index;
   3816
   3817	port = mad_agent->context;
   3818	attr_index = be16_to_cpu(((struct ib_mad_hdr *)
   3819				  msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
   3820
   3821	/*
   3822	 * If the send was in response to a received message (context[0] is not
   3823	 * set to a cm_id), and is not a REJ, then it is a send that was
   3824	 * manually retried.
   3825	 */
   3826	if (!cm_id_priv && (attr_index != CM_REJ_COUNTER))
   3827		msg->retries = 1;
   3828
   3829	atomic_long_add(1 + msg->retries, &port->counters[CM_XMIT][attr_index]);
   3830	if (msg->retries)
   3831		atomic_long_add(msg->retries,
   3832				&port->counters[CM_XMIT_RETRIES][attr_index]);
   3833
   3834	if (cm_id_priv)
   3835		cm_process_send_error(cm_id_priv, msg, state,
   3836				      mad_send_wc->status);
   3837	else
   3838		cm_free_response_msg(msg);
   3839}
   3840
   3841static void cm_work_handler(struct work_struct *_work)
   3842{
   3843	struct cm_work *work = container_of(_work, struct cm_work, work.work);
   3844	int ret;
   3845
   3846	switch (work->cm_event.event) {
   3847	case IB_CM_REQ_RECEIVED:
   3848		ret = cm_req_handler(work);
   3849		break;
   3850	case IB_CM_MRA_RECEIVED:
   3851		ret = cm_mra_handler(work);
   3852		break;
   3853	case IB_CM_REJ_RECEIVED:
   3854		ret = cm_rej_handler(work);
   3855		break;
   3856	case IB_CM_REP_RECEIVED:
   3857		ret = cm_rep_handler(work);
   3858		break;
   3859	case IB_CM_RTU_RECEIVED:
   3860		ret = cm_rtu_handler(work);
   3861		break;
   3862	case IB_CM_USER_ESTABLISHED:
   3863		ret = cm_establish_handler(work);
   3864		break;
   3865	case IB_CM_DREQ_RECEIVED:
   3866		ret = cm_dreq_handler(work);
   3867		break;
   3868	case IB_CM_DREP_RECEIVED:
   3869		ret = cm_drep_handler(work);
   3870		break;
   3871	case IB_CM_SIDR_REQ_RECEIVED:
   3872		ret = cm_sidr_req_handler(work);
   3873		break;
   3874	case IB_CM_SIDR_REP_RECEIVED:
   3875		ret = cm_sidr_rep_handler(work);
   3876		break;
   3877	case IB_CM_LAP_RECEIVED:
   3878		ret = cm_lap_handler(work);
   3879		break;
   3880	case IB_CM_APR_RECEIVED:
   3881		ret = cm_apr_handler(work);
   3882		break;
   3883	case IB_CM_TIMEWAIT_EXIT:
   3884		ret = cm_timewait_handler(work);
   3885		break;
   3886	default:
   3887		trace_icm_handler_err(work->cm_event.event);
   3888		ret = -EINVAL;
   3889		break;
   3890	}
   3891	if (ret)
   3892		cm_free_work(work);
   3893}
   3894
   3895static int cm_establish(struct ib_cm_id *cm_id)
   3896{
   3897	struct cm_id_private *cm_id_priv;
   3898	struct cm_work *work;
   3899	unsigned long flags;
   3900	int ret = 0;
   3901	struct cm_device *cm_dev;
   3902
   3903	cm_dev = ib_get_client_data(cm_id->device, &cm_client);
   3904	if (!cm_dev)
   3905		return -ENODEV;
   3906
   3907	work = kmalloc(sizeof *work, GFP_ATOMIC);
   3908	if (!work)
   3909		return -ENOMEM;
   3910
   3911	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
   3912	spin_lock_irqsave(&cm_id_priv->lock, flags);
   3913	switch (cm_id->state) {
   3914	case IB_CM_REP_SENT:
   3915	case IB_CM_MRA_REP_RCVD:
   3916		cm_id->state = IB_CM_ESTABLISHED;
   3917		break;
   3918	case IB_CM_ESTABLISHED:
   3919		ret = -EISCONN;
   3920		break;
   3921	default:
   3922		trace_icm_establish_err(cm_id);
   3923		ret = -EINVAL;
   3924		break;
   3925	}
   3926	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   3927
   3928	if (ret) {
   3929		kfree(work);
   3930		goto out;
   3931	}
   3932
   3933	/*
   3934	 * The CM worker thread may try to destroy the cm_id before it
   3935	 * can execute this work item.  To prevent potential deadlock,
   3936	 * we need to find the cm_id once we're in the context of the
   3937	 * worker thread, rather than holding a reference on it.
   3938	 */
   3939	INIT_DELAYED_WORK(&work->work, cm_work_handler);
   3940	work->local_id = cm_id->local_id;
   3941	work->remote_id = cm_id->remote_id;
   3942	work->mad_recv_wc = NULL;
   3943	work->cm_event.event = IB_CM_USER_ESTABLISHED;
   3944
   3945	/* Check if the device started its remove_one */
   3946	spin_lock_irqsave(&cm.lock, flags);
   3947	if (!cm_dev->going_down) {
   3948		queue_delayed_work(cm.wq, &work->work, 0);
   3949	} else {
   3950		kfree(work);
   3951		ret = -ENODEV;
   3952	}
   3953	spin_unlock_irqrestore(&cm.lock, flags);
   3954
   3955out:
   3956	return ret;
   3957}
   3958
   3959static int cm_migrate(struct ib_cm_id *cm_id)
   3960{
   3961	struct cm_id_private *cm_id_priv;
   3962	unsigned long flags;
   3963	int ret = 0;
   3964
   3965	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
   3966	spin_lock_irqsave(&cm_id_priv->lock, flags);
   3967	if (cm_id->state == IB_CM_ESTABLISHED &&
   3968	    (cm_id->lap_state == IB_CM_LAP_UNINIT ||
   3969	     cm_id->lap_state == IB_CM_LAP_IDLE)) {
   3970		cm_id->lap_state = IB_CM_LAP_IDLE;
   3971		cm_id_priv->av = cm_id_priv->alt_av;
   3972	} else
   3973		ret = -EINVAL;
   3974	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   3975
   3976	return ret;
   3977}
   3978
   3979int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
   3980{
   3981	int ret;
   3982
   3983	switch (event) {
   3984	case IB_EVENT_COMM_EST:
   3985		ret = cm_establish(cm_id);
   3986		break;
   3987	case IB_EVENT_PATH_MIG:
   3988		ret = cm_migrate(cm_id);
   3989		break;
   3990	default:
   3991		ret = -EINVAL;
   3992	}
   3993	return ret;
   3994}
   3995EXPORT_SYMBOL(ib_cm_notify);
   3996
   3997static void cm_recv_handler(struct ib_mad_agent *mad_agent,
   3998			    struct ib_mad_send_buf *send_buf,
   3999			    struct ib_mad_recv_wc *mad_recv_wc)
   4000{
   4001	struct cm_port *port = mad_agent->context;
   4002	struct cm_work *work;
   4003	enum ib_cm_event_type event;
   4004	bool alt_path = false;
   4005	u16 attr_id;
   4006	int paths = 0;
   4007	int going_down = 0;
   4008
   4009	switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
   4010	case CM_REQ_ATTR_ID:
   4011		alt_path = cm_req_has_alt_path((struct cm_req_msg *)
   4012						mad_recv_wc->recv_buf.mad);
   4013		paths = 1 + (alt_path != 0);
   4014		event = IB_CM_REQ_RECEIVED;
   4015		break;
   4016	case CM_MRA_ATTR_ID:
   4017		event = IB_CM_MRA_RECEIVED;
   4018		break;
   4019	case CM_REJ_ATTR_ID:
   4020		event = IB_CM_REJ_RECEIVED;
   4021		break;
   4022	case CM_REP_ATTR_ID:
   4023		event = IB_CM_REP_RECEIVED;
   4024		break;
   4025	case CM_RTU_ATTR_ID:
   4026		event = IB_CM_RTU_RECEIVED;
   4027		break;
   4028	case CM_DREQ_ATTR_ID:
   4029		event = IB_CM_DREQ_RECEIVED;
   4030		break;
   4031	case CM_DREP_ATTR_ID:
   4032		event = IB_CM_DREP_RECEIVED;
   4033		break;
   4034	case CM_SIDR_REQ_ATTR_ID:
   4035		event = IB_CM_SIDR_REQ_RECEIVED;
   4036		break;
   4037	case CM_SIDR_REP_ATTR_ID:
   4038		event = IB_CM_SIDR_REP_RECEIVED;
   4039		break;
   4040	case CM_LAP_ATTR_ID:
   4041		paths = 1;
   4042		event = IB_CM_LAP_RECEIVED;
   4043		break;
   4044	case CM_APR_ATTR_ID:
   4045		event = IB_CM_APR_RECEIVED;
   4046		break;
   4047	default:
   4048		ib_free_recv_mad(mad_recv_wc);
   4049		return;
   4050	}
   4051
   4052	attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
   4053	atomic_long_inc(&port->counters[CM_RECV][attr_id - CM_ATTR_ID_OFFSET]);
   4054
   4055	work = kmalloc(struct_size(work, path, paths), GFP_KERNEL);
   4056	if (!work) {
   4057		ib_free_recv_mad(mad_recv_wc);
   4058		return;
   4059	}
   4060
   4061	INIT_DELAYED_WORK(&work->work, cm_work_handler);
   4062	work->cm_event.event = event;
   4063	work->mad_recv_wc = mad_recv_wc;
   4064	work->port = port;
   4065
   4066	/* Check if the device started its remove_one */
   4067	spin_lock_irq(&cm.lock);
   4068	if (!port->cm_dev->going_down)
   4069		queue_delayed_work(cm.wq, &work->work, 0);
   4070	else
   4071		going_down = 1;
   4072	spin_unlock_irq(&cm.lock);
   4073
   4074	if (going_down) {
   4075		kfree(work);
   4076		ib_free_recv_mad(mad_recv_wc);
   4077	}
   4078}
   4079
   4080static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
   4081				struct ib_qp_attr *qp_attr,
   4082				int *qp_attr_mask)
   4083{
   4084	unsigned long flags;
   4085	int ret;
   4086
   4087	spin_lock_irqsave(&cm_id_priv->lock, flags);
   4088	switch (cm_id_priv->id.state) {
   4089	case IB_CM_REQ_SENT:
   4090	case IB_CM_MRA_REQ_RCVD:
   4091	case IB_CM_REQ_RCVD:
   4092	case IB_CM_MRA_REQ_SENT:
   4093	case IB_CM_REP_RCVD:
   4094	case IB_CM_MRA_REP_SENT:
   4095	case IB_CM_REP_SENT:
   4096	case IB_CM_MRA_REP_RCVD:
   4097	case IB_CM_ESTABLISHED:
   4098		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
   4099				IB_QP_PKEY_INDEX | IB_QP_PORT;
   4100		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
   4101		if (cm_id_priv->responder_resources)
   4102			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
   4103						    IB_ACCESS_REMOTE_ATOMIC;
   4104		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
   4105		if (cm_id_priv->av.port)
   4106			qp_attr->port_num = cm_id_priv->av.port->port_num;
   4107		ret = 0;
   4108		break;
   4109	default:
   4110		trace_icm_qp_init_err(&cm_id_priv->id);
   4111		ret = -EINVAL;
   4112		break;
   4113	}
   4114	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   4115	return ret;
   4116}
   4117
   4118static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
   4119			       struct ib_qp_attr *qp_attr,
   4120			       int *qp_attr_mask)
   4121{
   4122	unsigned long flags;
   4123	int ret;
   4124
   4125	spin_lock_irqsave(&cm_id_priv->lock, flags);
   4126	switch (cm_id_priv->id.state) {
   4127	case IB_CM_REQ_RCVD:
   4128	case IB_CM_MRA_REQ_SENT:
   4129	case IB_CM_REP_RCVD:
   4130	case IB_CM_MRA_REP_SENT:
   4131	case IB_CM_REP_SENT:
   4132	case IB_CM_MRA_REP_RCVD:
   4133	case IB_CM_ESTABLISHED:
   4134		*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
   4135				IB_QP_DEST_QPN | IB_QP_RQ_PSN;
   4136		qp_attr->ah_attr = cm_id_priv->av.ah_attr;
   4137		qp_attr->path_mtu = cm_id_priv->path_mtu;
   4138		qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
   4139		qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
   4140		if (cm_id_priv->qp_type == IB_QPT_RC ||
   4141		    cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
   4142			*qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
   4143					 IB_QP_MIN_RNR_TIMER;
   4144			qp_attr->max_dest_rd_atomic =
   4145					cm_id_priv->responder_resources;
   4146			qp_attr->min_rnr_timer = 0;
   4147		}
   4148		if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr) &&
   4149		    cm_id_priv->alt_av.port) {
   4150			*qp_attr_mask |= IB_QP_ALT_PATH;
   4151			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
   4152			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
   4153			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
   4154			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
   4155		}
   4156		ret = 0;
   4157		break;
   4158	default:
   4159		trace_icm_qp_rtr_err(&cm_id_priv->id);
   4160		ret = -EINVAL;
   4161		break;
   4162	}
   4163	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   4164	return ret;
   4165}
   4166
   4167static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
   4168			       struct ib_qp_attr *qp_attr,
   4169			       int *qp_attr_mask)
   4170{
   4171	unsigned long flags;
   4172	int ret;
   4173
   4174	spin_lock_irqsave(&cm_id_priv->lock, flags);
   4175	switch (cm_id_priv->id.state) {
   4176	/* Allow transition to RTS before sending REP */
   4177	case IB_CM_REQ_RCVD:
   4178	case IB_CM_MRA_REQ_SENT:
   4179
   4180	case IB_CM_REP_RCVD:
   4181	case IB_CM_MRA_REP_SENT:
   4182	case IB_CM_REP_SENT:
   4183	case IB_CM_MRA_REP_RCVD:
   4184	case IB_CM_ESTABLISHED:
   4185		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
   4186			*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
   4187			qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
   4188			switch (cm_id_priv->qp_type) {
   4189			case IB_QPT_RC:
   4190			case IB_QPT_XRC_INI:
   4191				*qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
   4192						 IB_QP_MAX_QP_RD_ATOMIC;
   4193				qp_attr->retry_cnt = cm_id_priv->retry_count;
   4194				qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
   4195				qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
   4196				fallthrough;
   4197			case IB_QPT_XRC_TGT:
   4198				*qp_attr_mask |= IB_QP_TIMEOUT;
   4199				qp_attr->timeout = cm_id_priv->av.timeout;
   4200				break;
   4201			default:
   4202				break;
   4203			}
   4204			if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
   4205				*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
   4206				qp_attr->path_mig_state = IB_MIG_REARM;
   4207			}
   4208		} else {
   4209			*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
   4210			if (cm_id_priv->alt_av.port)
   4211				qp_attr->alt_port_num =
   4212					cm_id_priv->alt_av.port->port_num;
   4213			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
   4214			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
   4215			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
   4216			qp_attr->path_mig_state = IB_MIG_REARM;
   4217		}
   4218		ret = 0;
   4219		break;
   4220	default:
   4221		trace_icm_qp_rts_err(&cm_id_priv->id);
   4222		ret = -EINVAL;
   4223		break;
   4224	}
   4225	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
   4226	return ret;
   4227}
   4228
   4229int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
   4230		       struct ib_qp_attr *qp_attr,
   4231		       int *qp_attr_mask)
   4232{
   4233	struct cm_id_private *cm_id_priv;
   4234	int ret;
   4235
   4236	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
   4237	switch (qp_attr->qp_state) {
   4238	case IB_QPS_INIT:
   4239		ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
   4240		break;
   4241	case IB_QPS_RTR:
   4242		ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
   4243		break;
   4244	case IB_QPS_RTS:
   4245		ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
   4246		break;
   4247	default:
   4248		ret = -EINVAL;
   4249		break;
   4250	}
   4251	return ret;
   4252}
   4253EXPORT_SYMBOL(ib_cm_init_qp_attr);
   4254
   4255static ssize_t cm_show_counter(struct ib_device *ibdev, u32 port_num,
   4256			       struct ib_port_attribute *attr, char *buf)
   4257{
   4258	struct cm_counter_attribute *cm_attr =
   4259		container_of(attr, struct cm_counter_attribute, attr);
   4260	struct cm_device *cm_dev = ib_get_client_data(ibdev, &cm_client);
   4261
   4262	if (WARN_ON(!cm_dev))
   4263		return -EINVAL;
   4264
   4265	return sysfs_emit(
   4266		buf, "%ld\n",
   4267		atomic_long_read(
   4268			&cm_dev->port[port_num - 1]
   4269				 ->counters[cm_attr->group][cm_attr->index]));
   4270}
   4271
   4272#define CM_COUNTER_ATTR(_name, _group, _index)                                 \
   4273	{                                                                      \
   4274		.attr = __ATTR(_name, 0444, cm_show_counter, NULL),            \
   4275		.group = _group, .index = _index                               \
   4276	}
   4277
   4278#define CM_COUNTER_GROUP(_group, _name)                                        \
   4279	static struct cm_counter_attribute cm_counter_attr_##_group[] = {      \
   4280		CM_COUNTER_ATTR(req, _group, CM_REQ_COUNTER),                  \
   4281		CM_COUNTER_ATTR(mra, _group, CM_MRA_COUNTER),                  \
   4282		CM_COUNTER_ATTR(rej, _group, CM_REJ_COUNTER),                  \
   4283		CM_COUNTER_ATTR(rep, _group, CM_REP_COUNTER),                  \
   4284		CM_COUNTER_ATTR(rtu, _group, CM_RTU_COUNTER),                  \
   4285		CM_COUNTER_ATTR(dreq, _group, CM_DREQ_COUNTER),                \
   4286		CM_COUNTER_ATTR(drep, _group, CM_DREP_COUNTER),                \
   4287		CM_COUNTER_ATTR(sidr_req, _group, CM_SIDR_REQ_COUNTER),        \
   4288		CM_COUNTER_ATTR(sidr_rep, _group, CM_SIDR_REP_COUNTER),        \
   4289		CM_COUNTER_ATTR(lap, _group, CM_LAP_COUNTER),                  \
   4290		CM_COUNTER_ATTR(apr, _group, CM_APR_COUNTER),                  \
   4291	};                                                                     \
   4292	static struct attribute *cm_counter_attrs_##_group[] = {               \
   4293		&cm_counter_attr_##_group[0].attr.attr,                        \
   4294		&cm_counter_attr_##_group[1].attr.attr,                        \
   4295		&cm_counter_attr_##_group[2].attr.attr,                        \
   4296		&cm_counter_attr_##_group[3].attr.attr,                        \
   4297		&cm_counter_attr_##_group[4].attr.attr,                        \
   4298		&cm_counter_attr_##_group[5].attr.attr,                        \
   4299		&cm_counter_attr_##_group[6].attr.attr,                        \
   4300		&cm_counter_attr_##_group[7].attr.attr,                        \
   4301		&cm_counter_attr_##_group[8].attr.attr,                        \
   4302		&cm_counter_attr_##_group[9].attr.attr,                        \
   4303		&cm_counter_attr_##_group[10].attr.attr,                       \
   4304		NULL,                                                          \
   4305	};                                                                     \
   4306	static const struct attribute_group cm_counter_group_##_group = {      \
   4307		.name = _name,                                                 \
   4308		.attrs = cm_counter_attrs_##_group,                            \
   4309	};
   4310
   4311CM_COUNTER_GROUP(CM_XMIT, "cm_tx_msgs")
   4312CM_COUNTER_GROUP(CM_XMIT_RETRIES, "cm_tx_retries")
   4313CM_COUNTER_GROUP(CM_RECV, "cm_rx_msgs")
   4314CM_COUNTER_GROUP(CM_RECV_DUPLICATES, "cm_rx_duplicates")
   4315
   4316static const struct attribute_group *cm_counter_groups[] = {
   4317	&cm_counter_group_CM_XMIT,
   4318	&cm_counter_group_CM_XMIT_RETRIES,
   4319	&cm_counter_group_CM_RECV,
   4320	&cm_counter_group_CM_RECV_DUPLICATES,
   4321	NULL,
   4322};
   4323
   4324static int cm_add_one(struct ib_device *ib_device)
   4325{
   4326	struct cm_device *cm_dev;
   4327	struct cm_port *port;
   4328	struct ib_mad_reg_req reg_req = {
   4329		.mgmt_class = IB_MGMT_CLASS_CM,
   4330		.mgmt_class_version = IB_CM_CLASS_VERSION,
   4331	};
   4332	struct ib_port_modify port_modify = {
   4333		.set_port_cap_mask = IB_PORT_CM_SUP
   4334	};
   4335	unsigned long flags;
   4336	int ret;
   4337	int count = 0;
   4338	u32 i;
   4339
   4340	cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
   4341			 GFP_KERNEL);
   4342	if (!cm_dev)
   4343		return -ENOMEM;
   4344
   4345	kref_init(&cm_dev->kref);
   4346	spin_lock_init(&cm_dev->mad_agent_lock);
   4347	cm_dev->ib_device = ib_device;
   4348	cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
   4349	cm_dev->going_down = 0;
   4350
   4351	ib_set_client_data(ib_device, &cm_client, cm_dev);
   4352
   4353	set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
   4354	rdma_for_each_port (ib_device, i) {
   4355		if (!rdma_cap_ib_cm(ib_device, i))
   4356			continue;
   4357
   4358		port = kzalloc(sizeof *port, GFP_KERNEL);
   4359		if (!port) {
   4360			ret = -ENOMEM;
   4361			goto error1;
   4362		}
   4363
   4364		cm_dev->port[i-1] = port;
   4365		port->cm_dev = cm_dev;
   4366		port->port_num = i;
   4367
   4368		ret = ib_port_register_client_groups(ib_device, i,
   4369						     cm_counter_groups);
   4370		if (ret)
   4371			goto error1;
   4372
   4373		port->mad_agent = ib_register_mad_agent(ib_device, i,
   4374							IB_QPT_GSI,
   4375							&reg_req,
   4376							0,
   4377							cm_send_handler,
   4378							cm_recv_handler,
   4379							port,
   4380							0);
   4381		if (IS_ERR(port->mad_agent)) {
   4382			ret = PTR_ERR(port->mad_agent);
   4383			goto error2;
   4384		}
   4385
   4386		ret = ib_modify_port(ib_device, i, 0, &port_modify);
   4387		if (ret)
   4388			goto error3;
   4389
   4390		count++;
   4391	}
   4392
   4393	if (!count) {
   4394		ret = -EOPNOTSUPP;
   4395		goto free;
   4396	}
   4397
   4398	write_lock_irqsave(&cm.device_lock, flags);
   4399	list_add_tail(&cm_dev->list, &cm.device_list);
   4400	write_unlock_irqrestore(&cm.device_lock, flags);
   4401	return 0;
   4402
   4403error3:
   4404	ib_unregister_mad_agent(port->mad_agent);
   4405error2:
   4406	ib_port_unregister_client_groups(ib_device, i, cm_counter_groups);
   4407error1:
   4408	port_modify.set_port_cap_mask = 0;
   4409	port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
   4410	while (--i) {
   4411		if (!rdma_cap_ib_cm(ib_device, i))
   4412			continue;
   4413
   4414		port = cm_dev->port[i-1];
   4415		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
   4416		ib_unregister_mad_agent(port->mad_agent);
   4417		ib_port_unregister_client_groups(ib_device, i,
   4418						 cm_counter_groups);
   4419	}
   4420free:
   4421	cm_device_put(cm_dev);
   4422	return ret;
   4423}
   4424
   4425static void cm_remove_one(struct ib_device *ib_device, void *client_data)
   4426{
   4427	struct cm_device *cm_dev = client_data;
   4428	struct cm_port *port;
   4429	struct ib_port_modify port_modify = {
   4430		.clr_port_cap_mask = IB_PORT_CM_SUP
   4431	};
   4432	unsigned long flags;
   4433	u32 i;
   4434
   4435	write_lock_irqsave(&cm.device_lock, flags);
   4436	list_del(&cm_dev->list);
   4437	write_unlock_irqrestore(&cm.device_lock, flags);
   4438
   4439	spin_lock_irq(&cm.lock);
   4440	cm_dev->going_down = 1;
   4441	spin_unlock_irq(&cm.lock);
   4442
   4443	rdma_for_each_port (ib_device, i) {
   4444		struct ib_mad_agent *mad_agent;
   4445
   4446		if (!rdma_cap_ib_cm(ib_device, i))
   4447			continue;
   4448
   4449		port = cm_dev->port[i-1];
   4450		mad_agent = port->mad_agent;
   4451		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
   4452		/*
   4453		 * We flush the queue here after the going_down set, this
   4454		 * verify that no new works will be queued in the recv handler,
   4455		 * after that we can call the unregister_mad_agent
   4456		 */
   4457		flush_workqueue(cm.wq);
   4458		/*
   4459		 * The above ensures no call paths from the work are running,
   4460		 * the remaining paths all take the mad_agent_lock.
   4461		 */
   4462		spin_lock(&cm_dev->mad_agent_lock);
   4463		port->mad_agent = NULL;
   4464		spin_unlock(&cm_dev->mad_agent_lock);
   4465		ib_unregister_mad_agent(mad_agent);
   4466		ib_port_unregister_client_groups(ib_device, i,
   4467						 cm_counter_groups);
   4468	}
   4469
   4470	cm_device_put(cm_dev);
   4471}
   4472
   4473static int __init ib_cm_init(void)
   4474{
   4475	int ret;
   4476
   4477	INIT_LIST_HEAD(&cm.device_list);
   4478	rwlock_init(&cm.device_lock);
   4479	spin_lock_init(&cm.lock);
   4480	cm.listen_service_table = RB_ROOT;
   4481	cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
   4482	cm.remote_id_table = RB_ROOT;
   4483	cm.remote_qp_table = RB_ROOT;
   4484	cm.remote_sidr_table = RB_ROOT;
   4485	xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC);
   4486	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
   4487	INIT_LIST_HEAD(&cm.timewait_list);
   4488
   4489	cm.wq = alloc_workqueue("ib_cm", 0, 1);
   4490	if (!cm.wq) {
   4491		ret = -ENOMEM;
   4492		goto error2;
   4493	}
   4494
   4495	ret = ib_register_client(&cm_client);
   4496	if (ret)
   4497		goto error3;
   4498
   4499	return 0;
   4500error3:
   4501	destroy_workqueue(cm.wq);
   4502error2:
   4503	return ret;
   4504}
   4505
   4506static void __exit ib_cm_cleanup(void)
   4507{
   4508	struct cm_timewait_info *timewait_info, *tmp;
   4509
   4510	spin_lock_irq(&cm.lock);
   4511	list_for_each_entry(timewait_info, &cm.timewait_list, list)
   4512		cancel_delayed_work(&timewait_info->work.work);
   4513	spin_unlock_irq(&cm.lock);
   4514
   4515	ib_unregister_client(&cm_client);
   4516	destroy_workqueue(cm.wq);
   4517
   4518	list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
   4519		list_del(&timewait_info->list);
   4520		kfree(timewait_info);
   4521	}
   4522
   4523	WARN_ON(!xa_empty(&cm.local_id_table));
   4524}
   4525
   4526module_init(ib_cm_init);
   4527module_exit(ib_cm_cleanup);