cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

mad.c (90293B)


      1/*
      2 * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
      3 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
      4 * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
      5 * Copyright (c) 2009 HNR Consulting. All rights reserved.
      6 * Copyright (c) 2014,2018 Intel Corporation.  All rights reserved.
      7 *
      8 * This software is available to you under a choice of one of two
      9 * licenses.  You may choose to be licensed under the terms of the GNU
     10 * General Public License (GPL) Version 2, available from the file
     11 * COPYING in the main directory of this source tree, or the
     12 * OpenIB.org BSD license below:
     13 *
     14 *     Redistribution and use in source and binary forms, with or
     15 *     without modification, are permitted provided that the following
     16 *     conditions are met:
     17 *
     18 *      - Redistributions of source code must retain the above
     19 *        copyright notice, this list of conditions and the following
     20 *        disclaimer.
     21 *
     22 *      - Redistributions in binary form must reproduce the above
     23 *        copyright notice, this list of conditions and the following
     24 *        disclaimer in the documentation and/or other materials
     25 *        provided with the distribution.
     26 *
     27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     34 * SOFTWARE.
     35 *
     36 */
     37
     38#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
     39
     40#include <linux/dma-mapping.h>
     41#include <linux/slab.h>
     42#include <linux/module.h>
     43#include <linux/security.h>
     44#include <linux/xarray.h>
     45#include <rdma/ib_cache.h>
     46
     47#include "mad_priv.h"
     48#include "core_priv.h"
     49#include "mad_rmpp.h"
     50#include "smi.h"
     51#include "opa_smi.h"
     52#include "agent.h"
     53
     54#define CREATE_TRACE_POINTS
     55#include <trace/events/ib_mad.h>
     56
     57#ifdef CONFIG_TRACEPOINTS
     58static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
     59			  struct ib_mad_qp_info *qp_info,
     60			  struct trace_event_raw_ib_mad_send_template *entry)
     61{
     62	u16 pkey;
     63	struct ib_device *dev = qp_info->port_priv->device;
     64	u32 pnum = qp_info->port_priv->port_num;
     65	struct ib_ud_wr *wr = &mad_send_wr->send_wr;
     66	struct rdma_ah_attr attr = {};
     67
     68	rdma_query_ah(wr->ah, &attr);
     69
     70	/* These are common */
     71	entry->sl = attr.sl;
     72	ib_query_pkey(dev, pnum, wr->pkey_index, &pkey);
     73	entry->pkey = pkey;
     74	entry->rqpn = wr->remote_qpn;
     75	entry->rqkey = wr->remote_qkey;
     76	entry->dlid = rdma_ah_get_dlid(&attr);
     77}
     78#endif
     79
     80static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
     81static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
     82
     83module_param_named(send_queue_size, mad_sendq_size, int, 0444);
     84MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
     85module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
     86MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
     87
     88static DEFINE_XARRAY_ALLOC1(ib_mad_clients);
     89static u32 ib_mad_client_next;
     90static struct list_head ib_mad_port_list;
     91
     92/* Port list lock */
     93static DEFINE_SPINLOCK(ib_mad_port_list_lock);
     94
     95/* Forward declarations */
     96static int method_in_use(struct ib_mad_mgmt_method_table **method,
     97			 struct ib_mad_reg_req *mad_reg_req);
     98static void remove_mad_reg_req(struct ib_mad_agent_private *priv);
     99static struct ib_mad_agent_private *find_mad_agent(
    100					struct ib_mad_port_private *port_priv,
    101					const struct ib_mad_hdr *mad);
    102static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
    103				    struct ib_mad_private *mad);
    104static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
    105static void timeout_sends(struct work_struct *work);
    106static void local_completions(struct work_struct *work);
    107static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
    108			      struct ib_mad_agent_private *agent_priv,
    109			      u8 mgmt_class);
    110static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
    111			   struct ib_mad_agent_private *agent_priv);
    112static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
    113			      struct ib_wc *wc);
    114static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
    115
    116/*
    117 * Returns a ib_mad_port_private structure or NULL for a device/port
    118 * Assumes ib_mad_port_list_lock is being held
    119 */
    120static inline struct ib_mad_port_private *
    121__ib_get_mad_port(struct ib_device *device, u32 port_num)
    122{
    123	struct ib_mad_port_private *entry;
    124
    125	list_for_each_entry(entry, &ib_mad_port_list, port_list) {
    126		if (entry->device == device && entry->port_num == port_num)
    127			return entry;
    128	}
    129	return NULL;
    130}
    131
    132/*
    133 * Wrapper function to return a ib_mad_port_private structure or NULL
    134 * for a device/port
    135 */
    136static inline struct ib_mad_port_private *
    137ib_get_mad_port(struct ib_device *device, u32 port_num)
    138{
    139	struct ib_mad_port_private *entry;
    140	unsigned long flags;
    141
    142	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
    143	entry = __ib_get_mad_port(device, port_num);
    144	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
    145
    146	return entry;
    147}
    148
    149static inline u8 convert_mgmt_class(u8 mgmt_class)
    150{
    151	/* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
    152	return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ?
    153		0 : mgmt_class;
    154}
    155
    156static int get_spl_qp_index(enum ib_qp_type qp_type)
    157{
    158	switch (qp_type) {
    159	case IB_QPT_SMI:
    160		return 0;
    161	case IB_QPT_GSI:
    162		return 1;
    163	default:
    164		return -1;
    165	}
    166}
    167
    168static int vendor_class_index(u8 mgmt_class)
    169{
    170	return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START;
    171}
    172
    173static int is_vendor_class(u8 mgmt_class)
    174{
    175	if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) ||
    176	    (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END))
    177		return 0;
    178	return 1;
    179}
    180
    181static int is_vendor_oui(char *oui)
    182{
    183	if (oui[0] || oui[1] || oui[2])
    184		return 1;
    185	return 0;
    186}
    187
    188static int is_vendor_method_in_use(
    189		struct ib_mad_mgmt_vendor_class *vendor_class,
    190		struct ib_mad_reg_req *mad_reg_req)
    191{
    192	struct ib_mad_mgmt_method_table *method;
    193	int i;
    194
    195	for (i = 0; i < MAX_MGMT_OUI; i++) {
    196		if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) {
    197			method = vendor_class->method_table[i];
    198			if (method) {
    199				if (method_in_use(&method, mad_reg_req))
    200					return 1;
    201				else
    202					break;
    203			}
    204		}
    205	}
    206	return 0;
    207}
    208
    209int ib_response_mad(const struct ib_mad_hdr *hdr)
    210{
    211	return ((hdr->method & IB_MGMT_METHOD_RESP) ||
    212		(hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) ||
    213		((hdr->mgmt_class == IB_MGMT_CLASS_BM) &&
    214		 (hdr->attr_mod & IB_BM_ATTR_MOD_RESP)));
    215}
    216EXPORT_SYMBOL(ib_response_mad);
    217
    218/*
    219 * ib_register_mad_agent - Register to send/receive MADs
    220 *
    221 * Context: Process context.
    222 */
    223struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
    224					   u32 port_num,
    225					   enum ib_qp_type qp_type,
    226					   struct ib_mad_reg_req *mad_reg_req,
    227					   u8 rmpp_version,
    228					   ib_mad_send_handler send_handler,
    229					   ib_mad_recv_handler recv_handler,
    230					   void *context,
    231					   u32 registration_flags)
    232{
    233	struct ib_mad_port_private *port_priv;
    234	struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
    235	struct ib_mad_agent_private *mad_agent_priv;
    236	struct ib_mad_reg_req *reg_req = NULL;
    237	struct ib_mad_mgmt_class_table *class;
    238	struct ib_mad_mgmt_vendor_class_table *vendor;
    239	struct ib_mad_mgmt_vendor_class *vendor_class;
    240	struct ib_mad_mgmt_method_table *method;
    241	int ret2, qpn;
    242	u8 mgmt_class, vclass;
    243
    244	if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) ||
    245	    (qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num)))
    246		return ERR_PTR(-EPROTONOSUPPORT);
    247
    248	/* Validate parameters */
    249	qpn = get_spl_qp_index(qp_type);
    250	if (qpn == -1) {
    251		dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n",
    252				    __func__, qp_type);
    253		goto error1;
    254	}
    255
    256	if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
    257		dev_dbg_ratelimited(&device->dev,
    258				    "%s: invalid RMPP Version %u\n",
    259				    __func__, rmpp_version);
    260		goto error1;
    261	}
    262
    263	/* Validate MAD registration request if supplied */
    264	if (mad_reg_req) {
    265		if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
    266			dev_dbg_ratelimited(&device->dev,
    267					    "%s: invalid Class Version %u\n",
    268					    __func__,
    269					    mad_reg_req->mgmt_class_version);
    270			goto error1;
    271		}
    272		if (!recv_handler) {
    273			dev_dbg_ratelimited(&device->dev,
    274					    "%s: no recv_handler\n", __func__);
    275			goto error1;
    276		}
    277		if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
    278			/*
    279			 * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
    280			 * one in this range currently allowed
    281			 */
    282			if (mad_reg_req->mgmt_class !=
    283			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
    284				dev_dbg_ratelimited(&device->dev,
    285					"%s: Invalid Mgmt Class 0x%x\n",
    286					__func__, mad_reg_req->mgmt_class);
    287				goto error1;
    288			}
    289		} else if (mad_reg_req->mgmt_class == 0) {
    290			/*
    291			 * Class 0 is reserved in IBA and is used for
    292			 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
    293			 */
    294			dev_dbg_ratelimited(&device->dev,
    295					    "%s: Invalid Mgmt Class 0\n",
    296					    __func__);
    297			goto error1;
    298		} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
    299			/*
    300			 * If class is in "new" vendor range,
    301			 * ensure supplied OUI is not zero
    302			 */
    303			if (!is_vendor_oui(mad_reg_req->oui)) {
    304				dev_dbg_ratelimited(&device->dev,
    305					"%s: No OUI specified for class 0x%x\n",
    306					__func__,
    307					mad_reg_req->mgmt_class);
    308				goto error1;
    309			}
    310		}
    311		/* Make sure class supplied is consistent with RMPP */
    312		if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
    313			if (rmpp_version) {
    314				dev_dbg_ratelimited(&device->dev,
    315					"%s: RMPP version for non-RMPP class 0x%x\n",
    316					__func__, mad_reg_req->mgmt_class);
    317				goto error1;
    318			}
    319		}
    320
    321		/* Make sure class supplied is consistent with QP type */
    322		if (qp_type == IB_QPT_SMI) {
    323			if ((mad_reg_req->mgmt_class !=
    324					IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
    325			    (mad_reg_req->mgmt_class !=
    326					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
    327				dev_dbg_ratelimited(&device->dev,
    328					"%s: Invalid SM QP type: class 0x%x\n",
    329					__func__, mad_reg_req->mgmt_class);
    330				goto error1;
    331			}
    332		} else {
    333			if ((mad_reg_req->mgmt_class ==
    334					IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
    335			    (mad_reg_req->mgmt_class ==
    336					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
    337				dev_dbg_ratelimited(&device->dev,
    338					"%s: Invalid GS QP type: class 0x%x\n",
    339					__func__, mad_reg_req->mgmt_class);
    340				goto error1;
    341			}
    342		}
    343	} else {
    344		/* No registration request supplied */
    345		if (!send_handler)
    346			goto error1;
    347		if (registration_flags & IB_MAD_USER_RMPP)
    348			goto error1;
    349	}
    350
    351	/* Validate device and port */
    352	port_priv = ib_get_mad_port(device, port_num);
    353	if (!port_priv) {
    354		dev_dbg_ratelimited(&device->dev, "%s: Invalid port %u\n",
    355				    __func__, port_num);
    356		ret = ERR_PTR(-ENODEV);
    357		goto error1;
    358	}
    359
    360	/* Verify the QP requested is supported. For example, Ethernet devices
    361	 * will not have QP0.
    362	 */
    363	if (!port_priv->qp_info[qpn].qp) {
    364		dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n",
    365				    __func__, qpn);
    366		ret = ERR_PTR(-EPROTONOSUPPORT);
    367		goto error1;
    368	}
    369
    370	/* Allocate structures */
    371	mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL);
    372	if (!mad_agent_priv) {
    373		ret = ERR_PTR(-ENOMEM);
    374		goto error1;
    375	}
    376
    377	if (mad_reg_req) {
    378		reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL);
    379		if (!reg_req) {
    380			ret = ERR_PTR(-ENOMEM);
    381			goto error3;
    382		}
    383	}
    384
    385	/* Now, fill in the various structures */
    386	mad_agent_priv->qp_info = &port_priv->qp_info[qpn];
    387	mad_agent_priv->reg_req = reg_req;
    388	mad_agent_priv->agent.rmpp_version = rmpp_version;
    389	mad_agent_priv->agent.device = device;
    390	mad_agent_priv->agent.recv_handler = recv_handler;
    391	mad_agent_priv->agent.send_handler = send_handler;
    392	mad_agent_priv->agent.context = context;
    393	mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
    394	mad_agent_priv->agent.port_num = port_num;
    395	mad_agent_priv->agent.flags = registration_flags;
    396	spin_lock_init(&mad_agent_priv->lock);
    397	INIT_LIST_HEAD(&mad_agent_priv->send_list);
    398	INIT_LIST_HEAD(&mad_agent_priv->wait_list);
    399	INIT_LIST_HEAD(&mad_agent_priv->done_list);
    400	INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
    401	INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
    402	INIT_LIST_HEAD(&mad_agent_priv->local_list);
    403	INIT_WORK(&mad_agent_priv->local_work, local_completions);
    404	refcount_set(&mad_agent_priv->refcount, 1);
    405	init_completion(&mad_agent_priv->comp);
    406
    407	ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type);
    408	if (ret2) {
    409		ret = ERR_PTR(ret2);
    410		goto error4;
    411	}
    412
    413	/*
    414	 * The mlx4 driver uses the top byte to distinguish which virtual
    415	 * function generated the MAD, so we must avoid using it.
    416	 */
    417	ret2 = xa_alloc_cyclic(&ib_mad_clients, &mad_agent_priv->agent.hi_tid,
    418			mad_agent_priv, XA_LIMIT(0, (1 << 24) - 1),
    419			&ib_mad_client_next, GFP_KERNEL);
    420	if (ret2 < 0) {
    421		ret = ERR_PTR(ret2);
    422		goto error5;
    423	}
    424
    425	/*
    426	 * Make sure MAD registration (if supplied)
    427	 * is non overlapping with any existing ones
    428	 */
    429	spin_lock_irq(&port_priv->reg_lock);
    430	if (mad_reg_req) {
    431		mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
    432		if (!is_vendor_class(mgmt_class)) {
    433			class = port_priv->version[mad_reg_req->
    434						   mgmt_class_version].class;
    435			if (class) {
    436				method = class->method_table[mgmt_class];
    437				if (method) {
    438					if (method_in_use(&method,
    439							   mad_reg_req))
    440						goto error6;
    441				}
    442			}
    443			ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
    444						  mgmt_class);
    445		} else {
    446			/* "New" vendor class range */
    447			vendor = port_priv->version[mad_reg_req->
    448						    mgmt_class_version].vendor;
    449			if (vendor) {
    450				vclass = vendor_class_index(mgmt_class);
    451				vendor_class = vendor->vendor_class[vclass];
    452				if (vendor_class) {
    453					if (is_vendor_method_in_use(
    454							vendor_class,
    455							mad_reg_req))
    456						goto error6;
    457				}
    458			}
    459			ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
    460		}
    461		if (ret2) {
    462			ret = ERR_PTR(ret2);
    463			goto error6;
    464		}
    465	}
    466	spin_unlock_irq(&port_priv->reg_lock);
    467
    468	trace_ib_mad_create_agent(mad_agent_priv);
    469	return &mad_agent_priv->agent;
    470error6:
    471	spin_unlock_irq(&port_priv->reg_lock);
    472	xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
    473error5:
    474	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
    475error4:
    476	kfree(reg_req);
    477error3:
    478	kfree(mad_agent_priv);
    479error1:
    480	return ret;
    481}
    482EXPORT_SYMBOL(ib_register_mad_agent);
    483
    484static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
    485{
    486	if (refcount_dec_and_test(&mad_agent_priv->refcount))
    487		complete(&mad_agent_priv->comp);
    488}
    489
    490static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
    491{
    492	struct ib_mad_port_private *port_priv;
    493
    494	/* Note that we could still be handling received MADs */
    495	trace_ib_mad_unregister_agent(mad_agent_priv);
    496
    497	/*
    498	 * Canceling all sends results in dropping received response
    499	 * MADs, preventing us from queuing additional work
    500	 */
    501	cancel_mads(mad_agent_priv);
    502	port_priv = mad_agent_priv->qp_info->port_priv;
    503	cancel_delayed_work(&mad_agent_priv->timed_work);
    504
    505	spin_lock_irq(&port_priv->reg_lock);
    506	remove_mad_reg_req(mad_agent_priv);
    507	spin_unlock_irq(&port_priv->reg_lock);
    508	xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
    509
    510	flush_workqueue(port_priv->wq);
    511
    512	deref_mad_agent(mad_agent_priv);
    513	wait_for_completion(&mad_agent_priv->comp);
    514	ib_cancel_rmpp_recvs(mad_agent_priv);
    515
    516	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
    517
    518	kfree(mad_agent_priv->reg_req);
    519	kfree_rcu(mad_agent_priv, rcu);
    520}
    521
    522/*
    523 * ib_unregister_mad_agent - Unregisters a client from using MAD services
    524 *
    525 * Context: Process context.
    526 */
    527void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
    528{
    529	struct ib_mad_agent_private *mad_agent_priv;
    530
    531	mad_agent_priv = container_of(mad_agent,
    532				      struct ib_mad_agent_private,
    533				      agent);
    534	unregister_mad_agent(mad_agent_priv);
    535}
    536EXPORT_SYMBOL(ib_unregister_mad_agent);
    537
    538static void dequeue_mad(struct ib_mad_list_head *mad_list)
    539{
    540	struct ib_mad_queue *mad_queue;
    541	unsigned long flags;
    542
    543	mad_queue = mad_list->mad_queue;
    544	spin_lock_irqsave(&mad_queue->lock, flags);
    545	list_del(&mad_list->list);
    546	mad_queue->count--;
    547	spin_unlock_irqrestore(&mad_queue->lock, flags);
    548}
    549
    550static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
    551		u16 pkey_index, u32 port_num, struct ib_wc *wc)
    552{
    553	memset(wc, 0, sizeof *wc);
    554	wc->wr_cqe = cqe;
    555	wc->status = IB_WC_SUCCESS;
    556	wc->opcode = IB_WC_RECV;
    557	wc->pkey_index = pkey_index;
    558	wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
    559	wc->src_qp = IB_QP0;
    560	wc->qp = qp;
    561	wc->slid = slid;
    562	wc->sl = 0;
    563	wc->dlid_path_bits = 0;
    564	wc->port_num = port_num;
    565}
    566
    567static size_t mad_priv_size(const struct ib_mad_private *mp)
    568{
    569	return sizeof(struct ib_mad_private) + mp->mad_size;
    570}
    571
    572static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags)
    573{
    574	size_t size = sizeof(struct ib_mad_private) + mad_size;
    575	struct ib_mad_private *ret = kzalloc(size, flags);
    576
    577	if (ret)
    578		ret->mad_size = mad_size;
    579
    580	return ret;
    581}
    582
    583static size_t port_mad_size(const struct ib_mad_port_private *port_priv)
    584{
    585	return rdma_max_mad_size(port_priv->device, port_priv->port_num);
    586}
    587
    588static size_t mad_priv_dma_size(const struct ib_mad_private *mp)
    589{
    590	return sizeof(struct ib_grh) + mp->mad_size;
    591}
    592
    593/*
    594 * Return 0 if SMP is to be sent
    595 * Return 1 if SMP was consumed locally (whether or not solicited)
    596 * Return < 0 if error
    597 */
    598static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
    599				  struct ib_mad_send_wr_private *mad_send_wr)
    600{
    601	int ret = 0;
    602	struct ib_smp *smp = mad_send_wr->send_buf.mad;
    603	struct opa_smp *opa_smp = (struct opa_smp *)smp;
    604	unsigned long flags;
    605	struct ib_mad_local_private *local;
    606	struct ib_mad_private *mad_priv;
    607	struct ib_mad_port_private *port_priv;
    608	struct ib_mad_agent_private *recv_mad_agent = NULL;
    609	struct ib_device *device = mad_agent_priv->agent.device;
    610	u32 port_num;
    611	struct ib_wc mad_wc;
    612	struct ib_ud_wr *send_wr = &mad_send_wr->send_wr;
    613	size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv);
    614	u16 out_mad_pkey_index = 0;
    615	u16 drslid;
    616	bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device,
    617				    mad_agent_priv->qp_info->port_priv->port_num);
    618
    619	if (rdma_cap_ib_switch(device) &&
    620	    smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
    621		port_num = send_wr->port_num;
    622	else
    623		port_num = mad_agent_priv->agent.port_num;
    624
    625	/*
    626	 * Directed route handling starts if the initial LID routed part of
    627	 * a request or the ending LID routed part of a response is empty.
    628	 * If we are at the start of the LID routed part, don't update the
    629	 * hop_ptr or hop_cnt.  See section 14.2.2, Vol 1 IB spec.
    630	 */
    631	if (opa && smp->class_version == OPA_SM_CLASS_VERSION) {
    632		u32 opa_drslid;
    633
    634		trace_ib_mad_handle_out_opa_smi(opa_smp);
    635
    636		if ((opa_get_smp_direction(opa_smp)
    637		     ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) ==
    638		     OPA_LID_PERMISSIVE &&
    639		     opa_smi_handle_dr_smp_send(opa_smp,
    640						rdma_cap_ib_switch(device),
    641						port_num) == IB_SMI_DISCARD) {
    642			ret = -EINVAL;
    643			dev_err(&device->dev, "OPA Invalid directed route\n");
    644			goto out;
    645		}
    646		opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid);
    647		if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) &&
    648		    opa_drslid & 0xffff0000) {
    649			ret = -EINVAL;
    650			dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n",
    651			       opa_drslid);
    652			goto out;
    653		}
    654		drslid = (u16)(opa_drslid & 0x0000ffff);
    655
    656		/* Check to post send on QP or process locally */
    657		if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD &&
    658		    opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD)
    659			goto out;
    660	} else {
    661		trace_ib_mad_handle_out_ib_smi(smp);
    662
    663		if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) ==
    664		     IB_LID_PERMISSIVE &&
    665		     smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) ==
    666		     IB_SMI_DISCARD) {
    667			ret = -EINVAL;
    668			dev_err(&device->dev, "Invalid directed route\n");
    669			goto out;
    670		}
    671		drslid = be16_to_cpu(smp->dr_slid);
    672
    673		/* Check to post send on QP or process locally */
    674		if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD &&
    675		    smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD)
    676			goto out;
    677	}
    678
    679	local = kmalloc(sizeof *local, GFP_ATOMIC);
    680	if (!local) {
    681		ret = -ENOMEM;
    682		goto out;
    683	}
    684	local->mad_priv = NULL;
    685	local->recv_mad_agent = NULL;
    686	mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC);
    687	if (!mad_priv) {
    688		ret = -ENOMEM;
    689		kfree(local);
    690		goto out;
    691	}
    692
    693	build_smp_wc(mad_agent_priv->agent.qp,
    694		     send_wr->wr.wr_cqe, drslid,
    695		     send_wr->pkey_index,
    696		     send_wr->port_num, &mad_wc);
    697
    698	if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) {
    699		mad_wc.byte_len = mad_send_wr->send_buf.hdr_len
    700					+ mad_send_wr->send_buf.data_len
    701					+ sizeof(struct ib_grh);
    702	}
    703
    704	/* No GRH for DR SMP */
    705	ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL,
    706				      (const struct ib_mad *)smp,
    707				      (struct ib_mad *)mad_priv->mad, &mad_size,
    708				      &out_mad_pkey_index);
    709	switch (ret) {
    710	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
    711		if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) &&
    712		    mad_agent_priv->agent.recv_handler) {
    713			local->mad_priv = mad_priv;
    714			local->recv_mad_agent = mad_agent_priv;
    715			/*
    716			 * Reference MAD agent until receive
    717			 * side of local completion handled
    718			 */
    719			refcount_inc(&mad_agent_priv->refcount);
    720		} else
    721			kfree(mad_priv);
    722		break;
    723	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED:
    724		kfree(mad_priv);
    725		break;
    726	case IB_MAD_RESULT_SUCCESS:
    727		/* Treat like an incoming receive MAD */
    728		port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
    729					    mad_agent_priv->agent.port_num);
    730		if (port_priv) {
    731			memcpy(mad_priv->mad, smp, mad_priv->mad_size);
    732			recv_mad_agent = find_mad_agent(port_priv,
    733						        (const struct ib_mad_hdr *)mad_priv->mad);
    734		}
    735		if (!port_priv || !recv_mad_agent) {
    736			/*
    737			 * No receiving agent so drop packet and
    738			 * generate send completion.
    739			 */
    740			kfree(mad_priv);
    741			break;
    742		}
    743		local->mad_priv = mad_priv;
    744		local->recv_mad_agent = recv_mad_agent;
    745		break;
    746	default:
    747		kfree(mad_priv);
    748		kfree(local);
    749		ret = -EINVAL;
    750		goto out;
    751	}
    752
    753	local->mad_send_wr = mad_send_wr;
    754	if (opa) {
    755		local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index;
    756		local->return_wc_byte_len = mad_size;
    757	}
    758	/* Reference MAD agent until send side of local completion handled */
    759	refcount_inc(&mad_agent_priv->refcount);
    760	/* Queue local completion to local list */
    761	spin_lock_irqsave(&mad_agent_priv->lock, flags);
    762	list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
    763	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
    764	queue_work(mad_agent_priv->qp_info->port_priv->wq,
    765		   &mad_agent_priv->local_work);
    766	ret = 1;
    767out:
    768	return ret;
    769}
    770
    771static int get_pad_size(int hdr_len, int data_len, size_t mad_size)
    772{
    773	int seg_size, pad;
    774
    775	seg_size = mad_size - hdr_len;
    776	if (data_len && seg_size) {
    777		pad = seg_size - data_len % seg_size;
    778		return pad == seg_size ? 0 : pad;
    779	} else
    780		return seg_size;
    781}
    782
    783static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
    784{
    785	struct ib_rmpp_segment *s, *t;
    786
    787	list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
    788		list_del(&s->list);
    789		kfree(s);
    790	}
    791}
    792
    793static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
    794				size_t mad_size, gfp_t gfp_mask)
    795{
    796	struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
    797	struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
    798	struct ib_rmpp_segment *seg = NULL;
    799	int left, seg_size, pad;
    800
    801	send_buf->seg_size = mad_size - send_buf->hdr_len;
    802	send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR;
    803	seg_size = send_buf->seg_size;
    804	pad = send_wr->pad;
    805
    806	/* Allocate data segments. */
    807	for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
    808		seg = kmalloc(sizeof(*seg) + seg_size, gfp_mask);
    809		if (!seg) {
    810			free_send_rmpp_list(send_wr);
    811			return -ENOMEM;
    812		}
    813		seg->num = ++send_buf->seg_count;
    814		list_add_tail(&seg->list, &send_wr->rmpp_list);
    815	}
    816
    817	/* Zero any padding */
    818	if (pad)
    819		memset(seg->data + seg_size - pad, 0, pad);
    820
    821	rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
    822					  agent.rmpp_version;
    823	rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
    824	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
    825
    826	send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
    827					struct ib_rmpp_segment, list);
    828	send_wr->last_ack_seg = send_wr->cur_seg;
    829	return 0;
    830}
    831
    832int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent)
    833{
    834	return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP);
    835}
    836EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
    837
    838struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent,
    839					   u32 remote_qpn, u16 pkey_index,
    840					   int rmpp_active, int hdr_len,
    841					   int data_len, gfp_t gfp_mask,
    842					   u8 base_version)
    843{
    844	struct ib_mad_agent_private *mad_agent_priv;
    845	struct ib_mad_send_wr_private *mad_send_wr;
    846	int pad, message_size, ret, size;
    847	void *buf;
    848	size_t mad_size;
    849	bool opa;
    850
    851	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
    852				      agent);
    853
    854	opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num);
    855
    856	if (opa && base_version == OPA_MGMT_BASE_VERSION)
    857		mad_size = sizeof(struct opa_mad);
    858	else
    859		mad_size = sizeof(struct ib_mad);
    860
    861	pad = get_pad_size(hdr_len, data_len, mad_size);
    862	message_size = hdr_len + data_len + pad;
    863
    864	if (ib_mad_kernel_rmpp_agent(mad_agent)) {
    865		if (!rmpp_active && message_size > mad_size)
    866			return ERR_PTR(-EINVAL);
    867	} else
    868		if (rmpp_active || message_size > mad_size)
    869			return ERR_PTR(-EINVAL);
    870
    871	size = rmpp_active ? hdr_len : mad_size;
    872	buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
    873	if (!buf)
    874		return ERR_PTR(-ENOMEM);
    875
    876	mad_send_wr = buf + size;
    877	INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
    878	mad_send_wr->send_buf.mad = buf;
    879	mad_send_wr->send_buf.hdr_len = hdr_len;
    880	mad_send_wr->send_buf.data_len = data_len;
    881	mad_send_wr->pad = pad;
    882
    883	mad_send_wr->mad_agent_priv = mad_agent_priv;
    884	mad_send_wr->sg_list[0].length = hdr_len;
    885	mad_send_wr->sg_list[0].lkey = mad_agent->qp->pd->local_dma_lkey;
    886
    887	/* OPA MADs don't have to be the full 2048 bytes */
    888	if (opa && base_version == OPA_MGMT_BASE_VERSION &&
    889	    data_len < mad_size - hdr_len)
    890		mad_send_wr->sg_list[1].length = data_len;
    891	else
    892		mad_send_wr->sg_list[1].length = mad_size - hdr_len;
    893
    894	mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
    895
    896	mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
    897
    898	mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
    899	mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
    900	mad_send_wr->send_wr.wr.num_sge = 2;
    901	mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
    902	mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED;
    903	mad_send_wr->send_wr.remote_qpn = remote_qpn;
    904	mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY;
    905	mad_send_wr->send_wr.pkey_index = pkey_index;
    906
    907	if (rmpp_active) {
    908		ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask);
    909		if (ret) {
    910			kfree(buf);
    911			return ERR_PTR(ret);
    912		}
    913	}
    914
    915	mad_send_wr->send_buf.mad_agent = mad_agent;
    916	refcount_inc(&mad_agent_priv->refcount);
    917	return &mad_send_wr->send_buf;
    918}
    919EXPORT_SYMBOL(ib_create_send_mad);
    920
    921int ib_get_mad_data_offset(u8 mgmt_class)
    922{
    923	if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
    924		return IB_MGMT_SA_HDR;
    925	else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
    926		 (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
    927		 (mgmt_class == IB_MGMT_CLASS_BIS))
    928		return IB_MGMT_DEVICE_HDR;
    929	else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
    930		 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
    931		return IB_MGMT_VENDOR_HDR;
    932	else
    933		return IB_MGMT_MAD_HDR;
    934}
    935EXPORT_SYMBOL(ib_get_mad_data_offset);
    936
    937int ib_is_mad_class_rmpp(u8 mgmt_class)
    938{
    939	if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) ||
    940	    (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
    941	    (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
    942	    (mgmt_class == IB_MGMT_CLASS_BIS) ||
    943	    ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
    944	     (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)))
    945		return 1;
    946	return 0;
    947}
    948EXPORT_SYMBOL(ib_is_mad_class_rmpp);
    949
    950void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
    951{
    952	struct ib_mad_send_wr_private *mad_send_wr;
    953	struct list_head *list;
    954
    955	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
    956				   send_buf);
    957	list = &mad_send_wr->cur_seg->list;
    958
    959	if (mad_send_wr->cur_seg->num < seg_num) {
    960		list_for_each_entry(mad_send_wr->cur_seg, list, list)
    961			if (mad_send_wr->cur_seg->num == seg_num)
    962				break;
    963	} else if (mad_send_wr->cur_seg->num > seg_num) {
    964		list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
    965			if (mad_send_wr->cur_seg->num == seg_num)
    966				break;
    967	}
    968	return mad_send_wr->cur_seg->data;
    969}
    970EXPORT_SYMBOL(ib_get_rmpp_segment);
    971
    972static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
    973{
    974	if (mad_send_wr->send_buf.seg_count)
    975		return ib_get_rmpp_segment(&mad_send_wr->send_buf,
    976					   mad_send_wr->seg_num);
    977	else
    978		return mad_send_wr->send_buf.mad +
    979		       mad_send_wr->send_buf.hdr_len;
    980}
    981
    982void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
    983{
    984	struct ib_mad_agent_private *mad_agent_priv;
    985	struct ib_mad_send_wr_private *mad_send_wr;
    986
    987	mad_agent_priv = container_of(send_buf->mad_agent,
    988				      struct ib_mad_agent_private, agent);
    989	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
    990				   send_buf);
    991
    992	free_send_rmpp_list(mad_send_wr);
    993	kfree(send_buf->mad);
    994	deref_mad_agent(mad_agent_priv);
    995}
    996EXPORT_SYMBOL(ib_free_send_mad);
    997
    998int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
    999{
   1000	struct ib_mad_qp_info *qp_info;
   1001	struct list_head *list;
   1002	struct ib_mad_agent *mad_agent;
   1003	struct ib_sge *sge;
   1004	unsigned long flags;
   1005	int ret;
   1006
   1007	/* Set WR ID to find mad_send_wr upon completion */
   1008	qp_info = mad_send_wr->mad_agent_priv->qp_info;
   1009	mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
   1010	mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
   1011	mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
   1012
   1013	mad_agent = mad_send_wr->send_buf.mad_agent;
   1014	sge = mad_send_wr->sg_list;
   1015	sge[0].addr = ib_dma_map_single(mad_agent->device,
   1016					mad_send_wr->send_buf.mad,
   1017					sge[0].length,
   1018					DMA_TO_DEVICE);
   1019	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr)))
   1020		return -ENOMEM;
   1021
   1022	mad_send_wr->header_mapping = sge[0].addr;
   1023
   1024	sge[1].addr = ib_dma_map_single(mad_agent->device,
   1025					ib_get_payload(mad_send_wr),
   1026					sge[1].length,
   1027					DMA_TO_DEVICE);
   1028	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) {
   1029		ib_dma_unmap_single(mad_agent->device,
   1030				    mad_send_wr->header_mapping,
   1031				    sge[0].length, DMA_TO_DEVICE);
   1032		return -ENOMEM;
   1033	}
   1034	mad_send_wr->payload_mapping = sge[1].addr;
   1035
   1036	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
   1037	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
   1038		trace_ib_mad_ib_send_mad(mad_send_wr, qp_info);
   1039		ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
   1040				   NULL);
   1041		list = &qp_info->send_queue.list;
   1042	} else {
   1043		ret = 0;
   1044		list = &qp_info->overflow_list;
   1045	}
   1046
   1047	if (!ret) {
   1048		qp_info->send_queue.count++;
   1049		list_add_tail(&mad_send_wr->mad_list.list, list);
   1050	}
   1051	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
   1052	if (ret) {
   1053		ib_dma_unmap_single(mad_agent->device,
   1054				    mad_send_wr->header_mapping,
   1055				    sge[0].length, DMA_TO_DEVICE);
   1056		ib_dma_unmap_single(mad_agent->device,
   1057				    mad_send_wr->payload_mapping,
   1058				    sge[1].length, DMA_TO_DEVICE);
   1059	}
   1060	return ret;
   1061}
   1062
   1063/*
   1064 * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
   1065 *  with the registered client
   1066 */
   1067int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
   1068		     struct ib_mad_send_buf **bad_send_buf)
   1069{
   1070	struct ib_mad_agent_private *mad_agent_priv;
   1071	struct ib_mad_send_buf *next_send_buf;
   1072	struct ib_mad_send_wr_private *mad_send_wr;
   1073	unsigned long flags;
   1074	int ret = -EINVAL;
   1075
   1076	/* Walk list of send WRs and post each on send list */
   1077	for (; send_buf; send_buf = next_send_buf) {
   1078		mad_send_wr = container_of(send_buf,
   1079					   struct ib_mad_send_wr_private,
   1080					   send_buf);
   1081		mad_agent_priv = mad_send_wr->mad_agent_priv;
   1082
   1083		ret = ib_mad_enforce_security(mad_agent_priv,
   1084					      mad_send_wr->send_wr.pkey_index);
   1085		if (ret)
   1086			goto error;
   1087
   1088		if (!send_buf->mad_agent->send_handler ||
   1089		    (send_buf->timeout_ms &&
   1090		     !send_buf->mad_agent->recv_handler)) {
   1091			ret = -EINVAL;
   1092			goto error;
   1093		}
   1094
   1095		if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) {
   1096			if (mad_agent_priv->agent.rmpp_version) {
   1097				ret = -EINVAL;
   1098				goto error;
   1099			}
   1100		}
   1101
   1102		/*
   1103		 * Save pointer to next work request to post in case the
   1104		 * current one completes, and the user modifies the work
   1105		 * request associated with the completion
   1106		 */
   1107		next_send_buf = send_buf->next;
   1108		mad_send_wr->send_wr.ah = send_buf->ah;
   1109
   1110		if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
   1111		    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
   1112			ret = handle_outgoing_dr_smp(mad_agent_priv,
   1113						     mad_send_wr);
   1114			if (ret < 0)		/* error */
   1115				goto error;
   1116			else if (ret == 1)	/* locally consumed */
   1117				continue;
   1118		}
   1119
   1120		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
   1121		/* Timeout will be updated after send completes */
   1122		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
   1123		mad_send_wr->max_retries = send_buf->retries;
   1124		mad_send_wr->retries_left = send_buf->retries;
   1125		send_buf->retries = 0;
   1126		/* Reference for work request to QP + response */
   1127		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
   1128		mad_send_wr->status = IB_WC_SUCCESS;
   1129
   1130		/* Reference MAD agent until send completes */
   1131		refcount_inc(&mad_agent_priv->refcount);
   1132		spin_lock_irqsave(&mad_agent_priv->lock, flags);
   1133		list_add_tail(&mad_send_wr->agent_list,
   1134			      &mad_agent_priv->send_list);
   1135		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
   1136
   1137		if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
   1138			ret = ib_send_rmpp_mad(mad_send_wr);
   1139			if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
   1140				ret = ib_send_mad(mad_send_wr);
   1141		} else
   1142			ret = ib_send_mad(mad_send_wr);
   1143		if (ret < 0) {
   1144			/* Fail send request */
   1145			spin_lock_irqsave(&mad_agent_priv->lock, flags);
   1146			list_del(&mad_send_wr->agent_list);
   1147			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
   1148			deref_mad_agent(mad_agent_priv);
   1149			goto error;
   1150		}
   1151	}
   1152	return 0;
   1153error:
   1154	if (bad_send_buf)
   1155		*bad_send_buf = send_buf;
   1156	return ret;
   1157}
   1158EXPORT_SYMBOL(ib_post_send_mad);
   1159
   1160/*
   1161 * ib_free_recv_mad - Returns data buffers used to receive
   1162 *  a MAD to the access layer
   1163 */
   1164void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
   1165{
   1166	struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf;
   1167	struct ib_mad_private_header *mad_priv_hdr;
   1168	struct ib_mad_private *priv;
   1169	struct list_head free_list;
   1170
   1171	INIT_LIST_HEAD(&free_list);
   1172	list_splice_init(&mad_recv_wc->rmpp_list, &free_list);
   1173
   1174	list_for_each_entry_safe(mad_recv_buf, temp_recv_buf,
   1175					&free_list, list) {
   1176		mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc,
   1177					   recv_buf);
   1178		mad_priv_hdr = container_of(mad_recv_wc,
   1179					    struct ib_mad_private_header,
   1180					    recv_wc);
   1181		priv = container_of(mad_priv_hdr, struct ib_mad_private,
   1182				    header);
   1183		kfree(priv);
   1184	}
   1185}
   1186EXPORT_SYMBOL(ib_free_recv_mad);
   1187
   1188static int method_in_use(struct ib_mad_mgmt_method_table **method,
   1189			 struct ib_mad_reg_req *mad_reg_req)
   1190{
   1191	int i;
   1192
   1193	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
   1194		if ((*method)->agent[i]) {
   1195			pr_err("Method %d already in use\n", i);
   1196			return -EINVAL;
   1197		}
   1198	}
   1199	return 0;
   1200}
   1201
   1202static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
   1203{
   1204	/* Allocate management method table */
   1205	*method = kzalloc(sizeof **method, GFP_ATOMIC);
   1206	return (*method) ? 0 : (-ENOMEM);
   1207}
   1208
   1209/*
   1210 * Check to see if there are any methods still in use
   1211 */
   1212static int check_method_table(struct ib_mad_mgmt_method_table *method)
   1213{
   1214	int i;
   1215
   1216	for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
   1217		if (method->agent[i])
   1218			return 1;
   1219	return 0;
   1220}
   1221
   1222/*
   1223 * Check to see if there are any method tables for this class still in use
   1224 */
   1225static int check_class_table(struct ib_mad_mgmt_class_table *class)
   1226{
   1227	int i;
   1228
   1229	for (i = 0; i < MAX_MGMT_CLASS; i++)
   1230		if (class->method_table[i])
   1231			return 1;
   1232	return 0;
   1233}
   1234
   1235static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class)
   1236{
   1237	int i;
   1238
   1239	for (i = 0; i < MAX_MGMT_OUI; i++)
   1240		if (vendor_class->method_table[i])
   1241			return 1;
   1242	return 0;
   1243}
   1244
   1245static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
   1246			   const char *oui)
   1247{
   1248	int i;
   1249
   1250	for (i = 0; i < MAX_MGMT_OUI; i++)
   1251		/* Is there matching OUI for this vendor class ? */
   1252		if (!memcmp(vendor_class->oui[i], oui, 3))
   1253			return i;
   1254
   1255	return -1;
   1256}
   1257
   1258static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor)
   1259{
   1260	int i;
   1261
   1262	for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++)
   1263		if (vendor->vendor_class[i])
   1264			return 1;
   1265
   1266	return 0;
   1267}
   1268
   1269static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
   1270				     struct ib_mad_agent_private *agent)
   1271{
   1272	int i;
   1273
   1274	/* Remove any methods for this mad agent */
   1275	for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
   1276		if (method->agent[i] == agent)
   1277			method->agent[i] = NULL;
   1278}
   1279
   1280static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
   1281			      struct ib_mad_agent_private *agent_priv,
   1282			      u8 mgmt_class)
   1283{
   1284	struct ib_mad_port_private *port_priv;
   1285	struct ib_mad_mgmt_class_table **class;
   1286	struct ib_mad_mgmt_method_table **method;
   1287	int i, ret;
   1288
   1289	port_priv = agent_priv->qp_info->port_priv;
   1290	class = &port_priv->version[mad_reg_req->mgmt_class_version].class;
   1291	if (!*class) {
   1292		/* Allocate management class table for "new" class version */
   1293		*class = kzalloc(sizeof **class, GFP_ATOMIC);
   1294		if (!*class) {
   1295			ret = -ENOMEM;
   1296			goto error1;
   1297		}
   1298
   1299		/* Allocate method table for this management class */
   1300		method = &(*class)->method_table[mgmt_class];
   1301		if ((ret = allocate_method_table(method)))
   1302			goto error2;
   1303	} else {
   1304		method = &(*class)->method_table[mgmt_class];
   1305		if (!*method) {
   1306			/* Allocate method table for this management class */
   1307			if ((ret = allocate_method_table(method)))
   1308				goto error1;
   1309		}
   1310	}
   1311
   1312	/* Now, make sure methods are not already in use */
   1313	if (method_in_use(method, mad_reg_req))
   1314		goto error3;
   1315
   1316	/* Finally, add in methods being registered */
   1317	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
   1318		(*method)->agent[i] = agent_priv;
   1319
   1320	return 0;
   1321
   1322error3:
   1323	/* Remove any methods for this mad agent */
   1324	remove_methods_mad_agent(*method, agent_priv);
   1325	/* Now, check to see if there are any methods in use */
   1326	if (!check_method_table(*method)) {
   1327		/* If not, release management method table */
   1328		kfree(*method);
   1329		*method = NULL;
   1330	}
   1331	ret = -EINVAL;
   1332	goto error1;
   1333error2:
   1334	kfree(*class);
   1335	*class = NULL;
   1336error1:
   1337	return ret;
   1338}
   1339
   1340static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
   1341			   struct ib_mad_agent_private *agent_priv)
   1342{
   1343	struct ib_mad_port_private *port_priv;
   1344	struct ib_mad_mgmt_vendor_class_table **vendor_table;
   1345	struct ib_mad_mgmt_vendor_class_table *vendor = NULL;
   1346	struct ib_mad_mgmt_vendor_class *vendor_class = NULL;
   1347	struct ib_mad_mgmt_method_table **method;
   1348	int i, ret = -ENOMEM;
   1349	u8 vclass;
   1350
   1351	/* "New" vendor (with OUI) class */
   1352	vclass = vendor_class_index(mad_reg_req->mgmt_class);
   1353	port_priv = agent_priv->qp_info->port_priv;
   1354	vendor_table = &port_priv->version[
   1355				mad_reg_req->mgmt_class_version].vendor;
   1356	if (!*vendor_table) {
   1357		/* Allocate mgmt vendor class table for "new" class version */
   1358		vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
   1359		if (!vendor)
   1360			goto error1;
   1361
   1362		*vendor_table = vendor;
   1363	}
   1364	if (!(*vendor_table)->vendor_class[vclass]) {
   1365		/* Allocate table for this management vendor class */
   1366		vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
   1367		if (!vendor_class)
   1368			goto error2;
   1369
   1370		(*vendor_table)->vendor_class[vclass] = vendor_class;
   1371	}
   1372	for (i = 0; i < MAX_MGMT_OUI; i++) {
   1373		/* Is there matching OUI for this vendor class ? */
   1374		if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i],
   1375			    mad_reg_req->oui, 3)) {
   1376			method = &(*vendor_table)->vendor_class[
   1377						vclass]->method_table[i];
   1378			if (!*method)
   1379				goto error3;
   1380			goto check_in_use;
   1381		}
   1382	}
   1383	for (i = 0; i < MAX_MGMT_OUI; i++) {
   1384		/* OUI slot available ? */
   1385		if (!is_vendor_oui((*vendor_table)->vendor_class[
   1386				vclass]->oui[i])) {
   1387			method = &(*vendor_table)->vendor_class[
   1388				vclass]->method_table[i];
   1389			/* Allocate method table for this OUI */
   1390			if (!*method) {
   1391				ret = allocate_method_table(method);
   1392				if (ret)
   1393					goto error3;
   1394			}
   1395			memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
   1396			       mad_reg_req->oui, 3);
   1397			goto check_in_use;
   1398		}
   1399	}
   1400	dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n");
   1401	goto error3;
   1402
   1403check_in_use:
   1404	/* Now, make sure methods are not already in use */
   1405	if (method_in_use(method, mad_reg_req))
   1406		goto error4;
   1407
   1408	/* Finally, add in methods being registered */
   1409	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
   1410		(*method)->agent[i] = agent_priv;
   1411
   1412	return 0;
   1413
   1414error4:
   1415	/* Remove any methods for this mad agent */
   1416	remove_methods_mad_agent(*method, agent_priv);
   1417	/* Now, check to see if there are any methods in use */
   1418	if (!check_method_table(*method)) {
   1419		/* If not, release management method table */
   1420		kfree(*method);
   1421		*method = NULL;
   1422	}
   1423	ret = -EINVAL;
   1424error3:
   1425	if (vendor_class) {
   1426		(*vendor_table)->vendor_class[vclass] = NULL;
   1427		kfree(vendor_class);
   1428	}
   1429error2:
   1430	if (vendor) {
   1431		*vendor_table = NULL;
   1432		kfree(vendor);
   1433	}
   1434error1:
   1435	return ret;
   1436}
   1437
   1438static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
   1439{
   1440	struct ib_mad_port_private *port_priv;
   1441	struct ib_mad_mgmt_class_table *class;
   1442	struct ib_mad_mgmt_method_table *method;
   1443	struct ib_mad_mgmt_vendor_class_table *vendor;
   1444	struct ib_mad_mgmt_vendor_class *vendor_class;
   1445	int index;
   1446	u8 mgmt_class;
   1447
   1448	/*
   1449	 * Was MAD registration request supplied
   1450	 * with original registration ?
   1451	 */
   1452	if (!agent_priv->reg_req)
   1453		goto out;
   1454
   1455	port_priv = agent_priv->qp_info->port_priv;
   1456	mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
   1457	class = port_priv->version[
   1458			agent_priv->reg_req->mgmt_class_version].class;
   1459	if (!class)
   1460		goto vendor_check;
   1461
   1462	method = class->method_table[mgmt_class];
   1463	if (method) {
   1464		/* Remove any methods for this mad agent */
   1465		remove_methods_mad_agent(method, agent_priv);
   1466		/* Now, check to see if there are any methods still in use */
   1467		if (!check_method_table(method)) {
   1468			/* If not, release management method table */
   1469			kfree(method);
   1470			class->method_table[mgmt_class] = NULL;
   1471			/* Any management classes left ? */
   1472			if (!check_class_table(class)) {
   1473				/* If not, release management class table */
   1474				kfree(class);
   1475				port_priv->version[
   1476					agent_priv->reg_req->
   1477					mgmt_class_version].class = NULL;
   1478			}
   1479		}
   1480	}
   1481
   1482vendor_check:
   1483	if (!is_vendor_class(mgmt_class))
   1484		goto out;
   1485
   1486	/* normalize mgmt_class to vendor range 2 */
   1487	mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class);
   1488	vendor = port_priv->version[
   1489			agent_priv->reg_req->mgmt_class_version].vendor;
   1490
   1491	if (!vendor)
   1492		goto out;
   1493
   1494	vendor_class = vendor->vendor_class[mgmt_class];
   1495	if (vendor_class) {
   1496		index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui);
   1497		if (index < 0)
   1498			goto out;
   1499		method = vendor_class->method_table[index];
   1500		if (method) {
   1501			/* Remove any methods for this mad agent */
   1502			remove_methods_mad_agent(method, agent_priv);
   1503			/*
   1504			 * Now, check to see if there are
   1505			 * any methods still in use
   1506			 */
   1507			if (!check_method_table(method)) {
   1508				/* If not, release management method table */
   1509				kfree(method);
   1510				vendor_class->method_table[index] = NULL;
   1511				memset(vendor_class->oui[index], 0, 3);
   1512				/* Any OUIs left ? */
   1513				if (!check_vendor_class(vendor_class)) {
   1514					/* If not, release vendor class table */
   1515					kfree(vendor_class);
   1516					vendor->vendor_class[mgmt_class] = NULL;
   1517					/* Any other vendor classes left ? */
   1518					if (!check_vendor_table(vendor)) {
   1519						kfree(vendor);
   1520						port_priv->version[
   1521							agent_priv->reg_req->
   1522							mgmt_class_version].
   1523							vendor = NULL;
   1524					}
   1525				}
   1526			}
   1527		}
   1528	}
   1529
   1530out:
   1531	return;
   1532}
   1533
   1534static struct ib_mad_agent_private *
   1535find_mad_agent(struct ib_mad_port_private *port_priv,
   1536	       const struct ib_mad_hdr *mad_hdr)
   1537{
   1538	struct ib_mad_agent_private *mad_agent = NULL;
   1539	unsigned long flags;
   1540
   1541	if (ib_response_mad(mad_hdr)) {
   1542		u32 hi_tid;
   1543
   1544		/*
   1545		 * Routing is based on high 32 bits of transaction ID
   1546		 * of MAD.
   1547		 */
   1548		hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
   1549		rcu_read_lock();
   1550		mad_agent = xa_load(&ib_mad_clients, hi_tid);
   1551		if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount))
   1552			mad_agent = NULL;
   1553		rcu_read_unlock();
   1554	} else {
   1555		struct ib_mad_mgmt_class_table *class;
   1556		struct ib_mad_mgmt_method_table *method;
   1557		struct ib_mad_mgmt_vendor_class_table *vendor;
   1558		struct ib_mad_mgmt_vendor_class *vendor_class;
   1559		const struct ib_vendor_mad *vendor_mad;
   1560		int index;
   1561
   1562		spin_lock_irqsave(&port_priv->reg_lock, flags);
   1563		/*
   1564		 * Routing is based on version, class, and method
   1565		 * For "newer" vendor MADs, also based on OUI
   1566		 */
   1567		if (mad_hdr->class_version >= MAX_MGMT_VERSION)
   1568			goto out;
   1569		if (!is_vendor_class(mad_hdr->mgmt_class)) {
   1570			class = port_priv->version[
   1571					mad_hdr->class_version].class;
   1572			if (!class)
   1573				goto out;
   1574			if (convert_mgmt_class(mad_hdr->mgmt_class) >=
   1575			    ARRAY_SIZE(class->method_table))
   1576				goto out;
   1577			method = class->method_table[convert_mgmt_class(
   1578							mad_hdr->mgmt_class)];
   1579			if (method)
   1580				mad_agent = method->agent[mad_hdr->method &
   1581							  ~IB_MGMT_METHOD_RESP];
   1582		} else {
   1583			vendor = port_priv->version[
   1584					mad_hdr->class_version].vendor;
   1585			if (!vendor)
   1586				goto out;
   1587			vendor_class = vendor->vendor_class[vendor_class_index(
   1588						mad_hdr->mgmt_class)];
   1589			if (!vendor_class)
   1590				goto out;
   1591			/* Find matching OUI */
   1592			vendor_mad = (const struct ib_vendor_mad *)mad_hdr;
   1593			index = find_vendor_oui(vendor_class, vendor_mad->oui);
   1594			if (index == -1)
   1595				goto out;
   1596			method = vendor_class->method_table[index];
   1597			if (method) {
   1598				mad_agent = method->agent[mad_hdr->method &
   1599							  ~IB_MGMT_METHOD_RESP];
   1600			}
   1601		}
   1602		if (mad_agent)
   1603			refcount_inc(&mad_agent->refcount);
   1604out:
   1605		spin_unlock_irqrestore(&port_priv->reg_lock, flags);
   1606	}
   1607
   1608	if (mad_agent && !mad_agent->agent.recv_handler) {
   1609		dev_notice(&port_priv->device->dev,
   1610			   "No receive handler for client %p on port %u\n",
   1611			   &mad_agent->agent, port_priv->port_num);
   1612		deref_mad_agent(mad_agent);
   1613		mad_agent = NULL;
   1614	}
   1615
   1616	return mad_agent;
   1617}
   1618
   1619static int validate_mad(const struct ib_mad_hdr *mad_hdr,
   1620			const struct ib_mad_qp_info *qp_info,
   1621			bool opa)
   1622{
   1623	int valid = 0;
   1624	u32 qp_num = qp_info->qp->qp_num;
   1625
   1626	/* Make sure MAD base version is understood */
   1627	if (mad_hdr->base_version != IB_MGMT_BASE_VERSION &&
   1628	    (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) {
   1629		pr_err("MAD received with unsupported base version %u %s\n",
   1630		       mad_hdr->base_version, opa ? "(opa)" : "");
   1631		goto out;
   1632	}
   1633
   1634	/* Filter SMI packets sent to other than QP0 */
   1635	if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
   1636	    (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
   1637		if (qp_num == 0)
   1638			valid = 1;
   1639	} else {
   1640		/* CM attributes other than ClassPortInfo only use Send method */
   1641		if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) &&
   1642		    (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) &&
   1643		    (mad_hdr->method != IB_MGMT_METHOD_SEND))
   1644			goto out;
   1645		/* Filter GSI packets sent to QP0 */
   1646		if (qp_num != 0)
   1647			valid = 1;
   1648	}
   1649
   1650out:
   1651	return valid;
   1652}
   1653
   1654static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv,
   1655			    const struct ib_mad_hdr *mad_hdr)
   1656{
   1657	struct ib_rmpp_mad *rmpp_mad;
   1658
   1659	rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
   1660	return !mad_agent_priv->agent.rmpp_version ||
   1661		!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) ||
   1662		!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
   1663				    IB_MGMT_RMPP_FLAG_ACTIVE) ||
   1664		(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
   1665}
   1666
   1667static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr,
   1668				     const struct ib_mad_recv_wc *rwc)
   1669{
   1670	return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class ==
   1671		rwc->recv_buf.mad->mad_hdr.mgmt_class;
   1672}
   1673
   1674static inline int
   1675rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv,
   1676		 const struct ib_mad_send_wr_private *wr,
   1677		 const struct ib_mad_recv_wc *rwc)
   1678{
   1679	struct rdma_ah_attr attr;
   1680	u8 send_resp, rcv_resp;
   1681	union ib_gid sgid;
   1682	struct ib_device *device = mad_agent_priv->agent.device;
   1683	u32 port_num = mad_agent_priv->agent.port_num;
   1684	u8 lmc;
   1685	bool has_grh;
   1686
   1687	send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad);
   1688	rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr);
   1689
   1690	if (send_resp == rcv_resp)
   1691		/* both requests, or both responses. GIDs different */
   1692		return 0;
   1693
   1694	if (rdma_query_ah(wr->send_buf.ah, &attr))
   1695		/* Assume not equal, to avoid false positives. */
   1696		return 0;
   1697
   1698	has_grh = !!(rdma_ah_get_ah_flags(&attr) & IB_AH_GRH);
   1699	if (has_grh != !!(rwc->wc->wc_flags & IB_WC_GRH))
   1700		/* one has GID, other does not.  Assume different */
   1701		return 0;
   1702
   1703	if (!send_resp && rcv_resp) {
   1704		/* is request/response. */
   1705		if (!has_grh) {
   1706			if (ib_get_cached_lmc(device, port_num, &lmc))
   1707				return 0;
   1708			return (!lmc || !((rdma_ah_get_path_bits(&attr) ^
   1709					   rwc->wc->dlid_path_bits) &
   1710					  ((1 << lmc) - 1)));
   1711		} else {
   1712			const struct ib_global_route *grh =
   1713					rdma_ah_read_grh(&attr);
   1714
   1715			if (rdma_query_gid(device, port_num,
   1716					   grh->sgid_index, &sgid))
   1717				return 0;
   1718			return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
   1719				       16);
   1720		}
   1721	}
   1722
   1723	if (!has_grh)
   1724		return rdma_ah_get_dlid(&attr) == rwc->wc->slid;
   1725	else
   1726		return !memcmp(rdma_ah_read_grh(&attr)->dgid.raw,
   1727			       rwc->recv_buf.grh->sgid.raw,
   1728			       16);
   1729}
   1730
   1731static inline int is_direct(u8 class)
   1732{
   1733	return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE);
   1734}
   1735
   1736struct ib_mad_send_wr_private*
   1737ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv,
   1738		 const struct ib_mad_recv_wc *wc)
   1739{
   1740	struct ib_mad_send_wr_private *wr;
   1741	const struct ib_mad_hdr *mad_hdr;
   1742
   1743	mad_hdr = &wc->recv_buf.mad->mad_hdr;
   1744
   1745	list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) {
   1746		if ((wr->tid == mad_hdr->tid) &&
   1747		    rcv_has_same_class(wr, wc) &&
   1748		    /*
   1749		     * Don't check GID for direct routed MADs.
   1750		     * These might have permissive LIDs.
   1751		     */
   1752		    (is_direct(mad_hdr->mgmt_class) ||
   1753		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
   1754			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
   1755	}
   1756
   1757	/*
   1758	 * It's possible to receive the response before we've
   1759	 * been notified that the send has completed
   1760	 */
   1761	list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) {
   1762		if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) &&
   1763		    wr->tid == mad_hdr->tid &&
   1764		    wr->timeout &&
   1765		    rcv_has_same_class(wr, wc) &&
   1766		    /*
   1767		     * Don't check GID for direct routed MADs.
   1768		     * These might have permissive LIDs.
   1769		     */
   1770		    (is_direct(mad_hdr->mgmt_class) ||
   1771		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
   1772			/* Verify request has not been canceled */
   1773			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
   1774	}
   1775	return NULL;
   1776}
   1777
   1778void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
   1779{
   1780	mad_send_wr->timeout = 0;
   1781	if (mad_send_wr->refcount == 1)
   1782		list_move_tail(&mad_send_wr->agent_list,
   1783			      &mad_send_wr->mad_agent_priv->done_list);
   1784}
   1785
   1786static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
   1787				 struct ib_mad_recv_wc *mad_recv_wc)
   1788{
   1789	struct ib_mad_send_wr_private *mad_send_wr;
   1790	struct ib_mad_send_wc mad_send_wc;
   1791	unsigned long flags;
   1792	int ret;
   1793
   1794	INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
   1795	ret = ib_mad_enforce_security(mad_agent_priv,
   1796				      mad_recv_wc->wc->pkey_index);
   1797	if (ret) {
   1798		ib_free_recv_mad(mad_recv_wc);
   1799		deref_mad_agent(mad_agent_priv);
   1800		return;
   1801	}
   1802
   1803	list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
   1804	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
   1805		mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
   1806						      mad_recv_wc);
   1807		if (!mad_recv_wc) {
   1808			deref_mad_agent(mad_agent_priv);
   1809			return;
   1810		}
   1811	}
   1812
   1813	/* Complete corresponding request */
   1814	if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) {
   1815		spin_lock_irqsave(&mad_agent_priv->lock, flags);
   1816		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
   1817		if (!mad_send_wr) {
   1818			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
   1819			if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)
   1820			   && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class)
   1821			   && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr)
   1822					& IB_MGMT_RMPP_FLAG_ACTIVE)) {
   1823				/* user rmpp is in effect
   1824				 * and this is an active RMPP MAD
   1825				 */
   1826				mad_agent_priv->agent.recv_handler(
   1827						&mad_agent_priv->agent, NULL,
   1828						mad_recv_wc);
   1829				deref_mad_agent(mad_agent_priv);
   1830			} else {
   1831				/* not user rmpp, revert to normal behavior and
   1832				 * drop the mad
   1833				 */
   1834				ib_free_recv_mad(mad_recv_wc);
   1835				deref_mad_agent(mad_agent_priv);
   1836				return;
   1837			}
   1838		} else {
   1839			ib_mark_mad_done(mad_send_wr);
   1840			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
   1841
   1842			/* Defined behavior is to complete response before request */
   1843			mad_agent_priv->agent.recv_handler(
   1844					&mad_agent_priv->agent,
   1845					&mad_send_wr->send_buf,
   1846					mad_recv_wc);
   1847			deref_mad_agent(mad_agent_priv);
   1848
   1849			mad_send_wc.status = IB_WC_SUCCESS;
   1850			mad_send_wc.vendor_err = 0;
   1851			mad_send_wc.send_buf = &mad_send_wr->send_buf;
   1852			ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
   1853		}
   1854	} else {
   1855		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
   1856						   mad_recv_wc);
   1857		deref_mad_agent(mad_agent_priv);
   1858	}
   1859}
   1860
   1861static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv,
   1862				     const struct ib_mad_qp_info *qp_info,
   1863				     const struct ib_wc *wc,
   1864				     u32 port_num,
   1865				     struct ib_mad_private *recv,
   1866				     struct ib_mad_private *response)
   1867{
   1868	enum smi_forward_action retsmi;
   1869	struct ib_smp *smp = (struct ib_smp *)recv->mad;
   1870
   1871	trace_ib_mad_handle_ib_smi(smp);
   1872
   1873	if (smi_handle_dr_smp_recv(smp,
   1874				   rdma_cap_ib_switch(port_priv->device),
   1875				   port_num,
   1876				   port_priv->device->phys_port_cnt) ==
   1877				   IB_SMI_DISCARD)
   1878		return IB_SMI_DISCARD;
   1879
   1880	retsmi = smi_check_forward_dr_smp(smp);
   1881	if (retsmi == IB_SMI_LOCAL)
   1882		return IB_SMI_HANDLE;
   1883
   1884	if (retsmi == IB_SMI_SEND) { /* don't forward */
   1885		if (smi_handle_dr_smp_send(smp,
   1886					   rdma_cap_ib_switch(port_priv->device),
   1887					   port_num) == IB_SMI_DISCARD)
   1888			return IB_SMI_DISCARD;
   1889
   1890		if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD)
   1891			return IB_SMI_DISCARD;
   1892	} else if (rdma_cap_ib_switch(port_priv->device)) {
   1893		/* forward case for switches */
   1894		memcpy(response, recv, mad_priv_size(response));
   1895		response->header.recv_wc.wc = &response->header.wc;
   1896		response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad;
   1897		response->header.recv_wc.recv_buf.grh = &response->grh;
   1898
   1899		agent_send_response((const struct ib_mad_hdr *)response->mad,
   1900				    &response->grh, wc,
   1901				    port_priv->device,
   1902				    smi_get_fwd_port(smp),
   1903				    qp_info->qp->qp_num,
   1904				    response->mad_size,
   1905				    false);
   1906
   1907		return IB_SMI_DISCARD;
   1908	}
   1909	return IB_SMI_HANDLE;
   1910}
   1911
   1912static bool generate_unmatched_resp(const struct ib_mad_private *recv,
   1913				    struct ib_mad_private *response,
   1914				    size_t *resp_len, bool opa)
   1915{
   1916	const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad;
   1917	struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad;
   1918
   1919	if (recv_hdr->method == IB_MGMT_METHOD_GET ||
   1920	    recv_hdr->method == IB_MGMT_METHOD_SET) {
   1921		memcpy(response, recv, mad_priv_size(response));
   1922		response->header.recv_wc.wc = &response->header.wc;
   1923		response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad;
   1924		response->header.recv_wc.recv_buf.grh = &response->grh;
   1925		resp_hdr->method = IB_MGMT_METHOD_GET_RESP;
   1926		resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB);
   1927		if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
   1928			resp_hdr->status |= IB_SMP_DIRECTION;
   1929
   1930		if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) {
   1931			if (recv_hdr->mgmt_class ==
   1932			    IB_MGMT_CLASS_SUBN_LID_ROUTED ||
   1933			    recv_hdr->mgmt_class ==
   1934			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
   1935				*resp_len = opa_get_smp_header_size(
   1936							(struct opa_smp *)recv->mad);
   1937			else
   1938				*resp_len = sizeof(struct ib_mad_hdr);
   1939		}
   1940
   1941		return true;
   1942	} else {
   1943		return false;
   1944	}
   1945}
   1946
   1947static enum smi_action
   1948handle_opa_smi(struct ib_mad_port_private *port_priv,
   1949	       struct ib_mad_qp_info *qp_info,
   1950	       struct ib_wc *wc,
   1951	       u32 port_num,
   1952	       struct ib_mad_private *recv,
   1953	       struct ib_mad_private *response)
   1954{
   1955	enum smi_forward_action retsmi;
   1956	struct opa_smp *smp = (struct opa_smp *)recv->mad;
   1957
   1958	trace_ib_mad_handle_opa_smi(smp);
   1959
   1960	if (opa_smi_handle_dr_smp_recv(smp,
   1961				   rdma_cap_ib_switch(port_priv->device),
   1962				   port_num,
   1963				   port_priv->device->phys_port_cnt) ==
   1964				   IB_SMI_DISCARD)
   1965		return IB_SMI_DISCARD;
   1966
   1967	retsmi = opa_smi_check_forward_dr_smp(smp);
   1968	if (retsmi == IB_SMI_LOCAL)
   1969		return IB_SMI_HANDLE;
   1970
   1971	if (retsmi == IB_SMI_SEND) { /* don't forward */
   1972		if (opa_smi_handle_dr_smp_send(smp,
   1973					   rdma_cap_ib_switch(port_priv->device),
   1974					   port_num) == IB_SMI_DISCARD)
   1975			return IB_SMI_DISCARD;
   1976
   1977		if (opa_smi_check_local_smp(smp, port_priv->device) ==
   1978		    IB_SMI_DISCARD)
   1979			return IB_SMI_DISCARD;
   1980
   1981	} else if (rdma_cap_ib_switch(port_priv->device)) {
   1982		/* forward case for switches */
   1983		memcpy(response, recv, mad_priv_size(response));
   1984		response->header.recv_wc.wc = &response->header.wc;
   1985		response->header.recv_wc.recv_buf.opa_mad =
   1986				(struct opa_mad *)response->mad;
   1987		response->header.recv_wc.recv_buf.grh = &response->grh;
   1988
   1989		agent_send_response((const struct ib_mad_hdr *)response->mad,
   1990				    &response->grh, wc,
   1991				    port_priv->device,
   1992				    opa_smi_get_fwd_port(smp),
   1993				    qp_info->qp->qp_num,
   1994				    recv->header.wc.byte_len,
   1995				    true);
   1996
   1997		return IB_SMI_DISCARD;
   1998	}
   1999
   2000	return IB_SMI_HANDLE;
   2001}
   2002
   2003static enum smi_action
   2004handle_smi(struct ib_mad_port_private *port_priv,
   2005	   struct ib_mad_qp_info *qp_info,
   2006	   struct ib_wc *wc,
   2007	   u32 port_num,
   2008	   struct ib_mad_private *recv,
   2009	   struct ib_mad_private *response,
   2010	   bool opa)
   2011{
   2012	struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad;
   2013
   2014	if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION &&
   2015	    mad_hdr->class_version == OPA_SM_CLASS_VERSION)
   2016		return handle_opa_smi(port_priv, qp_info, wc, port_num, recv,
   2017				      response);
   2018
   2019	return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
   2020}
   2021
   2022static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
   2023{
   2024	struct ib_mad_port_private *port_priv = cq->cq_context;
   2025	struct ib_mad_list_head *mad_list =
   2026		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
   2027	struct ib_mad_qp_info *qp_info;
   2028	struct ib_mad_private_header *mad_priv_hdr;
   2029	struct ib_mad_private *recv, *response = NULL;
   2030	struct ib_mad_agent_private *mad_agent;
   2031	u32 port_num;
   2032	int ret = IB_MAD_RESULT_SUCCESS;
   2033	size_t mad_size;
   2034	u16 resp_mad_pkey_index = 0;
   2035	bool opa;
   2036
   2037	if (list_empty_careful(&port_priv->port_list))
   2038		return;
   2039
   2040	if (wc->status != IB_WC_SUCCESS) {
   2041		/*
   2042		 * Receive errors indicate that the QP has entered the error
   2043		 * state - error handling/shutdown code will cleanup
   2044		 */
   2045		return;
   2046	}
   2047
   2048	qp_info = mad_list->mad_queue->qp_info;
   2049	dequeue_mad(mad_list);
   2050
   2051	opa = rdma_cap_opa_mad(qp_info->port_priv->device,
   2052			       qp_info->port_priv->port_num);
   2053
   2054	mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
   2055				    mad_list);
   2056	recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
   2057	ib_dma_unmap_single(port_priv->device,
   2058			    recv->header.mapping,
   2059			    mad_priv_dma_size(recv),
   2060			    DMA_FROM_DEVICE);
   2061
   2062	/* Setup MAD receive work completion from "normal" work completion */
   2063	recv->header.wc = *wc;
   2064	recv->header.recv_wc.wc = &recv->header.wc;
   2065
   2066	if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) {
   2067		recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh);
   2068		recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad);
   2069	} else {
   2070		recv->header.recv_wc.mad_len = sizeof(struct ib_mad);
   2071		recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad);
   2072	}
   2073
   2074	recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad;
   2075	recv->header.recv_wc.recv_buf.grh = &recv->grh;
   2076
   2077	/* Validate MAD */
   2078	if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa))
   2079		goto out;
   2080
   2081	trace_ib_mad_recv_done_handler(qp_info, wc,
   2082				       (struct ib_mad_hdr *)recv->mad);
   2083
   2084	mad_size = recv->mad_size;
   2085	response = alloc_mad_private(mad_size, GFP_KERNEL);
   2086	if (!response)
   2087		goto out;
   2088
   2089	if (rdma_cap_ib_switch(port_priv->device))
   2090		port_num = wc->port_num;
   2091	else
   2092		port_num = port_priv->port_num;
   2093
   2094	if (((struct ib_mad_hdr *)recv->mad)->mgmt_class ==
   2095	    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
   2096		if (handle_smi(port_priv, qp_info, wc, port_num, recv,
   2097			       response, opa)
   2098		    == IB_SMI_DISCARD)
   2099			goto out;
   2100	}
   2101
   2102	/* Give driver "right of first refusal" on incoming MAD */
   2103	if (port_priv->device->ops.process_mad) {
   2104		ret = port_priv->device->ops.process_mad(
   2105			port_priv->device, 0, port_priv->port_num, wc,
   2106			&recv->grh, (const struct ib_mad *)recv->mad,
   2107			(struct ib_mad *)response->mad, &mad_size,
   2108			&resp_mad_pkey_index);
   2109
   2110		if (opa)
   2111			wc->pkey_index = resp_mad_pkey_index;
   2112
   2113		if (ret & IB_MAD_RESULT_SUCCESS) {
   2114			if (ret & IB_MAD_RESULT_CONSUMED)
   2115				goto out;
   2116			if (ret & IB_MAD_RESULT_REPLY) {
   2117				agent_send_response((const struct ib_mad_hdr *)response->mad,
   2118						    &recv->grh, wc,
   2119						    port_priv->device,
   2120						    port_num,
   2121						    qp_info->qp->qp_num,
   2122						    mad_size, opa);
   2123				goto out;
   2124			}
   2125		}
   2126	}
   2127
   2128	mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad);
   2129	if (mad_agent) {
   2130		trace_ib_mad_recv_done_agent(mad_agent);
   2131		ib_mad_complete_recv(mad_agent, &recv->header.recv_wc);
   2132		/*
   2133		 * recv is freed up in error cases in ib_mad_complete_recv
   2134		 * or via recv_handler in ib_mad_complete_recv()
   2135		 */
   2136		recv = NULL;
   2137	} else if ((ret & IB_MAD_RESULT_SUCCESS) &&
   2138		   generate_unmatched_resp(recv, response, &mad_size, opa)) {
   2139		agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc,
   2140				    port_priv->device, port_num,
   2141				    qp_info->qp->qp_num, mad_size, opa);
   2142	}
   2143
   2144out:
   2145	/* Post another receive request for this QP */
   2146	if (response) {
   2147		ib_mad_post_receive_mads(qp_info, response);
   2148		kfree(recv);
   2149	} else
   2150		ib_mad_post_receive_mads(qp_info, recv);
   2151}
   2152
   2153static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
   2154{
   2155	struct ib_mad_send_wr_private *mad_send_wr;
   2156	unsigned long delay;
   2157
   2158	if (list_empty(&mad_agent_priv->wait_list)) {
   2159		cancel_delayed_work(&mad_agent_priv->timed_work);
   2160	} else {
   2161		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
   2162					 struct ib_mad_send_wr_private,
   2163					 agent_list);
   2164
   2165		if (time_after(mad_agent_priv->timeout,
   2166			       mad_send_wr->timeout)) {
   2167			mad_agent_priv->timeout = mad_send_wr->timeout;
   2168			delay = mad_send_wr->timeout - jiffies;
   2169			if ((long)delay <= 0)
   2170				delay = 1;
   2171			mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
   2172					 &mad_agent_priv->timed_work, delay);
   2173		}
   2174	}
   2175}
   2176
   2177static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
   2178{
   2179	struct ib_mad_agent_private *mad_agent_priv;
   2180	struct ib_mad_send_wr_private *temp_mad_send_wr;
   2181	struct list_head *list_item;
   2182	unsigned long delay;
   2183
   2184	mad_agent_priv = mad_send_wr->mad_agent_priv;
   2185	list_del(&mad_send_wr->agent_list);
   2186
   2187	delay = mad_send_wr->timeout;
   2188	mad_send_wr->timeout += jiffies;
   2189
   2190	if (delay) {
   2191		list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
   2192			temp_mad_send_wr = list_entry(list_item,
   2193						struct ib_mad_send_wr_private,
   2194						agent_list);
   2195			if (time_after(mad_send_wr->timeout,
   2196				       temp_mad_send_wr->timeout))
   2197				break;
   2198		}
   2199	} else {
   2200		list_item = &mad_agent_priv->wait_list;
   2201	}
   2202
   2203	list_add(&mad_send_wr->agent_list, list_item);
   2204
   2205	/* Reschedule a work item if we have a shorter timeout */
   2206	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
   2207		mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
   2208				 &mad_agent_priv->timed_work, delay);
   2209}
   2210
   2211void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
   2212			  unsigned long timeout_ms)
   2213{
   2214	mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
   2215	wait_for_response(mad_send_wr);
   2216}
   2217
   2218/*
   2219 * Process a send work completion
   2220 */
   2221void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
   2222			     struct ib_mad_send_wc *mad_send_wc)
   2223{
   2224	struct ib_mad_agent_private	*mad_agent_priv;
   2225	unsigned long			flags;
   2226	int				ret;
   2227
   2228	mad_agent_priv = mad_send_wr->mad_agent_priv;
   2229	spin_lock_irqsave(&mad_agent_priv->lock, flags);
   2230	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
   2231		ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
   2232		if (ret == IB_RMPP_RESULT_CONSUMED)
   2233			goto done;
   2234	} else
   2235		ret = IB_RMPP_RESULT_UNHANDLED;
   2236
   2237	if (mad_send_wc->status != IB_WC_SUCCESS &&
   2238	    mad_send_wr->status == IB_WC_SUCCESS) {
   2239		mad_send_wr->status = mad_send_wc->status;
   2240		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
   2241	}
   2242
   2243	if (--mad_send_wr->refcount > 0) {
   2244		if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
   2245		    mad_send_wr->status == IB_WC_SUCCESS) {
   2246			wait_for_response(mad_send_wr);
   2247		}
   2248		goto done;
   2249	}
   2250
   2251	/* Remove send from MAD agent and notify client of completion */
   2252	list_del(&mad_send_wr->agent_list);
   2253	adjust_timeout(mad_agent_priv);
   2254	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
   2255
   2256	if (mad_send_wr->status != IB_WC_SUCCESS)
   2257		mad_send_wc->status = mad_send_wr->status;
   2258	if (ret == IB_RMPP_RESULT_INTERNAL)
   2259		ib_rmpp_send_handler(mad_send_wc);
   2260	else
   2261		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
   2262						   mad_send_wc);
   2263
   2264	/* Release reference on agent taken when sending */
   2265	deref_mad_agent(mad_agent_priv);
   2266	return;
   2267done:
   2268	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
   2269}
   2270
   2271static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
   2272{
   2273	struct ib_mad_port_private *port_priv = cq->cq_context;
   2274	struct ib_mad_list_head *mad_list =
   2275		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
   2276	struct ib_mad_send_wr_private	*mad_send_wr, *queued_send_wr;
   2277	struct ib_mad_qp_info		*qp_info;
   2278	struct ib_mad_queue		*send_queue;
   2279	struct ib_mad_send_wc		mad_send_wc;
   2280	unsigned long flags;
   2281	int ret;
   2282
   2283	if (list_empty_careful(&port_priv->port_list))
   2284		return;
   2285
   2286	if (wc->status != IB_WC_SUCCESS) {
   2287		if (!ib_mad_send_error(port_priv, wc))
   2288			return;
   2289	}
   2290
   2291	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
   2292				   mad_list);
   2293	send_queue = mad_list->mad_queue;
   2294	qp_info = send_queue->qp_info;
   2295
   2296	trace_ib_mad_send_done_agent(mad_send_wr->mad_agent_priv);
   2297	trace_ib_mad_send_done_handler(mad_send_wr, wc);
   2298
   2299retry:
   2300	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
   2301			    mad_send_wr->header_mapping,
   2302			    mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
   2303	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
   2304			    mad_send_wr->payload_mapping,
   2305			    mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
   2306	queued_send_wr = NULL;
   2307	spin_lock_irqsave(&send_queue->lock, flags);
   2308	list_del(&mad_list->list);
   2309
   2310	/* Move queued send to the send queue */
   2311	if (send_queue->count-- > send_queue->max_active) {
   2312		mad_list = container_of(qp_info->overflow_list.next,
   2313					struct ib_mad_list_head, list);
   2314		queued_send_wr = container_of(mad_list,
   2315					struct ib_mad_send_wr_private,
   2316					mad_list);
   2317		list_move_tail(&mad_list->list, &send_queue->list);
   2318	}
   2319	spin_unlock_irqrestore(&send_queue->lock, flags);
   2320
   2321	mad_send_wc.send_buf = &mad_send_wr->send_buf;
   2322	mad_send_wc.status = wc->status;
   2323	mad_send_wc.vendor_err = wc->vendor_err;
   2324	ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
   2325
   2326	if (queued_send_wr) {
   2327		trace_ib_mad_send_done_resend(queued_send_wr, qp_info);
   2328		ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
   2329				   NULL);
   2330		if (ret) {
   2331			dev_err(&port_priv->device->dev,
   2332				"ib_post_send failed: %d\n", ret);
   2333			mad_send_wr = queued_send_wr;
   2334			wc->status = IB_WC_LOC_QP_OP_ERR;
   2335			goto retry;
   2336		}
   2337	}
   2338}
   2339
   2340static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
   2341{
   2342	struct ib_mad_send_wr_private *mad_send_wr;
   2343	struct ib_mad_list_head *mad_list;
   2344	unsigned long flags;
   2345
   2346	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
   2347	list_for_each_entry(mad_list, &qp_info->send_queue.list, list) {
   2348		mad_send_wr = container_of(mad_list,
   2349					   struct ib_mad_send_wr_private,
   2350					   mad_list);
   2351		mad_send_wr->retry = 1;
   2352	}
   2353	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
   2354}
   2355
   2356static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
   2357		struct ib_wc *wc)
   2358{
   2359	struct ib_mad_list_head *mad_list =
   2360		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
   2361	struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info;
   2362	struct ib_mad_send_wr_private *mad_send_wr;
   2363	int ret;
   2364
   2365	/*
   2366	 * Send errors will transition the QP to SQE - move
   2367	 * QP to RTS and repost flushed work requests
   2368	 */
   2369	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
   2370				   mad_list);
   2371	if (wc->status == IB_WC_WR_FLUSH_ERR) {
   2372		if (mad_send_wr->retry) {
   2373			/* Repost send */
   2374			mad_send_wr->retry = 0;
   2375			trace_ib_mad_error_handler(mad_send_wr, qp_info);
   2376			ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
   2377					   NULL);
   2378			if (!ret)
   2379				return false;
   2380		}
   2381	} else {
   2382		struct ib_qp_attr *attr;
   2383
   2384		/* Transition QP to RTS and fail offending send */
   2385		attr = kmalloc(sizeof *attr, GFP_KERNEL);
   2386		if (attr) {
   2387			attr->qp_state = IB_QPS_RTS;
   2388			attr->cur_qp_state = IB_QPS_SQE;
   2389			ret = ib_modify_qp(qp_info->qp, attr,
   2390					   IB_QP_STATE | IB_QP_CUR_STATE);
   2391			kfree(attr);
   2392			if (ret)
   2393				dev_err(&port_priv->device->dev,
   2394					"%s - ib_modify_qp to RTS: %d\n",
   2395					__func__, ret);
   2396			else
   2397				mark_sends_for_retry(qp_info);
   2398		}
   2399	}
   2400
   2401	return true;
   2402}
   2403
   2404static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
   2405{
   2406	unsigned long flags;
   2407	struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
   2408	struct ib_mad_send_wc mad_send_wc;
   2409	struct list_head cancel_list;
   2410
   2411	INIT_LIST_HEAD(&cancel_list);
   2412
   2413	spin_lock_irqsave(&mad_agent_priv->lock, flags);
   2414	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
   2415				 &mad_agent_priv->send_list, agent_list) {
   2416		if (mad_send_wr->status == IB_WC_SUCCESS) {
   2417			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
   2418			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
   2419		}
   2420	}
   2421
   2422	/* Empty wait list to prevent receives from finding a request */
   2423	list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
   2424	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
   2425
   2426	/* Report all cancelled requests */
   2427	mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
   2428	mad_send_wc.vendor_err = 0;
   2429
   2430	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
   2431				 &cancel_list, agent_list) {
   2432		mad_send_wc.send_buf = &mad_send_wr->send_buf;
   2433		list_del(&mad_send_wr->agent_list);
   2434		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
   2435						   &mad_send_wc);
   2436		deref_mad_agent(mad_agent_priv);
   2437	}
   2438}
   2439
   2440static struct ib_mad_send_wr_private*
   2441find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
   2442	     struct ib_mad_send_buf *send_buf)
   2443{
   2444	struct ib_mad_send_wr_private *mad_send_wr;
   2445
   2446	list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
   2447			    agent_list) {
   2448		if (&mad_send_wr->send_buf == send_buf)
   2449			return mad_send_wr;
   2450	}
   2451
   2452	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
   2453			    agent_list) {
   2454		if (is_rmpp_data_mad(mad_agent_priv,
   2455				     mad_send_wr->send_buf.mad) &&
   2456		    &mad_send_wr->send_buf == send_buf)
   2457			return mad_send_wr;
   2458	}
   2459	return NULL;
   2460}
   2461
   2462int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms)
   2463{
   2464	struct ib_mad_agent_private *mad_agent_priv;
   2465	struct ib_mad_send_wr_private *mad_send_wr;
   2466	unsigned long flags;
   2467	int active;
   2468
   2469	if (!send_buf)
   2470		return -EINVAL;
   2471
   2472	mad_agent_priv = container_of(send_buf->mad_agent,
   2473				      struct ib_mad_agent_private, agent);
   2474	spin_lock_irqsave(&mad_agent_priv->lock, flags);
   2475	mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
   2476	if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
   2477		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
   2478		return -EINVAL;
   2479	}
   2480
   2481	active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
   2482	if (!timeout_ms) {
   2483		mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
   2484		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
   2485	}
   2486
   2487	mad_send_wr->send_buf.timeout_ms = timeout_ms;
   2488	if (active)
   2489		mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
   2490	else
   2491		ib_reset_mad_timeout(mad_send_wr, timeout_ms);
   2492
   2493	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
   2494	return 0;
   2495}
   2496EXPORT_SYMBOL(ib_modify_mad);
   2497
   2498static void local_completions(struct work_struct *work)
   2499{
   2500	struct ib_mad_agent_private *mad_agent_priv;
   2501	struct ib_mad_local_private *local;
   2502	struct ib_mad_agent_private *recv_mad_agent;
   2503	unsigned long flags;
   2504	int free_mad;
   2505	struct ib_wc wc;
   2506	struct ib_mad_send_wc mad_send_wc;
   2507	bool opa;
   2508
   2509	mad_agent_priv =
   2510		container_of(work, struct ib_mad_agent_private, local_work);
   2511
   2512	opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device,
   2513			       mad_agent_priv->qp_info->port_priv->port_num);
   2514
   2515	spin_lock_irqsave(&mad_agent_priv->lock, flags);
   2516	while (!list_empty(&mad_agent_priv->local_list)) {
   2517		local = list_entry(mad_agent_priv->local_list.next,
   2518				   struct ib_mad_local_private,
   2519				   completion_list);
   2520		list_del(&local->completion_list);
   2521		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
   2522		free_mad = 0;
   2523		if (local->mad_priv) {
   2524			u8 base_version;
   2525			recv_mad_agent = local->recv_mad_agent;
   2526			if (!recv_mad_agent) {
   2527				dev_err(&mad_agent_priv->agent.device->dev,
   2528					"No receive MAD agent for local completion\n");
   2529				free_mad = 1;
   2530				goto local_send_completion;
   2531			}
   2532
   2533			/*
   2534			 * Defined behavior is to complete response
   2535			 * before request
   2536			 */
   2537			build_smp_wc(recv_mad_agent->agent.qp,
   2538				     local->mad_send_wr->send_wr.wr.wr_cqe,
   2539				     be16_to_cpu(IB_LID_PERMISSIVE),
   2540				     local->mad_send_wr->send_wr.pkey_index,
   2541				     recv_mad_agent->agent.port_num, &wc);
   2542
   2543			local->mad_priv->header.recv_wc.wc = &wc;
   2544
   2545			base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version;
   2546			if (opa && base_version == OPA_MGMT_BASE_VERSION) {
   2547				local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len;
   2548				local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad);
   2549			} else {
   2550				local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad);
   2551				local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad);
   2552			}
   2553
   2554			INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list);
   2555			list_add(&local->mad_priv->header.recv_wc.recv_buf.list,
   2556				 &local->mad_priv->header.recv_wc.rmpp_list);
   2557			local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
   2558			local->mad_priv->header.recv_wc.recv_buf.mad =
   2559						(struct ib_mad *)local->mad_priv->mad;
   2560			recv_mad_agent->agent.recv_handler(
   2561						&recv_mad_agent->agent,
   2562						&local->mad_send_wr->send_buf,
   2563						&local->mad_priv->header.recv_wc);
   2564			spin_lock_irqsave(&recv_mad_agent->lock, flags);
   2565			deref_mad_agent(recv_mad_agent);
   2566			spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
   2567		}
   2568
   2569local_send_completion:
   2570		/* Complete send */
   2571		mad_send_wc.status = IB_WC_SUCCESS;
   2572		mad_send_wc.vendor_err = 0;
   2573		mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
   2574		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
   2575						   &mad_send_wc);
   2576
   2577		spin_lock_irqsave(&mad_agent_priv->lock, flags);
   2578		deref_mad_agent(mad_agent_priv);
   2579		if (free_mad)
   2580			kfree(local->mad_priv);
   2581		kfree(local);
   2582	}
   2583	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
   2584}
   2585
   2586static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
   2587{
   2588	int ret;
   2589
   2590	if (!mad_send_wr->retries_left)
   2591		return -ETIMEDOUT;
   2592
   2593	mad_send_wr->retries_left--;
   2594	mad_send_wr->send_buf.retries++;
   2595
   2596	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
   2597
   2598	if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
   2599		ret = ib_retry_rmpp(mad_send_wr);
   2600		switch (ret) {
   2601		case IB_RMPP_RESULT_UNHANDLED:
   2602			ret = ib_send_mad(mad_send_wr);
   2603			break;
   2604		case IB_RMPP_RESULT_CONSUMED:
   2605			ret = 0;
   2606			break;
   2607		default:
   2608			ret = -ECOMM;
   2609			break;
   2610		}
   2611	} else
   2612		ret = ib_send_mad(mad_send_wr);
   2613
   2614	if (!ret) {
   2615		mad_send_wr->refcount++;
   2616		list_add_tail(&mad_send_wr->agent_list,
   2617			      &mad_send_wr->mad_agent_priv->send_list);
   2618	}
   2619	return ret;
   2620}
   2621
   2622static void timeout_sends(struct work_struct *work)
   2623{
   2624	struct ib_mad_agent_private *mad_agent_priv;
   2625	struct ib_mad_send_wr_private *mad_send_wr;
   2626	struct ib_mad_send_wc mad_send_wc;
   2627	unsigned long flags, delay;
   2628
   2629	mad_agent_priv = container_of(work, struct ib_mad_agent_private,
   2630				      timed_work.work);
   2631	mad_send_wc.vendor_err = 0;
   2632
   2633	spin_lock_irqsave(&mad_agent_priv->lock, flags);
   2634	while (!list_empty(&mad_agent_priv->wait_list)) {
   2635		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
   2636					 struct ib_mad_send_wr_private,
   2637					 agent_list);
   2638
   2639		if (time_after(mad_send_wr->timeout, jiffies)) {
   2640			delay = mad_send_wr->timeout - jiffies;
   2641			if ((long)delay <= 0)
   2642				delay = 1;
   2643			queue_delayed_work(mad_agent_priv->qp_info->
   2644					   port_priv->wq,
   2645					   &mad_agent_priv->timed_work, delay);
   2646			break;
   2647		}
   2648
   2649		list_del(&mad_send_wr->agent_list);
   2650		if (mad_send_wr->status == IB_WC_SUCCESS &&
   2651		    !retry_send(mad_send_wr))
   2652			continue;
   2653
   2654		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
   2655
   2656		if (mad_send_wr->status == IB_WC_SUCCESS)
   2657			mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
   2658		else
   2659			mad_send_wc.status = mad_send_wr->status;
   2660		mad_send_wc.send_buf = &mad_send_wr->send_buf;
   2661		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
   2662						   &mad_send_wc);
   2663
   2664		deref_mad_agent(mad_agent_priv);
   2665		spin_lock_irqsave(&mad_agent_priv->lock, flags);
   2666	}
   2667	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
   2668}
   2669
   2670/*
   2671 * Allocate receive MADs and post receive WRs for them
   2672 */
   2673static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
   2674				    struct ib_mad_private *mad)
   2675{
   2676	unsigned long flags;
   2677	int post, ret;
   2678	struct ib_mad_private *mad_priv;
   2679	struct ib_sge sg_list;
   2680	struct ib_recv_wr recv_wr;
   2681	struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
   2682
   2683	/* Initialize common scatter list fields */
   2684	sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey;
   2685
   2686	/* Initialize common receive WR fields */
   2687	recv_wr.next = NULL;
   2688	recv_wr.sg_list = &sg_list;
   2689	recv_wr.num_sge = 1;
   2690
   2691	do {
   2692		/* Allocate and map receive buffer */
   2693		if (mad) {
   2694			mad_priv = mad;
   2695			mad = NULL;
   2696		} else {
   2697			mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv),
   2698						     GFP_ATOMIC);
   2699			if (!mad_priv) {
   2700				ret = -ENOMEM;
   2701				break;
   2702			}
   2703		}
   2704		sg_list.length = mad_priv_dma_size(mad_priv);
   2705		sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
   2706						 &mad_priv->grh,
   2707						 mad_priv_dma_size(mad_priv),
   2708						 DMA_FROM_DEVICE);
   2709		if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
   2710						  sg_list.addr))) {
   2711			kfree(mad_priv);
   2712			ret = -ENOMEM;
   2713			break;
   2714		}
   2715		mad_priv->header.mapping = sg_list.addr;
   2716		mad_priv->header.mad_list.mad_queue = recv_queue;
   2717		mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
   2718		recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
   2719
   2720		/* Post receive WR */
   2721		spin_lock_irqsave(&recv_queue->lock, flags);
   2722		post = (++recv_queue->count < recv_queue->max_active);
   2723		list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
   2724		spin_unlock_irqrestore(&recv_queue->lock, flags);
   2725		ret = ib_post_recv(qp_info->qp, &recv_wr, NULL);
   2726		if (ret) {
   2727			spin_lock_irqsave(&recv_queue->lock, flags);
   2728			list_del(&mad_priv->header.mad_list.list);
   2729			recv_queue->count--;
   2730			spin_unlock_irqrestore(&recv_queue->lock, flags);
   2731			ib_dma_unmap_single(qp_info->port_priv->device,
   2732					    mad_priv->header.mapping,
   2733					    mad_priv_dma_size(mad_priv),
   2734					    DMA_FROM_DEVICE);
   2735			kfree(mad_priv);
   2736			dev_err(&qp_info->port_priv->device->dev,
   2737				"ib_post_recv failed: %d\n", ret);
   2738			break;
   2739		}
   2740	} while (post);
   2741
   2742	return ret;
   2743}
   2744
   2745/*
   2746 * Return all the posted receive MADs
   2747 */
   2748static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info)
   2749{
   2750	struct ib_mad_private_header *mad_priv_hdr;
   2751	struct ib_mad_private *recv;
   2752	struct ib_mad_list_head *mad_list;
   2753
   2754	if (!qp_info->qp)
   2755		return;
   2756
   2757	while (!list_empty(&qp_info->recv_queue.list)) {
   2758
   2759		mad_list = list_entry(qp_info->recv_queue.list.next,
   2760				      struct ib_mad_list_head, list);
   2761		mad_priv_hdr = container_of(mad_list,
   2762					    struct ib_mad_private_header,
   2763					    mad_list);
   2764		recv = container_of(mad_priv_hdr, struct ib_mad_private,
   2765				    header);
   2766
   2767		/* Remove from posted receive MAD list */
   2768		list_del(&mad_list->list);
   2769
   2770		ib_dma_unmap_single(qp_info->port_priv->device,
   2771				    recv->header.mapping,
   2772				    mad_priv_dma_size(recv),
   2773				    DMA_FROM_DEVICE);
   2774		kfree(recv);
   2775	}
   2776
   2777	qp_info->recv_queue.count = 0;
   2778}
   2779
   2780/*
   2781 * Start the port
   2782 */
   2783static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
   2784{
   2785	int ret, i;
   2786	struct ib_qp_attr *attr;
   2787	struct ib_qp *qp;
   2788	u16 pkey_index;
   2789
   2790	attr = kmalloc(sizeof *attr, GFP_KERNEL);
   2791	if (!attr)
   2792		return -ENOMEM;
   2793
   2794	ret = ib_find_pkey(port_priv->device, port_priv->port_num,
   2795			   IB_DEFAULT_PKEY_FULL, &pkey_index);
   2796	if (ret)
   2797		pkey_index = 0;
   2798
   2799	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
   2800		qp = port_priv->qp_info[i].qp;
   2801		if (!qp)
   2802			continue;
   2803
   2804		/*
   2805		 * PKey index for QP1 is irrelevant but
   2806		 * one is needed for the Reset to Init transition
   2807		 */
   2808		attr->qp_state = IB_QPS_INIT;
   2809		attr->pkey_index = pkey_index;
   2810		attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
   2811		ret = ib_modify_qp(qp, attr, IB_QP_STATE |
   2812					     IB_QP_PKEY_INDEX | IB_QP_QKEY);
   2813		if (ret) {
   2814			dev_err(&port_priv->device->dev,
   2815				"Couldn't change QP%d state to INIT: %d\n",
   2816				i, ret);
   2817			goto out;
   2818		}
   2819
   2820		attr->qp_state = IB_QPS_RTR;
   2821		ret = ib_modify_qp(qp, attr, IB_QP_STATE);
   2822		if (ret) {
   2823			dev_err(&port_priv->device->dev,
   2824				"Couldn't change QP%d state to RTR: %d\n",
   2825				i, ret);
   2826			goto out;
   2827		}
   2828
   2829		attr->qp_state = IB_QPS_RTS;
   2830		attr->sq_psn = IB_MAD_SEND_Q_PSN;
   2831		ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
   2832		if (ret) {
   2833			dev_err(&port_priv->device->dev,
   2834				"Couldn't change QP%d state to RTS: %d\n",
   2835				i, ret);
   2836			goto out;
   2837		}
   2838	}
   2839
   2840	ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
   2841	if (ret) {
   2842		dev_err(&port_priv->device->dev,
   2843			"Failed to request completion notification: %d\n",
   2844			ret);
   2845		goto out;
   2846	}
   2847
   2848	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
   2849		if (!port_priv->qp_info[i].qp)
   2850			continue;
   2851
   2852		ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
   2853		if (ret) {
   2854			dev_err(&port_priv->device->dev,
   2855				"Couldn't post receive WRs\n");
   2856			goto out;
   2857		}
   2858	}
   2859out:
   2860	kfree(attr);
   2861	return ret;
   2862}
   2863
   2864static void qp_event_handler(struct ib_event *event, void *qp_context)
   2865{
   2866	struct ib_mad_qp_info	*qp_info = qp_context;
   2867
   2868	/* It's worse than that! He's dead, Jim! */
   2869	dev_err(&qp_info->port_priv->device->dev,
   2870		"Fatal error (%d) on MAD QP (%u)\n",
   2871		event->event, qp_info->qp->qp_num);
   2872}
   2873
   2874static void init_mad_queue(struct ib_mad_qp_info *qp_info,
   2875			   struct ib_mad_queue *mad_queue)
   2876{
   2877	mad_queue->qp_info = qp_info;
   2878	mad_queue->count = 0;
   2879	spin_lock_init(&mad_queue->lock);
   2880	INIT_LIST_HEAD(&mad_queue->list);
   2881}
   2882
   2883static void init_mad_qp(struct ib_mad_port_private *port_priv,
   2884			struct ib_mad_qp_info *qp_info)
   2885{
   2886	qp_info->port_priv = port_priv;
   2887	init_mad_queue(qp_info, &qp_info->send_queue);
   2888	init_mad_queue(qp_info, &qp_info->recv_queue);
   2889	INIT_LIST_HEAD(&qp_info->overflow_list);
   2890}
   2891
   2892static int create_mad_qp(struct ib_mad_qp_info *qp_info,
   2893			 enum ib_qp_type qp_type)
   2894{
   2895	struct ib_qp_init_attr	qp_init_attr;
   2896	int ret;
   2897
   2898	memset(&qp_init_attr, 0, sizeof qp_init_attr);
   2899	qp_init_attr.send_cq = qp_info->port_priv->cq;
   2900	qp_init_attr.recv_cq = qp_info->port_priv->cq;
   2901	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
   2902	qp_init_attr.cap.max_send_wr = mad_sendq_size;
   2903	qp_init_attr.cap.max_recv_wr = mad_recvq_size;
   2904	qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
   2905	qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
   2906	qp_init_attr.qp_type = qp_type;
   2907	qp_init_attr.port_num = qp_info->port_priv->port_num;
   2908	qp_init_attr.qp_context = qp_info;
   2909	qp_init_attr.event_handler = qp_event_handler;
   2910	qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
   2911	if (IS_ERR(qp_info->qp)) {
   2912		dev_err(&qp_info->port_priv->device->dev,
   2913			"Couldn't create ib_mad QP%d\n",
   2914			get_spl_qp_index(qp_type));
   2915		ret = PTR_ERR(qp_info->qp);
   2916		goto error;
   2917	}
   2918	/* Use minimum queue sizes unless the CQ is resized */
   2919	qp_info->send_queue.max_active = mad_sendq_size;
   2920	qp_info->recv_queue.max_active = mad_recvq_size;
   2921	return 0;
   2922
   2923error:
   2924	return ret;
   2925}
   2926
   2927static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
   2928{
   2929	if (!qp_info->qp)
   2930		return;
   2931
   2932	ib_destroy_qp(qp_info->qp);
   2933}
   2934
   2935/*
   2936 * Open the port
   2937 * Create the QP, PD, MR, and CQ if needed
   2938 */
   2939static int ib_mad_port_open(struct ib_device *device,
   2940			    u32 port_num)
   2941{
   2942	int ret, cq_size;
   2943	struct ib_mad_port_private *port_priv;
   2944	unsigned long flags;
   2945	char name[sizeof "ib_mad123"];
   2946	int has_smi;
   2947
   2948	if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
   2949		return -EFAULT;
   2950
   2951	if (WARN_ON(rdma_cap_opa_mad(device, port_num) &&
   2952		    rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE))
   2953		return -EFAULT;
   2954
   2955	/* Create new device info */
   2956	port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
   2957	if (!port_priv)
   2958		return -ENOMEM;
   2959
   2960	port_priv->device = device;
   2961	port_priv->port_num = port_num;
   2962	spin_lock_init(&port_priv->reg_lock);
   2963	init_mad_qp(port_priv, &port_priv->qp_info[0]);
   2964	init_mad_qp(port_priv, &port_priv->qp_info[1]);
   2965
   2966	cq_size = mad_sendq_size + mad_recvq_size;
   2967	has_smi = rdma_cap_ib_smi(device, port_num);
   2968	if (has_smi)
   2969		cq_size *= 2;
   2970
   2971	port_priv->pd = ib_alloc_pd(device, 0);
   2972	if (IS_ERR(port_priv->pd)) {
   2973		dev_err(&device->dev, "Couldn't create ib_mad PD\n");
   2974		ret = PTR_ERR(port_priv->pd);
   2975		goto error3;
   2976	}
   2977
   2978	port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
   2979			IB_POLL_UNBOUND_WORKQUEUE);
   2980	if (IS_ERR(port_priv->cq)) {
   2981		dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
   2982		ret = PTR_ERR(port_priv->cq);
   2983		goto error4;
   2984	}
   2985
   2986	if (has_smi) {
   2987		ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
   2988		if (ret)
   2989			goto error6;
   2990	}
   2991	ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
   2992	if (ret)
   2993		goto error7;
   2994
   2995	snprintf(name, sizeof(name), "ib_mad%u", port_num);
   2996	port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
   2997	if (!port_priv->wq) {
   2998		ret = -ENOMEM;
   2999		goto error8;
   3000	}
   3001
   3002	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
   3003	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
   3004	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
   3005
   3006	ret = ib_mad_port_start(port_priv);
   3007	if (ret) {
   3008		dev_err(&device->dev, "Couldn't start port\n");
   3009		goto error9;
   3010	}
   3011
   3012	return 0;
   3013
   3014error9:
   3015	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
   3016	list_del_init(&port_priv->port_list);
   3017	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
   3018
   3019	destroy_workqueue(port_priv->wq);
   3020error8:
   3021	destroy_mad_qp(&port_priv->qp_info[1]);
   3022error7:
   3023	destroy_mad_qp(&port_priv->qp_info[0]);
   3024error6:
   3025	ib_free_cq(port_priv->cq);
   3026	cleanup_recv_queue(&port_priv->qp_info[1]);
   3027	cleanup_recv_queue(&port_priv->qp_info[0]);
   3028error4:
   3029	ib_dealloc_pd(port_priv->pd);
   3030error3:
   3031	kfree(port_priv);
   3032
   3033	return ret;
   3034}
   3035
   3036/*
   3037 * Close the port
   3038 * If there are no classes using the port, free the port
   3039 * resources (CQ, MR, PD, QP) and remove the port's info structure
   3040 */
   3041static int ib_mad_port_close(struct ib_device *device, u32 port_num)
   3042{
   3043	struct ib_mad_port_private *port_priv;
   3044	unsigned long flags;
   3045
   3046	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
   3047	port_priv = __ib_get_mad_port(device, port_num);
   3048	if (port_priv == NULL) {
   3049		spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
   3050		dev_err(&device->dev, "Port %u not found\n", port_num);
   3051		return -ENODEV;
   3052	}
   3053	list_del_init(&port_priv->port_list);
   3054	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
   3055
   3056	destroy_workqueue(port_priv->wq);
   3057	destroy_mad_qp(&port_priv->qp_info[1]);
   3058	destroy_mad_qp(&port_priv->qp_info[0]);
   3059	ib_free_cq(port_priv->cq);
   3060	ib_dealloc_pd(port_priv->pd);
   3061	cleanup_recv_queue(&port_priv->qp_info[1]);
   3062	cleanup_recv_queue(&port_priv->qp_info[0]);
   3063	/* XXX: Handle deallocation of MAD registration tables */
   3064
   3065	kfree(port_priv);
   3066
   3067	return 0;
   3068}
   3069
   3070static int ib_mad_init_device(struct ib_device *device)
   3071{
   3072	int start, i;
   3073	unsigned int count = 0;
   3074	int ret;
   3075
   3076	start = rdma_start_port(device);
   3077
   3078	for (i = start; i <= rdma_end_port(device); i++) {
   3079		if (!rdma_cap_ib_mad(device, i))
   3080			continue;
   3081
   3082		ret = ib_mad_port_open(device, i);
   3083		if (ret) {
   3084			dev_err(&device->dev, "Couldn't open port %d\n", i);
   3085			goto error;
   3086		}
   3087		ret = ib_agent_port_open(device, i);
   3088		if (ret) {
   3089			dev_err(&device->dev,
   3090				"Couldn't open port %d for agents\n", i);
   3091			goto error_agent;
   3092		}
   3093		count++;
   3094	}
   3095	if (!count)
   3096		return -EOPNOTSUPP;
   3097
   3098	return 0;
   3099
   3100error_agent:
   3101	if (ib_mad_port_close(device, i))
   3102		dev_err(&device->dev, "Couldn't close port %d\n", i);
   3103
   3104error:
   3105	while (--i >= start) {
   3106		if (!rdma_cap_ib_mad(device, i))
   3107			continue;
   3108
   3109		if (ib_agent_port_close(device, i))
   3110			dev_err(&device->dev,
   3111				"Couldn't close port %d for agents\n", i);
   3112		if (ib_mad_port_close(device, i))
   3113			dev_err(&device->dev, "Couldn't close port %d\n", i);
   3114	}
   3115	return ret;
   3116}
   3117
   3118static void ib_mad_remove_device(struct ib_device *device, void *client_data)
   3119{
   3120	unsigned int i;
   3121
   3122	rdma_for_each_port (device, i) {
   3123		if (!rdma_cap_ib_mad(device, i))
   3124			continue;
   3125
   3126		if (ib_agent_port_close(device, i))
   3127			dev_err(&device->dev,
   3128				"Couldn't close port %u for agents\n", i);
   3129		if (ib_mad_port_close(device, i))
   3130			dev_err(&device->dev, "Couldn't close port %u\n", i);
   3131	}
   3132}
   3133
   3134static struct ib_client mad_client = {
   3135	.name   = "mad",
   3136	.add = ib_mad_init_device,
   3137	.remove = ib_mad_remove_device
   3138};
   3139
   3140int ib_mad_init(void)
   3141{
   3142	mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
   3143	mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
   3144
   3145	mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
   3146	mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
   3147
   3148	INIT_LIST_HEAD(&ib_mad_port_list);
   3149
   3150	if (ib_register_client(&mad_client)) {
   3151		pr_err("Couldn't register ib_mad client\n");
   3152		return -EINVAL;
   3153	}
   3154
   3155	return 0;
   3156}
   3157
   3158void ib_mad_cleanup(void)
   3159{
   3160	ib_unregister_client(&mad_client);
   3161}