cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

main.c (90459B)


      1/*
      2 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
      3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
      4 *
      5 * This software is available to you under a choice of one of two
      6 * licenses.  You may choose to be licensed under the terms of the GNU
      7 * General Public License (GPL) Version 2, available from the file
      8 * COPYING in the main directory of this source tree, or the
      9 * OpenIB.org BSD license below:
     10 *
     11 *     Redistribution and use in source and binary forms, with or
     12 *     without modification, are permitted provided that the following
     13 *     conditions are met:
     14 *
     15 *      - Redistributions of source code must retain the above
     16 *        copyright notice, this list of conditions and the following
     17 *        disclaimer.
     18 *
     19 *      - Redistributions in binary form must reproduce the above
     20 *        copyright notice, this list of conditions and the following
     21 *        disclaimer in the documentation and/or other materials
     22 *        provided with the distribution.
     23 *
     24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     31 * SOFTWARE.
     32 */
     33
     34#include <linux/module.h>
     35#include <linux/init.h>
     36#include <linux/slab.h>
     37#include <linux/errno.h>
     38#include <linux/netdevice.h>
     39#include <linux/inetdevice.h>
     40#include <linux/rtnetlink.h>
     41#include <linux/if_vlan.h>
     42#include <linux/sched/mm.h>
     43#include <linux/sched/task.h>
     44
     45#include <net/ipv6.h>
     46#include <net/addrconf.h>
     47#include <net/devlink.h>
     48
     49#include <rdma/ib_smi.h>
     50#include <rdma/ib_user_verbs.h>
     51#include <rdma/ib_addr.h>
     52#include <rdma/ib_cache.h>
     53
     54#include <net/bonding.h>
     55
     56#include <linux/mlx4/driver.h>
     57#include <linux/mlx4/cmd.h>
     58#include <linux/mlx4/qp.h>
     59
     60#include "mlx4_ib.h"
     61#include <rdma/mlx4-abi.h>
     62
     63#define DRV_NAME	MLX4_IB_DRV_NAME
     64#define DRV_VERSION	"4.0-0"
     65
     66#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
     67#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
     68#define MLX4_IB_CARD_REV_A0   0xA0
     69
     70MODULE_AUTHOR("Roland Dreier");
     71MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
     72MODULE_LICENSE("Dual BSD/GPL");
     73
     74int mlx4_ib_sm_guid_assign = 0;
     75module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
     76MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
     77
     78static const char mlx4_ib_version[] =
     79	DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
     80	DRV_VERSION "\n";
     81
     82static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
     83static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
     84						    u32 port_num);
     85
     86static struct workqueue_struct *wq;
     87
     88static int check_flow_steering_support(struct mlx4_dev *dev)
     89{
     90	int eth_num_ports = 0;
     91	int ib_num_ports = 0;
     92
     93	int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
     94
     95	if (dmfs) {
     96		int i;
     97		mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
     98			eth_num_ports++;
     99		mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
    100			ib_num_ports++;
    101		dmfs &= (!ib_num_ports ||
    102			 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
    103			(!eth_num_ports ||
    104			 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
    105		if (ib_num_ports && mlx4_is_mfunc(dev)) {
    106			pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
    107			dmfs = 0;
    108		}
    109	}
    110	return dmfs;
    111}
    112
    113static int num_ib_ports(struct mlx4_dev *dev)
    114{
    115	int ib_ports = 0;
    116	int i;
    117
    118	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
    119		ib_ports++;
    120
    121	return ib_ports;
    122}
    123
    124static struct net_device *mlx4_ib_get_netdev(struct ib_device *device,
    125					     u32 port_num)
    126{
    127	struct mlx4_ib_dev *ibdev = to_mdev(device);
    128	struct net_device *dev;
    129
    130	rcu_read_lock();
    131	dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
    132
    133	if (dev) {
    134		if (mlx4_is_bonded(ibdev->dev)) {
    135			struct net_device *upper = NULL;
    136
    137			upper = netdev_master_upper_dev_get_rcu(dev);
    138			if (upper) {
    139				struct net_device *active;
    140
    141				active = bond_option_active_slave_get_rcu(netdev_priv(upper));
    142				if (active)
    143					dev = active;
    144			}
    145		}
    146	}
    147	if (dev)
    148		dev_hold(dev);
    149
    150	rcu_read_unlock();
    151	return dev;
    152}
    153
    154static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
    155				  struct mlx4_ib_dev *ibdev,
    156				  u32 port_num)
    157{
    158	struct mlx4_cmd_mailbox *mailbox;
    159	int err;
    160	struct mlx4_dev *dev = ibdev->dev;
    161	int i;
    162	union ib_gid *gid_tbl;
    163
    164	mailbox = mlx4_alloc_cmd_mailbox(dev);
    165	if (IS_ERR(mailbox))
    166		return -ENOMEM;
    167
    168	gid_tbl = mailbox->buf;
    169
    170	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
    171		memcpy(&gid_tbl[i], &gids[i].gid, sizeof(union ib_gid));
    172
    173	err = mlx4_cmd(dev, mailbox->dma,
    174		       MLX4_SET_PORT_GID_TABLE << 8 | port_num,
    175		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
    176		       MLX4_CMD_WRAPPED);
    177	if (mlx4_is_bonded(dev))
    178		err += mlx4_cmd(dev, mailbox->dma,
    179				MLX4_SET_PORT_GID_TABLE << 8 | 2,
    180				1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
    181				MLX4_CMD_WRAPPED);
    182
    183	mlx4_free_cmd_mailbox(dev, mailbox);
    184	return err;
    185}
    186
    187static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
    188				     struct mlx4_ib_dev *ibdev,
    189				     u32 port_num)
    190{
    191	struct mlx4_cmd_mailbox *mailbox;
    192	int err;
    193	struct mlx4_dev *dev = ibdev->dev;
    194	int i;
    195	struct {
    196		union ib_gid	gid;
    197		__be32		rsrvd1[2];
    198		__be16		rsrvd2;
    199		u8		type;
    200		u8		version;
    201		__be32		rsrvd3;
    202	} *gid_tbl;
    203
    204	mailbox = mlx4_alloc_cmd_mailbox(dev);
    205	if (IS_ERR(mailbox))
    206		return -ENOMEM;
    207
    208	gid_tbl = mailbox->buf;
    209	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
    210		memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
    211		if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
    212			gid_tbl[i].version = 2;
    213			if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
    214				gid_tbl[i].type = 1;
    215		}
    216	}
    217
    218	err = mlx4_cmd(dev, mailbox->dma,
    219		       MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
    220		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
    221		       MLX4_CMD_WRAPPED);
    222	if (mlx4_is_bonded(dev))
    223		err += mlx4_cmd(dev, mailbox->dma,
    224				MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
    225				1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
    226				MLX4_CMD_WRAPPED);
    227
    228	mlx4_free_cmd_mailbox(dev, mailbox);
    229	return err;
    230}
    231
    232static int mlx4_ib_update_gids(struct gid_entry *gids,
    233			       struct mlx4_ib_dev *ibdev,
    234			       u32 port_num)
    235{
    236	if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
    237		return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
    238
    239	return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
    240}
    241
    242static void free_gid_entry(struct gid_entry *entry)
    243{
    244	memset(&entry->gid, 0, sizeof(entry->gid));
    245	kfree(entry->ctx);
    246	entry->ctx = NULL;
    247}
    248
    249static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
    250{
    251	struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
    252	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
    253	struct mlx4_port_gid_table   *port_gid_table;
    254	int free = -1, found = -1;
    255	int ret = 0;
    256	int hw_update = 0;
    257	int i;
    258	struct gid_entry *gids = NULL;
    259	u16 vlan_id = 0xffff;
    260	u8 mac[ETH_ALEN];
    261
    262	if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
    263		return -EINVAL;
    264
    265	if (attr->port_num > MLX4_MAX_PORTS)
    266		return -EINVAL;
    267
    268	if (!context)
    269		return -EINVAL;
    270
    271	ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]);
    272	if (ret)
    273		return ret;
    274	port_gid_table = &iboe->gids[attr->port_num - 1];
    275	spin_lock_bh(&iboe->lock);
    276	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
    277		if (!memcmp(&port_gid_table->gids[i].gid,
    278			    &attr->gid, sizeof(attr->gid)) &&
    279		    port_gid_table->gids[i].gid_type == attr->gid_type &&
    280		    port_gid_table->gids[i].vlan_id == vlan_id)  {
    281			found = i;
    282			break;
    283		}
    284		if (free < 0 && rdma_is_zero_gid(&port_gid_table->gids[i].gid))
    285			free = i; /* HW has space */
    286	}
    287
    288	if (found < 0) {
    289		if (free < 0) {
    290			ret = -ENOSPC;
    291		} else {
    292			port_gid_table->gids[free].ctx = kmalloc(sizeof(*port_gid_table->gids[free].ctx), GFP_ATOMIC);
    293			if (!port_gid_table->gids[free].ctx) {
    294				ret = -ENOMEM;
    295			} else {
    296				*context = port_gid_table->gids[free].ctx;
    297				memcpy(&port_gid_table->gids[free].gid,
    298				       &attr->gid, sizeof(attr->gid));
    299				port_gid_table->gids[free].gid_type = attr->gid_type;
    300				port_gid_table->gids[free].vlan_id = vlan_id;
    301				port_gid_table->gids[free].ctx->real_index = free;
    302				port_gid_table->gids[free].ctx->refcount = 1;
    303				hw_update = 1;
    304			}
    305		}
    306	} else {
    307		struct gid_cache_context *ctx = port_gid_table->gids[found].ctx;
    308		*context = ctx;
    309		ctx->refcount++;
    310	}
    311	if (!ret && hw_update) {
    312		gids = kmalloc_array(MLX4_MAX_PORT_GIDS, sizeof(*gids),
    313				     GFP_ATOMIC);
    314		if (!gids) {
    315			ret = -ENOMEM;
    316			*context = NULL;
    317			free_gid_entry(&port_gid_table->gids[free]);
    318		} else {
    319			for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
    320				memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
    321				gids[i].gid_type = port_gid_table->gids[i].gid_type;
    322			}
    323		}
    324	}
    325	spin_unlock_bh(&iboe->lock);
    326
    327	if (!ret && hw_update) {
    328		ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
    329		if (ret) {
    330			spin_lock_bh(&iboe->lock);
    331			*context = NULL;
    332			free_gid_entry(&port_gid_table->gids[free]);
    333			spin_unlock_bh(&iboe->lock);
    334		}
    335		kfree(gids);
    336	}
    337
    338	return ret;
    339}
    340
    341static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
    342{
    343	struct gid_cache_context *ctx = *context;
    344	struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
    345	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
    346	struct mlx4_port_gid_table   *port_gid_table;
    347	int ret = 0;
    348	int hw_update = 0;
    349	struct gid_entry *gids = NULL;
    350
    351	if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
    352		return -EINVAL;
    353
    354	if (attr->port_num > MLX4_MAX_PORTS)
    355		return -EINVAL;
    356
    357	port_gid_table = &iboe->gids[attr->port_num - 1];
    358	spin_lock_bh(&iboe->lock);
    359	if (ctx) {
    360		ctx->refcount--;
    361		if (!ctx->refcount) {
    362			unsigned int real_index = ctx->real_index;
    363
    364			free_gid_entry(&port_gid_table->gids[real_index]);
    365			hw_update = 1;
    366		}
    367	}
    368	if (!ret && hw_update) {
    369		int i;
    370
    371		gids = kmalloc_array(MLX4_MAX_PORT_GIDS, sizeof(*gids),
    372				     GFP_ATOMIC);
    373		if (!gids) {
    374			ret = -ENOMEM;
    375		} else {
    376			for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
    377				memcpy(&gids[i].gid,
    378				       &port_gid_table->gids[i].gid,
    379				       sizeof(union ib_gid));
    380				gids[i].gid_type =
    381				    port_gid_table->gids[i].gid_type;
    382			}
    383		}
    384	}
    385	spin_unlock_bh(&iboe->lock);
    386
    387	if (!ret && hw_update) {
    388		ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
    389		kfree(gids);
    390	}
    391	return ret;
    392}
    393
    394int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
    395				    const struct ib_gid_attr *attr)
    396{
    397	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
    398	struct gid_cache_context *ctx = NULL;
    399	struct mlx4_port_gid_table   *port_gid_table;
    400	int real_index = -EINVAL;
    401	int i;
    402	unsigned long flags;
    403	u32 port_num = attr->port_num;
    404
    405	if (port_num > MLX4_MAX_PORTS)
    406		return -EINVAL;
    407
    408	if (mlx4_is_bonded(ibdev->dev))
    409		port_num = 1;
    410
    411	if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
    412		return attr->index;
    413
    414	spin_lock_irqsave(&iboe->lock, flags);
    415	port_gid_table = &iboe->gids[port_num - 1];
    416
    417	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
    418		if (!memcmp(&port_gid_table->gids[i].gid,
    419			    &attr->gid, sizeof(attr->gid)) &&
    420		    attr->gid_type == port_gid_table->gids[i].gid_type) {
    421			ctx = port_gid_table->gids[i].ctx;
    422			break;
    423		}
    424	if (ctx)
    425		real_index = ctx->real_index;
    426	spin_unlock_irqrestore(&iboe->lock, flags);
    427	return real_index;
    428}
    429
    430static int mlx4_ib_query_device(struct ib_device *ibdev,
    431				struct ib_device_attr *props,
    432				struct ib_udata *uhw)
    433{
    434	struct mlx4_ib_dev *dev = to_mdev(ibdev);
    435	struct ib_smp *in_mad  = NULL;
    436	struct ib_smp *out_mad = NULL;
    437	int err;
    438	int have_ib_ports;
    439	struct mlx4_uverbs_ex_query_device cmd;
    440	struct mlx4_uverbs_ex_query_device_resp resp = {};
    441	struct mlx4_clock_params clock_params;
    442
    443	if (uhw->inlen) {
    444		if (uhw->inlen < sizeof(cmd))
    445			return -EINVAL;
    446
    447		err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd));
    448		if (err)
    449			return err;
    450
    451		if (cmd.comp_mask)
    452			return -EINVAL;
    453
    454		if (cmd.reserved)
    455			return -EINVAL;
    456	}
    457
    458	resp.response_length = offsetof(typeof(resp), response_length) +
    459		sizeof(resp.response_length);
    460	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
    461	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
    462	err = -ENOMEM;
    463	if (!in_mad || !out_mad)
    464		goto out;
    465
    466	ib_init_query_mad(in_mad);
    467	in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
    468
    469	err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
    470			   1, NULL, NULL, in_mad, out_mad);
    471	if (err)
    472		goto out;
    473
    474	memset(props, 0, sizeof *props);
    475
    476	have_ib_ports = num_ib_ports(dev->dev);
    477
    478	props->fw_ver = dev->dev->caps.fw_ver;
    479	props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
    480		IB_DEVICE_PORT_ACTIVE_EVENT		|
    481		IB_DEVICE_SYS_IMAGE_GUID		|
    482		IB_DEVICE_RC_RNR_NAK_GEN;
    483	props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK;
    484	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
    485		props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
    486	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
    487		props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
    488	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM && have_ib_ports)
    489		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
    490	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
    491		props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
    492	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
    493		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
    494	if (dev->dev->caps.max_gso_sz &&
    495	    (dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
    496	    (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
    497		props->kernel_cap_flags |= IBK_UD_TSO;
    498	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
    499		props->kernel_cap_flags |= IBK_LOCAL_DMA_LKEY;
    500	if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
    501	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
    502	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
    503		props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
    504	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
    505		props->device_cap_flags |= IB_DEVICE_XRC;
    506	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
    507		props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
    508	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
    509		if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
    510			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
    511		else
    512			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
    513	}
    514	if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
    515		props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
    516
    517	props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
    518
    519	props->vendor_id	   = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
    520		0xffffff;
    521	props->vendor_part_id	   = dev->dev->persist->pdev->device;
    522	props->hw_ver		   = be32_to_cpup((__be32 *) (out_mad->data + 32));
    523	memcpy(&props->sys_image_guid, out_mad->data +	4, 8);
    524
    525	props->max_mr_size	   = ~0ull;
    526	props->page_size_cap	   = dev->dev->caps.page_size_cap;
    527	props->max_qp		   = dev->dev->quotas.qp;
    528	props->max_qp_wr	   = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
    529	props->max_send_sge =
    530		min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg);
    531	props->max_recv_sge =
    532		min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg);
    533	props->max_sge_rd = MLX4_MAX_SGE_RD;
    534	props->max_cq		   = dev->dev->quotas.cq;
    535	props->max_cqe		   = dev->dev->caps.max_cqes;
    536	props->max_mr		   = dev->dev->quotas.mpt;
    537	props->max_pd		   = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
    538	props->max_qp_rd_atom	   = dev->dev->caps.max_qp_dest_rdma;
    539	props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
    540	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
    541	props->max_srq		   = dev->dev->quotas.srq;
    542	props->max_srq_wr	   = dev->dev->caps.max_srq_wqes - 1;
    543	props->max_srq_sge	   = dev->dev->caps.max_srq_sge;
    544	props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
    545	props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
    546	props->atomic_cap	   = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
    547		IB_ATOMIC_HCA : IB_ATOMIC_NONE;
    548	props->masked_atomic_cap   = props->atomic_cap;
    549	props->max_pkeys	   = dev->dev->caps.pkey_table_len[1];
    550	props->max_mcast_grp	   = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
    551	props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
    552	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
    553					   props->max_mcast_grp;
    554	props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
    555	props->timestamp_mask = 0xFFFFFFFFFFFFULL;
    556	props->max_ah = INT_MAX;
    557
    558	if (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET ||
    559	    mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET) {
    560		if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) {
    561			props->rss_caps.max_rwq_indirection_tables =
    562				props->max_qp;
    563			props->rss_caps.max_rwq_indirection_table_size =
    564				dev->dev->caps.max_rss_tbl_sz;
    565			props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
    566			props->max_wq_type_rq = props->max_qp;
    567		}
    568
    569		if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)
    570			props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
    571	}
    572
    573	props->cq_caps.max_cq_moderation_count = MLX4_MAX_CQ_COUNT;
    574	props->cq_caps.max_cq_moderation_period = MLX4_MAX_CQ_PERIOD;
    575
    576	if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
    577		resp.response_length += sizeof(resp.hca_core_clock_offset);
    578		if (!mlx4_get_internal_clock_params(dev->dev, &clock_params)) {
    579			resp.comp_mask |= MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET;
    580			resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
    581		}
    582	}
    583
    584	if (uhw->outlen >= resp.response_length +
    585	    sizeof(resp.max_inl_recv_sz)) {
    586		resp.response_length += sizeof(resp.max_inl_recv_sz);
    587		resp.max_inl_recv_sz  = dev->dev->caps.max_rq_sg *
    588			sizeof(struct mlx4_wqe_data_seg);
    589	}
    590
    591	if (offsetofend(typeof(resp), rss_caps) <= uhw->outlen) {
    592		if (props->rss_caps.supported_qpts) {
    593			resp.rss_caps.rx_hash_function =
    594				MLX4_IB_RX_HASH_FUNC_TOEPLITZ;
    595
    596			resp.rss_caps.rx_hash_fields_mask =
    597				MLX4_IB_RX_HASH_SRC_IPV4 |
    598				MLX4_IB_RX_HASH_DST_IPV4 |
    599				MLX4_IB_RX_HASH_SRC_IPV6 |
    600				MLX4_IB_RX_HASH_DST_IPV6 |
    601				MLX4_IB_RX_HASH_SRC_PORT_TCP |
    602				MLX4_IB_RX_HASH_DST_PORT_TCP |
    603				MLX4_IB_RX_HASH_SRC_PORT_UDP |
    604				MLX4_IB_RX_HASH_DST_PORT_UDP;
    605
    606			if (dev->dev->caps.tunnel_offload_mode ==
    607			    MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
    608				resp.rss_caps.rx_hash_fields_mask |=
    609					MLX4_IB_RX_HASH_INNER;
    610		}
    611		resp.response_length = offsetof(typeof(resp), rss_caps) +
    612				       sizeof(resp.rss_caps);
    613	}
    614
    615	if (offsetofend(typeof(resp), tso_caps) <= uhw->outlen) {
    616		if (dev->dev->caps.max_gso_sz &&
    617		    ((mlx4_ib_port_link_layer(ibdev, 1) ==
    618		    IB_LINK_LAYER_ETHERNET) ||
    619		    (mlx4_ib_port_link_layer(ibdev, 2) ==
    620		    IB_LINK_LAYER_ETHERNET))) {
    621			resp.tso_caps.max_tso = dev->dev->caps.max_gso_sz;
    622			resp.tso_caps.supported_qpts |=
    623				1 << IB_QPT_RAW_PACKET;
    624		}
    625		resp.response_length = offsetof(typeof(resp), tso_caps) +
    626				       sizeof(resp.tso_caps);
    627	}
    628
    629	if (uhw->outlen) {
    630		err = ib_copy_to_udata(uhw, &resp, resp.response_length);
    631		if (err)
    632			goto out;
    633	}
    634out:
    635	kfree(in_mad);
    636	kfree(out_mad);
    637
    638	return err;
    639}
    640
    641static enum rdma_link_layer
    642mlx4_ib_port_link_layer(struct ib_device *device, u32 port_num)
    643{
    644	struct mlx4_dev *dev = to_mdev(device)->dev;
    645
    646	return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
    647		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
    648}
    649
    650static int ib_link_query_port(struct ib_device *ibdev, u32 port,
    651			      struct ib_port_attr *props, int netw_view)
    652{
    653	struct ib_smp *in_mad  = NULL;
    654	struct ib_smp *out_mad = NULL;
    655	int ext_active_speed;
    656	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
    657	int err = -ENOMEM;
    658
    659	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
    660	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
    661	if (!in_mad || !out_mad)
    662		goto out;
    663
    664	ib_init_query_mad(in_mad);
    665	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
    666	in_mad->attr_mod = cpu_to_be32(port);
    667
    668	if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
    669		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
    670
    671	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
    672				in_mad, out_mad);
    673	if (err)
    674		goto out;
    675
    676
    677	props->lid		= be16_to_cpup((__be16 *) (out_mad->data + 16));
    678	props->lmc		= out_mad->data[34] & 0x7;
    679	props->sm_lid		= be16_to_cpup((__be16 *) (out_mad->data + 18));
    680	props->sm_sl		= out_mad->data[36] & 0xf;
    681	props->state		= out_mad->data[32] & 0xf;
    682	props->phys_state	= out_mad->data[33] >> 4;
    683	props->port_cap_flags	= be32_to_cpup((__be32 *) (out_mad->data + 20));
    684	if (netw_view)
    685		props->gid_tbl_len = out_mad->data[50];
    686	else
    687		props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
    688	props->max_msg_sz	= to_mdev(ibdev)->dev->caps.max_msg_sz;
    689	props->pkey_tbl_len	= to_mdev(ibdev)->dev->caps.pkey_table_len[port];
    690	props->bad_pkey_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 46));
    691	props->qkey_viol_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 48));
    692	props->active_width	= out_mad->data[31] & 0xf;
    693	props->active_speed	= out_mad->data[35] >> 4;
    694	props->max_mtu		= out_mad->data[41] & 0xf;
    695	props->active_mtu	= out_mad->data[36] >> 4;
    696	props->subnet_timeout	= out_mad->data[51] & 0x1f;
    697	props->max_vl_num	= out_mad->data[37] >> 4;
    698	props->init_type_reply	= out_mad->data[41] >> 4;
    699
    700	/* Check if extended speeds (EDR/FDR/...) are supported */
    701	if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
    702		ext_active_speed = out_mad->data[62] >> 4;
    703
    704		switch (ext_active_speed) {
    705		case 1:
    706			props->active_speed = IB_SPEED_FDR;
    707			break;
    708		case 2:
    709			props->active_speed = IB_SPEED_EDR;
    710			break;
    711		}
    712	}
    713
    714	/* If reported active speed is QDR, check if is FDR-10 */
    715	if (props->active_speed == IB_SPEED_QDR) {
    716		ib_init_query_mad(in_mad);
    717		in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
    718		in_mad->attr_mod = cpu_to_be32(port);
    719
    720		err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
    721				   NULL, NULL, in_mad, out_mad);
    722		if (err)
    723			goto out;
    724
    725		/* Checking LinkSpeedActive for FDR-10 */
    726		if (out_mad->data[15] & 0x1)
    727			props->active_speed = IB_SPEED_FDR10;
    728	}
    729
    730	/* Avoid wrong speed value returned by FW if the IB link is down. */
    731	if (props->state == IB_PORT_DOWN)
    732		 props->active_speed = IB_SPEED_SDR;
    733
    734out:
    735	kfree(in_mad);
    736	kfree(out_mad);
    737	return err;
    738}
    739
    740static u8 state_to_phys_state(enum ib_port_state state)
    741{
    742	return state == IB_PORT_ACTIVE ?
    743		IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
    744}
    745
    746static int eth_link_query_port(struct ib_device *ibdev, u32 port,
    747			       struct ib_port_attr *props)
    748{
    749
    750	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
    751	struct mlx4_ib_iboe *iboe = &mdev->iboe;
    752	struct net_device *ndev;
    753	enum ib_mtu tmp;
    754	struct mlx4_cmd_mailbox *mailbox;
    755	int err = 0;
    756	int is_bonded = mlx4_is_bonded(mdev->dev);
    757
    758	mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
    759	if (IS_ERR(mailbox))
    760		return PTR_ERR(mailbox);
    761
    762	err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
    763			   MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
    764			   MLX4_CMD_WRAPPED);
    765	if (err)
    766		goto out;
    767
    768	props->active_width	=  (((u8 *)mailbox->buf)[5] == 0x40) ||
    769				   (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
    770					   IB_WIDTH_4X : IB_WIDTH_1X;
    771	props->active_speed	=  (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
    772					   IB_SPEED_FDR : IB_SPEED_QDR;
    773	props->port_cap_flags	= IB_PORT_CM_SUP;
    774	props->ip_gids = true;
    775	props->gid_tbl_len	= mdev->dev->caps.gid_table_len[port];
    776	props->max_msg_sz	= mdev->dev->caps.max_msg_sz;
    777	if (mdev->dev->caps.pkey_table_len[port])
    778		props->pkey_tbl_len = 1;
    779	props->max_mtu		= IB_MTU_4096;
    780	props->max_vl_num	= 2;
    781	props->state		= IB_PORT_DOWN;
    782	props->phys_state	= state_to_phys_state(props->state);
    783	props->active_mtu	= IB_MTU_256;
    784	spin_lock_bh(&iboe->lock);
    785	ndev = iboe->netdevs[port - 1];
    786	if (ndev && is_bonded) {
    787		rcu_read_lock(); /* required to get upper dev */
    788		ndev = netdev_master_upper_dev_get_rcu(ndev);
    789		rcu_read_unlock();
    790	}
    791	if (!ndev)
    792		goto out_unlock;
    793
    794	tmp = iboe_get_mtu(ndev->mtu);
    795	props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
    796
    797	props->state		= (netif_running(ndev) && netif_carrier_ok(ndev)) ?
    798					IB_PORT_ACTIVE : IB_PORT_DOWN;
    799	props->phys_state	= state_to_phys_state(props->state);
    800out_unlock:
    801	spin_unlock_bh(&iboe->lock);
    802out:
    803	mlx4_free_cmd_mailbox(mdev->dev, mailbox);
    804	return err;
    805}
    806
    807int __mlx4_ib_query_port(struct ib_device *ibdev, u32 port,
    808			 struct ib_port_attr *props, int netw_view)
    809{
    810	int err;
    811
    812	/* props being zeroed by the caller, avoid zeroing it here */
    813
    814	err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
    815		ib_link_query_port(ibdev, port, props, netw_view) :
    816				eth_link_query_port(ibdev, port, props);
    817
    818	return err;
    819}
    820
    821static int mlx4_ib_query_port(struct ib_device *ibdev, u32 port,
    822			      struct ib_port_attr *props)
    823{
    824	/* returns host view */
    825	return __mlx4_ib_query_port(ibdev, port, props, 0);
    826}
    827
    828int __mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
    829			union ib_gid *gid, int netw_view)
    830{
    831	struct ib_smp *in_mad  = NULL;
    832	struct ib_smp *out_mad = NULL;
    833	int err = -ENOMEM;
    834	struct mlx4_ib_dev *dev = to_mdev(ibdev);
    835	int clear = 0;
    836	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
    837
    838	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
    839	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
    840	if (!in_mad || !out_mad)
    841		goto out;
    842
    843	ib_init_query_mad(in_mad);
    844	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
    845	in_mad->attr_mod = cpu_to_be32(port);
    846
    847	if (mlx4_is_mfunc(dev->dev) && netw_view)
    848		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
    849
    850	err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
    851	if (err)
    852		goto out;
    853
    854	memcpy(gid->raw, out_mad->data + 8, 8);
    855
    856	if (mlx4_is_mfunc(dev->dev) && !netw_view) {
    857		if (index) {
    858			/* For any index > 0, return the null guid */
    859			err = 0;
    860			clear = 1;
    861			goto out;
    862		}
    863	}
    864
    865	ib_init_query_mad(in_mad);
    866	in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
    867	in_mad->attr_mod = cpu_to_be32(index / 8);
    868
    869	err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
    870			   NULL, NULL, in_mad, out_mad);
    871	if (err)
    872		goto out;
    873
    874	memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
    875
    876out:
    877	if (clear)
    878		memset(gid->raw + 8, 0, 8);
    879	kfree(in_mad);
    880	kfree(out_mad);
    881	return err;
    882}
    883
    884static int mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
    885			     union ib_gid *gid)
    886{
    887	if (rdma_protocol_ib(ibdev, port))
    888		return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
    889	return 0;
    890}
    891
    892static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u32 port,
    893			       u64 *sl2vl_tbl)
    894{
    895	union sl2vl_tbl_to_u64 sl2vl64;
    896	struct ib_smp *in_mad  = NULL;
    897	struct ib_smp *out_mad = NULL;
    898	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
    899	int err = -ENOMEM;
    900	int jj;
    901
    902	if (mlx4_is_slave(to_mdev(ibdev)->dev)) {
    903		*sl2vl_tbl = 0;
    904		return 0;
    905	}
    906
    907	in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
    908	out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
    909	if (!in_mad || !out_mad)
    910		goto out;
    911
    912	ib_init_query_mad(in_mad);
    913	in_mad->attr_id  = IB_SMP_ATTR_SL_TO_VL_TABLE;
    914	in_mad->attr_mod = 0;
    915
    916	if (mlx4_is_mfunc(to_mdev(ibdev)->dev))
    917		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
    918
    919	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
    920			   in_mad, out_mad);
    921	if (err)
    922		goto out;
    923
    924	for (jj = 0; jj < 8; jj++)
    925		sl2vl64.sl8[jj] = ((struct ib_smp *)out_mad)->data[jj];
    926	*sl2vl_tbl = sl2vl64.sl64;
    927
    928out:
    929	kfree(in_mad);
    930	kfree(out_mad);
    931	return err;
    932}
    933
    934static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev)
    935{
    936	u64 sl2vl;
    937	int i;
    938	int err;
    939
    940	for (i = 1; i <= mdev->dev->caps.num_ports; i++) {
    941		if (mdev->dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
    942			continue;
    943		err = mlx4_ib_query_sl2vl(&mdev->ib_dev, i, &sl2vl);
    944		if (err) {
    945			pr_err("Unable to get default sl to vl mapping for port %d.  Using all zeroes (%d)\n",
    946			       i, err);
    947			sl2vl = 0;
    948		}
    949		atomic64_set(&mdev->sl2vl[i - 1], sl2vl);
    950	}
    951}
    952
    953int __mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
    954			 u16 *pkey, int netw_view)
    955{
    956	struct ib_smp *in_mad  = NULL;
    957	struct ib_smp *out_mad = NULL;
    958	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
    959	int err = -ENOMEM;
    960
    961	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
    962	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
    963	if (!in_mad || !out_mad)
    964		goto out;
    965
    966	ib_init_query_mad(in_mad);
    967	in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
    968	in_mad->attr_mod = cpu_to_be32(index / 32);
    969
    970	if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
    971		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
    972
    973	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
    974			   in_mad, out_mad);
    975	if (err)
    976		goto out;
    977
    978	*pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
    979
    980out:
    981	kfree(in_mad);
    982	kfree(out_mad);
    983	return err;
    984}
    985
    986static int mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
    987			      u16 *pkey)
    988{
    989	return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
    990}
    991
    992static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
    993				 struct ib_device_modify *props)
    994{
    995	struct mlx4_cmd_mailbox *mailbox;
    996	unsigned long flags;
    997
    998	if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
    999		return -EOPNOTSUPP;
   1000
   1001	if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
   1002		return 0;
   1003
   1004	if (mlx4_is_slave(to_mdev(ibdev)->dev))
   1005		return -EOPNOTSUPP;
   1006
   1007	spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
   1008	memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
   1009	spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
   1010
   1011	/*
   1012	 * If possible, pass node desc to FW, so it can generate
   1013	 * a 144 trap.  If cmd fails, just ignore.
   1014	 */
   1015	mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
   1016	if (IS_ERR(mailbox))
   1017		return 0;
   1018
   1019	memcpy(mailbox->buf, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
   1020	mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
   1021		 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
   1022
   1023	mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
   1024
   1025	return 0;
   1026}
   1027
   1028static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u32 port,
   1029			    int reset_qkey_viols, u32 cap_mask)
   1030{
   1031	struct mlx4_cmd_mailbox *mailbox;
   1032	int err;
   1033
   1034	mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
   1035	if (IS_ERR(mailbox))
   1036		return PTR_ERR(mailbox);
   1037
   1038	if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
   1039		*(u8 *) mailbox->buf	     = !!reset_qkey_viols << 6;
   1040		((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
   1041	} else {
   1042		((u8 *) mailbox->buf)[3]     = !!reset_qkey_viols;
   1043		((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
   1044	}
   1045
   1046	err = mlx4_cmd(dev->dev, mailbox->dma, port, MLX4_SET_PORT_IB_OPCODE,
   1047		       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
   1048		       MLX4_CMD_WRAPPED);
   1049
   1050	mlx4_free_cmd_mailbox(dev->dev, mailbox);
   1051	return err;
   1052}
   1053
   1054static int mlx4_ib_modify_port(struct ib_device *ibdev, u32 port, int mask,
   1055			       struct ib_port_modify *props)
   1056{
   1057	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
   1058	u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
   1059	struct ib_port_attr attr;
   1060	u32 cap_mask;
   1061	int err;
   1062
   1063	/* return OK if this is RoCE. CM calls ib_modify_port() regardless
   1064	 * of whether port link layer is ETH or IB. For ETH ports, qkey
   1065	 * violations and port capabilities are not meaningful.
   1066	 */
   1067	if (is_eth)
   1068		return 0;
   1069
   1070	mutex_lock(&mdev->cap_mask_mutex);
   1071
   1072	err = ib_query_port(ibdev, port, &attr);
   1073	if (err)
   1074		goto out;
   1075
   1076	cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
   1077		~props->clr_port_cap_mask;
   1078
   1079	err = mlx4_ib_SET_PORT(mdev, port,
   1080			       !!(mask & IB_PORT_RESET_QKEY_CNTR),
   1081			       cap_mask);
   1082
   1083out:
   1084	mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
   1085	return err;
   1086}
   1087
   1088static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx,
   1089				  struct ib_udata *udata)
   1090{
   1091	struct ib_device *ibdev = uctx->device;
   1092	struct mlx4_ib_dev *dev = to_mdev(ibdev);
   1093	struct mlx4_ib_ucontext *context = to_mucontext(uctx);
   1094	struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
   1095	struct mlx4_ib_alloc_ucontext_resp resp;
   1096	int err;
   1097
   1098	if (!dev->ib_active)
   1099		return -EAGAIN;
   1100
   1101	if (ibdev->ops.uverbs_abi_ver ==
   1102	    MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
   1103		resp_v3.qp_tab_size      = dev->dev->caps.num_qps;
   1104		resp_v3.bf_reg_size      = dev->dev->caps.bf_reg_size;
   1105		resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
   1106	} else {
   1107		resp.dev_caps	      = dev->dev->caps.userspace_caps;
   1108		resp.qp_tab_size      = dev->dev->caps.num_qps;
   1109		resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
   1110		resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
   1111		resp.cqe_size	      = dev->dev->caps.cqe_size;
   1112	}
   1113
   1114	err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
   1115	if (err)
   1116		return err;
   1117
   1118	INIT_LIST_HEAD(&context->db_page_list);
   1119	mutex_init(&context->db_page_mutex);
   1120
   1121	INIT_LIST_HEAD(&context->wqn_ranges_list);
   1122	mutex_init(&context->wqn_ranges_mutex);
   1123
   1124	if (ibdev->ops.uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
   1125		err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
   1126	else
   1127		err = ib_copy_to_udata(udata, &resp, sizeof(resp));
   1128
   1129	if (err) {
   1130		mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
   1131		return -EFAULT;
   1132	}
   1133
   1134	return err;
   1135}
   1136
   1137static void mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
   1138{
   1139	struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
   1140
   1141	mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
   1142}
   1143
   1144static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
   1145{
   1146}
   1147
   1148static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
   1149{
   1150	struct mlx4_ib_dev *dev = to_mdev(context->device);
   1151
   1152	switch (vma->vm_pgoff) {
   1153	case 0:
   1154		return rdma_user_mmap_io(context, vma,
   1155					 to_mucontext(context)->uar.pfn,
   1156					 PAGE_SIZE,
   1157					 pgprot_noncached(vma->vm_page_prot),
   1158					 NULL);
   1159
   1160	case 1:
   1161		if (dev->dev->caps.bf_reg_size == 0)
   1162			return -EINVAL;
   1163		return rdma_user_mmap_io(
   1164			context, vma,
   1165			to_mucontext(context)->uar.pfn +
   1166				dev->dev->caps.num_uars,
   1167			PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot),
   1168			NULL);
   1169
   1170	case 3: {
   1171		struct mlx4_clock_params params;
   1172		int ret;
   1173
   1174		ret = mlx4_get_internal_clock_params(dev->dev, &params);
   1175		if (ret)
   1176			return ret;
   1177
   1178		return rdma_user_mmap_io(
   1179			context, vma,
   1180			(pci_resource_start(dev->dev->persist->pdev,
   1181					    params.bar) +
   1182			 params.offset) >>
   1183				PAGE_SHIFT,
   1184			PAGE_SIZE, pgprot_noncached(vma->vm_page_prot),
   1185			NULL);
   1186	}
   1187
   1188	default:
   1189		return -EINVAL;
   1190	}
   1191}
   1192
   1193static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
   1194{
   1195	struct mlx4_ib_pd *pd = to_mpd(ibpd);
   1196	struct ib_device *ibdev = ibpd->device;
   1197	int err;
   1198
   1199	err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
   1200	if (err)
   1201		return err;
   1202
   1203	if (udata && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
   1204		mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
   1205		return -EFAULT;
   1206	}
   1207	return 0;
   1208}
   1209
   1210static int mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
   1211{
   1212	mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
   1213	return 0;
   1214}
   1215
   1216static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
   1217{
   1218	struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
   1219	struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
   1220	struct ib_cq_init_attr cq_attr = {};
   1221	int err;
   1222
   1223	if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
   1224		return -EOPNOTSUPP;
   1225
   1226	err = mlx4_xrcd_alloc(dev->dev, &xrcd->xrcdn);
   1227	if (err)
   1228		return err;
   1229
   1230	xrcd->pd = ib_alloc_pd(ibxrcd->device, 0);
   1231	if (IS_ERR(xrcd->pd)) {
   1232		err = PTR_ERR(xrcd->pd);
   1233		goto err2;
   1234	}
   1235
   1236	cq_attr.cqe = 1;
   1237	xrcd->cq = ib_create_cq(ibxrcd->device, NULL, NULL, xrcd, &cq_attr);
   1238	if (IS_ERR(xrcd->cq)) {
   1239		err = PTR_ERR(xrcd->cq);
   1240		goto err3;
   1241	}
   1242
   1243	return 0;
   1244
   1245err3:
   1246	ib_dealloc_pd(xrcd->pd);
   1247err2:
   1248	mlx4_xrcd_free(dev->dev, xrcd->xrcdn);
   1249	return err;
   1250}
   1251
   1252static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
   1253{
   1254	ib_destroy_cq(to_mxrcd(xrcd)->cq);
   1255	ib_dealloc_pd(to_mxrcd(xrcd)->pd);
   1256	mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
   1257	return 0;
   1258}
   1259
   1260static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
   1261{
   1262	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
   1263	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
   1264	struct mlx4_ib_gid_entry *ge;
   1265
   1266	ge = kzalloc(sizeof *ge, GFP_KERNEL);
   1267	if (!ge)
   1268		return -ENOMEM;
   1269
   1270	ge->gid = *gid;
   1271	if (mlx4_ib_add_mc(mdev, mqp, gid)) {
   1272		ge->port = mqp->port;
   1273		ge->added = 1;
   1274	}
   1275
   1276	mutex_lock(&mqp->mutex);
   1277	list_add_tail(&ge->list, &mqp->gid_list);
   1278	mutex_unlock(&mqp->mutex);
   1279
   1280	return 0;
   1281}
   1282
   1283static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
   1284					  struct mlx4_ib_counters *ctr_table)
   1285{
   1286	struct counter_index *counter, *tmp_count;
   1287
   1288	mutex_lock(&ctr_table->mutex);
   1289	list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
   1290				 list) {
   1291		if (counter->allocated)
   1292			mlx4_counter_free(ibdev->dev, counter->index);
   1293		list_del(&counter->list);
   1294		kfree(counter);
   1295	}
   1296	mutex_unlock(&ctr_table->mutex);
   1297}
   1298
   1299int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
   1300		   union ib_gid *gid)
   1301{
   1302	struct net_device *ndev;
   1303	int ret = 0;
   1304
   1305	if (!mqp->port)
   1306		return 0;
   1307
   1308	spin_lock_bh(&mdev->iboe.lock);
   1309	ndev = mdev->iboe.netdevs[mqp->port - 1];
   1310	if (ndev)
   1311		dev_hold(ndev);
   1312	spin_unlock_bh(&mdev->iboe.lock);
   1313
   1314	if (ndev) {
   1315		ret = 1;
   1316		dev_put(ndev);
   1317	}
   1318
   1319	return ret;
   1320}
   1321
   1322struct mlx4_ib_steering {
   1323	struct list_head list;
   1324	struct mlx4_flow_reg_id reg_id;
   1325	union ib_gid gid;
   1326};
   1327
   1328#define LAST_ETH_FIELD vlan_tag
   1329#define LAST_IB_FIELD sl
   1330#define LAST_IPV4_FIELD dst_ip
   1331#define LAST_TCP_UDP_FIELD src_port
   1332
   1333/* Field is the last supported field */
   1334#define FIELDS_NOT_SUPPORTED(filter, field)\
   1335	memchr_inv((void *)&filter.field  +\
   1336		   sizeof(filter.field), 0,\
   1337		   sizeof(filter) -\
   1338		   offsetof(typeof(filter), field) -\
   1339		   sizeof(filter.field))
   1340
   1341static int parse_flow_attr(struct mlx4_dev *dev,
   1342			   u32 qp_num,
   1343			   union ib_flow_spec *ib_spec,
   1344			   struct _rule_hw *mlx4_spec)
   1345{
   1346	enum mlx4_net_trans_rule_id type;
   1347
   1348	switch (ib_spec->type) {
   1349	case IB_FLOW_SPEC_ETH:
   1350		if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
   1351			return -ENOTSUPP;
   1352
   1353		type = MLX4_NET_TRANS_RULE_ID_ETH;
   1354		memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
   1355		       ETH_ALEN);
   1356		memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
   1357		       ETH_ALEN);
   1358		mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
   1359		mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
   1360		break;
   1361	case IB_FLOW_SPEC_IB:
   1362		if (FIELDS_NOT_SUPPORTED(ib_spec->ib.mask, LAST_IB_FIELD))
   1363			return -ENOTSUPP;
   1364
   1365		type = MLX4_NET_TRANS_RULE_ID_IB;
   1366		mlx4_spec->ib.l3_qpn =
   1367			cpu_to_be32(qp_num);
   1368		mlx4_spec->ib.qpn_mask =
   1369			cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
   1370		break;
   1371
   1372
   1373	case IB_FLOW_SPEC_IPV4:
   1374		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
   1375			return -ENOTSUPP;
   1376
   1377		type = MLX4_NET_TRANS_RULE_ID_IPV4;
   1378		mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
   1379		mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
   1380		mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
   1381		mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
   1382		break;
   1383
   1384	case IB_FLOW_SPEC_TCP:
   1385	case IB_FLOW_SPEC_UDP:
   1386		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD))
   1387			return -ENOTSUPP;
   1388
   1389		type = ib_spec->type == IB_FLOW_SPEC_TCP ?
   1390					MLX4_NET_TRANS_RULE_ID_TCP :
   1391					MLX4_NET_TRANS_RULE_ID_UDP;
   1392		mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
   1393		mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port;
   1394		mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
   1395		mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port;
   1396		break;
   1397
   1398	default:
   1399		return -EINVAL;
   1400	}
   1401	if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 ||
   1402	    mlx4_hw_rule_sz(dev, type) < 0)
   1403		return -EINVAL;
   1404	mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type));
   1405	mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2;
   1406	return mlx4_hw_rule_sz(dev, type);
   1407}
   1408
   1409struct default_rules {
   1410	__u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
   1411	__u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
   1412	__u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
   1413	__u8  link_layer;
   1414};
   1415static const struct default_rules default_table[] = {
   1416	{
   1417		.mandatory_fields = {IB_FLOW_SPEC_IPV4},
   1418		.mandatory_not_fields = {IB_FLOW_SPEC_ETH},
   1419		.rules_create_list = {IB_FLOW_SPEC_IB},
   1420		.link_layer = IB_LINK_LAYER_INFINIBAND
   1421	}
   1422};
   1423
   1424static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
   1425					 struct ib_flow_attr *flow_attr)
   1426{
   1427	int i, j, k;
   1428	void *ib_flow;
   1429	const struct default_rules *pdefault_rules = default_table;
   1430	u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
   1431
   1432	for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) {
   1433		__u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
   1434		memset(&field_types, 0, sizeof(field_types));
   1435
   1436		if (link_layer != pdefault_rules->link_layer)
   1437			continue;
   1438
   1439		ib_flow = flow_attr + 1;
   1440		/* we assume the specs are sorted */
   1441		for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
   1442		     j < flow_attr->num_of_specs; k++) {
   1443			union ib_flow_spec *current_flow =
   1444				(union ib_flow_spec *)ib_flow;
   1445
   1446			/* same layer but different type */
   1447			if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
   1448			     (pdefault_rules->mandatory_fields[k] &
   1449			      IB_FLOW_SPEC_LAYER_MASK)) &&
   1450			    (current_flow->type !=
   1451			     pdefault_rules->mandatory_fields[k]))
   1452				goto out;
   1453
   1454			/* same layer, try match next one */
   1455			if (current_flow->type ==
   1456			    pdefault_rules->mandatory_fields[k]) {
   1457				j++;
   1458				ib_flow +=
   1459					((union ib_flow_spec *)ib_flow)->size;
   1460			}
   1461		}
   1462
   1463		ib_flow = flow_attr + 1;
   1464		for (j = 0; j < flow_attr->num_of_specs;
   1465		     j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
   1466			for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
   1467				/* same layer and same type */
   1468				if (((union ib_flow_spec *)ib_flow)->type ==
   1469				    pdefault_rules->mandatory_not_fields[k])
   1470					goto out;
   1471
   1472		return i;
   1473	}
   1474out:
   1475	return -1;
   1476}
   1477
   1478static int __mlx4_ib_create_default_rules(
   1479		struct mlx4_ib_dev *mdev,
   1480		struct ib_qp *qp,
   1481		const struct default_rules *pdefault_rules,
   1482		struct _rule_hw *mlx4_spec) {
   1483	int size = 0;
   1484	int i;
   1485
   1486	for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
   1487		union ib_flow_spec ib_spec = {};
   1488		int ret;
   1489
   1490		switch (pdefault_rules->rules_create_list[i]) {
   1491		case 0:
   1492			/* no rule */
   1493			continue;
   1494		case IB_FLOW_SPEC_IB:
   1495			ib_spec.type = IB_FLOW_SPEC_IB;
   1496			ib_spec.size = sizeof(struct ib_flow_spec_ib);
   1497
   1498			break;
   1499		default:
   1500			/* invalid rule */
   1501			return -EINVAL;
   1502		}
   1503		/* We must put empty rule, qpn is being ignored */
   1504		ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
   1505				      mlx4_spec);
   1506		if (ret < 0) {
   1507			pr_info("invalid parsing\n");
   1508			return -EINVAL;
   1509		}
   1510
   1511		mlx4_spec = (void *)mlx4_spec + ret;
   1512		size += ret;
   1513	}
   1514	return size;
   1515}
   1516
   1517static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
   1518			  int domain,
   1519			  enum mlx4_net_trans_promisc_mode flow_type,
   1520			  u64 *reg_id)
   1521{
   1522	int ret, i;
   1523	int size = 0;
   1524	void *ib_flow;
   1525	struct mlx4_ib_dev *mdev = to_mdev(qp->device);
   1526	struct mlx4_cmd_mailbox *mailbox;
   1527	struct mlx4_net_trans_rule_hw_ctrl *ctrl;
   1528	int default_flow;
   1529
   1530	if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
   1531		pr_err("Invalid priority value %d\n", flow_attr->priority);
   1532		return -EINVAL;
   1533	}
   1534
   1535	if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
   1536		return -EINVAL;
   1537
   1538	mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
   1539	if (IS_ERR(mailbox))
   1540		return PTR_ERR(mailbox);
   1541	ctrl = mailbox->buf;
   1542
   1543	ctrl->prio = cpu_to_be16(domain | flow_attr->priority);
   1544	ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
   1545	ctrl->port = flow_attr->port;
   1546	ctrl->qpn = cpu_to_be32(qp->qp_num);
   1547
   1548	ib_flow = flow_attr + 1;
   1549	size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
   1550	/* Add default flows */
   1551	default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
   1552	if (default_flow >= 0) {
   1553		ret = __mlx4_ib_create_default_rules(
   1554				mdev, qp, default_table + default_flow,
   1555				mailbox->buf + size);
   1556		if (ret < 0) {
   1557			mlx4_free_cmd_mailbox(mdev->dev, mailbox);
   1558			return -EINVAL;
   1559		}
   1560		size += ret;
   1561	}
   1562	for (i = 0; i < flow_attr->num_of_specs; i++) {
   1563		ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
   1564				      mailbox->buf + size);
   1565		if (ret < 0) {
   1566			mlx4_free_cmd_mailbox(mdev->dev, mailbox);
   1567			return -EINVAL;
   1568		}
   1569		ib_flow += ((union ib_flow_spec *) ib_flow)->size;
   1570		size += ret;
   1571	}
   1572
   1573	if (mlx4_is_master(mdev->dev) && flow_type == MLX4_FS_REGULAR &&
   1574	    flow_attr->num_of_specs == 1) {
   1575		struct _rule_hw *rule_header = (struct _rule_hw *)(ctrl + 1);
   1576		enum ib_flow_spec_type header_spec =
   1577			((union ib_flow_spec *)(flow_attr + 1))->type;
   1578
   1579		if (header_spec == IB_FLOW_SPEC_ETH)
   1580			mlx4_handle_eth_header_mcast_prio(ctrl, rule_header);
   1581	}
   1582
   1583	ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
   1584			   MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
   1585			   MLX4_CMD_NATIVE);
   1586	if (ret == -ENOMEM)
   1587		pr_err("mcg table is full. Fail to register network rule.\n");
   1588	else if (ret == -ENXIO)
   1589		pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
   1590	else if (ret)
   1591		pr_err("Invalid argument. Fail to register network rule.\n");
   1592
   1593	mlx4_free_cmd_mailbox(mdev->dev, mailbox);
   1594	return ret;
   1595}
   1596
   1597static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
   1598{
   1599	int err;
   1600	err = mlx4_cmd(dev, reg_id, 0, 0,
   1601		       MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
   1602		       MLX4_CMD_NATIVE);
   1603	if (err)
   1604		pr_err("Fail to detach network rule. registration id = 0x%llx\n",
   1605		       reg_id);
   1606	return err;
   1607}
   1608
   1609static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
   1610				    u64 *reg_id)
   1611{
   1612	void *ib_flow;
   1613	union ib_flow_spec *ib_spec;
   1614	struct mlx4_dev	*dev = to_mdev(qp->device)->dev;
   1615	int err = 0;
   1616
   1617	if (dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN ||
   1618	    dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC)
   1619		return 0; /* do nothing */
   1620
   1621	ib_flow = flow_attr + 1;
   1622	ib_spec = (union ib_flow_spec *)ib_flow;
   1623
   1624	if (ib_spec->type !=  IB_FLOW_SPEC_ETH || flow_attr->num_of_specs != 1)
   1625		return 0; /* do nothing */
   1626
   1627	err = mlx4_tunnel_steer_add(to_mdev(qp->device)->dev, ib_spec->eth.val.dst_mac,
   1628				    flow_attr->port, qp->qp_num,
   1629				    MLX4_DOMAIN_UVERBS | (flow_attr->priority & 0xff),
   1630				    reg_id);
   1631	return err;
   1632}
   1633
   1634static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
   1635				      struct ib_flow_attr *flow_attr,
   1636				      enum mlx4_net_trans_promisc_mode *type)
   1637{
   1638	int err = 0;
   1639
   1640	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) ||
   1641	    (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) ||
   1642	    (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) {
   1643		return -EOPNOTSUPP;
   1644	}
   1645
   1646	if (flow_attr->num_of_specs == 0) {
   1647		type[0] = MLX4_FS_MC_SNIFFER;
   1648		type[1] = MLX4_FS_UC_SNIFFER;
   1649	} else {
   1650		union ib_flow_spec *ib_spec;
   1651
   1652		ib_spec = (union ib_flow_spec *)(flow_attr + 1);
   1653		if (ib_spec->type !=  IB_FLOW_SPEC_ETH)
   1654			return -EINVAL;
   1655
   1656		/* if all is zero than MC and UC */
   1657		if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) {
   1658			type[0] = MLX4_FS_MC_SNIFFER;
   1659			type[1] = MLX4_FS_UC_SNIFFER;
   1660		} else {
   1661			u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01,
   1662					    ib_spec->eth.mask.dst_mac[1],
   1663					    ib_spec->eth.mask.dst_mac[2],
   1664					    ib_spec->eth.mask.dst_mac[3],
   1665					    ib_spec->eth.mask.dst_mac[4],
   1666					    ib_spec->eth.mask.dst_mac[5]};
   1667
   1668			/* Above xor was only on MC bit, non empty mask is valid
   1669			 * only if this bit is set and rest are zero.
   1670			 */
   1671			if (!is_zero_ether_addr(&mac[0]))
   1672				return -EINVAL;
   1673
   1674			if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac))
   1675				type[0] = MLX4_FS_MC_SNIFFER;
   1676			else
   1677				type[0] = MLX4_FS_UC_SNIFFER;
   1678		}
   1679	}
   1680
   1681	return err;
   1682}
   1683
   1684static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
   1685					   struct ib_flow_attr *flow_attr,
   1686					   struct ib_udata *udata)
   1687{
   1688	int err = 0, i = 0, j = 0;
   1689	struct mlx4_ib_flow *mflow;
   1690	enum mlx4_net_trans_promisc_mode type[2];
   1691	struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
   1692	int is_bonded = mlx4_is_bonded(dev);
   1693
   1694	if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)
   1695		return ERR_PTR(-EOPNOTSUPP);
   1696
   1697	if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
   1698	    (flow_attr->type != IB_FLOW_ATTR_NORMAL))
   1699		return ERR_PTR(-EOPNOTSUPP);
   1700
   1701	if (udata &&
   1702	    udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
   1703		return ERR_PTR(-EOPNOTSUPP);
   1704
   1705	memset(type, 0, sizeof(type));
   1706
   1707	mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
   1708	if (!mflow) {
   1709		err = -ENOMEM;
   1710		goto err_free;
   1711	}
   1712
   1713	switch (flow_attr->type) {
   1714	case IB_FLOW_ATTR_NORMAL:
   1715		/* If dont trap flag (continue match) is set, under specific
   1716		 * condition traffic be replicated to given qp,
   1717		 * without stealing it
   1718		 */
   1719		if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) {
   1720			err = mlx4_ib_add_dont_trap_rule(dev,
   1721							 flow_attr,
   1722							 type);
   1723			if (err)
   1724				goto err_free;
   1725		} else {
   1726			type[0] = MLX4_FS_REGULAR;
   1727		}
   1728		break;
   1729
   1730	case IB_FLOW_ATTR_ALL_DEFAULT:
   1731		type[0] = MLX4_FS_ALL_DEFAULT;
   1732		break;
   1733
   1734	case IB_FLOW_ATTR_MC_DEFAULT:
   1735		type[0] = MLX4_FS_MC_DEFAULT;
   1736		break;
   1737
   1738	case IB_FLOW_ATTR_SNIFFER:
   1739		type[0] = MLX4_FS_MIRROR_RX_PORT;
   1740		type[1] = MLX4_FS_MIRROR_SX_PORT;
   1741		break;
   1742
   1743	default:
   1744		err = -EINVAL;
   1745		goto err_free;
   1746	}
   1747
   1748	while (i < ARRAY_SIZE(type) && type[i]) {
   1749		err = __mlx4_ib_create_flow(qp, flow_attr, MLX4_DOMAIN_UVERBS,
   1750					    type[i], &mflow->reg_id[i].id);
   1751		if (err)
   1752			goto err_create_flow;
   1753		if (is_bonded) {
   1754			/* Application always sees one port so the mirror rule
   1755			 * must be on port #2
   1756			 */
   1757			flow_attr->port = 2;
   1758			err = __mlx4_ib_create_flow(qp, flow_attr,
   1759						    MLX4_DOMAIN_UVERBS, type[j],
   1760						    &mflow->reg_id[j].mirror);
   1761			flow_attr->port = 1;
   1762			if (err)
   1763				goto err_create_flow;
   1764			j++;
   1765		}
   1766
   1767		i++;
   1768	}
   1769
   1770	if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
   1771		err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
   1772					       &mflow->reg_id[i].id);
   1773		if (err)
   1774			goto err_create_flow;
   1775
   1776		if (is_bonded) {
   1777			flow_attr->port = 2;
   1778			err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
   1779						       &mflow->reg_id[j].mirror);
   1780			flow_attr->port = 1;
   1781			if (err)
   1782				goto err_create_flow;
   1783			j++;
   1784		}
   1785		/* function to create mirror rule */
   1786		i++;
   1787	}
   1788
   1789	return &mflow->ibflow;
   1790
   1791err_create_flow:
   1792	while (i) {
   1793		(void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
   1794					     mflow->reg_id[i].id);
   1795		i--;
   1796	}
   1797
   1798	while (j) {
   1799		(void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
   1800					     mflow->reg_id[j].mirror);
   1801		j--;
   1802	}
   1803err_free:
   1804	kfree(mflow);
   1805	return ERR_PTR(err);
   1806}
   1807
   1808static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
   1809{
   1810	int err, ret = 0;
   1811	int i = 0;
   1812	struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
   1813	struct mlx4_ib_flow *mflow = to_mflow(flow_id);
   1814
   1815	while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) {
   1816		err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id);
   1817		if (err)
   1818			ret = err;
   1819		if (mflow->reg_id[i].mirror) {
   1820			err = __mlx4_ib_destroy_flow(mdev->dev,
   1821						     mflow->reg_id[i].mirror);
   1822			if (err)
   1823				ret = err;
   1824		}
   1825		i++;
   1826	}
   1827
   1828	kfree(mflow);
   1829	return ret;
   1830}
   1831
   1832static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
   1833{
   1834	int err;
   1835	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
   1836	struct mlx4_dev	*dev = mdev->dev;
   1837	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
   1838	struct mlx4_ib_steering *ib_steering = NULL;
   1839	enum mlx4_protocol prot = MLX4_PROT_IB_IPV6;
   1840	struct mlx4_flow_reg_id	reg_id;
   1841
   1842	if (mdev->dev->caps.steering_mode ==
   1843	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
   1844		ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
   1845		if (!ib_steering)
   1846			return -ENOMEM;
   1847	}
   1848
   1849	err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
   1850				    !!(mqp->flags &
   1851				       MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
   1852				    prot, &reg_id.id);
   1853	if (err) {
   1854		pr_err("multicast attach op failed, err %d\n", err);
   1855		goto err_malloc;
   1856	}
   1857
   1858	reg_id.mirror = 0;
   1859	if (mlx4_is_bonded(dev)) {
   1860		err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
   1861					    (mqp->port == 1) ? 2 : 1,
   1862					    !!(mqp->flags &
   1863					    MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
   1864					    prot, &reg_id.mirror);
   1865		if (err)
   1866			goto err_add;
   1867	}
   1868
   1869	err = add_gid_entry(ibqp, gid);
   1870	if (err)
   1871		goto err_add;
   1872
   1873	if (ib_steering) {
   1874		memcpy(ib_steering->gid.raw, gid->raw, 16);
   1875		ib_steering->reg_id = reg_id;
   1876		mutex_lock(&mqp->mutex);
   1877		list_add(&ib_steering->list, &mqp->steering_rules);
   1878		mutex_unlock(&mqp->mutex);
   1879	}
   1880	return 0;
   1881
   1882err_add:
   1883	mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
   1884			      prot, reg_id.id);
   1885	if (reg_id.mirror)
   1886		mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
   1887				      prot, reg_id.mirror);
   1888err_malloc:
   1889	kfree(ib_steering);
   1890
   1891	return err;
   1892}
   1893
   1894static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
   1895{
   1896	struct mlx4_ib_gid_entry *ge;
   1897	struct mlx4_ib_gid_entry *tmp;
   1898	struct mlx4_ib_gid_entry *ret = NULL;
   1899
   1900	list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
   1901		if (!memcmp(raw, ge->gid.raw, 16)) {
   1902			ret = ge;
   1903			break;
   1904		}
   1905	}
   1906
   1907	return ret;
   1908}
   1909
   1910static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
   1911{
   1912	int err;
   1913	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
   1914	struct mlx4_dev *dev = mdev->dev;
   1915	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
   1916	struct net_device *ndev;
   1917	struct mlx4_ib_gid_entry *ge;
   1918	struct mlx4_flow_reg_id reg_id = {0, 0};
   1919	enum mlx4_protocol prot =  MLX4_PROT_IB_IPV6;
   1920
   1921	if (mdev->dev->caps.steering_mode ==
   1922	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
   1923		struct mlx4_ib_steering *ib_steering;
   1924
   1925		mutex_lock(&mqp->mutex);
   1926		list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
   1927			if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
   1928				list_del(&ib_steering->list);
   1929				break;
   1930			}
   1931		}
   1932		mutex_unlock(&mqp->mutex);
   1933		if (&ib_steering->list == &mqp->steering_rules) {
   1934			pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
   1935			return -EINVAL;
   1936		}
   1937		reg_id = ib_steering->reg_id;
   1938		kfree(ib_steering);
   1939	}
   1940
   1941	err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
   1942				    prot, reg_id.id);
   1943	if (err)
   1944		return err;
   1945
   1946	if (mlx4_is_bonded(dev)) {
   1947		err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
   1948					    prot, reg_id.mirror);
   1949		if (err)
   1950			return err;
   1951	}
   1952
   1953	mutex_lock(&mqp->mutex);
   1954	ge = find_gid_entry(mqp, gid->raw);
   1955	if (ge) {
   1956		spin_lock_bh(&mdev->iboe.lock);
   1957		ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
   1958		if (ndev)
   1959			dev_hold(ndev);
   1960		spin_unlock_bh(&mdev->iboe.lock);
   1961		if (ndev)
   1962			dev_put(ndev);
   1963		list_del(&ge->list);
   1964		kfree(ge);
   1965	} else
   1966		pr_warn("could not find mgid entry\n");
   1967
   1968	mutex_unlock(&mqp->mutex);
   1969
   1970	return 0;
   1971}
   1972
   1973static int init_node_data(struct mlx4_ib_dev *dev)
   1974{
   1975	struct ib_smp *in_mad  = NULL;
   1976	struct ib_smp *out_mad = NULL;
   1977	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
   1978	int err = -ENOMEM;
   1979
   1980	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
   1981	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
   1982	if (!in_mad || !out_mad)
   1983		goto out;
   1984
   1985	ib_init_query_mad(in_mad);
   1986	in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
   1987	if (mlx4_is_master(dev->dev))
   1988		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
   1989
   1990	err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
   1991	if (err)
   1992		goto out;
   1993
   1994	memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX);
   1995
   1996	in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
   1997
   1998	err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
   1999	if (err)
   2000		goto out;
   2001
   2002	dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
   2003	memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
   2004
   2005out:
   2006	kfree(in_mad);
   2007	kfree(out_mad);
   2008	return err;
   2009}
   2010
   2011static ssize_t hca_type_show(struct device *device,
   2012			     struct device_attribute *attr, char *buf)
   2013{
   2014	struct mlx4_ib_dev *dev =
   2015		rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
   2016
   2017	return sysfs_emit(buf, "MT%d\n", dev->dev->persist->pdev->device);
   2018}
   2019static DEVICE_ATTR_RO(hca_type);
   2020
   2021static ssize_t hw_rev_show(struct device *device,
   2022			   struct device_attribute *attr, char *buf)
   2023{
   2024	struct mlx4_ib_dev *dev =
   2025		rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
   2026
   2027	return sysfs_emit(buf, "%x\n", dev->dev->rev_id);
   2028}
   2029static DEVICE_ATTR_RO(hw_rev);
   2030
   2031static ssize_t board_id_show(struct device *device,
   2032			     struct device_attribute *attr, char *buf)
   2033{
   2034	struct mlx4_ib_dev *dev =
   2035		rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
   2036
   2037	return sysfs_emit(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id);
   2038}
   2039static DEVICE_ATTR_RO(board_id);
   2040
   2041static struct attribute *mlx4_class_attributes[] = {
   2042	&dev_attr_hw_rev.attr,
   2043	&dev_attr_hca_type.attr,
   2044	&dev_attr_board_id.attr,
   2045	NULL
   2046};
   2047
   2048static const struct attribute_group mlx4_attr_group = {
   2049	.attrs = mlx4_class_attributes,
   2050};
   2051
   2052struct diag_counter {
   2053	const char *name;
   2054	u32 offset;
   2055};
   2056
   2057#define DIAG_COUNTER(_name, _offset)			\
   2058	{ .name = #_name, .offset = _offset }
   2059
   2060static const struct diag_counter diag_basic[] = {
   2061	DIAG_COUNTER(rq_num_lle, 0x00),
   2062	DIAG_COUNTER(sq_num_lle, 0x04),
   2063	DIAG_COUNTER(rq_num_lqpoe, 0x08),
   2064	DIAG_COUNTER(sq_num_lqpoe, 0x0C),
   2065	DIAG_COUNTER(rq_num_lpe, 0x18),
   2066	DIAG_COUNTER(sq_num_lpe, 0x1C),
   2067	DIAG_COUNTER(rq_num_wrfe, 0x20),
   2068	DIAG_COUNTER(sq_num_wrfe, 0x24),
   2069	DIAG_COUNTER(sq_num_mwbe, 0x2C),
   2070	DIAG_COUNTER(sq_num_bre, 0x34),
   2071	DIAG_COUNTER(sq_num_rire, 0x44),
   2072	DIAG_COUNTER(rq_num_rire, 0x48),
   2073	DIAG_COUNTER(sq_num_rae, 0x4C),
   2074	DIAG_COUNTER(rq_num_rae, 0x50),
   2075	DIAG_COUNTER(sq_num_roe, 0x54),
   2076	DIAG_COUNTER(sq_num_tree, 0x5C),
   2077	DIAG_COUNTER(sq_num_rree, 0x64),
   2078	DIAG_COUNTER(rq_num_rnr, 0x68),
   2079	DIAG_COUNTER(sq_num_rnr, 0x6C),
   2080	DIAG_COUNTER(rq_num_oos, 0x100),
   2081	DIAG_COUNTER(sq_num_oos, 0x104),
   2082};
   2083
   2084static const struct diag_counter diag_ext[] = {
   2085	DIAG_COUNTER(rq_num_dup, 0x130),
   2086	DIAG_COUNTER(sq_num_to, 0x134),
   2087};
   2088
   2089static const struct diag_counter diag_device_only[] = {
   2090	DIAG_COUNTER(num_cqovf, 0x1A0),
   2091	DIAG_COUNTER(rq_num_udsdprd, 0x118),
   2092};
   2093
   2094static struct rdma_hw_stats *
   2095mlx4_ib_alloc_hw_device_stats(struct ib_device *ibdev)
   2096{
   2097	struct mlx4_ib_dev *dev = to_mdev(ibdev);
   2098	struct mlx4_ib_diag_counters *diag = dev->diag_counters;
   2099
   2100	if (!diag[0].descs)
   2101		return NULL;
   2102
   2103	return rdma_alloc_hw_stats_struct(diag[0].descs, diag[0].num_counters,
   2104					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
   2105}
   2106
   2107static struct rdma_hw_stats *
   2108mlx4_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
   2109{
   2110	struct mlx4_ib_dev *dev = to_mdev(ibdev);
   2111	struct mlx4_ib_diag_counters *diag = dev->diag_counters;
   2112
   2113	if (!diag[1].descs)
   2114		return NULL;
   2115
   2116	return rdma_alloc_hw_stats_struct(diag[1].descs, diag[1].num_counters,
   2117					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
   2118}
   2119
   2120static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
   2121				struct rdma_hw_stats *stats,
   2122				u32 port, int index)
   2123{
   2124	struct mlx4_ib_dev *dev = to_mdev(ibdev);
   2125	struct mlx4_ib_diag_counters *diag = dev->diag_counters;
   2126	u32 hw_value[ARRAY_SIZE(diag_device_only) +
   2127		ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {};
   2128	int ret;
   2129	int i;
   2130
   2131	ret = mlx4_query_diag_counters(dev->dev,
   2132				       MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS,
   2133				       diag[!!port].offset, hw_value,
   2134				       diag[!!port].num_counters, port);
   2135
   2136	if (ret)
   2137		return ret;
   2138
   2139	for (i = 0; i < diag[!!port].num_counters; i++)
   2140		stats->value[i] = hw_value[i];
   2141
   2142	return diag[!!port].num_counters;
   2143}
   2144
   2145static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
   2146					 struct rdma_stat_desc **pdescs,
   2147					 u32 **offset, u32 *num, bool port)
   2148{
   2149	u32 num_counters;
   2150
   2151	num_counters = ARRAY_SIZE(diag_basic);
   2152
   2153	if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT)
   2154		num_counters += ARRAY_SIZE(diag_ext);
   2155
   2156	if (!port)
   2157		num_counters += ARRAY_SIZE(diag_device_only);
   2158
   2159	*pdescs = kcalloc(num_counters, sizeof(struct rdma_stat_desc),
   2160			  GFP_KERNEL);
   2161	if (!*pdescs)
   2162		return -ENOMEM;
   2163
   2164	*offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
   2165	if (!*offset)
   2166		goto err;
   2167
   2168	*num = num_counters;
   2169
   2170	return 0;
   2171
   2172err:
   2173	kfree(*pdescs);
   2174	return -ENOMEM;
   2175}
   2176
   2177static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
   2178				       struct rdma_stat_desc *descs,
   2179				       u32 *offset, bool port)
   2180{
   2181	int i;
   2182	int j;
   2183
   2184	for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
   2185		descs[i].name = diag_basic[i].name;
   2186		offset[i] = diag_basic[i].offset;
   2187	}
   2188
   2189	if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
   2190		for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
   2191			descs[j].name = diag_ext[i].name;
   2192			offset[j] = diag_ext[i].offset;
   2193		}
   2194	}
   2195
   2196	if (!port) {
   2197		for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
   2198			descs[j].name = diag_device_only[i].name;
   2199			offset[j] = diag_device_only[i].offset;
   2200		}
   2201	}
   2202}
   2203
   2204static const struct ib_device_ops mlx4_ib_hw_stats_ops = {
   2205	.alloc_hw_device_stats = mlx4_ib_alloc_hw_device_stats,
   2206	.alloc_hw_port_stats = mlx4_ib_alloc_hw_port_stats,
   2207	.get_hw_stats = mlx4_ib_get_hw_stats,
   2208};
   2209
   2210static const struct ib_device_ops mlx4_ib_hw_stats_ops1 = {
   2211	.alloc_hw_device_stats = mlx4_ib_alloc_hw_device_stats,
   2212	.get_hw_stats = mlx4_ib_get_hw_stats,
   2213};
   2214
   2215static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
   2216{
   2217	struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
   2218	int i;
   2219	int ret;
   2220	bool per_port = !!(ibdev->dev->caps.flags2 &
   2221		MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
   2222
   2223	if (mlx4_is_slave(ibdev->dev))
   2224		return 0;
   2225
   2226	for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
   2227		/*
   2228		 * i == 1 means we are building port counters, set a different
   2229		 * stats ops without port stats callback.
   2230		 */
   2231		if (i && !per_port) {
   2232			ib_set_device_ops(&ibdev->ib_dev,
   2233					  &mlx4_ib_hw_stats_ops1);
   2234
   2235			return 0;
   2236		}
   2237
   2238		ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].descs,
   2239						    &diag[i].offset,
   2240						    &diag[i].num_counters, i);
   2241		if (ret)
   2242			goto err_alloc;
   2243
   2244		mlx4_ib_fill_diag_counters(ibdev, diag[i].descs,
   2245					   diag[i].offset, i);
   2246	}
   2247
   2248	ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_hw_stats_ops);
   2249
   2250	return 0;
   2251
   2252err_alloc:
   2253	if (i) {
   2254		kfree(diag[i - 1].descs);
   2255		kfree(diag[i - 1].offset);
   2256	}
   2257
   2258	return ret;
   2259}
   2260
   2261static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
   2262{
   2263	int i;
   2264
   2265	for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
   2266		kfree(ibdev->diag_counters[i].offset);
   2267		kfree(ibdev->diag_counters[i].descs);
   2268	}
   2269}
   2270
   2271#define MLX4_IB_INVALID_MAC	((u64)-1)
   2272static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
   2273			       struct net_device *dev,
   2274			       int port)
   2275{
   2276	u64 new_smac = 0;
   2277	u64 release_mac = MLX4_IB_INVALID_MAC;
   2278	struct mlx4_ib_qp *qp;
   2279
   2280	new_smac = ether_addr_to_u64(dev->dev_addr);
   2281	atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
   2282
   2283	/* no need for update QP1 and mac registration in non-SRIOV */
   2284	if (!mlx4_is_mfunc(ibdev->dev))
   2285		return;
   2286
   2287	mutex_lock(&ibdev->qp1_proxy_lock[port - 1]);
   2288	qp = ibdev->qp1_proxy[port - 1];
   2289	if (qp) {
   2290		int new_smac_index;
   2291		u64 old_smac;
   2292		struct mlx4_update_qp_params update_params;
   2293
   2294		mutex_lock(&qp->mutex);
   2295		old_smac = qp->pri.smac;
   2296		if (new_smac == old_smac)
   2297			goto unlock;
   2298
   2299		new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac);
   2300
   2301		if (new_smac_index < 0)
   2302			goto unlock;
   2303
   2304		update_params.smac_index = new_smac_index;
   2305		if (mlx4_update_qp(ibdev->dev, qp->mqp.qpn, MLX4_UPDATE_QP_SMAC,
   2306				   &update_params)) {
   2307			release_mac = new_smac;
   2308			goto unlock;
   2309		}
   2310		/* if old port was zero, no mac was yet registered for this QP */
   2311		if (qp->pri.smac_port)
   2312			release_mac = old_smac;
   2313		qp->pri.smac = new_smac;
   2314		qp->pri.smac_port = port;
   2315		qp->pri.smac_index = new_smac_index;
   2316	}
   2317
   2318unlock:
   2319	if (release_mac != MLX4_IB_INVALID_MAC)
   2320		mlx4_unregister_mac(ibdev->dev, port, release_mac);
   2321	if (qp)
   2322		mutex_unlock(&qp->mutex);
   2323	mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
   2324}
   2325
   2326static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
   2327				 struct net_device *dev,
   2328				 unsigned long event)
   2329
   2330{
   2331	struct mlx4_ib_iboe *iboe;
   2332	int update_qps_port = -1;
   2333	int port;
   2334
   2335	ASSERT_RTNL();
   2336
   2337	iboe = &ibdev->iboe;
   2338
   2339	spin_lock_bh(&iboe->lock);
   2340	mlx4_foreach_ib_transport_port(port, ibdev->dev) {
   2341
   2342		iboe->netdevs[port - 1] =
   2343			mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
   2344
   2345		if (dev == iboe->netdevs[port - 1] &&
   2346		    (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
   2347		     event == NETDEV_UP || event == NETDEV_CHANGE))
   2348			update_qps_port = port;
   2349
   2350		if (dev == iboe->netdevs[port - 1] &&
   2351		    (event == NETDEV_UP || event == NETDEV_DOWN)) {
   2352			enum ib_port_state port_state;
   2353			struct ib_event ibev = { };
   2354
   2355			if (ib_get_cached_port_state(&ibdev->ib_dev, port,
   2356						     &port_state))
   2357				continue;
   2358
   2359			if (event == NETDEV_UP &&
   2360			    (port_state != IB_PORT_ACTIVE ||
   2361			     iboe->last_port_state[port - 1] != IB_PORT_DOWN))
   2362				continue;
   2363			if (event == NETDEV_DOWN &&
   2364			    (port_state != IB_PORT_DOWN ||
   2365			     iboe->last_port_state[port - 1] != IB_PORT_ACTIVE))
   2366				continue;
   2367			iboe->last_port_state[port - 1] = port_state;
   2368
   2369			ibev.device = &ibdev->ib_dev;
   2370			ibev.element.port_num = port;
   2371			ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE :
   2372							  IB_EVENT_PORT_ERR;
   2373			ib_dispatch_event(&ibev);
   2374		}
   2375
   2376	}
   2377	spin_unlock_bh(&iboe->lock);
   2378
   2379	if (update_qps_port > 0)
   2380		mlx4_ib_update_qps(ibdev, dev, update_qps_port);
   2381}
   2382
   2383static int mlx4_ib_netdev_event(struct notifier_block *this,
   2384				unsigned long event, void *ptr)
   2385{
   2386	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
   2387	struct mlx4_ib_dev *ibdev;
   2388
   2389	if (!net_eq(dev_net(dev), &init_net))
   2390		return NOTIFY_DONE;
   2391
   2392	ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
   2393	mlx4_ib_scan_netdevs(ibdev, dev, event);
   2394
   2395	return NOTIFY_DONE;
   2396}
   2397
   2398static void init_pkeys(struct mlx4_ib_dev *ibdev)
   2399{
   2400	int port;
   2401	int slave;
   2402	int i;
   2403
   2404	if (mlx4_is_master(ibdev->dev)) {
   2405		for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
   2406		     ++slave) {
   2407			for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
   2408				for (i = 0;
   2409				     i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
   2410				     ++i) {
   2411					ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
   2412					/* master has the identity virt2phys pkey mapping */
   2413						(slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
   2414							ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
   2415					mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
   2416							     ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
   2417				}
   2418			}
   2419		}
   2420		/* initialize pkey cache */
   2421		for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
   2422			for (i = 0;
   2423			     i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
   2424			     ++i)
   2425				ibdev->pkeys.phys_pkey_cache[port-1][i] =
   2426					(i) ? 0 : 0xFFFF;
   2427		}
   2428	}
   2429}
   2430
   2431static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
   2432{
   2433	int i, j, eq = 0, total_eqs = 0;
   2434
   2435	ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors,
   2436				  sizeof(ibdev->eq_table[0]), GFP_KERNEL);
   2437	if (!ibdev->eq_table)
   2438		return;
   2439
   2440	for (i = 1; i <= dev->caps.num_ports; i++) {
   2441		for (j = 0; j < mlx4_get_eqs_per_port(dev, i);
   2442		     j++, total_eqs++) {
   2443			if (i > 1 &&  mlx4_is_eq_shared(dev, total_eqs))
   2444				continue;
   2445			ibdev->eq_table[eq] = total_eqs;
   2446			if (!mlx4_assign_eq(dev, i,
   2447					    &ibdev->eq_table[eq]))
   2448				eq++;
   2449			else
   2450				ibdev->eq_table[eq] = -1;
   2451		}
   2452	}
   2453
   2454	for (i = eq; i < dev->caps.num_comp_vectors;
   2455	     ibdev->eq_table[i++] = -1)
   2456		;
   2457
   2458	/* Advertise the new number of EQs to clients */
   2459	ibdev->ib_dev.num_comp_vectors = eq;
   2460}
   2461
   2462static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
   2463{
   2464	int i;
   2465	int total_eqs = ibdev->ib_dev.num_comp_vectors;
   2466
   2467	/* no eqs were allocated */
   2468	if (!ibdev->eq_table)
   2469		return;
   2470
   2471	/* Reset the advertised EQ number */
   2472	ibdev->ib_dev.num_comp_vectors = 0;
   2473
   2474	for (i = 0; i < total_eqs; i++)
   2475		mlx4_release_eq(dev, ibdev->eq_table[i]);
   2476
   2477	kfree(ibdev->eq_table);
   2478	ibdev->eq_table = NULL;
   2479}
   2480
   2481static int mlx4_port_immutable(struct ib_device *ibdev, u32 port_num,
   2482			       struct ib_port_immutable *immutable)
   2483{
   2484	struct ib_port_attr attr;
   2485	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
   2486	int err;
   2487
   2488	if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
   2489		immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
   2490		immutable->max_mad_size = IB_MGMT_MAD_SIZE;
   2491	} else {
   2492		if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
   2493			immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
   2494		if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
   2495			immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
   2496				RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
   2497		immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
   2498		if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
   2499		    RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
   2500			immutable->max_mad_size = IB_MGMT_MAD_SIZE;
   2501	}
   2502
   2503	err = ib_query_port(ibdev, port_num, &attr);
   2504	if (err)
   2505		return err;
   2506
   2507	immutable->pkey_tbl_len = attr.pkey_tbl_len;
   2508	immutable->gid_tbl_len = attr.gid_tbl_len;
   2509
   2510	return 0;
   2511}
   2512
   2513static void get_fw_ver_str(struct ib_device *device, char *str)
   2514{
   2515	struct mlx4_ib_dev *dev =
   2516		container_of(device, struct mlx4_ib_dev, ib_dev);
   2517	snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d",
   2518		 (int) (dev->dev->caps.fw_ver >> 32),
   2519		 (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
   2520		 (int) dev->dev->caps.fw_ver & 0xffff);
   2521}
   2522
   2523static const struct ib_device_ops mlx4_ib_dev_ops = {
   2524	.owner = THIS_MODULE,
   2525	.driver_id = RDMA_DRIVER_MLX4,
   2526	.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION,
   2527
   2528	.add_gid = mlx4_ib_add_gid,
   2529	.alloc_mr = mlx4_ib_alloc_mr,
   2530	.alloc_pd = mlx4_ib_alloc_pd,
   2531	.alloc_ucontext = mlx4_ib_alloc_ucontext,
   2532	.attach_mcast = mlx4_ib_mcg_attach,
   2533	.create_ah = mlx4_ib_create_ah,
   2534	.create_cq = mlx4_ib_create_cq,
   2535	.create_qp = mlx4_ib_create_qp,
   2536	.create_srq = mlx4_ib_create_srq,
   2537	.dealloc_pd = mlx4_ib_dealloc_pd,
   2538	.dealloc_ucontext = mlx4_ib_dealloc_ucontext,
   2539	.del_gid = mlx4_ib_del_gid,
   2540	.dereg_mr = mlx4_ib_dereg_mr,
   2541	.destroy_ah = mlx4_ib_destroy_ah,
   2542	.destroy_cq = mlx4_ib_destroy_cq,
   2543	.destroy_qp = mlx4_ib_destroy_qp,
   2544	.destroy_srq = mlx4_ib_destroy_srq,
   2545	.detach_mcast = mlx4_ib_mcg_detach,
   2546	.device_group = &mlx4_attr_group,
   2547	.disassociate_ucontext = mlx4_ib_disassociate_ucontext,
   2548	.drain_rq = mlx4_ib_drain_rq,
   2549	.drain_sq = mlx4_ib_drain_sq,
   2550	.get_dev_fw_str = get_fw_ver_str,
   2551	.get_dma_mr = mlx4_ib_get_dma_mr,
   2552	.get_link_layer = mlx4_ib_port_link_layer,
   2553	.get_netdev = mlx4_ib_get_netdev,
   2554	.get_port_immutable = mlx4_port_immutable,
   2555	.map_mr_sg = mlx4_ib_map_mr_sg,
   2556	.mmap = mlx4_ib_mmap,
   2557	.modify_cq = mlx4_ib_modify_cq,
   2558	.modify_device = mlx4_ib_modify_device,
   2559	.modify_port = mlx4_ib_modify_port,
   2560	.modify_qp = mlx4_ib_modify_qp,
   2561	.modify_srq = mlx4_ib_modify_srq,
   2562	.poll_cq = mlx4_ib_poll_cq,
   2563	.post_recv = mlx4_ib_post_recv,
   2564	.post_send = mlx4_ib_post_send,
   2565	.post_srq_recv = mlx4_ib_post_srq_recv,
   2566	.process_mad = mlx4_ib_process_mad,
   2567	.query_ah = mlx4_ib_query_ah,
   2568	.query_device = mlx4_ib_query_device,
   2569	.query_gid = mlx4_ib_query_gid,
   2570	.query_pkey = mlx4_ib_query_pkey,
   2571	.query_port = mlx4_ib_query_port,
   2572	.query_qp = mlx4_ib_query_qp,
   2573	.query_srq = mlx4_ib_query_srq,
   2574	.reg_user_mr = mlx4_ib_reg_user_mr,
   2575	.req_notify_cq = mlx4_ib_arm_cq,
   2576	.rereg_user_mr = mlx4_ib_rereg_user_mr,
   2577	.resize_cq = mlx4_ib_resize_cq,
   2578
   2579	INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah),
   2580	INIT_RDMA_OBJ_SIZE(ib_cq, mlx4_ib_cq, ibcq),
   2581	INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd),
   2582	INIT_RDMA_OBJ_SIZE(ib_qp, mlx4_ib_qp, ibqp),
   2583	INIT_RDMA_OBJ_SIZE(ib_srq, mlx4_ib_srq, ibsrq),
   2584	INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx4_ib_ucontext, ibucontext),
   2585};
   2586
   2587static const struct ib_device_ops mlx4_ib_dev_wq_ops = {
   2588	.create_rwq_ind_table = mlx4_ib_create_rwq_ind_table,
   2589	.create_wq = mlx4_ib_create_wq,
   2590	.destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table,
   2591	.destroy_wq = mlx4_ib_destroy_wq,
   2592	.modify_wq = mlx4_ib_modify_wq,
   2593
   2594	INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx4_ib_rwq_ind_table,
   2595			   ib_rwq_ind_tbl),
   2596};
   2597
   2598static const struct ib_device_ops mlx4_ib_dev_mw_ops = {
   2599	.alloc_mw = mlx4_ib_alloc_mw,
   2600	.dealloc_mw = mlx4_ib_dealloc_mw,
   2601
   2602	INIT_RDMA_OBJ_SIZE(ib_mw, mlx4_ib_mw, ibmw),
   2603};
   2604
   2605static const struct ib_device_ops mlx4_ib_dev_xrc_ops = {
   2606	.alloc_xrcd = mlx4_ib_alloc_xrcd,
   2607	.dealloc_xrcd = mlx4_ib_dealloc_xrcd,
   2608
   2609	INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx4_ib_xrcd, ibxrcd),
   2610};
   2611
   2612static const struct ib_device_ops mlx4_ib_dev_fs_ops = {
   2613	.create_flow = mlx4_ib_create_flow,
   2614	.destroy_flow = mlx4_ib_destroy_flow,
   2615};
   2616
   2617static void *mlx4_ib_add(struct mlx4_dev *dev)
   2618{
   2619	struct mlx4_ib_dev *ibdev;
   2620	int num_ports = 0;
   2621	int i, j;
   2622	int err;
   2623	struct mlx4_ib_iboe *iboe;
   2624	int ib_num_ports = 0;
   2625	int num_req_counters;
   2626	int allocated;
   2627	u32 counter_index;
   2628	struct counter_index *new_counter_index = NULL;
   2629
   2630	pr_info_once("%s", mlx4_ib_version);
   2631
   2632	num_ports = 0;
   2633	mlx4_foreach_ib_transport_port(i, dev)
   2634		num_ports++;
   2635
   2636	/* No point in registering a device with no ports... */
   2637	if (num_ports == 0)
   2638		return NULL;
   2639
   2640	ibdev = ib_alloc_device(mlx4_ib_dev, ib_dev);
   2641	if (!ibdev) {
   2642		dev_err(&dev->persist->pdev->dev,
   2643			"Device struct alloc failed\n");
   2644		return NULL;
   2645	}
   2646
   2647	iboe = &ibdev->iboe;
   2648
   2649	if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
   2650		goto err_dealloc;
   2651
   2652	if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
   2653		goto err_pd;
   2654
   2655	ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
   2656				 PAGE_SIZE);
   2657	if (!ibdev->uar_map)
   2658		goto err_uar;
   2659	MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
   2660
   2661	ibdev->dev = dev;
   2662	ibdev->bond_next_port	= 0;
   2663
   2664	ibdev->ib_dev.node_type		= RDMA_NODE_IB_CA;
   2665	ibdev->ib_dev.local_dma_lkey	= dev->caps.reserved_lkey;
   2666	ibdev->num_ports		= num_ports;
   2667	ibdev->ib_dev.phys_port_cnt     = mlx4_is_bonded(dev) ?
   2668						1 : ibdev->num_ports;
   2669	ibdev->ib_dev.num_comp_vectors	= dev->caps.num_comp_vectors;
   2670	ibdev->ib_dev.dev.parent	= &dev->persist->pdev->dev;
   2671
   2672	ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops);
   2673
   2674	if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
   2675	    ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) ==
   2676	    IB_LINK_LAYER_ETHERNET) ||
   2677	    (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) ==
   2678	    IB_LINK_LAYER_ETHERNET)))
   2679		ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops);
   2680
   2681	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
   2682	    dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
   2683		ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops);
   2684
   2685	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
   2686		ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops);
   2687	}
   2688
   2689	if (check_flow_steering_support(dev)) {
   2690		ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
   2691		ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops);
   2692	}
   2693
   2694	if (!dev->caps.userspace_caps)
   2695		ibdev->ib_dev.ops.uverbs_abi_ver =
   2696			MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
   2697
   2698	mlx4_ib_alloc_eqs(dev, ibdev);
   2699
   2700	spin_lock_init(&iboe->lock);
   2701
   2702	if (init_node_data(ibdev))
   2703		goto err_map;
   2704	mlx4_init_sl2vl_tbl(ibdev);
   2705
   2706	for (i = 0; i < ibdev->num_ports; ++i) {
   2707		mutex_init(&ibdev->counters_table[i].mutex);
   2708		INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
   2709		iboe->last_port_state[i] = IB_PORT_DOWN;
   2710	}
   2711
   2712	num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
   2713	for (i = 0; i < num_req_counters; ++i) {
   2714		mutex_init(&ibdev->qp1_proxy_lock[i]);
   2715		allocated = 0;
   2716		if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
   2717						IB_LINK_LAYER_ETHERNET) {
   2718			err = mlx4_counter_alloc(ibdev->dev, &counter_index,
   2719						 MLX4_RES_USAGE_DRIVER);
   2720			/* if failed to allocate a new counter, use default */
   2721			if (err)
   2722				counter_index =
   2723					mlx4_get_default_counter_index(dev,
   2724								       i + 1);
   2725			else
   2726				allocated = 1;
   2727		} else { /* IB_LINK_LAYER_INFINIBAND use the default counter */
   2728			counter_index = mlx4_get_default_counter_index(dev,
   2729								       i + 1);
   2730		}
   2731		new_counter_index = kmalloc(sizeof(*new_counter_index),
   2732					    GFP_KERNEL);
   2733		if (!new_counter_index) {
   2734			if (allocated)
   2735				mlx4_counter_free(ibdev->dev, counter_index);
   2736			goto err_counter;
   2737		}
   2738		new_counter_index->index = counter_index;
   2739		new_counter_index->allocated = allocated;
   2740		list_add_tail(&new_counter_index->list,
   2741			      &ibdev->counters_table[i].counters_list);
   2742		ibdev->counters_table[i].default_counter = counter_index;
   2743		pr_info("counter index %d for port %d allocated %d\n",
   2744			counter_index, i + 1, allocated);
   2745	}
   2746	if (mlx4_is_bonded(dev))
   2747		for (i = 1; i < ibdev->num_ports ; ++i) {
   2748			new_counter_index =
   2749					kmalloc(sizeof(struct counter_index),
   2750						GFP_KERNEL);
   2751			if (!new_counter_index)
   2752				goto err_counter;
   2753			new_counter_index->index = counter_index;
   2754			new_counter_index->allocated = 0;
   2755			list_add_tail(&new_counter_index->list,
   2756				      &ibdev->counters_table[i].counters_list);
   2757			ibdev->counters_table[i].default_counter =
   2758								counter_index;
   2759		}
   2760
   2761	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
   2762		ib_num_ports++;
   2763
   2764	spin_lock_init(&ibdev->sm_lock);
   2765	mutex_init(&ibdev->cap_mask_mutex);
   2766	INIT_LIST_HEAD(&ibdev->qp_list);
   2767	spin_lock_init(&ibdev->reset_flow_resource_lock);
   2768
   2769	if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
   2770	    ib_num_ports) {
   2771		ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
   2772		err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
   2773					    MLX4_IB_UC_STEER_QPN_ALIGN,
   2774					    &ibdev->steer_qpn_base, 0,
   2775					    MLX4_RES_USAGE_DRIVER);
   2776		if (err)
   2777			goto err_counter;
   2778
   2779		ibdev->ib_uc_qpns_bitmap = bitmap_alloc(ibdev->steer_qpn_count,
   2780							GFP_KERNEL);
   2781		if (!ibdev->ib_uc_qpns_bitmap)
   2782			goto err_steer_qp_release;
   2783
   2784		if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) {
   2785			bitmap_zero(ibdev->ib_uc_qpns_bitmap,
   2786				    ibdev->steer_qpn_count);
   2787			err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
   2788					dev, ibdev->steer_qpn_base,
   2789					ibdev->steer_qpn_base +
   2790					ibdev->steer_qpn_count - 1);
   2791			if (err)
   2792				goto err_steer_free_bitmap;
   2793		} else {
   2794			bitmap_fill(ibdev->ib_uc_qpns_bitmap,
   2795				    ibdev->steer_qpn_count);
   2796		}
   2797	}
   2798
   2799	for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
   2800		atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
   2801
   2802	if (mlx4_ib_alloc_diag_counters(ibdev))
   2803		goto err_steer_free_bitmap;
   2804
   2805	if (ib_register_device(&ibdev->ib_dev, "mlx4_%d",
   2806			       &dev->persist->pdev->dev))
   2807		goto err_diag_counters;
   2808
   2809	if (mlx4_ib_mad_init(ibdev))
   2810		goto err_reg;
   2811
   2812	if (mlx4_ib_init_sriov(ibdev))
   2813		goto err_mad;
   2814
   2815	if (!iboe->nb.notifier_call) {
   2816		iboe->nb.notifier_call = mlx4_ib_netdev_event;
   2817		err = register_netdevice_notifier(&iboe->nb);
   2818		if (err) {
   2819			iboe->nb.notifier_call = NULL;
   2820			goto err_notif;
   2821		}
   2822	}
   2823	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
   2824		err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
   2825		if (err)
   2826			goto err_notif;
   2827	}
   2828
   2829	ibdev->ib_active = true;
   2830	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
   2831		devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),
   2832					 &ibdev->ib_dev);
   2833
   2834	if (mlx4_is_mfunc(ibdev->dev))
   2835		init_pkeys(ibdev);
   2836
   2837	/* create paravirt contexts for any VFs which are active */
   2838	if (mlx4_is_master(ibdev->dev)) {
   2839		for (j = 0; j < MLX4_MFUNC_MAX; j++) {
   2840			if (j == mlx4_master_func_num(ibdev->dev))
   2841				continue;
   2842			if (mlx4_is_slave_active(ibdev->dev, j))
   2843				do_slave_init(ibdev, j, 1);
   2844		}
   2845	}
   2846	return ibdev;
   2847
   2848err_notif:
   2849	if (ibdev->iboe.nb.notifier_call) {
   2850		if (unregister_netdevice_notifier(&ibdev->iboe.nb))
   2851			pr_warn("failure unregistering notifier\n");
   2852		ibdev->iboe.nb.notifier_call = NULL;
   2853	}
   2854	flush_workqueue(wq);
   2855
   2856	mlx4_ib_close_sriov(ibdev);
   2857
   2858err_mad:
   2859	mlx4_ib_mad_cleanup(ibdev);
   2860
   2861err_reg:
   2862	ib_unregister_device(&ibdev->ib_dev);
   2863
   2864err_diag_counters:
   2865	mlx4_ib_diag_cleanup(ibdev);
   2866
   2867err_steer_free_bitmap:
   2868	bitmap_free(ibdev->ib_uc_qpns_bitmap);
   2869
   2870err_steer_qp_release:
   2871	mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
   2872			      ibdev->steer_qpn_count);
   2873err_counter:
   2874	for (i = 0; i < ibdev->num_ports; ++i)
   2875		mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
   2876
   2877err_map:
   2878	mlx4_ib_free_eqs(dev, ibdev);
   2879	iounmap(ibdev->uar_map);
   2880
   2881err_uar:
   2882	mlx4_uar_free(dev, &ibdev->priv_uar);
   2883
   2884err_pd:
   2885	mlx4_pd_free(dev, ibdev->priv_pdn);
   2886
   2887err_dealloc:
   2888	ib_dealloc_device(&ibdev->ib_dev);
   2889
   2890	return NULL;
   2891}
   2892
   2893int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
   2894{
   2895	int offset;
   2896
   2897	WARN_ON(!dev->ib_uc_qpns_bitmap);
   2898
   2899	offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
   2900					 dev->steer_qpn_count,
   2901					 get_count_order(count));
   2902	if (offset < 0)
   2903		return offset;
   2904
   2905	*qpn = dev->steer_qpn_base + offset;
   2906	return 0;
   2907}
   2908
   2909void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
   2910{
   2911	if (!qpn ||
   2912	    dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
   2913		return;
   2914
   2915	if (WARN(qpn < dev->steer_qpn_base, "qpn = %u, steer_qpn_base = %u\n",
   2916		 qpn, dev->steer_qpn_base))
   2917		/* not supposed to be here */
   2918		return;
   2919
   2920	bitmap_release_region(dev->ib_uc_qpns_bitmap,
   2921			      qpn - dev->steer_qpn_base,
   2922			      get_count_order(count));
   2923}
   2924
   2925int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
   2926			 int is_attach)
   2927{
   2928	int err;
   2929	size_t flow_size;
   2930	struct ib_flow_attr *flow = NULL;
   2931	struct ib_flow_spec_ib *ib_spec;
   2932
   2933	if (is_attach) {
   2934		flow_size = sizeof(struct ib_flow_attr) +
   2935			    sizeof(struct ib_flow_spec_ib);
   2936		flow = kzalloc(flow_size, GFP_KERNEL);
   2937		if (!flow)
   2938			return -ENOMEM;
   2939		flow->port = mqp->port;
   2940		flow->num_of_specs = 1;
   2941		flow->size = flow_size;
   2942		ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
   2943		ib_spec->type = IB_FLOW_SPEC_IB;
   2944		ib_spec->size = sizeof(struct ib_flow_spec_ib);
   2945		/* Add an empty rule for IB L2 */
   2946		memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
   2947
   2948		err = __mlx4_ib_create_flow(&mqp->ibqp, flow, MLX4_DOMAIN_NIC,
   2949					    MLX4_FS_REGULAR, &mqp->reg_id);
   2950	} else {
   2951		err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
   2952	}
   2953	kfree(flow);
   2954	return err;
   2955}
   2956
   2957static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
   2958{
   2959	struct mlx4_ib_dev *ibdev = ibdev_ptr;
   2960	int p;
   2961	int i;
   2962
   2963	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
   2964		devlink_port_type_clear(mlx4_get_devlink_port(dev, i));
   2965	ibdev->ib_active = false;
   2966	flush_workqueue(wq);
   2967
   2968	if (ibdev->iboe.nb.notifier_call) {
   2969		if (unregister_netdevice_notifier(&ibdev->iboe.nb))
   2970			pr_warn("failure unregistering notifier\n");
   2971		ibdev->iboe.nb.notifier_call = NULL;
   2972	}
   2973
   2974	mlx4_ib_close_sriov(ibdev);
   2975	mlx4_ib_mad_cleanup(ibdev);
   2976	ib_unregister_device(&ibdev->ib_dev);
   2977	mlx4_ib_diag_cleanup(ibdev);
   2978
   2979	mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
   2980			      ibdev->steer_qpn_count);
   2981	bitmap_free(ibdev->ib_uc_qpns_bitmap);
   2982
   2983	iounmap(ibdev->uar_map);
   2984	for (p = 0; p < ibdev->num_ports; ++p)
   2985		mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
   2986
   2987	mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
   2988		mlx4_CLOSE_PORT(dev, p);
   2989
   2990	mlx4_ib_free_eqs(dev, ibdev);
   2991
   2992	mlx4_uar_free(dev, &ibdev->priv_uar);
   2993	mlx4_pd_free(dev, ibdev->priv_pdn);
   2994	ib_dealloc_device(&ibdev->ib_dev);
   2995}
   2996
   2997static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
   2998{
   2999	struct mlx4_ib_demux_work **dm = NULL;
   3000	struct mlx4_dev *dev = ibdev->dev;
   3001	int i;
   3002	unsigned long flags;
   3003	struct mlx4_active_ports actv_ports;
   3004	unsigned int ports;
   3005	unsigned int first_port;
   3006
   3007	if (!mlx4_is_master(dev))
   3008		return;
   3009
   3010	actv_ports = mlx4_get_active_ports(dev, slave);
   3011	ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
   3012	first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
   3013
   3014	dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
   3015	if (!dm)
   3016		return;
   3017
   3018	for (i = 0; i < ports; i++) {
   3019		dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
   3020		if (!dm[i]) {
   3021			while (--i >= 0)
   3022				kfree(dm[i]);
   3023			goto out;
   3024		}
   3025		INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
   3026		dm[i]->port = first_port + i + 1;
   3027		dm[i]->slave = slave;
   3028		dm[i]->do_init = do_init;
   3029		dm[i]->dev = ibdev;
   3030	}
   3031	/* initialize or tear down tunnel QPs for the slave */
   3032	spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
   3033	if (!ibdev->sriov.is_going_down) {
   3034		for (i = 0; i < ports; i++)
   3035			queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
   3036		spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
   3037	} else {
   3038		spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
   3039		for (i = 0; i < ports; i++)
   3040			kfree(dm[i]);
   3041	}
   3042out:
   3043	kfree(dm);
   3044	return;
   3045}
   3046
   3047static void mlx4_ib_handle_catas_error(struct mlx4_ib_dev *ibdev)
   3048{
   3049	struct mlx4_ib_qp *mqp;
   3050	unsigned long flags_qp;
   3051	unsigned long flags_cq;
   3052	struct mlx4_ib_cq *send_mcq, *recv_mcq;
   3053	struct list_head    cq_notify_list;
   3054	struct mlx4_cq *mcq;
   3055	unsigned long flags;
   3056
   3057	pr_warn("mlx4_ib_handle_catas_error was started\n");
   3058	INIT_LIST_HEAD(&cq_notify_list);
   3059
   3060	/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
   3061	spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
   3062
   3063	list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
   3064		spin_lock_irqsave(&mqp->sq.lock, flags_qp);
   3065		if (mqp->sq.tail != mqp->sq.head) {
   3066			send_mcq = to_mcq(mqp->ibqp.send_cq);
   3067			spin_lock_irqsave(&send_mcq->lock, flags_cq);
   3068			if (send_mcq->mcq.comp &&
   3069			    mqp->ibqp.send_cq->comp_handler) {
   3070				if (!send_mcq->mcq.reset_notify_added) {
   3071					send_mcq->mcq.reset_notify_added = 1;
   3072					list_add_tail(&send_mcq->mcq.reset_notify,
   3073						      &cq_notify_list);
   3074				}
   3075			}
   3076			spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
   3077		}
   3078		spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
   3079		/* Now, handle the QP's receive queue */
   3080		spin_lock_irqsave(&mqp->rq.lock, flags_qp);
   3081		/* no handling is needed for SRQ */
   3082		if (!mqp->ibqp.srq) {
   3083			if (mqp->rq.tail != mqp->rq.head) {
   3084				recv_mcq = to_mcq(mqp->ibqp.recv_cq);
   3085				spin_lock_irqsave(&recv_mcq->lock, flags_cq);
   3086				if (recv_mcq->mcq.comp &&
   3087				    mqp->ibqp.recv_cq->comp_handler) {
   3088					if (!recv_mcq->mcq.reset_notify_added) {
   3089						recv_mcq->mcq.reset_notify_added = 1;
   3090						list_add_tail(&recv_mcq->mcq.reset_notify,
   3091							      &cq_notify_list);
   3092					}
   3093				}
   3094				spin_unlock_irqrestore(&recv_mcq->lock,
   3095						       flags_cq);
   3096			}
   3097		}
   3098		spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
   3099	}
   3100
   3101	list_for_each_entry(mcq, &cq_notify_list, reset_notify) {
   3102		mcq->comp(mcq);
   3103	}
   3104	spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
   3105	pr_warn("mlx4_ib_handle_catas_error ended\n");
   3106}
   3107
   3108static void handle_bonded_port_state_event(struct work_struct *work)
   3109{
   3110	struct ib_event_work *ew =
   3111		container_of(work, struct ib_event_work, work);
   3112	struct mlx4_ib_dev *ibdev = ew->ib_dev;
   3113	enum ib_port_state bonded_port_state = IB_PORT_NOP;
   3114	int i;
   3115	struct ib_event ibev;
   3116
   3117	kfree(ew);
   3118	spin_lock_bh(&ibdev->iboe.lock);
   3119	for (i = 0; i < MLX4_MAX_PORTS; ++i) {
   3120		struct net_device *curr_netdev = ibdev->iboe.netdevs[i];
   3121		enum ib_port_state curr_port_state;
   3122
   3123		if (!curr_netdev)
   3124			continue;
   3125
   3126		curr_port_state =
   3127			(netif_running(curr_netdev) &&
   3128			 netif_carrier_ok(curr_netdev)) ?
   3129			IB_PORT_ACTIVE : IB_PORT_DOWN;
   3130
   3131		bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
   3132			curr_port_state : IB_PORT_ACTIVE;
   3133	}
   3134	spin_unlock_bh(&ibdev->iboe.lock);
   3135
   3136	ibev.device = &ibdev->ib_dev;
   3137	ibev.element.port_num = 1;
   3138	ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ?
   3139		IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
   3140
   3141	ib_dispatch_event(&ibev);
   3142}
   3143
   3144void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port)
   3145{
   3146	u64 sl2vl;
   3147	int err;
   3148
   3149	err = mlx4_ib_query_sl2vl(&mdev->ib_dev, port, &sl2vl);
   3150	if (err) {
   3151		pr_err("Unable to get current sl to vl mapping for port %d.  Using all zeroes (%d)\n",
   3152		       port, err);
   3153		sl2vl = 0;
   3154	}
   3155	atomic64_set(&mdev->sl2vl[port - 1], sl2vl);
   3156}
   3157
   3158static void ib_sl2vl_update_work(struct work_struct *work)
   3159{
   3160	struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
   3161	struct mlx4_ib_dev *mdev = ew->ib_dev;
   3162	int port = ew->port;
   3163
   3164	mlx4_ib_sl2vl_update(mdev, port);
   3165
   3166	kfree(ew);
   3167}
   3168
   3169void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
   3170				     int port)
   3171{
   3172	struct ib_event_work *ew;
   3173
   3174	ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
   3175	if (ew) {
   3176		INIT_WORK(&ew->work, ib_sl2vl_update_work);
   3177		ew->port = port;
   3178		ew->ib_dev = ibdev;
   3179		queue_work(wq, &ew->work);
   3180	}
   3181}
   3182
   3183static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
   3184			  enum mlx4_dev_event event, unsigned long param)
   3185{
   3186	struct ib_event ibev;
   3187	struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
   3188	struct mlx4_eqe *eqe = NULL;
   3189	struct ib_event_work *ew;
   3190	int p = 0;
   3191
   3192	if (mlx4_is_bonded(dev) &&
   3193	    ((event == MLX4_DEV_EVENT_PORT_UP) ||
   3194	    (event == MLX4_DEV_EVENT_PORT_DOWN))) {
   3195		ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
   3196		if (!ew)
   3197			return;
   3198		INIT_WORK(&ew->work, handle_bonded_port_state_event);
   3199		ew->ib_dev = ibdev;
   3200		queue_work(wq, &ew->work);
   3201		return;
   3202	}
   3203
   3204	if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
   3205		eqe = (struct mlx4_eqe *)param;
   3206	else
   3207		p = (int) param;
   3208
   3209	switch (event) {
   3210	case MLX4_DEV_EVENT_PORT_UP:
   3211		if (p > ibdev->num_ports)
   3212			return;
   3213		if (!mlx4_is_slave(dev) &&
   3214		    rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
   3215			IB_LINK_LAYER_INFINIBAND) {
   3216			if (mlx4_is_master(dev))
   3217				mlx4_ib_invalidate_all_guid_record(ibdev, p);
   3218			if (ibdev->dev->flags & MLX4_FLAG_SECURE_HOST &&
   3219			    !(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT))
   3220				mlx4_sched_ib_sl2vl_update_work(ibdev, p);
   3221		}
   3222		ibev.event = IB_EVENT_PORT_ACTIVE;
   3223		break;
   3224
   3225	case MLX4_DEV_EVENT_PORT_DOWN:
   3226		if (p > ibdev->num_ports)
   3227			return;
   3228		ibev.event = IB_EVENT_PORT_ERR;
   3229		break;
   3230
   3231	case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
   3232		ibdev->ib_active = false;
   3233		ibev.event = IB_EVENT_DEVICE_FATAL;
   3234		mlx4_ib_handle_catas_error(ibdev);
   3235		break;
   3236
   3237	case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
   3238		ew = kmalloc(sizeof *ew, GFP_ATOMIC);
   3239		if (!ew)
   3240			return;
   3241
   3242		INIT_WORK(&ew->work, handle_port_mgmt_change_event);
   3243		memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
   3244		ew->ib_dev = ibdev;
   3245		/* need to queue only for port owner, which uses GEN_EQE */
   3246		if (mlx4_is_master(dev))
   3247			queue_work(wq, &ew->work);
   3248		else
   3249			handle_port_mgmt_change_event(&ew->work);
   3250		return;
   3251
   3252	case MLX4_DEV_EVENT_SLAVE_INIT:
   3253		/* here, p is the slave id */
   3254		do_slave_init(ibdev, p, 1);
   3255		if (mlx4_is_master(dev)) {
   3256			int i;
   3257
   3258			for (i = 1; i <= ibdev->num_ports; i++) {
   3259				if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
   3260					== IB_LINK_LAYER_INFINIBAND)
   3261					mlx4_ib_slave_alias_guid_event(ibdev,
   3262								       p, i,
   3263								       1);
   3264			}
   3265		}
   3266		return;
   3267
   3268	case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
   3269		if (mlx4_is_master(dev)) {
   3270			int i;
   3271
   3272			for (i = 1; i <= ibdev->num_ports; i++) {
   3273				if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
   3274					== IB_LINK_LAYER_INFINIBAND)
   3275					mlx4_ib_slave_alias_guid_event(ibdev,
   3276								       p, i,
   3277								       0);
   3278			}
   3279		}
   3280		/* here, p is the slave id */
   3281		do_slave_init(ibdev, p, 0);
   3282		return;
   3283
   3284	default:
   3285		return;
   3286	}
   3287
   3288	ibev.device	      = ibdev_ptr;
   3289	ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
   3290
   3291	ib_dispatch_event(&ibev);
   3292}
   3293
   3294static struct mlx4_interface mlx4_ib_interface = {
   3295	.add		= mlx4_ib_add,
   3296	.remove		= mlx4_ib_remove,
   3297	.event		= mlx4_ib_event,
   3298	.protocol	= MLX4_PROT_IB_IPV6,
   3299	.flags		= MLX4_INTFF_BONDING
   3300};
   3301
   3302static int __init mlx4_ib_init(void)
   3303{
   3304	int err;
   3305
   3306	wq = alloc_ordered_workqueue("mlx4_ib", WQ_MEM_RECLAIM);
   3307	if (!wq)
   3308		return -ENOMEM;
   3309
   3310	err = mlx4_ib_cm_init();
   3311	if (err)
   3312		goto clean_wq;
   3313
   3314	err = mlx4_ib_mcg_init();
   3315	if (err)
   3316		goto clean_cm;
   3317
   3318	err = mlx4_register_interface(&mlx4_ib_interface);
   3319	if (err)
   3320		goto clean_mcg;
   3321
   3322	return 0;
   3323
   3324clean_mcg:
   3325	mlx4_ib_mcg_destroy();
   3326
   3327clean_cm:
   3328	mlx4_ib_cm_destroy();
   3329
   3330clean_wq:
   3331	destroy_workqueue(wq);
   3332	return err;
   3333}
   3334
   3335static void __exit mlx4_ib_cleanup(void)
   3336{
   3337	mlx4_unregister_interface(&mlx4_ib_interface);
   3338	mlx4_ib_mcg_destroy();
   3339	mlx4_ib_cm_destroy();
   3340	destroy_workqueue(wq);
   3341}
   3342
   3343module_init(mlx4_ib_init);
   3344module_exit(mlx4_ib_cleanup);