cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

main.c (48156B)


      1/*
      2 * Broadcom NetXtreme-E RoCE driver.
      3 *
      4 * Copyright (c) 2016 - 2017, Broadcom. All rights reserved.  The term
      5 * Broadcom refers to Broadcom Limited and/or its subsidiaries.
      6 *
      7 * This software is available to you under a choice of one of two
      8 * licenses.  You may choose to be licensed under the terms of the GNU
      9 * General Public License (GPL) Version 2, available from the file
     10 * COPYING in the main directory of this source tree, or the
     11 * BSD license below:
     12 *
     13 * Redistribution and use in source and binary forms, with or without
     14 * modification, are permitted provided that the following conditions
     15 * are met:
     16 *
     17 * 1. Redistributions of source code must retain the above copyright
     18 *    notice, this list of conditions and the following disclaimer.
     19 * 2. Redistributions in binary form must reproduce the above copyright
     20 *    notice, this list of conditions and the following disclaimer in
     21 *    the documentation and/or other materials provided with the
     22 *    distribution.
     23 *
     24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
     25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
     26 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
     28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     31 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
     32 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
     33 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
     34 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     35 *
     36 * Description: Main component of the bnxt_re driver
     37 */
     38
     39#include <linux/module.h>
     40#include <linux/netdevice.h>
     41#include <linux/ethtool.h>
     42#include <linux/mutex.h>
     43#include <linux/list.h>
     44#include <linux/rculist.h>
     45#include <linux/spinlock.h>
     46#include <linux/pci.h>
     47#include <net/dcbnl.h>
     48#include <net/ipv6.h>
     49#include <net/addrconf.h>
     50#include <linux/if_ether.h>
     51
     52#include <rdma/ib_verbs.h>
     53#include <rdma/ib_user_verbs.h>
     54#include <rdma/ib_umem.h>
     55#include <rdma/ib_addr.h>
     56
     57#include "bnxt_ulp.h"
     58#include "roce_hsi.h"
     59#include "qplib_res.h"
     60#include "qplib_sp.h"
     61#include "qplib_fp.h"
     62#include "qplib_rcfw.h"
     63#include "bnxt_re.h"
     64#include "ib_verbs.h"
     65#include <rdma/bnxt_re-abi.h>
     66#include "bnxt.h"
     67#include "hw_counters.h"
     68
     69static char version[] =
     70		BNXT_RE_DESC "\n";
     71
     72MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
     73MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
     74MODULE_LICENSE("Dual BSD/GPL");
     75
     76/* globals */
     77static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list);
     78/* Mutex to protect the list of bnxt_re devices added */
     79static DEFINE_MUTEX(bnxt_re_dev_lock);
     80static struct workqueue_struct *bnxt_re_wq;
     81static void bnxt_re_remove_device(struct bnxt_re_dev *rdev);
     82static void bnxt_re_dealloc_driver(struct ib_device *ib_dev);
     83static void bnxt_re_stop_irq(void *handle);
     84static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev);
     85
     86static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode)
     87{
     88	struct bnxt_qplib_chip_ctx *cctx;
     89
     90	cctx = rdev->chip_ctx;
     91	cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
     92			       mode : BNXT_QPLIB_WQE_MODE_STATIC;
     93}
     94
     95static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
     96{
     97	struct bnxt_qplib_chip_ctx *chip_ctx;
     98
     99	if (!rdev->chip_ctx)
    100		return;
    101	chip_ctx = rdev->chip_ctx;
    102	rdev->chip_ctx = NULL;
    103	rdev->rcfw.res = NULL;
    104	rdev->qplib_res.cctx = NULL;
    105	rdev->qplib_res.pdev = NULL;
    106	rdev->qplib_res.netdev = NULL;
    107	kfree(chip_ctx);
    108}
    109
    110static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode)
    111{
    112	struct bnxt_qplib_chip_ctx *chip_ctx;
    113	struct bnxt_en_dev *en_dev;
    114	struct bnxt *bp;
    115
    116	en_dev = rdev->en_dev;
    117	bp = netdev_priv(en_dev->net);
    118
    119	chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL);
    120	if (!chip_ctx)
    121		return -ENOMEM;
    122	chip_ctx->chip_num = bp->chip_num;
    123	chip_ctx->hw_stats_size = bp->hw_ring_stats_size;
    124
    125	rdev->chip_ctx = chip_ctx;
    126	/* rest members to follow eventually */
    127
    128	rdev->qplib_res.cctx = rdev->chip_ctx;
    129	rdev->rcfw.res = &rdev->qplib_res;
    130	rdev->qplib_res.dattr = &rdev->dev_attr;
    131	rdev->qplib_res.is_vf = BNXT_VF(bp);
    132
    133	bnxt_re_set_drv_mode(rdev, wqe_mode);
    134	if (bnxt_qplib_determine_atomics(en_dev->pdev))
    135		ibdev_info(&rdev->ibdev,
    136			   "platform doesn't support global atomics.");
    137	return 0;
    138}
    139
    140/* SR-IOV helper functions */
    141
    142static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev)
    143{
    144	struct bnxt *bp;
    145
    146	bp = netdev_priv(rdev->en_dev->net);
    147	if (BNXT_VF(bp))
    148		rdev->is_virtfn = 1;
    149}
    150
    151/* Set the maximum number of each resource that the driver actually wants
    152 * to allocate. This may be up to the maximum number the firmware has
    153 * reserved for the function. The driver may choose to allocate fewer
    154 * resources than the firmware maximum.
    155 */
    156static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev)
    157{
    158	struct bnxt_qplib_dev_attr *attr;
    159	struct bnxt_qplib_ctx *ctx;
    160	int i;
    161
    162	attr = &rdev->dev_attr;
    163	ctx = &rdev->qplib_ctx;
    164
    165	ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT,
    166			       attr->max_qp);
    167	ctx->mrw_count = BNXT_RE_MAX_MRW_COUNT_256K;
    168	/* Use max_mr from fw since max_mrw does not get set */
    169	ctx->mrw_count = min_t(u32, ctx->mrw_count, attr->max_mr);
    170	ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
    171				attr->max_srq);
    172	ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq);
    173	if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))
    174		for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
    175			rdev->qplib_ctx.tqm_ctx.qcount[i] =
    176			rdev->dev_attr.tqm_alloc_reqs[i];
    177}
    178
    179static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf)
    180{
    181	struct bnxt_qplib_vf_res *vf_res;
    182	u32 mrws = 0;
    183	u32 vf_pct;
    184	u32 nvfs;
    185
    186	vf_res = &qplib_ctx->vf_res;
    187	/*
    188	 * Reserve a set of resources for the PF. Divide the remaining
    189	 * resources among the VFs
    190	 */
    191	vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF;
    192	nvfs = num_vf;
    193	num_vf = 100 * num_vf;
    194	vf_res->max_qp_per_vf = (qplib_ctx->qpc_count * vf_pct) / num_vf;
    195	vf_res->max_srq_per_vf = (qplib_ctx->srqc_count * vf_pct) / num_vf;
    196	vf_res->max_cq_per_vf = (qplib_ctx->cq_count * vf_pct) / num_vf;
    197	/*
    198	 * The driver allows many more MRs than other resources. If the
    199	 * firmware does also, then reserve a fixed amount for the PF and
    200	 * divide the rest among VFs. VFs may use many MRs for NFS
    201	 * mounts, ISER, NVME applications, etc. If the firmware severely
    202	 * restricts the number of MRs, then let PF have half and divide
    203	 * the rest among VFs, as for the other resource types.
    204	 */
    205	if (qplib_ctx->mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) {
    206		mrws = qplib_ctx->mrw_count * vf_pct;
    207		nvfs = num_vf;
    208	} else {
    209		mrws = qplib_ctx->mrw_count - BNXT_RE_RESVD_MR_FOR_PF;
    210	}
    211	vf_res->max_mrw_per_vf = (mrws / nvfs);
    212	vf_res->max_gid_per_vf = BNXT_RE_MAX_GID_PER_VF;
    213}
    214
    215static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
    216{
    217	u32 num_vfs;
    218
    219	memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res));
    220	bnxt_re_limit_pf_res(rdev);
    221
    222	num_vfs =  bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
    223			BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs;
    224	if (num_vfs)
    225		bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs);
    226}
    227
    228/* for handling bnxt_en callbacks later */
    229static void bnxt_re_stop(void *p)
    230{
    231	struct bnxt_re_dev *rdev = p;
    232	struct bnxt *bp;
    233
    234	if (!rdev)
    235		return;
    236	ASSERT_RTNL();
    237
    238	/* L2 driver invokes this callback during device error/crash or device
    239	 * reset. Current RoCE driver doesn't recover the device in case of
    240	 * error. Handle the error by dispatching fatal events to all qps
    241	 * ie. by calling bnxt_re_dev_stop and release the MSIx vectors as
    242	 * L2 driver want to modify the MSIx table.
    243	 */
    244	bp = netdev_priv(rdev->netdev);
    245
    246	ibdev_info(&rdev->ibdev, "Handle device stop call from L2 driver");
    247	/* Check the current device state from L2 structure and move the
    248	 * device to detached state if FW_FATAL_COND is set.
    249	 * This prevents more commands to HW during clean-up,
    250	 * in case the device is already in error.
    251	 */
    252	if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
    253		set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
    254
    255	bnxt_re_dev_stop(rdev);
    256	bnxt_re_stop_irq(rdev);
    257	/* Move the device states to detached and  avoid sending any more
    258	 * commands to HW
    259	 */
    260	set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
    261	set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
    262}
    263
    264static void bnxt_re_start(void *p)
    265{
    266}
    267
    268static void bnxt_re_sriov_config(void *p, int num_vfs)
    269{
    270	struct bnxt_re_dev *rdev = p;
    271
    272	if (!rdev)
    273		return;
    274
    275	if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
    276		return;
    277	rdev->num_vfs = num_vfs;
    278	if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
    279		bnxt_re_set_resource_limits(rdev);
    280		bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
    281					      &rdev->qplib_ctx);
    282	}
    283}
    284
    285static void bnxt_re_shutdown(void *p)
    286{
    287	struct bnxt_re_dev *rdev = p;
    288
    289	if (!rdev)
    290		return;
    291	ASSERT_RTNL();
    292	/* Release the MSIx vectors before queuing unregister */
    293	bnxt_re_stop_irq(rdev);
    294	ib_unregister_device_queued(&rdev->ibdev);
    295}
    296
    297static void bnxt_re_stop_irq(void *handle)
    298{
    299	struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle;
    300	struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw;
    301	struct bnxt_qplib_nq *nq;
    302	int indx;
    303
    304	for (indx = BNXT_RE_NQ_IDX; indx < rdev->num_msix; indx++) {
    305		nq = &rdev->nq[indx - 1];
    306		bnxt_qplib_nq_stop_irq(nq, false);
    307	}
    308
    309	bnxt_qplib_rcfw_stop_irq(rcfw, false);
    310}
    311
    312static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
    313{
    314	struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle;
    315	struct bnxt_msix_entry *msix_ent = rdev->msix_entries;
    316	struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw;
    317	struct bnxt_qplib_nq *nq;
    318	int indx, rc;
    319
    320	if (!ent) {
    321		/* Not setting the f/w timeout bit in rcfw.
    322		 * During the driver unload the first command
    323		 * to f/w will timeout and that will set the
    324		 * timeout bit.
    325		 */
    326		ibdev_err(&rdev->ibdev, "Failed to re-start IRQs\n");
    327		return;
    328	}
    329
    330	/* Vectors may change after restart, so update with new vectors
    331	 * in device sctructure.
    332	 */
    333	for (indx = 0; indx < rdev->num_msix; indx++)
    334		rdev->msix_entries[indx].vector = ent[indx].vector;
    335
    336	bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
    337				  false);
    338	for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) {
    339		nq = &rdev->nq[indx - 1];
    340		rc = bnxt_qplib_nq_start_irq(nq, indx - 1,
    341					     msix_ent[indx].vector, false);
    342		if (rc)
    343			ibdev_warn(&rdev->ibdev, "Failed to reinit NQ index %d\n",
    344				   indx - 1);
    345	}
    346}
    347
    348static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
    349	.ulp_async_notifier = NULL,
    350	.ulp_stop = bnxt_re_stop,
    351	.ulp_start = bnxt_re_start,
    352	.ulp_sriov_config = bnxt_re_sriov_config,
    353	.ulp_shutdown = bnxt_re_shutdown,
    354	.ulp_irq_stop = bnxt_re_stop_irq,
    355	.ulp_irq_restart = bnxt_re_start_irq
    356};
    357
    358/* RoCE -> Net driver */
    359
    360/* Driver registration routines used to let the networking driver (bnxt_en)
    361 * to know that the RoCE driver is now installed
    362 */
    363static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev)
    364{
    365	struct bnxt_en_dev *en_dev;
    366	int rc;
    367
    368	if (!rdev)
    369		return -EINVAL;
    370
    371	en_dev = rdev->en_dev;
    372
    373	rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev,
    374						    BNXT_ROCE_ULP);
    375	return rc;
    376}
    377
    378static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
    379{
    380	struct bnxt_en_dev *en_dev;
    381	int rc = 0;
    382
    383	if (!rdev)
    384		return -EINVAL;
    385
    386	en_dev = rdev->en_dev;
    387
    388	rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP,
    389						  &bnxt_re_ulp_ops, rdev);
    390	rdev->qplib_res.pdev = rdev->en_dev->pdev;
    391	return rc;
    392}
    393
    394static int bnxt_re_free_msix(struct bnxt_re_dev *rdev)
    395{
    396	struct bnxt_en_dev *en_dev;
    397	int rc;
    398
    399	if (!rdev)
    400		return -EINVAL;
    401
    402	en_dev = rdev->en_dev;
    403
    404
    405	rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP);
    406
    407	return rc;
    408}
    409
    410static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
    411{
    412	int rc = 0, num_msix_want = BNXT_RE_MAX_MSIX, num_msix_got;
    413	struct bnxt_en_dev *en_dev;
    414
    415	if (!rdev)
    416		return -EINVAL;
    417
    418	en_dev = rdev->en_dev;
    419
    420	num_msix_want = min_t(u32, BNXT_RE_MAX_MSIX, num_online_cpus());
    421
    422	num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP,
    423							 rdev->msix_entries,
    424							 num_msix_want);
    425	if (num_msix_got < BNXT_RE_MIN_MSIX) {
    426		rc = -EINVAL;
    427		goto done;
    428	}
    429	if (num_msix_got != num_msix_want) {
    430		ibdev_warn(&rdev->ibdev,
    431			   "Requested %d MSI-X vectors, got %d\n",
    432			   num_msix_want, num_msix_got);
    433	}
    434	rdev->num_msix = num_msix_got;
    435done:
    436	return rc;
    437}
    438
    439static void bnxt_re_init_hwrm_hdr(struct bnxt_re_dev *rdev, struct input *hdr,
    440				  u16 opcd, u16 crid, u16 trid)
    441{
    442	hdr->req_type = cpu_to_le16(opcd);
    443	hdr->cmpl_ring = cpu_to_le16(crid);
    444	hdr->target_id = cpu_to_le16(trid);
    445}
    446
    447static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
    448				int msg_len, void *resp, int resp_max_len,
    449				int timeout)
    450{
    451	fw_msg->msg = msg;
    452	fw_msg->msg_len = msg_len;
    453	fw_msg->resp = resp;
    454	fw_msg->resp_max_len = resp_max_len;
    455	fw_msg->timeout = timeout;
    456}
    457
    458static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev,
    459				 u16 fw_ring_id, int type)
    460{
    461	struct bnxt_en_dev *en_dev = rdev->en_dev;
    462	struct hwrm_ring_free_input req = {0};
    463	struct hwrm_ring_free_output resp;
    464	struct bnxt_fw_msg fw_msg;
    465	int rc = -EINVAL;
    466
    467	if (!en_dev)
    468		return rc;
    469
    470	if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
    471		return 0;
    472
    473	memset(&fw_msg, 0, sizeof(fw_msg));
    474
    475	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
    476	req.ring_type = type;
    477	req.ring_id = cpu_to_le16(fw_ring_id);
    478	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
    479			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
    480	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
    481	if (rc)
    482		ibdev_err(&rdev->ibdev, "Failed to free HW ring:%d :%#x",
    483			  req.ring_id, rc);
    484	return rc;
    485}
    486
    487static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev,
    488				  struct bnxt_re_ring_attr *ring_attr,
    489				  u16 *fw_ring_id)
    490{
    491	struct bnxt_en_dev *en_dev = rdev->en_dev;
    492	struct hwrm_ring_alloc_input req = {0};
    493	struct hwrm_ring_alloc_output resp;
    494	struct bnxt_fw_msg fw_msg;
    495	int rc = -EINVAL;
    496
    497	if (!en_dev)
    498		return rc;
    499
    500	memset(&fw_msg, 0, sizeof(fw_msg));
    501	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1);
    502	req.enables = 0;
    503	req.page_tbl_addr =  cpu_to_le64(ring_attr->dma_arr[0]);
    504	if (ring_attr->pages > 1) {
    505		/* Page size is in log2 units */
    506		req.page_size = BNXT_PAGE_SHIFT;
    507		req.page_tbl_depth = 1;
    508	}
    509	req.fbo = 0;
    510	/* Association of ring index with doorbell index and MSIX number */
    511	req.logical_id = cpu_to_le16(ring_attr->lrid);
    512	req.length = cpu_to_le32(ring_attr->depth + 1);
    513	req.ring_type = ring_attr->type;
    514	req.int_mode = ring_attr->mode;
    515	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
    516			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
    517	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
    518	if (!rc)
    519		*fw_ring_id = le16_to_cpu(resp.ring_id);
    520
    521	return rc;
    522}
    523
    524static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
    525				      u32 fw_stats_ctx_id)
    526{
    527	struct bnxt_en_dev *en_dev = rdev->en_dev;
    528	struct hwrm_stat_ctx_free_input req = {};
    529	struct hwrm_stat_ctx_free_output resp = {};
    530	struct bnxt_fw_msg fw_msg;
    531	int rc = -EINVAL;
    532
    533	if (!en_dev)
    534		return rc;
    535
    536	if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
    537		return 0;
    538
    539	memset(&fw_msg, 0, sizeof(fw_msg));
    540
    541	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1);
    542	req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
    543	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
    544			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
    545	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
    546	if (rc)
    547		ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x",
    548			  rc);
    549
    550	return rc;
    551}
    552
    553static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
    554				       dma_addr_t dma_map,
    555				       u32 *fw_stats_ctx_id)
    556{
    557	struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx;
    558	struct hwrm_stat_ctx_alloc_output resp = {0};
    559	struct hwrm_stat_ctx_alloc_input req = {0};
    560	struct bnxt_en_dev *en_dev = rdev->en_dev;
    561	struct bnxt_fw_msg fw_msg;
    562	int rc = -EINVAL;
    563
    564	*fw_stats_ctx_id = INVALID_STATS_CTX_ID;
    565
    566	if (!en_dev)
    567		return rc;
    568
    569	memset(&fw_msg, 0, sizeof(fw_msg));
    570
    571	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
    572	req.update_period_ms = cpu_to_le32(1000);
    573	req.stats_dma_addr = cpu_to_le64(dma_map);
    574	req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size);
    575	req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
    576	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
    577			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
    578	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
    579	if (!rc)
    580		*fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id);
    581
    582	return rc;
    583}
    584
    585/* Device */
    586
    587static bool is_bnxt_re_dev(struct net_device *netdev)
    588{
    589	struct ethtool_drvinfo drvinfo;
    590
    591	if (netdev->ethtool_ops && netdev->ethtool_ops->get_drvinfo) {
    592		memset(&drvinfo, 0, sizeof(drvinfo));
    593		netdev->ethtool_ops->get_drvinfo(netdev, &drvinfo);
    594
    595		if (strcmp(drvinfo.driver, "bnxt_en"))
    596			return false;
    597		return true;
    598	}
    599	return false;
    600}
    601
    602static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
    603{
    604	struct ib_device *ibdev =
    605		ib_device_get_by_netdev(netdev, RDMA_DRIVER_BNXT_RE);
    606	if (!ibdev)
    607		return NULL;
    608
    609	return container_of(ibdev, struct bnxt_re_dev, ibdev);
    610}
    611
    612static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
    613{
    614	struct bnxt_en_dev *en_dev;
    615	struct pci_dev *pdev;
    616
    617	en_dev = bnxt_ulp_probe(netdev);
    618	if (IS_ERR(en_dev))
    619		return en_dev;
    620
    621	pdev = en_dev->pdev;
    622	if (!pdev)
    623		return ERR_PTR(-EINVAL);
    624
    625	if (!(en_dev->flags & BNXT_EN_FLAG_ROCE_CAP)) {
    626		dev_info(&pdev->dev,
    627			"%s: probe error: RoCE is not supported on this device",
    628			ROCE_DRV_MODULE_NAME);
    629		return ERR_PTR(-ENODEV);
    630	}
    631
    632	dev_hold(netdev);
    633
    634	return en_dev;
    635}
    636
    637static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
    638			   char *buf)
    639{
    640	struct bnxt_re_dev *rdev =
    641		rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
    642
    643	return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->vendor);
    644}
    645static DEVICE_ATTR_RO(hw_rev);
    646
    647static ssize_t hca_type_show(struct device *device,
    648			     struct device_attribute *attr, char *buf)
    649{
    650	struct bnxt_re_dev *rdev =
    651		rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
    652
    653	return sysfs_emit(buf, "%s\n", rdev->ibdev.node_desc);
    654}
    655static DEVICE_ATTR_RO(hca_type);
    656
    657static struct attribute *bnxt_re_attributes[] = {
    658	&dev_attr_hw_rev.attr,
    659	&dev_attr_hca_type.attr,
    660	NULL
    661};
    662
    663static const struct attribute_group bnxt_re_dev_attr_group = {
    664	.attrs = bnxt_re_attributes,
    665};
    666
    667static const struct ib_device_ops bnxt_re_dev_ops = {
    668	.owner = THIS_MODULE,
    669	.driver_id = RDMA_DRIVER_BNXT_RE,
    670	.uverbs_abi_ver = BNXT_RE_ABI_VERSION,
    671
    672	.add_gid = bnxt_re_add_gid,
    673	.alloc_hw_port_stats = bnxt_re_ib_alloc_hw_port_stats,
    674	.alloc_mr = bnxt_re_alloc_mr,
    675	.alloc_pd = bnxt_re_alloc_pd,
    676	.alloc_ucontext = bnxt_re_alloc_ucontext,
    677	.create_ah = bnxt_re_create_ah,
    678	.create_cq = bnxt_re_create_cq,
    679	.create_qp = bnxt_re_create_qp,
    680	.create_srq = bnxt_re_create_srq,
    681	.create_user_ah = bnxt_re_create_ah,
    682	.dealloc_driver = bnxt_re_dealloc_driver,
    683	.dealloc_pd = bnxt_re_dealloc_pd,
    684	.dealloc_ucontext = bnxt_re_dealloc_ucontext,
    685	.del_gid = bnxt_re_del_gid,
    686	.dereg_mr = bnxt_re_dereg_mr,
    687	.destroy_ah = bnxt_re_destroy_ah,
    688	.destroy_cq = bnxt_re_destroy_cq,
    689	.destroy_qp = bnxt_re_destroy_qp,
    690	.destroy_srq = bnxt_re_destroy_srq,
    691	.device_group = &bnxt_re_dev_attr_group,
    692	.get_dev_fw_str = bnxt_re_query_fw_str,
    693	.get_dma_mr = bnxt_re_get_dma_mr,
    694	.get_hw_stats = bnxt_re_ib_get_hw_stats,
    695	.get_link_layer = bnxt_re_get_link_layer,
    696	.get_port_immutable = bnxt_re_get_port_immutable,
    697	.map_mr_sg = bnxt_re_map_mr_sg,
    698	.mmap = bnxt_re_mmap,
    699	.modify_qp = bnxt_re_modify_qp,
    700	.modify_srq = bnxt_re_modify_srq,
    701	.poll_cq = bnxt_re_poll_cq,
    702	.post_recv = bnxt_re_post_recv,
    703	.post_send = bnxt_re_post_send,
    704	.post_srq_recv = bnxt_re_post_srq_recv,
    705	.query_ah = bnxt_re_query_ah,
    706	.query_device = bnxt_re_query_device,
    707	.query_pkey = bnxt_re_query_pkey,
    708	.query_port = bnxt_re_query_port,
    709	.query_qp = bnxt_re_query_qp,
    710	.query_srq = bnxt_re_query_srq,
    711	.reg_user_mr = bnxt_re_reg_user_mr,
    712	.req_notify_cq = bnxt_re_req_notify_cq,
    713	INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah),
    714	INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq),
    715	INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
    716	INIT_RDMA_OBJ_SIZE(ib_qp, bnxt_re_qp, ib_qp),
    717	INIT_RDMA_OBJ_SIZE(ib_srq, bnxt_re_srq, ib_srq),
    718	INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx),
    719};
    720
    721static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
    722{
    723	struct ib_device *ibdev = &rdev->ibdev;
    724	int ret;
    725
    726	/* ib device init */
    727	ibdev->node_type = RDMA_NODE_IB_CA;
    728	strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
    729		strlen(BNXT_RE_DESC) + 5);
    730	ibdev->phys_port_cnt = 1;
    731
    732	addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr);
    733
    734	ibdev->num_comp_vectors	= rdev->num_msix - 1;
    735	ibdev->dev.parent = &rdev->en_dev->pdev->dev;
    736	ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
    737
    738	ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
    739	ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1);
    740	if (ret)
    741		return ret;
    742
    743	dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX);
    744	return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev);
    745}
    746
    747static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
    748{
    749	dev_put(rdev->netdev);
    750	rdev->netdev = NULL;
    751	mutex_lock(&bnxt_re_dev_lock);
    752	list_del_rcu(&rdev->list);
    753	mutex_unlock(&bnxt_re_dev_lock);
    754
    755	synchronize_rcu();
    756}
    757
    758static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
    759					   struct bnxt_en_dev *en_dev)
    760{
    761	struct bnxt_re_dev *rdev;
    762
    763	/* Allocate bnxt_re_dev instance here */
    764	rdev = ib_alloc_device(bnxt_re_dev, ibdev);
    765	if (!rdev) {
    766		ibdev_err(NULL, "%s: bnxt_re_dev allocation failure!",
    767			  ROCE_DRV_MODULE_NAME);
    768		return NULL;
    769	}
    770	/* Default values */
    771	rdev->netdev = netdev;
    772	dev_hold(rdev->netdev);
    773	rdev->en_dev = en_dev;
    774	rdev->id = rdev->en_dev->pdev->devfn;
    775	INIT_LIST_HEAD(&rdev->qp_list);
    776	mutex_init(&rdev->qp_lock);
    777	atomic_set(&rdev->qp_count, 0);
    778	atomic_set(&rdev->cq_count, 0);
    779	atomic_set(&rdev->srq_count, 0);
    780	atomic_set(&rdev->mr_count, 0);
    781	atomic_set(&rdev->mw_count, 0);
    782	atomic_set(&rdev->ah_count, 0);
    783	atomic_set(&rdev->pd_count, 0);
    784	rdev->cosq[0] = 0xFFFF;
    785	rdev->cosq[1] = 0xFFFF;
    786
    787	mutex_lock(&bnxt_re_dev_lock);
    788	list_add_tail_rcu(&rdev->list, &bnxt_re_dev_list);
    789	mutex_unlock(&bnxt_re_dev_lock);
    790	return rdev;
    791}
    792
    793static int bnxt_re_handle_unaffi_async_event(struct creq_func_event
    794					     *unaffi_async)
    795{
    796	switch (unaffi_async->event) {
    797	case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
    798		break;
    799	case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
    800		break;
    801	case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
    802		break;
    803	case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
    804		break;
    805	case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
    806		break;
    807	case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
    808		break;
    809	case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
    810		break;
    811	case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
    812		break;
    813	case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
    814		break;
    815	case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
    816		break;
    817	case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
    818		break;
    819	default:
    820		return -EINVAL;
    821	}
    822	return 0;
    823}
    824
    825static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
    826					 struct bnxt_re_qp *qp)
    827{
    828	struct ib_event event;
    829	unsigned int flags;
    830
    831	if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR &&
    832	    rdma_is_kernel_res(&qp->ib_qp.res)) {
    833		flags = bnxt_re_lock_cqs(qp);
    834		bnxt_qplib_add_flush_qp(&qp->qplib_qp);
    835		bnxt_re_unlock_cqs(qp, flags);
    836	}
    837
    838	memset(&event, 0, sizeof(event));
    839	if (qp->qplib_qp.srq) {
    840		event.device = &qp->rdev->ibdev;
    841		event.element.qp = &qp->ib_qp;
    842		event.event = IB_EVENT_QP_LAST_WQE_REACHED;
    843	}
    844
    845	if (event.device && qp->ib_qp.event_handler)
    846		qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
    847
    848	return 0;
    849}
    850
    851static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async,
    852					   void *obj)
    853{
    854	int rc = 0;
    855	u8 event;
    856
    857	if (!obj)
    858		return rc; /* QP was already dead, still return success */
    859
    860	event = affi_async->event;
    861	if (event == CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION) {
    862		struct bnxt_qplib_qp *lib_qp = obj;
    863		struct bnxt_re_qp *qp = container_of(lib_qp, struct bnxt_re_qp,
    864						     qplib_qp);
    865		rc = bnxt_re_handle_qp_async_event(affi_async, qp);
    866	}
    867	return rc;
    868}
    869
    870static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
    871			       void *aeqe, void *obj)
    872{
    873	struct creq_qp_event *affi_async;
    874	struct creq_func_event *unaffi_async;
    875	u8 type;
    876	int rc;
    877
    878	type = ((struct creq_base *)aeqe)->type;
    879	if (type == CREQ_BASE_TYPE_FUNC_EVENT) {
    880		unaffi_async = aeqe;
    881		rc = bnxt_re_handle_unaffi_async_event(unaffi_async);
    882	} else {
    883		affi_async = aeqe;
    884		rc = bnxt_re_handle_affi_async_event(affi_async, obj);
    885	}
    886
    887	return rc;
    888}
    889
    890static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq,
    891				struct bnxt_qplib_srq *handle, u8 event)
    892{
    893	struct bnxt_re_srq *srq = container_of(handle, struct bnxt_re_srq,
    894					       qplib_srq);
    895	struct ib_event ib_event;
    896
    897	ib_event.device = &srq->rdev->ibdev;
    898	ib_event.element.srq = &srq->ib_srq;
    899	if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT)
    900		ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED;
    901	else
    902		ib_event.event = IB_EVENT_SRQ_ERR;
    903
    904	if (srq->ib_srq.event_handler) {
    905		/* Lock event_handler? */
    906		(*srq->ib_srq.event_handler)(&ib_event,
    907					     srq->ib_srq.srq_context);
    908	}
    909	return 0;
    910}
    911
    912static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
    913			       struct bnxt_qplib_cq *handle)
    914{
    915	struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq,
    916					     qplib_cq);
    917
    918	if (cq->ib_cq.comp_handler) {
    919		/* Lock comp_handler? */
    920		(*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context);
    921	}
    922
    923	return 0;
    924}
    925
    926#define BNXT_RE_GEN_P5_PF_NQ_DB		0x10000
    927#define BNXT_RE_GEN_P5_VF_NQ_DB		0x4000
    928static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx)
    929{
    930	return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
    931		(rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB :
    932				   BNXT_RE_GEN_P5_PF_NQ_DB) :
    933				   rdev->msix_entries[indx].db_offset;
    934}
    935
    936static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
    937{
    938	int i;
    939
    940	for (i = 1; i < rdev->num_msix; i++)
    941		bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
    942
    943	if (rdev->qplib_res.rcfw)
    944		bnxt_qplib_cleanup_res(&rdev->qplib_res);
    945}
    946
    947static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
    948{
    949	int num_vec_enabled = 0;
    950	int rc = 0, i;
    951	u32 db_offt;
    952
    953	bnxt_qplib_init_res(&rdev->qplib_res);
    954
    955	for (i = 1; i < rdev->num_msix ; i++) {
    956		db_offt = bnxt_re_get_nqdb_offset(rdev, i);
    957		rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1],
    958					  i - 1, rdev->msix_entries[i].vector,
    959					  db_offt, &bnxt_re_cqn_handler,
    960					  &bnxt_re_srqn_handler);
    961		if (rc) {
    962			ibdev_err(&rdev->ibdev,
    963				  "Failed to enable NQ with rc = 0x%x", rc);
    964			goto fail;
    965		}
    966		num_vec_enabled++;
    967	}
    968	return 0;
    969fail:
    970	for (i = num_vec_enabled; i >= 0; i--)
    971		bnxt_qplib_disable_nq(&rdev->nq[i]);
    972	return rc;
    973}
    974
    975static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev)
    976{
    977	u8 type;
    978	int i;
    979
    980	for (i = 0; i < rdev->num_msix - 1; i++) {
    981		type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
    982		bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
    983		bnxt_qplib_free_nq(&rdev->nq[i]);
    984		rdev->nq[i].res = NULL;
    985	}
    986}
    987
    988static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
    989{
    990	bnxt_re_free_nq_res(rdev);
    991
    992	if (rdev->qplib_res.dpi_tbl.max) {
    993		bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
    994				       &rdev->qplib_res.dpi_tbl,
    995				       &rdev->dpi_privileged);
    996	}
    997	if (rdev->qplib_res.rcfw) {
    998		bnxt_qplib_free_res(&rdev->qplib_res);
    999		rdev->qplib_res.rcfw = NULL;
   1000	}
   1001}
   1002
   1003static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
   1004{
   1005	struct bnxt_re_ring_attr rattr = {};
   1006	int num_vec_created = 0;
   1007	int rc = 0, i;
   1008	u8 type;
   1009
   1010	/* Configure and allocate resources for qplib */
   1011	rdev->qplib_res.rcfw = &rdev->rcfw;
   1012	rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr,
   1013				     rdev->is_virtfn);
   1014	if (rc)
   1015		goto fail;
   1016
   1017	rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev,
   1018				  rdev->netdev, &rdev->dev_attr);
   1019	if (rc)
   1020		goto fail;
   1021
   1022	rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl,
   1023				  &rdev->dpi_privileged,
   1024				  rdev);
   1025	if (rc)
   1026		goto dealloc_res;
   1027
   1028	for (i = 0; i < rdev->num_msix - 1; i++) {
   1029		struct bnxt_qplib_nq *nq;
   1030
   1031		nq = &rdev->nq[i];
   1032		nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
   1033		rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, &rdev->nq[i]);
   1034		if (rc) {
   1035			ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x",
   1036				  i, rc);
   1037			goto free_nq;
   1038		}
   1039		type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
   1040		rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr;
   1041		rattr.pages = nq->hwq.pbl[rdev->nq[i].hwq.level].pg_count;
   1042		rattr.type = type;
   1043		rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
   1044		rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1;
   1045		rattr.lrid = rdev->msix_entries[i + 1].ring_idx;
   1046		rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id);
   1047		if (rc) {
   1048			ibdev_err(&rdev->ibdev,
   1049				  "Failed to allocate NQ fw id with rc = 0x%x",
   1050				  rc);
   1051			bnxt_qplib_free_nq(&rdev->nq[i]);
   1052			goto free_nq;
   1053		}
   1054		num_vec_created++;
   1055	}
   1056	return 0;
   1057free_nq:
   1058	for (i = num_vec_created - 1; i >= 0; i--) {
   1059		type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
   1060		bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
   1061		bnxt_qplib_free_nq(&rdev->nq[i]);
   1062	}
   1063	bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
   1064			       &rdev->qplib_res.dpi_tbl,
   1065			       &rdev->dpi_privileged);
   1066dealloc_res:
   1067	bnxt_qplib_free_res(&rdev->qplib_res);
   1068
   1069fail:
   1070	rdev->qplib_res.rcfw = NULL;
   1071	return rc;
   1072}
   1073
   1074static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
   1075				   u8 port_num, enum ib_event_type event)
   1076{
   1077	struct ib_event ib_event;
   1078
   1079	ib_event.device = ibdev;
   1080	if (qp) {
   1081		ib_event.element.qp = qp;
   1082		ib_event.event = event;
   1083		if (qp->event_handler)
   1084			qp->event_handler(&ib_event, qp->qp_context);
   1085
   1086	} else {
   1087		ib_event.element.port_num = port_num;
   1088		ib_event.event = event;
   1089		ib_dispatch_event(&ib_event);
   1090	}
   1091}
   1092
   1093#define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN      0x02
   1094static int bnxt_re_query_hwrm_pri2cos(struct bnxt_re_dev *rdev, u8 dir,
   1095				      u64 *cid_map)
   1096{
   1097	struct hwrm_queue_pri2cos_qcfg_input req = {0};
   1098	struct bnxt *bp = netdev_priv(rdev->netdev);
   1099	struct hwrm_queue_pri2cos_qcfg_output resp;
   1100	struct bnxt_en_dev *en_dev = rdev->en_dev;
   1101	struct bnxt_fw_msg fw_msg;
   1102	u32 flags = 0;
   1103	u8 *qcfgmap, *tmp_map;
   1104	int rc = 0, i;
   1105
   1106	if (!cid_map)
   1107		return -EINVAL;
   1108
   1109	memset(&fw_msg, 0, sizeof(fw_msg));
   1110	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
   1111			      HWRM_QUEUE_PRI2COS_QCFG, -1, -1);
   1112	flags |= (dir & 0x01);
   1113	flags |= HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN;
   1114	req.flags = cpu_to_le32(flags);
   1115	req.port_id = bp->pf.port_id;
   1116
   1117	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
   1118			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
   1119	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
   1120	if (rc)
   1121		return rc;
   1122
   1123	if (resp.queue_cfg_info) {
   1124		ibdev_warn(&rdev->ibdev,
   1125			   "Asymmetric cos queue configuration detected");
   1126		ibdev_warn(&rdev->ibdev,
   1127			   " on device, QoS may not be fully functional\n");
   1128	}
   1129	qcfgmap = &resp.pri0_cos_queue_id;
   1130	tmp_map = (u8 *)cid_map;
   1131	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
   1132		tmp_map[i] = qcfgmap[i];
   1133
   1134	return rc;
   1135}
   1136
   1137static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev,
   1138					struct bnxt_re_qp *qp)
   1139{
   1140	return (qp->ib_qp.qp_type == IB_QPT_GSI) ||
   1141	       (qp == rdev->gsi_ctx.gsi_sqp);
   1142}
   1143
   1144static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev)
   1145{
   1146	int mask = IB_QP_STATE;
   1147	struct ib_qp_attr qp_attr;
   1148	struct bnxt_re_qp *qp;
   1149
   1150	qp_attr.qp_state = IB_QPS_ERR;
   1151	mutex_lock(&rdev->qp_lock);
   1152	list_for_each_entry(qp, &rdev->qp_list, list) {
   1153		/* Modify the state of all QPs except QP1/Shadow QP */
   1154		if (!bnxt_re_is_qp1_or_shadow_qp(rdev, qp)) {
   1155			if (qp->qplib_qp.state !=
   1156			    CMDQ_MODIFY_QP_NEW_STATE_RESET &&
   1157			    qp->qplib_qp.state !=
   1158			    CMDQ_MODIFY_QP_NEW_STATE_ERR) {
   1159				bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp,
   1160						       1, IB_EVENT_QP_FATAL);
   1161				bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, mask,
   1162						  NULL);
   1163			}
   1164		}
   1165	}
   1166	mutex_unlock(&rdev->qp_lock);
   1167}
   1168
   1169static int bnxt_re_update_gid(struct bnxt_re_dev *rdev)
   1170{
   1171	struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
   1172	struct bnxt_qplib_gid gid;
   1173	u16 gid_idx, index;
   1174	int rc = 0;
   1175
   1176	if (!ib_device_try_get(&rdev->ibdev))
   1177		return 0;
   1178
   1179	if (!sgid_tbl) {
   1180		ibdev_err(&rdev->ibdev, "QPLIB: SGID table not allocated");
   1181		rc = -EINVAL;
   1182		goto out;
   1183	}
   1184
   1185	for (index = 0; index < sgid_tbl->active; index++) {
   1186		gid_idx = sgid_tbl->hw_id[index];
   1187
   1188		if (!memcmp(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
   1189			    sizeof(bnxt_qplib_gid_zero)))
   1190			continue;
   1191		/* need to modify the VLAN enable setting of non VLAN GID only
   1192		 * as setting is done for VLAN GID while adding GID
   1193		 */
   1194		if (sgid_tbl->vlan[index])
   1195			continue;
   1196
   1197		memcpy(&gid, &sgid_tbl->tbl[index], sizeof(gid));
   1198
   1199		rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx,
   1200					    rdev->qplib_res.netdev->dev_addr);
   1201	}
   1202out:
   1203	ib_device_put(&rdev->ibdev);
   1204	return rc;
   1205}
   1206
   1207static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev)
   1208{
   1209	u32 prio_map = 0, tmp_map = 0;
   1210	struct net_device *netdev;
   1211	struct dcb_app app;
   1212
   1213	netdev = rdev->netdev;
   1214
   1215	memset(&app, 0, sizeof(app));
   1216	app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
   1217	app.protocol = ETH_P_IBOE;
   1218	tmp_map = dcb_ieee_getapp_mask(netdev, &app);
   1219	prio_map = tmp_map;
   1220
   1221	app.selector = IEEE_8021QAZ_APP_SEL_DGRAM;
   1222	app.protocol = ROCE_V2_UDP_DPORT;
   1223	tmp_map = dcb_ieee_getapp_mask(netdev, &app);
   1224	prio_map |= tmp_map;
   1225
   1226	return prio_map;
   1227}
   1228
   1229static void bnxt_re_parse_cid_map(u8 prio_map, u8 *cid_map, u16 *cosq)
   1230{
   1231	u16 prio;
   1232	u8 id;
   1233
   1234	for (prio = 0, id = 0; prio < 8; prio++) {
   1235		if (prio_map & (1 << prio)) {
   1236			cosq[id] = cid_map[prio];
   1237			id++;
   1238			if (id == 2) /* Max 2 tcs supported */
   1239				break;
   1240		}
   1241	}
   1242}
   1243
   1244static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
   1245{
   1246	u8 prio_map = 0;
   1247	u64 cid_map;
   1248	int rc;
   1249
   1250	/* Get priority for roce */
   1251	prio_map = bnxt_re_get_priority_mask(rdev);
   1252
   1253	if (prio_map == rdev->cur_prio_map)
   1254		return 0;
   1255	rdev->cur_prio_map = prio_map;
   1256	/* Get cosq id for this priority */
   1257	rc = bnxt_re_query_hwrm_pri2cos(rdev, 0, &cid_map);
   1258	if (rc) {
   1259		ibdev_warn(&rdev->ibdev, "no cos for p_mask %x\n", prio_map);
   1260		return rc;
   1261	}
   1262	/* Parse CoS IDs for app priority */
   1263	bnxt_re_parse_cid_map(prio_map, (u8 *)&cid_map, rdev->cosq);
   1264
   1265	/* Config BONO. */
   1266	rc = bnxt_qplib_map_tc2cos(&rdev->qplib_res, rdev->cosq);
   1267	if (rc) {
   1268		ibdev_warn(&rdev->ibdev, "no tc for cos{%x, %x}\n",
   1269			   rdev->cosq[0], rdev->cosq[1]);
   1270		return rc;
   1271	}
   1272
   1273	/* Actual priorities are not programmed as they are already
   1274	 * done by L2 driver; just enable or disable priority vlan tagging
   1275	 */
   1276	if ((prio_map == 0 && rdev->qplib_res.prio) ||
   1277	    (prio_map != 0 && !rdev->qplib_res.prio)) {
   1278		rdev->qplib_res.prio = prio_map ? true : false;
   1279
   1280		bnxt_re_update_gid(rdev);
   1281	}
   1282
   1283	return 0;
   1284}
   1285
   1286static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
   1287{
   1288	struct bnxt_en_dev *en_dev = rdev->en_dev;
   1289	struct hwrm_ver_get_output resp = {0};
   1290	struct hwrm_ver_get_input req = {0};
   1291	struct bnxt_fw_msg fw_msg;
   1292	int rc = 0;
   1293
   1294	memset(&fw_msg, 0, sizeof(fw_msg));
   1295	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
   1296			      HWRM_VER_GET, -1, -1);
   1297	req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
   1298	req.hwrm_intf_min = HWRM_VERSION_MINOR;
   1299	req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
   1300	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
   1301			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
   1302	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
   1303	if (rc) {
   1304		ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x",
   1305			  rc);
   1306		return;
   1307	}
   1308	rdev->qplib_ctx.hwrm_intf_ver =
   1309		(u64)le16_to_cpu(resp.hwrm_intf_major) << 48 |
   1310		(u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 |
   1311		(u64)le16_to_cpu(resp.hwrm_intf_build) << 16 |
   1312		le16_to_cpu(resp.hwrm_intf_patch);
   1313}
   1314
   1315static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
   1316{
   1317	int rc = 0;
   1318	u32 event;
   1319
   1320	/* Register ib dev */
   1321	rc = bnxt_re_register_ib(rdev);
   1322	if (rc) {
   1323		pr_err("Failed to register with IB: %#x\n", rc);
   1324		return rc;
   1325	}
   1326	dev_info(rdev_to_dev(rdev), "Device registered successfully");
   1327	ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
   1328			 &rdev->active_width);
   1329	set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
   1330
   1331	event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ?
   1332		IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
   1333
   1334	bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, event);
   1335
   1336	return rc;
   1337}
   1338
   1339static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev)
   1340{
   1341	u8 type;
   1342	int rc;
   1343
   1344	if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
   1345		cancel_delayed_work_sync(&rdev->worker);
   1346
   1347	if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED,
   1348			       &rdev->flags))
   1349		bnxt_re_cleanup_res(rdev);
   1350	if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags))
   1351		bnxt_re_free_res(rdev);
   1352
   1353	if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
   1354		rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
   1355		if (rc)
   1356			ibdev_warn(&rdev->ibdev,
   1357				   "Failed to deinitialize RCFW: %#x", rc);
   1358		bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
   1359		bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx);
   1360		bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
   1361		type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
   1362		bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
   1363		bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
   1364	}
   1365	if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) {
   1366		rc = bnxt_re_free_msix(rdev);
   1367		if (rc)
   1368			ibdev_warn(&rdev->ibdev,
   1369				   "Failed to free MSI-X vectors: %#x", rc);
   1370	}
   1371
   1372	bnxt_re_destroy_chip_ctx(rdev);
   1373	if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
   1374		rc = bnxt_re_unregister_netdev(rdev);
   1375		if (rc)
   1376			ibdev_warn(&rdev->ibdev,
   1377				   "Failed to unregister with netdev: %#x", rc);
   1378	}
   1379}
   1380
   1381/* worker thread for polling periodic events. Now used for QoS programming*/
   1382static void bnxt_re_worker(struct work_struct *work)
   1383{
   1384	struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
   1385						worker.work);
   1386
   1387	bnxt_re_setup_qos(rdev);
   1388	schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
   1389}
   1390
   1391static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode)
   1392{
   1393	struct bnxt_qplib_creq_ctx *creq;
   1394	struct bnxt_re_ring_attr rattr;
   1395	u32 db_offt;
   1396	int vid;
   1397	u8 type;
   1398	int rc;
   1399
   1400	/* Registered a new RoCE device instance to netdev */
   1401	memset(&rattr, 0, sizeof(rattr));
   1402	rc = bnxt_re_register_netdev(rdev);
   1403	if (rc) {
   1404		ibdev_err(&rdev->ibdev,
   1405			  "Failed to register with netedev: %#x\n", rc);
   1406		return -EINVAL;
   1407	}
   1408	set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
   1409
   1410	rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode);
   1411	if (rc) {
   1412		ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
   1413		return -EINVAL;
   1414	}
   1415
   1416	/* Check whether VF or PF */
   1417	bnxt_re_get_sriov_func_type(rdev);
   1418
   1419	rc = bnxt_re_request_msix(rdev);
   1420	if (rc) {
   1421		ibdev_err(&rdev->ibdev,
   1422			  "Failed to get MSI-X vectors: %#x\n", rc);
   1423		rc = -EINVAL;
   1424		goto fail;
   1425	}
   1426	set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags);
   1427
   1428	bnxt_re_query_hwrm_intf_version(rdev);
   1429
   1430	/* Establish RCFW Communication Channel to initialize the context
   1431	 * memory for the function and all child VFs
   1432	 */
   1433	rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res, &rdev->rcfw,
   1434					   &rdev->qplib_ctx,
   1435					   BNXT_RE_MAX_QPC_COUNT);
   1436	if (rc) {
   1437		ibdev_err(&rdev->ibdev,
   1438			  "Failed to allocate RCFW Channel: %#x\n", rc);
   1439		goto fail;
   1440	}
   1441
   1442	type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
   1443	creq = &rdev->rcfw.creq;
   1444	rattr.dma_arr = creq->hwq.pbl[PBL_LVL_0].pg_map_arr;
   1445	rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count;
   1446	rattr.type = type;
   1447	rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
   1448	rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1;
   1449	rattr.lrid = rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
   1450	rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id);
   1451	if (rc) {
   1452		ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc);
   1453		goto free_rcfw;
   1454	}
   1455	db_offt = bnxt_re_get_nqdb_offset(rdev, BNXT_RE_AEQ_IDX);
   1456	vid = rdev->msix_entries[BNXT_RE_AEQ_IDX].vector;
   1457	rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw,
   1458					    vid, db_offt, rdev->is_virtfn,
   1459					    &bnxt_re_aeq_handler);
   1460	if (rc) {
   1461		ibdev_err(&rdev->ibdev, "Failed to enable RCFW channel: %#x\n",
   1462			  rc);
   1463		goto free_ring;
   1464	}
   1465
   1466	rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr,
   1467				     rdev->is_virtfn);
   1468	if (rc)
   1469		goto disable_rcfw;
   1470
   1471	bnxt_re_set_resource_limits(rdev);
   1472
   1473	rc = bnxt_qplib_alloc_ctx(&rdev->qplib_res, &rdev->qplib_ctx, 0,
   1474				  bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx));
   1475	if (rc) {
   1476		ibdev_err(&rdev->ibdev,
   1477			  "Failed to allocate QPLIB context: %#x\n", rc);
   1478		goto disable_rcfw;
   1479	}
   1480	rc = bnxt_re_net_stats_ctx_alloc(rdev,
   1481					 rdev->qplib_ctx.stats.dma_map,
   1482					 &rdev->qplib_ctx.stats.fw_id);
   1483	if (rc) {
   1484		ibdev_err(&rdev->ibdev,
   1485			  "Failed to allocate stats context: %#x\n", rc);
   1486		goto free_ctx;
   1487	}
   1488
   1489	rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx,
   1490				  rdev->is_virtfn);
   1491	if (rc) {
   1492		ibdev_err(&rdev->ibdev,
   1493			  "Failed to initialize RCFW: %#x\n", rc);
   1494		goto free_sctx;
   1495	}
   1496	set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
   1497
   1498	/* Resources based on the 'new' device caps */
   1499	rc = bnxt_re_alloc_res(rdev);
   1500	if (rc) {
   1501		ibdev_err(&rdev->ibdev,
   1502			  "Failed to allocate resources: %#x\n", rc);
   1503		goto fail;
   1504	}
   1505	set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags);
   1506	rc = bnxt_re_init_res(rdev);
   1507	if (rc) {
   1508		ibdev_err(&rdev->ibdev,
   1509			  "Failed to initialize resources: %#x\n", rc);
   1510		goto fail;
   1511	}
   1512
   1513	set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags);
   1514
   1515	if (!rdev->is_virtfn) {
   1516		rc = bnxt_re_setup_qos(rdev);
   1517		if (rc)
   1518			ibdev_info(&rdev->ibdev,
   1519				   "RoCE priority not yet configured\n");
   1520
   1521		INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
   1522		set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
   1523		schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
   1524	}
   1525
   1526	return 0;
   1527free_sctx:
   1528	bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
   1529free_ctx:
   1530	bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx);
   1531disable_rcfw:
   1532	bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
   1533free_ring:
   1534	type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
   1535	bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
   1536free_rcfw:
   1537	bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
   1538fail:
   1539	bnxt_re_dev_uninit(rdev);
   1540
   1541	return rc;
   1542}
   1543
   1544static void bnxt_re_dev_unreg(struct bnxt_re_dev *rdev)
   1545{
   1546	struct net_device *netdev = rdev->netdev;
   1547
   1548	bnxt_re_dev_remove(rdev);
   1549
   1550	if (netdev)
   1551		dev_put(netdev);
   1552}
   1553
   1554static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct net_device *netdev)
   1555{
   1556	struct bnxt_en_dev *en_dev;
   1557	int rc = 0;
   1558
   1559	if (!is_bnxt_re_dev(netdev))
   1560		return -ENODEV;
   1561
   1562	en_dev = bnxt_re_dev_probe(netdev);
   1563	if (IS_ERR(en_dev)) {
   1564		if (en_dev != ERR_PTR(-ENODEV))
   1565			ibdev_err(&(*rdev)->ibdev, "%s: Failed to probe\n",
   1566				  ROCE_DRV_MODULE_NAME);
   1567		rc = PTR_ERR(en_dev);
   1568		goto exit;
   1569	}
   1570	*rdev = bnxt_re_dev_add(netdev, en_dev);
   1571	if (!*rdev) {
   1572		rc = -ENOMEM;
   1573		dev_put(netdev);
   1574		goto exit;
   1575	}
   1576exit:
   1577	return rc;
   1578}
   1579
   1580static void bnxt_re_remove_device(struct bnxt_re_dev *rdev)
   1581{
   1582	bnxt_re_dev_uninit(rdev);
   1583	pci_dev_put(rdev->en_dev->pdev);
   1584	bnxt_re_dev_unreg(rdev);
   1585}
   1586
   1587static int bnxt_re_add_device(struct bnxt_re_dev **rdev,
   1588			      struct net_device *netdev, u8 wqe_mode)
   1589{
   1590	int rc;
   1591
   1592	rc = bnxt_re_dev_reg(rdev, netdev);
   1593	if (rc == -ENODEV)
   1594		return rc;
   1595	if (rc) {
   1596		pr_err("Failed to register with the device %s: %#x\n",
   1597		       netdev->name, rc);
   1598		return rc;
   1599	}
   1600
   1601	pci_dev_get((*rdev)->en_dev->pdev);
   1602	rc = bnxt_re_dev_init(*rdev, wqe_mode);
   1603	if (rc) {
   1604		pci_dev_put((*rdev)->en_dev->pdev);
   1605		bnxt_re_dev_unreg(*rdev);
   1606	}
   1607
   1608	return rc;
   1609}
   1610
   1611static void bnxt_re_dealloc_driver(struct ib_device *ib_dev)
   1612{
   1613	struct bnxt_re_dev *rdev =
   1614		container_of(ib_dev, struct bnxt_re_dev, ibdev);
   1615
   1616	dev_info(rdev_to_dev(rdev), "Unregistering Device");
   1617
   1618	rtnl_lock();
   1619	bnxt_re_remove_device(rdev);
   1620	rtnl_unlock();
   1621}
   1622
   1623/* Handle all deferred netevents tasks */
   1624static void bnxt_re_task(struct work_struct *work)
   1625{
   1626	struct bnxt_re_work *re_work;
   1627	struct bnxt_re_dev *rdev;
   1628	int rc = 0;
   1629
   1630	re_work = container_of(work, struct bnxt_re_work, work);
   1631	rdev = re_work->rdev;
   1632
   1633	if (re_work->event == NETDEV_REGISTER) {
   1634		rc = bnxt_re_ib_init(rdev);
   1635		if (rc) {
   1636			ibdev_err(&rdev->ibdev,
   1637				  "Failed to register with IB: %#x", rc);
   1638			rtnl_lock();
   1639			bnxt_re_remove_device(rdev);
   1640			rtnl_unlock();
   1641			goto exit;
   1642		}
   1643		goto exit;
   1644	}
   1645
   1646	if (!ib_device_try_get(&rdev->ibdev))
   1647		goto exit;
   1648
   1649	switch (re_work->event) {
   1650	case NETDEV_UP:
   1651		bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
   1652				       IB_EVENT_PORT_ACTIVE);
   1653		break;
   1654	case NETDEV_DOWN:
   1655		bnxt_re_dev_stop(rdev);
   1656		break;
   1657	case NETDEV_CHANGE:
   1658		if (!netif_carrier_ok(rdev->netdev))
   1659			bnxt_re_dev_stop(rdev);
   1660		else if (netif_carrier_ok(rdev->netdev))
   1661			bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
   1662					       IB_EVENT_PORT_ACTIVE);
   1663		ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
   1664				 &rdev->active_width);
   1665		break;
   1666	default:
   1667		break;
   1668	}
   1669	ib_device_put(&rdev->ibdev);
   1670exit:
   1671	put_device(&rdev->ibdev.dev);
   1672	kfree(re_work);
   1673}
   1674
   1675/*
   1676 * "Notifier chain callback can be invoked for the same chain from
   1677 * different CPUs at the same time".
   1678 *
   1679 * For cases when the netdev is already present, our call to the
   1680 * register_netdevice_notifier() will actually get the rtnl_lock()
   1681 * before sending NETDEV_REGISTER and (if up) NETDEV_UP
   1682 * events.
   1683 *
   1684 * But for cases when the netdev is not already present, the notifier
   1685 * chain is subjected to be invoked from different CPUs simultaneously.
   1686 *
   1687 * This is protected by the netdev_mutex.
   1688 */
   1689static int bnxt_re_netdev_event(struct notifier_block *notifier,
   1690				unsigned long event, void *ptr)
   1691{
   1692	struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr);
   1693	struct bnxt_re_work *re_work;
   1694	struct bnxt_re_dev *rdev;
   1695	int rc = 0;
   1696	bool sch_work = false;
   1697	bool release = true;
   1698
   1699	real_dev = rdma_vlan_dev_real_dev(netdev);
   1700	if (!real_dev)
   1701		real_dev = netdev;
   1702
   1703	rdev = bnxt_re_from_netdev(real_dev);
   1704	if (!rdev && event != NETDEV_REGISTER)
   1705		return NOTIFY_OK;
   1706
   1707	if (real_dev != netdev)
   1708		goto exit;
   1709
   1710	switch (event) {
   1711	case NETDEV_REGISTER:
   1712		if (rdev)
   1713			break;
   1714		rc = bnxt_re_add_device(&rdev, real_dev,
   1715					BNXT_QPLIB_WQE_MODE_STATIC);
   1716		if (!rc)
   1717			sch_work = true;
   1718		release = false;
   1719		break;
   1720
   1721	case NETDEV_UNREGISTER:
   1722		ib_unregister_device_queued(&rdev->ibdev);
   1723		break;
   1724
   1725	default:
   1726		sch_work = true;
   1727		break;
   1728	}
   1729	if (sch_work) {
   1730		/* Allocate for the deferred task */
   1731		re_work = kzalloc(sizeof(*re_work), GFP_KERNEL);
   1732		if (re_work) {
   1733			get_device(&rdev->ibdev.dev);
   1734			re_work->rdev = rdev;
   1735			re_work->event = event;
   1736			re_work->vlan_dev = (real_dev == netdev ?
   1737					     NULL : netdev);
   1738			INIT_WORK(&re_work->work, bnxt_re_task);
   1739			queue_work(bnxt_re_wq, &re_work->work);
   1740		}
   1741	}
   1742
   1743exit:
   1744	if (rdev && release)
   1745		ib_device_put(&rdev->ibdev);
   1746	return NOTIFY_DONE;
   1747}
   1748
   1749static struct notifier_block bnxt_re_netdev_notifier = {
   1750	.notifier_call = bnxt_re_netdev_event
   1751};
   1752
   1753static int __init bnxt_re_mod_init(void)
   1754{
   1755	int rc = 0;
   1756
   1757	pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version);
   1758
   1759	bnxt_re_wq = create_singlethread_workqueue("bnxt_re");
   1760	if (!bnxt_re_wq)
   1761		return -ENOMEM;
   1762
   1763	INIT_LIST_HEAD(&bnxt_re_dev_list);
   1764
   1765	rc = register_netdevice_notifier(&bnxt_re_netdev_notifier);
   1766	if (rc) {
   1767		pr_err("%s: Cannot register to netdevice_notifier",
   1768		       ROCE_DRV_MODULE_NAME);
   1769		goto err_netdev;
   1770	}
   1771	return 0;
   1772
   1773err_netdev:
   1774	destroy_workqueue(bnxt_re_wq);
   1775
   1776	return rc;
   1777}
   1778
   1779static void __exit bnxt_re_mod_exit(void)
   1780{
   1781	struct bnxt_re_dev *rdev;
   1782
   1783	unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
   1784	if (bnxt_re_wq)
   1785		destroy_workqueue(bnxt_re_wq);
   1786	list_for_each_entry(rdev, &bnxt_re_dev_list, list) {
   1787		/* VF device removal should be called before the removal
   1788		 * of PF device. Queue VFs unregister first, so that VFs
   1789		 * shall be removed before the PF during the call of
   1790		 * ib_unregister_driver.
   1791		 */
   1792		if (rdev->is_virtfn)
   1793			ib_unregister_device(&rdev->ibdev);
   1794	}
   1795	ib_unregister_driver(RDMA_DRIVER_BNXT_RE);
   1796}
   1797
   1798module_init(bnxt_re_mod_init);
   1799module_exit(bnxt_re_mod_exit);