cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ocrdma_verbs.c (80355B)


      1/* This file is part of the Emulex RoCE Device Driver for
      2 * RoCE (RDMA over Converged Ethernet) adapters.
      3 * Copyright (C) 2012-2015 Emulex. All rights reserved.
      4 * EMULEX and SLI are trademarks of Emulex.
      5 * www.emulex.com
      6 *
      7 * This software is available to you under a choice of one of two licenses.
      8 * You may choose to be licensed under the terms of the GNU General Public
      9 * License (GPL) Version 2, available from the file COPYING in the main
     10 * directory of this source tree, or the BSD license below:
     11 *
     12 * Redistribution and use in source and binary forms, with or without
     13 * modification, are permitted provided that the following conditions
     14 * are met:
     15 *
     16 * - Redistributions of source code must retain the above copyright notice,
     17 *   this list of conditions and the following disclaimer.
     18 *
     19 * - Redistributions in binary form must reproduce the above copyright
     20 *   notice, this list of conditions and the following disclaimer in
     21 *   the documentation and/or other materials provided with the distribution.
     22 *
     23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
     25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
     27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     30 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
     31 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
     32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
     33 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     34 *
     35 * Contact Information:
     36 * linux-drivers@emulex.com
     37 *
     38 * Emulex
     39 * 3333 Susan Street
     40 * Costa Mesa, CA 92626
     41 */
     42
     43#include <linux/dma-mapping.h>
     44#include <net/addrconf.h>
     45#include <rdma/ib_verbs.h>
     46#include <rdma/ib_user_verbs.h>
     47#include <rdma/iw_cm.h>
     48#include <rdma/ib_umem.h>
     49#include <rdma/ib_addr.h>
     50#include <rdma/ib_cache.h>
     51#include <rdma/uverbs_ioctl.h>
     52
     53#include "ocrdma.h"
     54#include "ocrdma_hw.h"
     55#include "ocrdma_verbs.h"
     56#include <rdma/ocrdma-abi.h>
     57
     58int ocrdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
     59{
     60	if (index > 0)
     61		return -EINVAL;
     62
     63	*pkey = 0xffff;
     64	return 0;
     65}
     66
     67int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
     68			struct ib_udata *uhw)
     69{
     70	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
     71
     72	if (uhw->inlen || uhw->outlen)
     73		return -EINVAL;
     74
     75	memset(attr, 0, sizeof *attr);
     76	memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
     77	       min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
     78	addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
     79			    dev->nic_info.mac_addr);
     80	attr->max_mr_size = dev->attr.max_mr_size;
     81	attr->page_size_cap = 0xffff000;
     82	attr->vendor_id = dev->nic_info.pdev->vendor;
     83	attr->vendor_part_id = dev->nic_info.pdev->device;
     84	attr->hw_ver = dev->asic_id;
     85	attr->max_qp = dev->attr.max_qp;
     86	attr->max_ah = OCRDMA_MAX_AH;
     87	attr->max_qp_wr = dev->attr.max_wqe;
     88
     89	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
     90					IB_DEVICE_RC_RNR_NAK_GEN |
     91					IB_DEVICE_SHUTDOWN_PORT |
     92					IB_DEVICE_SYS_IMAGE_GUID |
     93					IB_DEVICE_MEM_MGT_EXTENSIONS;
     94	attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
     95	attr->max_send_sge = dev->attr.max_send_sge;
     96	attr->max_recv_sge = dev->attr.max_recv_sge;
     97	attr->max_sge_rd = dev->attr.max_rdma_sge;
     98	attr->max_cq = dev->attr.max_cq;
     99	attr->max_cqe = dev->attr.max_cqe;
    100	attr->max_mr = dev->attr.max_mr;
    101	attr->max_mw = dev->attr.max_mw;
    102	attr->max_pd = dev->attr.max_pd;
    103	attr->atomic_cap = 0;
    104	attr->max_qp_rd_atom =
    105	    min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp);
    106	attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp;
    107	attr->max_srq = dev->attr.max_srq;
    108	attr->max_srq_sge = dev->attr.max_srq_sge;
    109	attr->max_srq_wr = dev->attr.max_rqe;
    110	attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
    111	attr->max_fast_reg_page_list_len = dev->attr.max_pages_per_frmr;
    112	attr->max_pkeys = 1;
    113	return 0;
    114}
    115
    116static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
    117					    u16 *ib_speed, u8 *ib_width)
    118{
    119	int status;
    120	u8 speed;
    121
    122	status = ocrdma_mbx_get_link_speed(dev, &speed, NULL);
    123	if (status)
    124		speed = OCRDMA_PHYS_LINK_SPEED_ZERO;
    125
    126	switch (speed) {
    127	case OCRDMA_PHYS_LINK_SPEED_1GBPS:
    128		*ib_speed = IB_SPEED_SDR;
    129		*ib_width = IB_WIDTH_1X;
    130		break;
    131
    132	case OCRDMA_PHYS_LINK_SPEED_10GBPS:
    133		*ib_speed = IB_SPEED_QDR;
    134		*ib_width = IB_WIDTH_1X;
    135		break;
    136
    137	case OCRDMA_PHYS_LINK_SPEED_20GBPS:
    138		*ib_speed = IB_SPEED_DDR;
    139		*ib_width = IB_WIDTH_4X;
    140		break;
    141
    142	case OCRDMA_PHYS_LINK_SPEED_40GBPS:
    143		*ib_speed = IB_SPEED_QDR;
    144		*ib_width = IB_WIDTH_4X;
    145		break;
    146
    147	default:
    148		/* Unsupported */
    149		*ib_speed = IB_SPEED_SDR;
    150		*ib_width = IB_WIDTH_1X;
    151	}
    152}
    153
    154int ocrdma_query_port(struct ib_device *ibdev,
    155		      u32 port, struct ib_port_attr *props)
    156{
    157	enum ib_port_state port_state;
    158	struct ocrdma_dev *dev;
    159	struct net_device *netdev;
    160
    161	/* props being zeroed by the caller, avoid zeroing it here */
    162	dev = get_ocrdma_dev(ibdev);
    163	netdev = dev->nic_info.netdev;
    164	if (netif_running(netdev) && netif_oper_up(netdev)) {
    165		port_state = IB_PORT_ACTIVE;
    166		props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
    167	} else {
    168		port_state = IB_PORT_DOWN;
    169		props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
    170	}
    171	props->max_mtu = IB_MTU_4096;
    172	props->active_mtu = iboe_get_mtu(netdev->mtu);
    173	props->lid = 0;
    174	props->lmc = 0;
    175	props->sm_lid = 0;
    176	props->sm_sl = 0;
    177	props->state = port_state;
    178	props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
    179				IB_PORT_DEVICE_MGMT_SUP |
    180				IB_PORT_VENDOR_CLASS_SUP;
    181	props->ip_gids = true;
    182	props->gid_tbl_len = OCRDMA_MAX_SGID;
    183	props->pkey_tbl_len = 1;
    184	props->bad_pkey_cntr = 0;
    185	props->qkey_viol_cntr = 0;
    186	get_link_speed_and_width(dev, &props->active_speed,
    187				 &props->active_width);
    188	props->max_msg_sz = 0x80000000;
    189	props->max_vl_num = 4;
    190	return 0;
    191}
    192
    193static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
    194			   unsigned long len)
    195{
    196	struct ocrdma_mm *mm;
    197
    198	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
    199	if (mm == NULL)
    200		return -ENOMEM;
    201	mm->key.phy_addr = phy_addr;
    202	mm->key.len = len;
    203	INIT_LIST_HEAD(&mm->entry);
    204
    205	mutex_lock(&uctx->mm_list_lock);
    206	list_add_tail(&mm->entry, &uctx->mm_head);
    207	mutex_unlock(&uctx->mm_list_lock);
    208	return 0;
    209}
    210
    211static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
    212			    unsigned long len)
    213{
    214	struct ocrdma_mm *mm, *tmp;
    215
    216	mutex_lock(&uctx->mm_list_lock);
    217	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
    218		if (len != mm->key.len && phy_addr != mm->key.phy_addr)
    219			continue;
    220
    221		list_del(&mm->entry);
    222		kfree(mm);
    223		break;
    224	}
    225	mutex_unlock(&uctx->mm_list_lock);
    226}
    227
    228static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
    229			      unsigned long len)
    230{
    231	bool found = false;
    232	struct ocrdma_mm *mm;
    233
    234	mutex_lock(&uctx->mm_list_lock);
    235	list_for_each_entry(mm, &uctx->mm_head, entry) {
    236		if (len != mm->key.len && phy_addr != mm->key.phy_addr)
    237			continue;
    238
    239		found = true;
    240		break;
    241	}
    242	mutex_unlock(&uctx->mm_list_lock);
    243	return found;
    244}
    245
    246
    247static u16 _ocrdma_pd_mgr_get_bitmap(struct ocrdma_dev *dev, bool dpp_pool)
    248{
    249	u16 pd_bitmap_idx = 0;
    250	unsigned long *pd_bitmap;
    251
    252	if (dpp_pool) {
    253		pd_bitmap = dev->pd_mgr->pd_dpp_bitmap;
    254		pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
    255						    dev->pd_mgr->max_dpp_pd);
    256		__set_bit(pd_bitmap_idx, pd_bitmap);
    257		dev->pd_mgr->pd_dpp_count++;
    258		if (dev->pd_mgr->pd_dpp_count > dev->pd_mgr->pd_dpp_thrsh)
    259			dev->pd_mgr->pd_dpp_thrsh = dev->pd_mgr->pd_dpp_count;
    260	} else {
    261		pd_bitmap = dev->pd_mgr->pd_norm_bitmap;
    262		pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
    263						    dev->pd_mgr->max_normal_pd);
    264		__set_bit(pd_bitmap_idx, pd_bitmap);
    265		dev->pd_mgr->pd_norm_count++;
    266		if (dev->pd_mgr->pd_norm_count > dev->pd_mgr->pd_norm_thrsh)
    267			dev->pd_mgr->pd_norm_thrsh = dev->pd_mgr->pd_norm_count;
    268	}
    269	return pd_bitmap_idx;
    270}
    271
    272static int _ocrdma_pd_mgr_put_bitmap(struct ocrdma_dev *dev, u16 pd_id,
    273					bool dpp_pool)
    274{
    275	u16 pd_count;
    276	u16 pd_bit_index;
    277
    278	pd_count = dpp_pool ? dev->pd_mgr->pd_dpp_count :
    279			      dev->pd_mgr->pd_norm_count;
    280	if (pd_count == 0)
    281		return -EINVAL;
    282
    283	if (dpp_pool) {
    284		pd_bit_index = pd_id - dev->pd_mgr->pd_dpp_start;
    285		if (pd_bit_index >= dev->pd_mgr->max_dpp_pd) {
    286			return -EINVAL;
    287		} else {
    288			__clear_bit(pd_bit_index, dev->pd_mgr->pd_dpp_bitmap);
    289			dev->pd_mgr->pd_dpp_count--;
    290		}
    291	} else {
    292		pd_bit_index = pd_id - dev->pd_mgr->pd_norm_start;
    293		if (pd_bit_index >= dev->pd_mgr->max_normal_pd) {
    294			return -EINVAL;
    295		} else {
    296			__clear_bit(pd_bit_index, dev->pd_mgr->pd_norm_bitmap);
    297			dev->pd_mgr->pd_norm_count--;
    298		}
    299	}
    300
    301	return 0;
    302}
    303
    304static int ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
    305				   bool dpp_pool)
    306{
    307	int status;
    308
    309	mutex_lock(&dev->dev_lock);
    310	status = _ocrdma_pd_mgr_put_bitmap(dev, pd_id, dpp_pool);
    311	mutex_unlock(&dev->dev_lock);
    312	return status;
    313}
    314
    315static int ocrdma_get_pd_num(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
    316{
    317	u16 pd_idx = 0;
    318	int status = 0;
    319
    320	mutex_lock(&dev->dev_lock);
    321	if (pd->dpp_enabled) {
    322		/* try allocating DPP PD, if not available then normal PD */
    323		if (dev->pd_mgr->pd_dpp_count < dev->pd_mgr->max_dpp_pd) {
    324			pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, true);
    325			pd->id = dev->pd_mgr->pd_dpp_start + pd_idx;
    326			pd->dpp_page = dev->pd_mgr->dpp_page_index + pd_idx;
    327		} else if (dev->pd_mgr->pd_norm_count <
    328			   dev->pd_mgr->max_normal_pd) {
    329			pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, false);
    330			pd->id = dev->pd_mgr->pd_norm_start + pd_idx;
    331			pd->dpp_enabled = false;
    332		} else {
    333			status = -EINVAL;
    334		}
    335	} else {
    336		if (dev->pd_mgr->pd_norm_count < dev->pd_mgr->max_normal_pd) {
    337			pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, false);
    338			pd->id = dev->pd_mgr->pd_norm_start + pd_idx;
    339		} else {
    340			status = -EINVAL;
    341		}
    342	}
    343	mutex_unlock(&dev->dev_lock);
    344	return status;
    345}
    346
    347/*
    348 * NOTE:
    349 *
    350 * ocrdma_ucontext must be used here because this function is also
    351 * called from ocrdma_alloc_ucontext where ib_udata does not have
    352 * valid ib_ucontext pointer. ib_uverbs_get_context does not call
    353 * uobj_{alloc|get_xxx} helpers which are used to store the
    354 * ib_ucontext in uverbs_attr_bundle wrapping the ib_udata. so
    355 * ib_udata does NOT imply valid ib_ucontext here!
    356 */
    357static int _ocrdma_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
    358			    struct ocrdma_ucontext *uctx,
    359			    struct ib_udata *udata)
    360{
    361	int status;
    362
    363	if (udata && uctx && dev->attr.max_dpp_pds) {
    364		pd->dpp_enabled =
    365			ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
    366		pd->num_dpp_qp =
    367			pd->dpp_enabled ? (dev->nic_info.db_page_size /
    368					   dev->attr.wqe_size) : 0;
    369	}
    370
    371	if (dev->pd_mgr->pd_prealloc_valid)
    372		return ocrdma_get_pd_num(dev, pd);
    373
    374retry:
    375	status = ocrdma_mbx_alloc_pd(dev, pd);
    376	if (status) {
    377		if (pd->dpp_enabled) {
    378			pd->dpp_enabled = false;
    379			pd->num_dpp_qp = 0;
    380			goto retry;
    381		}
    382		return status;
    383	}
    384
    385	return 0;
    386}
    387
    388static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
    389				 struct ocrdma_pd *pd)
    390{
    391	return (uctx->cntxt_pd == pd);
    392}
    393
    394static void _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
    395			      struct ocrdma_pd *pd)
    396{
    397	if (dev->pd_mgr->pd_prealloc_valid)
    398		ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled);
    399	else
    400		ocrdma_mbx_dealloc_pd(dev, pd);
    401}
    402
    403static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev,
    404				    struct ocrdma_ucontext *uctx,
    405				    struct ib_udata *udata)
    406{
    407	struct ib_device *ibdev = &dev->ibdev;
    408	struct ib_pd *pd;
    409	int status;
    410
    411	pd = rdma_zalloc_drv_obj(ibdev, ib_pd);
    412	if (!pd)
    413		return -ENOMEM;
    414
    415	pd->device  = ibdev;
    416	uctx->cntxt_pd = get_ocrdma_pd(pd);
    417
    418	status = _ocrdma_alloc_pd(dev, uctx->cntxt_pd, uctx, udata);
    419	if (status) {
    420		kfree(uctx->cntxt_pd);
    421		goto err;
    422	}
    423
    424	uctx->cntxt_pd->uctx = uctx;
    425	uctx->cntxt_pd->ibpd.device = &dev->ibdev;
    426err:
    427	return status;
    428}
    429
    430static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
    431{
    432	struct ocrdma_pd *pd = uctx->cntxt_pd;
    433	struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
    434
    435	if (uctx->pd_in_use) {
    436		pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
    437		       __func__, dev->id, pd->id);
    438	}
    439	uctx->cntxt_pd = NULL;
    440	_ocrdma_dealloc_pd(dev, pd);
    441	kfree(pd);
    442}
    443
    444static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)
    445{
    446	struct ocrdma_pd *pd = NULL;
    447
    448	mutex_lock(&uctx->mm_list_lock);
    449	if (!uctx->pd_in_use) {
    450		uctx->pd_in_use = true;
    451		pd = uctx->cntxt_pd;
    452	}
    453	mutex_unlock(&uctx->mm_list_lock);
    454
    455	return pd;
    456}
    457
    458static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx)
    459{
    460	mutex_lock(&uctx->mm_list_lock);
    461	uctx->pd_in_use = false;
    462	mutex_unlock(&uctx->mm_list_lock);
    463}
    464
    465int ocrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
    466{
    467	struct ib_device *ibdev = uctx->device;
    468	int status;
    469	struct ocrdma_ucontext *ctx = get_ocrdma_ucontext(uctx);
    470	struct ocrdma_alloc_ucontext_resp resp = {};
    471	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
    472	struct pci_dev *pdev = dev->nic_info.pdev;
    473	u32 map_len = roundup(sizeof(u32) * 2048, PAGE_SIZE);
    474
    475	if (!udata)
    476		return -EFAULT;
    477	INIT_LIST_HEAD(&ctx->mm_head);
    478	mutex_init(&ctx->mm_list_lock);
    479
    480	ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len,
    481					    &ctx->ah_tbl.pa, GFP_KERNEL);
    482	if (!ctx->ah_tbl.va)
    483		return -ENOMEM;
    484
    485	ctx->ah_tbl.len = map_len;
    486
    487	resp.ah_tbl_len = ctx->ah_tbl.len;
    488	resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va);
    489
    490	status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
    491	if (status)
    492		goto map_err;
    493
    494	status = ocrdma_alloc_ucontext_pd(dev, ctx, udata);
    495	if (status)
    496		goto pd_err;
    497
    498	resp.dev_id = dev->id;
    499	resp.max_inline_data = dev->attr.max_inline_data;
    500	resp.wqe_size = dev->attr.wqe_size;
    501	resp.rqe_size = dev->attr.rqe_size;
    502	resp.dpp_wqe_size = dev->attr.wqe_size;
    503
    504	memcpy(resp.fw_ver, dev->attr.fw_ver, sizeof(resp.fw_ver));
    505	status = ib_copy_to_udata(udata, &resp, sizeof(resp));
    506	if (status)
    507		goto cpy_err;
    508	return 0;
    509
    510cpy_err:
    511	ocrdma_dealloc_ucontext_pd(ctx);
    512pd_err:
    513	ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
    514map_err:
    515	dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
    516			  ctx->ah_tbl.pa);
    517	return status;
    518}
    519
    520void ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
    521{
    522	struct ocrdma_mm *mm, *tmp;
    523	struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
    524	struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
    525	struct pci_dev *pdev = dev->nic_info.pdev;
    526
    527	ocrdma_dealloc_ucontext_pd(uctx);
    528
    529	ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
    530	dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
    531			  uctx->ah_tbl.pa);
    532
    533	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
    534		list_del(&mm->entry);
    535		kfree(mm);
    536	}
    537}
    538
    539int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
    540{
    541	struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context);
    542	struct ocrdma_dev *dev = get_ocrdma_dev(context->device);
    543	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
    544	u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
    545	unsigned long len = (vma->vm_end - vma->vm_start);
    546	int status;
    547	bool found;
    548
    549	if (vma->vm_start & (PAGE_SIZE - 1))
    550		return -EINVAL;
    551	found = ocrdma_search_mmap(ucontext, vma->vm_pgoff << PAGE_SHIFT, len);
    552	if (!found)
    553		return -EINVAL;
    554
    555	if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
    556		dev->nic_info.db_total_size)) &&
    557		(len <=	dev->nic_info.db_page_size)) {
    558		if (vma->vm_flags & VM_READ)
    559			return -EPERM;
    560
    561		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
    562		status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
    563					    len, vma->vm_page_prot);
    564	} else if (dev->nic_info.dpp_unmapped_len &&
    565		(vm_page >= (u64) dev->nic_info.dpp_unmapped_addr) &&
    566		(vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr +
    567			dev->nic_info.dpp_unmapped_len)) &&
    568		(len <= dev->nic_info.dpp_unmapped_len)) {
    569		if (vma->vm_flags & VM_READ)
    570			return -EPERM;
    571
    572		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
    573		status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
    574					    len, vma->vm_page_prot);
    575	} else {
    576		status = remap_pfn_range(vma, vma->vm_start,
    577					 vma->vm_pgoff, len, vma->vm_page_prot);
    578	}
    579	return status;
    580}
    581
    582static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
    583				struct ib_udata *udata)
    584{
    585	int status;
    586	u64 db_page_addr;
    587	u64 dpp_page_addr = 0;
    588	u32 db_page_size;
    589	struct ocrdma_alloc_pd_uresp rsp;
    590	struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
    591		udata, struct ocrdma_ucontext, ibucontext);
    592
    593	memset(&rsp, 0, sizeof(rsp));
    594	rsp.id = pd->id;
    595	rsp.dpp_enabled = pd->dpp_enabled;
    596	db_page_addr = ocrdma_get_db_addr(dev, pd->id);
    597	db_page_size = dev->nic_info.db_page_size;
    598
    599	status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size);
    600	if (status)
    601		return status;
    602
    603	if (pd->dpp_enabled) {
    604		dpp_page_addr = dev->nic_info.dpp_unmapped_addr +
    605				(pd->id * PAGE_SIZE);
    606		status = ocrdma_add_mmap(uctx, dpp_page_addr,
    607				 PAGE_SIZE);
    608		if (status)
    609			goto dpp_map_err;
    610		rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr);
    611		rsp.dpp_page_addr_lo = dpp_page_addr;
    612	}
    613
    614	status = ib_copy_to_udata(udata, &rsp, sizeof(rsp));
    615	if (status)
    616		goto ucopy_err;
    617
    618	pd->uctx = uctx;
    619	return 0;
    620
    621ucopy_err:
    622	if (pd->dpp_enabled)
    623		ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE);
    624dpp_map_err:
    625	ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
    626	return status;
    627}
    628
    629int ocrdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
    630{
    631	struct ib_device *ibdev = ibpd->device;
    632	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
    633	struct ocrdma_pd *pd;
    634	int status;
    635	u8 is_uctx_pd = false;
    636	struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
    637		udata, struct ocrdma_ucontext, ibucontext);
    638
    639	if (udata) {
    640		pd = ocrdma_get_ucontext_pd(uctx);
    641		if (pd) {
    642			is_uctx_pd = true;
    643			goto pd_mapping;
    644		}
    645	}
    646
    647	pd = get_ocrdma_pd(ibpd);
    648	status = _ocrdma_alloc_pd(dev, pd, uctx, udata);
    649	if (status)
    650		goto exit;
    651
    652pd_mapping:
    653	if (udata) {
    654		status = ocrdma_copy_pd_uresp(dev, pd, udata);
    655		if (status)
    656			goto err;
    657	}
    658	return 0;
    659
    660err:
    661	if (is_uctx_pd)
    662		ocrdma_release_ucontext_pd(uctx);
    663	else
    664		_ocrdma_dealloc_pd(dev, pd);
    665exit:
    666	return status;
    667}
    668
    669int ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
    670{
    671	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
    672	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
    673	struct ocrdma_ucontext *uctx = NULL;
    674	u64 usr_db;
    675
    676	uctx = pd->uctx;
    677	if (uctx) {
    678		u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
    679			(pd->id * PAGE_SIZE);
    680		if (pd->dpp_enabled)
    681			ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE);
    682		usr_db = ocrdma_get_db_addr(dev, pd->id);
    683		ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
    684
    685		if (is_ucontext_pd(uctx, pd)) {
    686			ocrdma_release_ucontext_pd(uctx);
    687			return 0;
    688		}
    689	}
    690	_ocrdma_dealloc_pd(dev, pd);
    691	return 0;
    692}
    693
    694static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
    695			    u32 pdid, int acc, u32 num_pbls, u32 addr_check)
    696{
    697	int status;
    698
    699	mr->hwmr.fr_mr = 0;
    700	mr->hwmr.local_rd = 1;
    701	mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
    702	mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
    703	mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
    704	mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
    705	mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
    706	mr->hwmr.num_pbls = num_pbls;
    707
    708	status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pdid, addr_check);
    709	if (status)
    710		return status;
    711
    712	mr->ibmr.lkey = mr->hwmr.lkey;
    713	if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
    714		mr->ibmr.rkey = mr->hwmr.lkey;
    715	return 0;
    716}
    717
    718struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc)
    719{
    720	int status;
    721	struct ocrdma_mr *mr;
    722	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
    723	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
    724
    725	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
    726		pr_err("%s err, invalid access rights\n", __func__);
    727		return ERR_PTR(-EINVAL);
    728	}
    729
    730	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
    731	if (!mr)
    732		return ERR_PTR(-ENOMEM);
    733
    734	status = ocrdma_alloc_lkey(dev, mr, pd->id, acc, 0,
    735				   OCRDMA_ADDR_CHECK_DISABLE);
    736	if (status) {
    737		kfree(mr);
    738		return ERR_PTR(status);
    739	}
    740
    741	return &mr->ibmr;
    742}
    743
    744static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev *dev,
    745				   struct ocrdma_hw_mr *mr)
    746{
    747	struct pci_dev *pdev = dev->nic_info.pdev;
    748	int i = 0;
    749
    750	if (mr->pbl_table) {
    751		for (i = 0; i < mr->num_pbls; i++) {
    752			if (!mr->pbl_table[i].va)
    753				continue;
    754			dma_free_coherent(&pdev->dev, mr->pbl_size,
    755					  mr->pbl_table[i].va,
    756					  mr->pbl_table[i].pa);
    757		}
    758		kfree(mr->pbl_table);
    759		mr->pbl_table = NULL;
    760	}
    761}
    762
    763static int ocrdma_get_pbl_info(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
    764			      u32 num_pbes)
    765{
    766	u32 num_pbls = 0;
    767	u32 idx = 0;
    768	int status = 0;
    769	u32 pbl_size;
    770
    771	do {
    772		pbl_size = OCRDMA_MIN_HPAGE_SIZE * (1 << idx);
    773		if (pbl_size > MAX_OCRDMA_PBL_SIZE) {
    774			status = -EFAULT;
    775			break;
    776		}
    777		num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64)));
    778		num_pbls = num_pbls / (pbl_size / sizeof(u64));
    779		idx++;
    780	} while (num_pbls >= dev->attr.max_num_mr_pbl);
    781
    782	mr->hwmr.num_pbes = num_pbes;
    783	mr->hwmr.num_pbls = num_pbls;
    784	mr->hwmr.pbl_size = pbl_size;
    785	return status;
    786}
    787
    788static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
    789{
    790	int status = 0;
    791	int i;
    792	u32 dma_len = mr->pbl_size;
    793	struct pci_dev *pdev = dev->nic_info.pdev;
    794	void *va;
    795	dma_addr_t pa;
    796
    797	mr->pbl_table = kcalloc(mr->num_pbls, sizeof(struct ocrdma_pbl),
    798				GFP_KERNEL);
    799
    800	if (!mr->pbl_table)
    801		return -ENOMEM;
    802
    803	for (i = 0; i < mr->num_pbls; i++) {
    804		va = dma_alloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL);
    805		if (!va) {
    806			ocrdma_free_mr_pbl_tbl(dev, mr);
    807			status = -ENOMEM;
    808			break;
    809		}
    810		mr->pbl_table[i].va = va;
    811		mr->pbl_table[i].pa = pa;
    812	}
    813	return status;
    814}
    815
    816static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr)
    817{
    818	struct ocrdma_pbe *pbe;
    819	struct ib_block_iter biter;
    820	struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
    821	int pbe_cnt;
    822	u64 pg_addr;
    823
    824	if (!mr->hwmr.num_pbes)
    825		return;
    826
    827	pbe = (struct ocrdma_pbe *)pbl_tbl->va;
    828	pbe_cnt = 0;
    829
    830	rdma_umem_for_each_dma_block (mr->umem, &biter, PAGE_SIZE) {
    831		/* store the page address in pbe */
    832		pg_addr = rdma_block_iter_dma_address(&biter);
    833		pbe->pa_lo = cpu_to_le32(pg_addr);
    834		pbe->pa_hi = cpu_to_le32(upper_32_bits(pg_addr));
    835		pbe_cnt += 1;
    836		pbe++;
    837
    838		/* if the given pbl is full storing the pbes,
    839		 * move to next pbl.
    840		 */
    841		if (pbe_cnt == (mr->hwmr.pbl_size / sizeof(u64))) {
    842			pbl_tbl++;
    843			pbe = (struct ocrdma_pbe *)pbl_tbl->va;
    844			pbe_cnt = 0;
    845		}
    846	}
    847}
    848
    849struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
    850				 u64 usr_addr, int acc, struct ib_udata *udata)
    851{
    852	int status = -ENOMEM;
    853	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
    854	struct ocrdma_mr *mr;
    855	struct ocrdma_pd *pd;
    856
    857	pd = get_ocrdma_pd(ibpd);
    858
    859	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
    860		return ERR_PTR(-EINVAL);
    861
    862	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
    863	if (!mr)
    864		return ERR_PTR(status);
    865	mr->umem = ib_umem_get(ibpd->device, start, len, acc);
    866	if (IS_ERR(mr->umem)) {
    867		status = -EFAULT;
    868		goto umem_err;
    869	}
    870	status = ocrdma_get_pbl_info(
    871		dev, mr, ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE));
    872	if (status)
    873		goto umem_err;
    874
    875	mr->hwmr.pbe_size = PAGE_SIZE;
    876	mr->hwmr.va = usr_addr;
    877	mr->hwmr.len = len;
    878	mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
    879	mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
    880	mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
    881	mr->hwmr.local_rd = 1;
    882	mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
    883	status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
    884	if (status)
    885		goto umem_err;
    886	build_user_pbes(dev, mr);
    887	status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
    888	if (status)
    889		goto mbx_err;
    890	mr->ibmr.lkey = mr->hwmr.lkey;
    891	if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
    892		mr->ibmr.rkey = mr->hwmr.lkey;
    893
    894	return &mr->ibmr;
    895
    896mbx_err:
    897	ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
    898umem_err:
    899	kfree(mr);
    900	return ERR_PTR(status);
    901}
    902
    903int ocrdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
    904{
    905	struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr);
    906	struct ocrdma_dev *dev = get_ocrdma_dev(ib_mr->device);
    907
    908	(void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
    909
    910	kfree(mr->pages);
    911	ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
    912
    913	/* it could be user registered memory. */
    914	ib_umem_release(mr->umem);
    915	kfree(mr);
    916
    917	/* Don't stop cleanup, in case FW is unresponsive */
    918	if (dev->mqe_ctx.fw_error_state) {
    919		pr_err("%s(%d) fw not responding.\n",
    920		       __func__, dev->id);
    921	}
    922	return 0;
    923}
    924
    925static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
    926				struct ib_udata *udata)
    927{
    928	int status;
    929	struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
    930		udata, struct ocrdma_ucontext, ibucontext);
    931	struct ocrdma_create_cq_uresp uresp;
    932
    933	/* this must be user flow! */
    934	if (!udata)
    935		return -EINVAL;
    936
    937	memset(&uresp, 0, sizeof(uresp));
    938	uresp.cq_id = cq->id;
    939	uresp.page_size = PAGE_ALIGN(cq->len);
    940	uresp.num_pages = 1;
    941	uresp.max_hw_cqe = cq->max_hw_cqe;
    942	uresp.page_addr[0] = virt_to_phys(cq->va);
    943	uresp.db_page_addr =  ocrdma_get_db_addr(dev, uctx->cntxt_pd->id);
    944	uresp.db_page_size = dev->nic_info.db_page_size;
    945	uresp.phase_change = cq->phase_change ? 1 : 0;
    946	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
    947	if (status) {
    948		pr_err("%s(%d) copy error cqid=0x%x.\n",
    949		       __func__, dev->id, cq->id);
    950		goto err;
    951	}
    952	status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
    953	if (status)
    954		goto err;
    955	status = ocrdma_add_mmap(uctx, uresp.page_addr[0], uresp.page_size);
    956	if (status) {
    957		ocrdma_del_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
    958		goto err;
    959	}
    960	cq->ucontext = uctx;
    961err:
    962	return status;
    963}
    964
    965int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
    966		     struct ib_udata *udata)
    967{
    968	struct ib_device *ibdev = ibcq->device;
    969	int entries = attr->cqe;
    970	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
    971	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
    972	struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
    973		udata, struct ocrdma_ucontext, ibucontext);
    974	u16 pd_id = 0;
    975	int status;
    976	struct ocrdma_create_cq_ureq ureq;
    977
    978	if (attr->flags)
    979		return -EOPNOTSUPP;
    980
    981	if (udata) {
    982		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
    983			return -EFAULT;
    984	} else
    985		ureq.dpp_cq = 0;
    986
    987	spin_lock_init(&cq->cq_lock);
    988	spin_lock_init(&cq->comp_handler_lock);
    989	INIT_LIST_HEAD(&cq->sq_head);
    990	INIT_LIST_HEAD(&cq->rq_head);
    991
    992	if (udata)
    993		pd_id = uctx->cntxt_pd->id;
    994
    995	status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id);
    996	if (status)
    997		return status;
    998
    999	if (udata) {
   1000		status = ocrdma_copy_cq_uresp(dev, cq, udata);
   1001		if (status)
   1002			goto ctx_err;
   1003	}
   1004	cq->phase = OCRDMA_CQE_VALID;
   1005	dev->cq_tbl[cq->id] = cq;
   1006	return 0;
   1007
   1008ctx_err:
   1009	ocrdma_mbx_destroy_cq(dev, cq);
   1010	return status;
   1011}
   1012
   1013int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
   1014		     struct ib_udata *udata)
   1015{
   1016	int status = 0;
   1017	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
   1018
   1019	if (new_cnt < 1 || new_cnt > cq->max_hw_cqe) {
   1020		status = -EINVAL;
   1021		return status;
   1022	}
   1023	ibcq->cqe = new_cnt;
   1024	return status;
   1025}
   1026
   1027static void ocrdma_flush_cq(struct ocrdma_cq *cq)
   1028{
   1029	int cqe_cnt;
   1030	int valid_count = 0;
   1031	unsigned long flags;
   1032
   1033	struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
   1034	struct ocrdma_cqe *cqe = NULL;
   1035
   1036	cqe = cq->va;
   1037	cqe_cnt = cq->cqe_cnt;
   1038
   1039	/* Last irq might have scheduled a polling thread
   1040	 * sync-up with it before hard flushing.
   1041	 */
   1042	spin_lock_irqsave(&cq->cq_lock, flags);
   1043	while (cqe_cnt) {
   1044		if (is_cqe_valid(cq, cqe))
   1045			valid_count++;
   1046		cqe++;
   1047		cqe_cnt--;
   1048	}
   1049	ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count);
   1050	spin_unlock_irqrestore(&cq->cq_lock, flags);
   1051}
   1052
   1053int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
   1054{
   1055	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
   1056	struct ocrdma_eq *eq = NULL;
   1057	struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
   1058	int pdid = 0;
   1059	u32 irq, indx;
   1060
   1061	dev->cq_tbl[cq->id] = NULL;
   1062	indx = ocrdma_get_eq_table_index(dev, cq->eqn);
   1063
   1064	eq = &dev->eq_tbl[indx];
   1065	irq = ocrdma_get_irq(dev, eq);
   1066	synchronize_irq(irq);
   1067	ocrdma_flush_cq(cq);
   1068
   1069	ocrdma_mbx_destroy_cq(dev, cq);
   1070	if (cq->ucontext) {
   1071		pdid = cq->ucontext->cntxt_pd->id;
   1072		ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
   1073				PAGE_ALIGN(cq->len));
   1074		ocrdma_del_mmap(cq->ucontext,
   1075				ocrdma_get_db_addr(dev, pdid),
   1076				dev->nic_info.db_page_size);
   1077	}
   1078	return 0;
   1079}
   1080
   1081static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
   1082{
   1083	int status = -EINVAL;
   1084
   1085	if (qp->id < OCRDMA_MAX_QP && dev->qp_tbl[qp->id] == NULL) {
   1086		dev->qp_tbl[qp->id] = qp;
   1087		status = 0;
   1088	}
   1089	return status;
   1090}
   1091
   1092static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
   1093{
   1094	dev->qp_tbl[qp->id] = NULL;
   1095}
   1096
   1097static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
   1098				  struct ib_qp_init_attr *attrs,
   1099				  struct ib_udata *udata)
   1100{
   1101	if ((attrs->qp_type != IB_QPT_GSI) &&
   1102	    (attrs->qp_type != IB_QPT_RC) &&
   1103	    (attrs->qp_type != IB_QPT_UC) &&
   1104	    (attrs->qp_type != IB_QPT_UD)) {
   1105		pr_err("%s(%d) unsupported qp type=0x%x requested\n",
   1106		       __func__, dev->id, attrs->qp_type);
   1107		return -EOPNOTSUPP;
   1108	}
   1109	/* Skip the check for QP1 to support CM size of 128 */
   1110	if ((attrs->qp_type != IB_QPT_GSI) &&
   1111	    (attrs->cap.max_send_wr > dev->attr.max_wqe)) {
   1112		pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
   1113		       __func__, dev->id, attrs->cap.max_send_wr);
   1114		pr_err("%s(%d) supported send_wr=0x%x\n",
   1115		       __func__, dev->id, dev->attr.max_wqe);
   1116		return -EINVAL;
   1117	}
   1118	if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) {
   1119		pr_err("%s(%d) unsupported recv_wr=0x%x requested\n",
   1120		       __func__, dev->id, attrs->cap.max_recv_wr);
   1121		pr_err("%s(%d) supported recv_wr=0x%x\n",
   1122		       __func__, dev->id, dev->attr.max_rqe);
   1123		return -EINVAL;
   1124	}
   1125	if (attrs->cap.max_inline_data > dev->attr.max_inline_data) {
   1126		pr_err("%s(%d) unsupported inline data size=0x%x requested\n",
   1127		       __func__, dev->id, attrs->cap.max_inline_data);
   1128		pr_err("%s(%d) supported inline data size=0x%x\n",
   1129		       __func__, dev->id, dev->attr.max_inline_data);
   1130		return -EINVAL;
   1131	}
   1132	if (attrs->cap.max_send_sge > dev->attr.max_send_sge) {
   1133		pr_err("%s(%d) unsupported send_sge=0x%x requested\n",
   1134		       __func__, dev->id, attrs->cap.max_send_sge);
   1135		pr_err("%s(%d) supported send_sge=0x%x\n",
   1136		       __func__, dev->id, dev->attr.max_send_sge);
   1137		return -EINVAL;
   1138	}
   1139	if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) {
   1140		pr_err("%s(%d) unsupported recv_sge=0x%x requested\n",
   1141		       __func__, dev->id, attrs->cap.max_recv_sge);
   1142		pr_err("%s(%d) supported recv_sge=0x%x\n",
   1143		       __func__, dev->id, dev->attr.max_recv_sge);
   1144		return -EINVAL;
   1145	}
   1146	/* unprivileged user space cannot create special QP */
   1147	if (udata && attrs->qp_type == IB_QPT_GSI) {
   1148		pr_err
   1149		    ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
   1150		     __func__, dev->id, attrs->qp_type);
   1151		return -EINVAL;
   1152	}
   1153	/* allow creating only one GSI type of QP */
   1154	if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) {
   1155		pr_err("%s(%d) GSI special QPs already created.\n",
   1156		       __func__, dev->id);
   1157		return -EINVAL;
   1158	}
   1159	/* verify consumer QPs are not trying to use GSI QP's CQ */
   1160	if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) {
   1161		if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) ||
   1162			(dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
   1163			pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
   1164				__func__, dev->id);
   1165			return -EINVAL;
   1166		}
   1167	}
   1168	return 0;
   1169}
   1170
   1171static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
   1172				struct ib_udata *udata, int dpp_offset,
   1173				int dpp_credit_lmt, int srq)
   1174{
   1175	int status;
   1176	u64 usr_db;
   1177	struct ocrdma_create_qp_uresp uresp;
   1178	struct ocrdma_pd *pd = qp->pd;
   1179	struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
   1180
   1181	memset(&uresp, 0, sizeof(uresp));
   1182	usr_db = dev->nic_info.unmapped_db +
   1183			(pd->id * dev->nic_info.db_page_size);
   1184	uresp.qp_id = qp->id;
   1185	uresp.sq_dbid = qp->sq.dbid;
   1186	uresp.num_sq_pages = 1;
   1187	uresp.sq_page_size = PAGE_ALIGN(qp->sq.len);
   1188	uresp.sq_page_addr[0] = virt_to_phys(qp->sq.va);
   1189	uresp.num_wqe_allocated = qp->sq.max_cnt;
   1190	if (!srq) {
   1191		uresp.rq_dbid = qp->rq.dbid;
   1192		uresp.num_rq_pages = 1;
   1193		uresp.rq_page_size = PAGE_ALIGN(qp->rq.len);
   1194		uresp.rq_page_addr[0] = virt_to_phys(qp->rq.va);
   1195		uresp.num_rqe_allocated = qp->rq.max_cnt;
   1196	}
   1197	uresp.db_page_addr = usr_db;
   1198	uresp.db_page_size = dev->nic_info.db_page_size;
   1199	uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
   1200	uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
   1201	uresp.db_shift = OCRDMA_DB_RQ_SHIFT;
   1202
   1203	if (qp->dpp_enabled) {
   1204		uresp.dpp_credit = dpp_credit_lmt;
   1205		uresp.dpp_offset = dpp_offset;
   1206	}
   1207	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
   1208	if (status) {
   1209		pr_err("%s(%d) user copy error.\n", __func__, dev->id);
   1210		goto err;
   1211	}
   1212	status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0],
   1213				 uresp.sq_page_size);
   1214	if (status)
   1215		goto err;
   1216
   1217	if (!srq) {
   1218		status = ocrdma_add_mmap(pd->uctx, uresp.rq_page_addr[0],
   1219					 uresp.rq_page_size);
   1220		if (status)
   1221			goto rq_map_err;
   1222	}
   1223	return status;
   1224rq_map_err:
   1225	ocrdma_del_mmap(pd->uctx, uresp.sq_page_addr[0], uresp.sq_page_size);
   1226err:
   1227	return status;
   1228}
   1229
   1230static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
   1231			     struct ocrdma_pd *pd)
   1232{
   1233	if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
   1234		qp->sq_db = dev->nic_info.db +
   1235			(pd->id * dev->nic_info.db_page_size) +
   1236			OCRDMA_DB_GEN2_SQ_OFFSET;
   1237		qp->rq_db = dev->nic_info.db +
   1238			(pd->id * dev->nic_info.db_page_size) +
   1239			OCRDMA_DB_GEN2_RQ_OFFSET;
   1240	} else {
   1241		qp->sq_db = dev->nic_info.db +
   1242			(pd->id * dev->nic_info.db_page_size) +
   1243			OCRDMA_DB_SQ_OFFSET;
   1244		qp->rq_db = dev->nic_info.db +
   1245			(pd->id * dev->nic_info.db_page_size) +
   1246			OCRDMA_DB_RQ_OFFSET;
   1247	}
   1248}
   1249
   1250static int ocrdma_alloc_wr_id_tbl(struct ocrdma_qp *qp)
   1251{
   1252	qp->wqe_wr_id_tbl =
   1253	    kcalloc(qp->sq.max_cnt, sizeof(*(qp->wqe_wr_id_tbl)),
   1254		    GFP_KERNEL);
   1255	if (qp->wqe_wr_id_tbl == NULL)
   1256		return -ENOMEM;
   1257	qp->rqe_wr_id_tbl =
   1258	    kcalloc(qp->rq.max_cnt, sizeof(u64), GFP_KERNEL);
   1259	if (qp->rqe_wr_id_tbl == NULL)
   1260		return -ENOMEM;
   1261
   1262	return 0;
   1263}
   1264
   1265static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
   1266				      struct ocrdma_pd *pd,
   1267				      struct ib_qp_init_attr *attrs)
   1268{
   1269	qp->pd = pd;
   1270	spin_lock_init(&qp->q_lock);
   1271	INIT_LIST_HEAD(&qp->sq_entry);
   1272	INIT_LIST_HEAD(&qp->rq_entry);
   1273
   1274	qp->qp_type = attrs->qp_type;
   1275	qp->cap_flags = OCRDMA_QP_INB_RD | OCRDMA_QP_INB_WR;
   1276	qp->max_inline_data = attrs->cap.max_inline_data;
   1277	qp->sq.max_sges = attrs->cap.max_send_sge;
   1278	qp->rq.max_sges = attrs->cap.max_recv_sge;
   1279	qp->state = OCRDMA_QPS_RST;
   1280	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
   1281}
   1282
   1283static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
   1284				   struct ib_qp_init_attr *attrs)
   1285{
   1286	if (attrs->qp_type == IB_QPT_GSI) {
   1287		dev->gsi_qp_created = 1;
   1288		dev->gsi_sqcq = get_ocrdma_cq(attrs->send_cq);
   1289		dev->gsi_rqcq = get_ocrdma_cq(attrs->recv_cq);
   1290	}
   1291}
   1292
   1293int ocrdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
   1294		     struct ib_udata *udata)
   1295{
   1296	int status;
   1297	struct ib_pd *ibpd = ibqp->pd;
   1298	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
   1299	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
   1300	struct ocrdma_dev *dev = get_ocrdma_dev(ibqp->device);
   1301	struct ocrdma_create_qp_ureq ureq;
   1302	u16 dpp_credit_lmt, dpp_offset;
   1303
   1304	if (attrs->create_flags)
   1305		return -EOPNOTSUPP;
   1306
   1307	status = ocrdma_check_qp_params(ibpd, dev, attrs, udata);
   1308	if (status)
   1309		goto gen_err;
   1310
   1311	memset(&ureq, 0, sizeof(ureq));
   1312	if (udata) {
   1313		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
   1314			return -EFAULT;
   1315	}
   1316	ocrdma_set_qp_init_params(qp, pd, attrs);
   1317	if (udata == NULL)
   1318		qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
   1319					OCRDMA_QP_FAST_REG);
   1320
   1321	mutex_lock(&dev->dev_lock);
   1322	status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq,
   1323					ureq.dpp_cq_id,
   1324					&dpp_offset, &dpp_credit_lmt);
   1325	if (status)
   1326		goto mbx_err;
   1327
   1328	/* user space QP's wr_id table are managed in library */
   1329	if (udata == NULL) {
   1330		status = ocrdma_alloc_wr_id_tbl(qp);
   1331		if (status)
   1332			goto map_err;
   1333	}
   1334
   1335	status = ocrdma_add_qpn_map(dev, qp);
   1336	if (status)
   1337		goto map_err;
   1338	ocrdma_set_qp_db(dev, qp, pd);
   1339	if (udata) {
   1340		status = ocrdma_copy_qp_uresp(qp, udata, dpp_offset,
   1341					      dpp_credit_lmt,
   1342					      (attrs->srq != NULL));
   1343		if (status)
   1344			goto cpy_err;
   1345	}
   1346	ocrdma_store_gsi_qp_cq(dev, attrs);
   1347	qp->ibqp.qp_num = qp->id;
   1348	mutex_unlock(&dev->dev_lock);
   1349	return 0;
   1350
   1351cpy_err:
   1352	ocrdma_del_qpn_map(dev, qp);
   1353map_err:
   1354	ocrdma_mbx_destroy_qp(dev, qp);
   1355mbx_err:
   1356	mutex_unlock(&dev->dev_lock);
   1357	kfree(qp->wqe_wr_id_tbl);
   1358	kfree(qp->rqe_wr_id_tbl);
   1359	pr_err("%s(%d) error=%d\n", __func__, dev->id, status);
   1360gen_err:
   1361	return status;
   1362}
   1363
   1364int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
   1365		      int attr_mask)
   1366{
   1367	int status = 0;
   1368	struct ocrdma_qp *qp;
   1369	struct ocrdma_dev *dev;
   1370	enum ib_qp_state old_qps;
   1371
   1372	qp = get_ocrdma_qp(ibqp);
   1373	dev = get_ocrdma_dev(ibqp->device);
   1374	if (attr_mask & IB_QP_STATE)
   1375		status = ocrdma_qp_state_change(qp, attr->qp_state, &old_qps);
   1376	/* if new and previous states are same hw doesn't need to
   1377	 * know about it.
   1378	 */
   1379	if (status < 0)
   1380		return status;
   1381	return ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
   1382}
   1383
   1384int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
   1385		     int attr_mask, struct ib_udata *udata)
   1386{
   1387	unsigned long flags;
   1388	int status = -EINVAL;
   1389	struct ocrdma_qp *qp;
   1390	struct ocrdma_dev *dev;
   1391	enum ib_qp_state old_qps, new_qps;
   1392
   1393	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
   1394		return -EOPNOTSUPP;
   1395
   1396	qp = get_ocrdma_qp(ibqp);
   1397	dev = get_ocrdma_dev(ibqp->device);
   1398
   1399	/* syncronize with multiple context trying to change, retrive qps */
   1400	mutex_lock(&dev->dev_lock);
   1401	/* syncronize with wqe, rqe posting and cqe processing contexts */
   1402	spin_lock_irqsave(&qp->q_lock, flags);
   1403	old_qps = get_ibqp_state(qp->state);
   1404	if (attr_mask & IB_QP_STATE)
   1405		new_qps = attr->qp_state;
   1406	else
   1407		new_qps = old_qps;
   1408	spin_unlock_irqrestore(&qp->q_lock, flags);
   1409
   1410	if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
   1411		pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
   1412		       "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
   1413		       __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
   1414		       old_qps, new_qps);
   1415		goto param_err;
   1416	}
   1417
   1418	status = _ocrdma_modify_qp(ibqp, attr, attr_mask);
   1419	if (status > 0)
   1420		status = 0;
   1421param_err:
   1422	mutex_unlock(&dev->dev_lock);
   1423	return status;
   1424}
   1425
   1426static enum ib_mtu ocrdma_mtu_int_to_enum(u16 mtu)
   1427{
   1428	switch (mtu) {
   1429	case 256:
   1430		return IB_MTU_256;
   1431	case 512:
   1432		return IB_MTU_512;
   1433	case 1024:
   1434		return IB_MTU_1024;
   1435	case 2048:
   1436		return IB_MTU_2048;
   1437	case 4096:
   1438		return IB_MTU_4096;
   1439	default:
   1440		return IB_MTU_1024;
   1441	}
   1442}
   1443
   1444static int ocrdma_to_ib_qp_acc_flags(int qp_cap_flags)
   1445{
   1446	int ib_qp_acc_flags = 0;
   1447
   1448	if (qp_cap_flags & OCRDMA_QP_INB_WR)
   1449		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
   1450	if (qp_cap_flags & OCRDMA_QP_INB_RD)
   1451		ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
   1452	return ib_qp_acc_flags;
   1453}
   1454
   1455int ocrdma_query_qp(struct ib_qp *ibqp,
   1456		    struct ib_qp_attr *qp_attr,
   1457		    int attr_mask, struct ib_qp_init_attr *qp_init_attr)
   1458{
   1459	int status;
   1460	u32 qp_state;
   1461	struct ocrdma_qp_params params;
   1462	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
   1463	struct ocrdma_dev *dev = get_ocrdma_dev(ibqp->device);
   1464
   1465	memset(&params, 0, sizeof(params));
   1466	mutex_lock(&dev->dev_lock);
   1467	status = ocrdma_mbx_query_qp(dev, qp, &params);
   1468	mutex_unlock(&dev->dev_lock);
   1469	if (status)
   1470		goto mbx_err;
   1471	if (qp->qp_type == IB_QPT_UD)
   1472		qp_attr->qkey = params.qkey;
   1473	qp_attr->path_mtu =
   1474		ocrdma_mtu_int_to_enum(params.path_mtu_pkey_indx &
   1475				OCRDMA_QP_PARAMS_PATH_MTU_MASK) >>
   1476				OCRDMA_QP_PARAMS_PATH_MTU_SHIFT;
   1477	qp_attr->path_mig_state = IB_MIG_MIGRATED;
   1478	qp_attr->rq_psn = params.hop_lmt_rq_psn & OCRDMA_QP_PARAMS_RQ_PSN_MASK;
   1479	qp_attr->sq_psn = params.tclass_sq_psn & OCRDMA_QP_PARAMS_SQ_PSN_MASK;
   1480	qp_attr->dest_qp_num =
   1481	    params.ack_to_rnr_rtc_dest_qpn & OCRDMA_QP_PARAMS_DEST_QPN_MASK;
   1482
   1483	qp_attr->qp_access_flags = ocrdma_to_ib_qp_acc_flags(qp->cap_flags);
   1484	qp_attr->cap.max_send_wr = qp->sq.max_cnt - 1;
   1485	qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1;
   1486	qp_attr->cap.max_send_sge = qp->sq.max_sges;
   1487	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
   1488	qp_attr->cap.max_inline_data = qp->max_inline_data;
   1489	qp_init_attr->cap = qp_attr->cap;
   1490	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
   1491
   1492	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
   1493			params.rnt_rc_sl_fl &
   1494			  OCRDMA_QP_PARAMS_FLOW_LABEL_MASK,
   1495			qp->sgid_idx,
   1496			(params.hop_lmt_rq_psn &
   1497			 OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
   1498			 OCRDMA_QP_PARAMS_HOP_LMT_SHIFT,
   1499			(params.tclass_sq_psn &
   1500			 OCRDMA_QP_PARAMS_TCLASS_MASK) >>
   1501			 OCRDMA_QP_PARAMS_TCLASS_SHIFT);
   1502	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid[0]);
   1503
   1504	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
   1505	rdma_ah_set_sl(&qp_attr->ah_attr, (params.rnt_rc_sl_fl &
   1506					   OCRDMA_QP_PARAMS_SL_MASK) >>
   1507					   OCRDMA_QP_PARAMS_SL_SHIFT);
   1508	qp_attr->timeout = (params.ack_to_rnr_rtc_dest_qpn &
   1509			    OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK) >>
   1510				OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
   1511	qp_attr->rnr_retry = (params.ack_to_rnr_rtc_dest_qpn &
   1512			      OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK) >>
   1513				OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT;
   1514	qp_attr->retry_cnt =
   1515	    (params.rnt_rc_sl_fl & OCRDMA_QP_PARAMS_RETRY_CNT_MASK) >>
   1516		OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT;
   1517	qp_attr->min_rnr_timer = 0;
   1518	qp_attr->pkey_index = 0;
   1519	qp_attr->port_num = 1;
   1520	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
   1521	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
   1522	qp_attr->alt_pkey_index = 0;
   1523	qp_attr->alt_port_num = 0;
   1524	qp_attr->alt_timeout = 0;
   1525	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
   1526	qp_state = (params.max_sge_recv_flags & OCRDMA_QP_PARAMS_STATE_MASK) >>
   1527		    OCRDMA_QP_PARAMS_STATE_SHIFT;
   1528	qp_attr->qp_state = get_ibqp_state(qp_state);
   1529	qp_attr->cur_qp_state = qp_attr->qp_state;
   1530	qp_attr->sq_draining = (qp_state == OCRDMA_QPS_SQ_DRAINING) ? 1 : 0;
   1531	qp_attr->max_dest_rd_atomic =
   1532	    params.max_ord_ird >> OCRDMA_QP_PARAMS_MAX_ORD_SHIFT;
   1533	qp_attr->max_rd_atomic =
   1534	    params.max_ord_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK;
   1535	qp_attr->en_sqd_async_notify = (params.max_sge_recv_flags &
   1536				OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC) ? 1 : 0;
   1537	/* Sync driver QP state with FW */
   1538	ocrdma_qp_state_change(qp, qp_attr->qp_state, NULL);
   1539mbx_err:
   1540	return status;
   1541}
   1542
   1543static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, unsigned int idx)
   1544{
   1545	unsigned int i = idx / 32;
   1546	u32 mask = (1U << (idx % 32));
   1547
   1548	srq->idx_bit_fields[i] ^= mask;
   1549}
   1550
   1551static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
   1552{
   1553	return ((q->max_wqe_idx - q->head) + q->tail) % q->max_cnt;
   1554}
   1555
   1556static int is_hw_sq_empty(struct ocrdma_qp *qp)
   1557{
   1558	return (qp->sq.tail == qp->sq.head);
   1559}
   1560
   1561static int is_hw_rq_empty(struct ocrdma_qp *qp)
   1562{
   1563	return (qp->rq.tail == qp->rq.head);
   1564}
   1565
   1566static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q)
   1567{
   1568	return q->va + (q->head * q->entry_size);
   1569}
   1570
   1571static void *ocrdma_hwq_head_from_idx(struct ocrdma_qp_hwq_info *q,
   1572				      u32 idx)
   1573{
   1574	return q->va + (idx * q->entry_size);
   1575}
   1576
   1577static void ocrdma_hwq_inc_head(struct ocrdma_qp_hwq_info *q)
   1578{
   1579	q->head = (q->head + 1) & q->max_wqe_idx;
   1580}
   1581
   1582static void ocrdma_hwq_inc_tail(struct ocrdma_qp_hwq_info *q)
   1583{
   1584	q->tail = (q->tail + 1) & q->max_wqe_idx;
   1585}
   1586
   1587/* discard the cqe for a given QP */
   1588static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
   1589{
   1590	unsigned long cq_flags;
   1591	unsigned long flags;
   1592	int discard_cnt = 0;
   1593	u32 cur_getp, stop_getp;
   1594	struct ocrdma_cqe *cqe;
   1595	u32 qpn = 0, wqe_idx = 0;
   1596
   1597	spin_lock_irqsave(&cq->cq_lock, cq_flags);
   1598
   1599	/* traverse through the CQEs in the hw CQ,
   1600	 * find the matching CQE for a given qp,
   1601	 * mark the matching one discarded by clearing qpn.
   1602	 * ring the doorbell in the poll_cq() as
   1603	 * we don't complete out of order cqe.
   1604	 */
   1605
   1606	cur_getp = cq->getp;
   1607	/* find upto when do we reap the cq. */
   1608	stop_getp = cur_getp;
   1609	do {
   1610		if (is_hw_sq_empty(qp) && (!qp->srq && is_hw_rq_empty(qp)))
   1611			break;
   1612
   1613		cqe = cq->va + cur_getp;
   1614		/* if (a) done reaping whole hw cq, or
   1615		 *    (b) qp_xq becomes empty.
   1616		 * then exit
   1617		 */
   1618		qpn = cqe->cmn.qpn & OCRDMA_CQE_QPN_MASK;
   1619		/* if previously discarded cqe found, skip that too. */
   1620		/* check for matching qp */
   1621		if (qpn == 0 || qpn != qp->id)
   1622			goto skip_cqe;
   1623
   1624		if (is_cqe_for_sq(cqe)) {
   1625			ocrdma_hwq_inc_tail(&qp->sq);
   1626		} else {
   1627			if (qp->srq) {
   1628				wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
   1629					OCRDMA_CQE_BUFTAG_SHIFT) &
   1630					qp->srq->rq.max_wqe_idx;
   1631				BUG_ON(wqe_idx < 1);
   1632				spin_lock_irqsave(&qp->srq->q_lock, flags);
   1633				ocrdma_hwq_inc_tail(&qp->srq->rq);
   1634				ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1);
   1635				spin_unlock_irqrestore(&qp->srq->q_lock, flags);
   1636
   1637			} else {
   1638				ocrdma_hwq_inc_tail(&qp->rq);
   1639			}
   1640		}
   1641		/* mark cqe discarded so that it is not picked up later
   1642		 * in the poll_cq().
   1643		 */
   1644		discard_cnt += 1;
   1645		cqe->cmn.qpn = 0;
   1646skip_cqe:
   1647		cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
   1648	} while (cur_getp != stop_getp);
   1649	spin_unlock_irqrestore(&cq->cq_lock, cq_flags);
   1650}
   1651
   1652void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
   1653{
   1654	int found = false;
   1655	unsigned long flags;
   1656	struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
   1657	/* sync with any active CQ poll */
   1658
   1659	spin_lock_irqsave(&dev->flush_q_lock, flags);
   1660	found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp);
   1661	if (found)
   1662		list_del(&qp->sq_entry);
   1663	if (!qp->srq) {
   1664		found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp);
   1665		if (found)
   1666			list_del(&qp->rq_entry);
   1667	}
   1668	spin_unlock_irqrestore(&dev->flush_q_lock, flags);
   1669}
   1670
   1671int ocrdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
   1672{
   1673	struct ocrdma_pd *pd;
   1674	struct ocrdma_qp *qp;
   1675	struct ocrdma_dev *dev;
   1676	struct ib_qp_attr attrs;
   1677	int attr_mask;
   1678	unsigned long flags;
   1679
   1680	qp = get_ocrdma_qp(ibqp);
   1681	dev = get_ocrdma_dev(ibqp->device);
   1682
   1683	pd = qp->pd;
   1684
   1685	/* change the QP state to ERROR */
   1686	if (qp->state != OCRDMA_QPS_RST) {
   1687		attrs.qp_state = IB_QPS_ERR;
   1688		attr_mask = IB_QP_STATE;
   1689		_ocrdma_modify_qp(ibqp, &attrs, attr_mask);
   1690	}
   1691	/* ensure that CQEs for newly created QP (whose id may be same with
   1692	 * one which just getting destroyed are same), dont get
   1693	 * discarded until the old CQEs are discarded.
   1694	 */
   1695	mutex_lock(&dev->dev_lock);
   1696	(void) ocrdma_mbx_destroy_qp(dev, qp);
   1697
   1698	/*
   1699	 * acquire CQ lock while destroy is in progress, in order to
   1700	 * protect against proessing in-flight CQEs for this QP.
   1701	 */
   1702	spin_lock_irqsave(&qp->sq_cq->cq_lock, flags);
   1703	if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) {
   1704		spin_lock(&qp->rq_cq->cq_lock);
   1705		ocrdma_del_qpn_map(dev, qp);
   1706		spin_unlock(&qp->rq_cq->cq_lock);
   1707	} else {
   1708		ocrdma_del_qpn_map(dev, qp);
   1709	}
   1710	spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags);
   1711
   1712	if (!pd->uctx) {
   1713		ocrdma_discard_cqes(qp, qp->sq_cq);
   1714		ocrdma_discard_cqes(qp, qp->rq_cq);
   1715	}
   1716	mutex_unlock(&dev->dev_lock);
   1717
   1718	if (pd->uctx) {
   1719		ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa,
   1720				PAGE_ALIGN(qp->sq.len));
   1721		if (!qp->srq)
   1722			ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa,
   1723					PAGE_ALIGN(qp->rq.len));
   1724	}
   1725
   1726	ocrdma_del_flush_qp(qp);
   1727
   1728	kfree(qp->wqe_wr_id_tbl);
   1729	kfree(qp->rqe_wr_id_tbl);
   1730	return 0;
   1731}
   1732
   1733static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
   1734				struct ib_udata *udata)
   1735{
   1736	int status;
   1737	struct ocrdma_create_srq_uresp uresp;
   1738
   1739	memset(&uresp, 0, sizeof(uresp));
   1740	uresp.rq_dbid = srq->rq.dbid;
   1741	uresp.num_rq_pages = 1;
   1742	uresp.rq_page_addr[0] = virt_to_phys(srq->rq.va);
   1743	uresp.rq_page_size = srq->rq.len;
   1744	uresp.db_page_addr = dev->nic_info.unmapped_db +
   1745	    (srq->pd->id * dev->nic_info.db_page_size);
   1746	uresp.db_page_size = dev->nic_info.db_page_size;
   1747	uresp.num_rqe_allocated = srq->rq.max_cnt;
   1748	if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
   1749		uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
   1750		uresp.db_shift = 24;
   1751	} else {
   1752		uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
   1753		uresp.db_shift = 16;
   1754	}
   1755
   1756	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
   1757	if (status)
   1758		return status;
   1759	status = ocrdma_add_mmap(srq->pd->uctx, uresp.rq_page_addr[0],
   1760				 uresp.rq_page_size);
   1761	if (status)
   1762		return status;
   1763	return status;
   1764}
   1765
   1766int ocrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
   1767		      struct ib_udata *udata)
   1768{
   1769	int status;
   1770	struct ocrdma_pd *pd = get_ocrdma_pd(ibsrq->pd);
   1771	struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
   1772	struct ocrdma_srq *srq = get_ocrdma_srq(ibsrq);
   1773
   1774	if (init_attr->srq_type != IB_SRQT_BASIC)
   1775		return -EOPNOTSUPP;
   1776
   1777	if (init_attr->attr.max_sge > dev->attr.max_recv_sge)
   1778		return -EINVAL;
   1779	if (init_attr->attr.max_wr > dev->attr.max_rqe)
   1780		return -EINVAL;
   1781
   1782	spin_lock_init(&srq->q_lock);
   1783	srq->pd = pd;
   1784	srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size);
   1785	status = ocrdma_mbx_create_srq(dev, srq, init_attr, pd);
   1786	if (status)
   1787		return status;
   1788
   1789	if (!udata) {
   1790		srq->rqe_wr_id_tbl = kcalloc(srq->rq.max_cnt, sizeof(u64),
   1791					     GFP_KERNEL);
   1792		if (!srq->rqe_wr_id_tbl) {
   1793			status = -ENOMEM;
   1794			goto arm_err;
   1795		}
   1796
   1797		srq->bit_fields_len = (srq->rq.max_cnt / 32) +
   1798		    (srq->rq.max_cnt % 32 ? 1 : 0);
   1799		srq->idx_bit_fields =
   1800		    kmalloc_array(srq->bit_fields_len, sizeof(u32),
   1801				  GFP_KERNEL);
   1802		if (!srq->idx_bit_fields) {
   1803			status = -ENOMEM;
   1804			goto arm_err;
   1805		}
   1806		memset(srq->idx_bit_fields, 0xff,
   1807		       srq->bit_fields_len * sizeof(u32));
   1808	}
   1809
   1810	if (init_attr->attr.srq_limit) {
   1811		status = ocrdma_mbx_modify_srq(srq, &init_attr->attr);
   1812		if (status)
   1813			goto arm_err;
   1814	}
   1815
   1816	if (udata) {
   1817		status = ocrdma_copy_srq_uresp(dev, srq, udata);
   1818		if (status)
   1819			goto arm_err;
   1820	}
   1821
   1822	return 0;
   1823
   1824arm_err:
   1825	ocrdma_mbx_destroy_srq(dev, srq);
   1826	kfree(srq->rqe_wr_id_tbl);
   1827	kfree(srq->idx_bit_fields);
   1828	return status;
   1829}
   1830
   1831int ocrdma_modify_srq(struct ib_srq *ibsrq,
   1832		      struct ib_srq_attr *srq_attr,
   1833		      enum ib_srq_attr_mask srq_attr_mask,
   1834		      struct ib_udata *udata)
   1835{
   1836	int status;
   1837	struct ocrdma_srq *srq;
   1838
   1839	srq = get_ocrdma_srq(ibsrq);
   1840	if (srq_attr_mask & IB_SRQ_MAX_WR)
   1841		status = -EINVAL;
   1842	else
   1843		status = ocrdma_mbx_modify_srq(srq, srq_attr);
   1844	return status;
   1845}
   1846
   1847int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
   1848{
   1849	struct ocrdma_srq *srq;
   1850
   1851	srq = get_ocrdma_srq(ibsrq);
   1852	return ocrdma_mbx_query_srq(srq, srq_attr);
   1853}
   1854
   1855int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
   1856{
   1857	struct ocrdma_srq *srq;
   1858	struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
   1859
   1860	srq = get_ocrdma_srq(ibsrq);
   1861
   1862	ocrdma_mbx_destroy_srq(dev, srq);
   1863
   1864	if (srq->pd->uctx)
   1865		ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa,
   1866				PAGE_ALIGN(srq->rq.len));
   1867
   1868	kfree(srq->idx_bit_fields);
   1869	kfree(srq->rqe_wr_id_tbl);
   1870	return 0;
   1871}
   1872
   1873/* unprivileged verbs and their support functions. */
   1874static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
   1875				struct ocrdma_hdr_wqe *hdr,
   1876				const struct ib_send_wr *wr)
   1877{
   1878	struct ocrdma_ewqe_ud_hdr *ud_hdr =
   1879		(struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
   1880	struct ocrdma_ah *ah = get_ocrdma_ah(ud_wr(wr)->ah);
   1881
   1882	ud_hdr->rsvd_dest_qpn = ud_wr(wr)->remote_qpn;
   1883	if (qp->qp_type == IB_QPT_GSI)
   1884		ud_hdr->qkey = qp->qkey;
   1885	else
   1886		ud_hdr->qkey = ud_wr(wr)->remote_qkey;
   1887	ud_hdr->rsvd_ahid = ah->id;
   1888	ud_hdr->hdr_type = ah->hdr_type;
   1889	if (ah->av->valid & OCRDMA_AV_VLAN_VALID)
   1890		hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT);
   1891}
   1892
   1893static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr,
   1894			      struct ocrdma_sge *sge, int num_sge,
   1895			      struct ib_sge *sg_list)
   1896{
   1897	int i;
   1898
   1899	for (i = 0; i < num_sge; i++) {
   1900		sge[i].lrkey = sg_list[i].lkey;
   1901		sge[i].addr_lo = sg_list[i].addr;
   1902		sge[i].addr_hi = upper_32_bits(sg_list[i].addr);
   1903		sge[i].len = sg_list[i].length;
   1904		hdr->total_len += sg_list[i].length;
   1905	}
   1906	if (num_sge == 0)
   1907		memset(sge, 0, sizeof(*sge));
   1908}
   1909
   1910static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge)
   1911{
   1912	uint32_t total_len = 0, i;
   1913
   1914	for (i = 0; i < num_sge; i++)
   1915		total_len += sg_list[i].length;
   1916	return total_len;
   1917}
   1918
   1919
   1920static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
   1921				    struct ocrdma_hdr_wqe *hdr,
   1922				    struct ocrdma_sge *sge,
   1923				    const struct ib_send_wr *wr, u32 wqe_size)
   1924{
   1925	int i;
   1926	char *dpp_addr;
   1927
   1928	if (wr->send_flags & IB_SEND_INLINE && qp->qp_type != IB_QPT_UD) {
   1929		hdr->total_len = ocrdma_sglist_len(wr->sg_list, wr->num_sge);
   1930		if (unlikely(hdr->total_len > qp->max_inline_data)) {
   1931			pr_err("%s() supported_len=0x%x,\n"
   1932			       " unsupported len req=0x%x\n", __func__,
   1933				qp->max_inline_data, hdr->total_len);
   1934			return -EINVAL;
   1935		}
   1936		dpp_addr = (char *)sge;
   1937		for (i = 0; i < wr->num_sge; i++) {
   1938			memcpy(dpp_addr,
   1939			       (void *)(unsigned long)wr->sg_list[i].addr,
   1940			       wr->sg_list[i].length);
   1941			dpp_addr += wr->sg_list[i].length;
   1942		}
   1943
   1944		wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES);
   1945		if (0 == hdr->total_len)
   1946			wqe_size += sizeof(struct ocrdma_sge);
   1947		hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT);
   1948	} else {
   1949		ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
   1950		if (wr->num_sge)
   1951			wqe_size += (wr->num_sge * sizeof(struct ocrdma_sge));
   1952		else
   1953			wqe_size += sizeof(struct ocrdma_sge);
   1954		hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
   1955	}
   1956	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
   1957	return 0;
   1958}
   1959
   1960static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
   1961			     const struct ib_send_wr *wr)
   1962{
   1963	struct ocrdma_sge *sge;
   1964	u32 wqe_size = sizeof(*hdr);
   1965
   1966	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
   1967		ocrdma_build_ud_hdr(qp, hdr, wr);
   1968		sge = (struct ocrdma_sge *)(hdr + 2);
   1969		wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr);
   1970	} else {
   1971		sge = (struct ocrdma_sge *)(hdr + 1);
   1972	}
   1973
   1974	return ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
   1975}
   1976
   1977static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
   1978			      const struct ib_send_wr *wr)
   1979{
   1980	int status;
   1981	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
   1982	struct ocrdma_sge *sge = ext_rw + 1;
   1983	u32 wqe_size = sizeof(*hdr) + sizeof(*ext_rw);
   1984
   1985	status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
   1986	if (status)
   1987		return status;
   1988	ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
   1989	ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
   1990	ext_rw->lrkey = rdma_wr(wr)->rkey;
   1991	ext_rw->len = hdr->total_len;
   1992	return 0;
   1993}
   1994
   1995static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
   1996			      const struct ib_send_wr *wr)
   1997{
   1998	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
   1999	struct ocrdma_sge *sge = ext_rw + 1;
   2000	u32 wqe_size = ((wr->num_sge + 1) * sizeof(struct ocrdma_sge)) +
   2001	    sizeof(struct ocrdma_hdr_wqe);
   2002
   2003	ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
   2004	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
   2005	hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT);
   2006	hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
   2007
   2008	ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
   2009	ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
   2010	ext_rw->lrkey = rdma_wr(wr)->rkey;
   2011	ext_rw->len = hdr->total_len;
   2012}
   2013
   2014static int get_encoded_page_size(int pg_sz)
   2015{
   2016	/* Max size is 256M 4096 << 16 */
   2017	int i = 0;
   2018	for (; i < 17; i++)
   2019		if (pg_sz == (4096 << i))
   2020			break;
   2021	return i;
   2022}
   2023
   2024static int ocrdma_build_reg(struct ocrdma_qp *qp,
   2025			    struct ocrdma_hdr_wqe *hdr,
   2026			    const struct ib_reg_wr *wr)
   2027{
   2028	u64 fbo;
   2029	struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
   2030	struct ocrdma_mr *mr = get_ocrdma_mr(wr->mr);
   2031	struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
   2032	struct ocrdma_pbe *pbe;
   2033	u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr);
   2034	int num_pbes = 0, i;
   2035
   2036	wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
   2037
   2038	hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
   2039	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
   2040
   2041	if (wr->access & IB_ACCESS_LOCAL_WRITE)
   2042		hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR;
   2043	if (wr->access & IB_ACCESS_REMOTE_WRITE)
   2044		hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR;
   2045	if (wr->access & IB_ACCESS_REMOTE_READ)
   2046		hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD;
   2047	hdr->lkey = wr->key;
   2048	hdr->total_len = mr->ibmr.length;
   2049
   2050	fbo = mr->ibmr.iova - mr->pages[0];
   2051
   2052	fast_reg->va_hi = upper_32_bits(mr->ibmr.iova);
   2053	fast_reg->va_lo = (u32) (mr->ibmr.iova & 0xffffffff);
   2054	fast_reg->fbo_hi = upper_32_bits(fbo);
   2055	fast_reg->fbo_lo = (u32) fbo & 0xffffffff;
   2056	fast_reg->num_sges = mr->npages;
   2057	fast_reg->size_sge = get_encoded_page_size(mr->ibmr.page_size);
   2058
   2059	pbe = pbl_tbl->va;
   2060	for (i = 0; i < mr->npages; i++) {
   2061		u64 buf_addr = mr->pages[i];
   2062
   2063		pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
   2064		pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
   2065		num_pbes += 1;
   2066		pbe++;
   2067
   2068		/* if the pbl is full storing the pbes,
   2069		 * move to next pbl.
   2070		*/
   2071		if (num_pbes == (mr->hwmr.pbl_size/sizeof(u64))) {
   2072			pbl_tbl++;
   2073			pbe = (struct ocrdma_pbe *)pbl_tbl->va;
   2074		}
   2075	}
   2076
   2077	return 0;
   2078}
   2079
   2080static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
   2081{
   2082	u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT);
   2083
   2084	iowrite32(val, qp->sq_db);
   2085}
   2086
   2087int ocrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
   2088		     const struct ib_send_wr **bad_wr)
   2089{
   2090	int status = 0;
   2091	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
   2092	struct ocrdma_hdr_wqe *hdr;
   2093	unsigned long flags;
   2094
   2095	spin_lock_irqsave(&qp->q_lock, flags);
   2096	if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) {
   2097		spin_unlock_irqrestore(&qp->q_lock, flags);
   2098		*bad_wr = wr;
   2099		return -EINVAL;
   2100	}
   2101
   2102	while (wr) {
   2103		if (qp->qp_type == IB_QPT_UD &&
   2104		    (wr->opcode != IB_WR_SEND &&
   2105		     wr->opcode != IB_WR_SEND_WITH_IMM)) {
   2106			*bad_wr = wr;
   2107			status = -EINVAL;
   2108			break;
   2109		}
   2110		if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
   2111		    wr->num_sge > qp->sq.max_sges) {
   2112			*bad_wr = wr;
   2113			status = -ENOMEM;
   2114			break;
   2115		}
   2116		hdr = ocrdma_hwq_head(&qp->sq);
   2117		hdr->cw = 0;
   2118		if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
   2119			hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
   2120		if (wr->send_flags & IB_SEND_FENCE)
   2121			hdr->cw |=
   2122			    (OCRDMA_FLAG_FENCE_L << OCRDMA_WQE_FLAGS_SHIFT);
   2123		if (wr->send_flags & IB_SEND_SOLICITED)
   2124			hdr->cw |=
   2125			    (OCRDMA_FLAG_SOLICIT << OCRDMA_WQE_FLAGS_SHIFT);
   2126		hdr->total_len = 0;
   2127		switch (wr->opcode) {
   2128		case IB_WR_SEND_WITH_IMM:
   2129			hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
   2130			hdr->immdt = ntohl(wr->ex.imm_data);
   2131			fallthrough;
   2132		case IB_WR_SEND:
   2133			hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
   2134			ocrdma_build_send(qp, hdr, wr);
   2135			break;
   2136		case IB_WR_SEND_WITH_INV:
   2137			hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
   2138			hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
   2139			hdr->lkey = wr->ex.invalidate_rkey;
   2140			status = ocrdma_build_send(qp, hdr, wr);
   2141			break;
   2142		case IB_WR_RDMA_WRITE_WITH_IMM:
   2143			hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
   2144			hdr->immdt = ntohl(wr->ex.imm_data);
   2145			fallthrough;
   2146		case IB_WR_RDMA_WRITE:
   2147			hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT);
   2148			status = ocrdma_build_write(qp, hdr, wr);
   2149			break;
   2150		case IB_WR_RDMA_READ:
   2151			ocrdma_build_read(qp, hdr, wr);
   2152			break;
   2153		case IB_WR_LOCAL_INV:
   2154			hdr->cw |=
   2155			    (OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT);
   2156			hdr->cw |= ((sizeof(struct ocrdma_hdr_wqe) +
   2157					sizeof(struct ocrdma_sge)) /
   2158				OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
   2159			hdr->lkey = wr->ex.invalidate_rkey;
   2160			break;
   2161		case IB_WR_REG_MR:
   2162			status = ocrdma_build_reg(qp, hdr, reg_wr(wr));
   2163			break;
   2164		default:
   2165			status = -EINVAL;
   2166			break;
   2167		}
   2168		if (status) {
   2169			*bad_wr = wr;
   2170			break;
   2171		}
   2172		if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
   2173			qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1;
   2174		else
   2175			qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0;
   2176		qp->wqe_wr_id_tbl[qp->sq.head].wrid = wr->wr_id;
   2177		ocrdma_cpu_to_le32(hdr, ((hdr->cw >> OCRDMA_WQE_SIZE_SHIFT) &
   2178				   OCRDMA_WQE_SIZE_MASK) * OCRDMA_WQE_STRIDE);
   2179		/* make sure wqe is written before adapter can access it */
   2180		wmb();
   2181		/* inform hw to start processing it */
   2182		ocrdma_ring_sq_db(qp);
   2183
   2184		/* update pointer, counter for next wr */
   2185		ocrdma_hwq_inc_head(&qp->sq);
   2186		wr = wr->next;
   2187	}
   2188	spin_unlock_irqrestore(&qp->q_lock, flags);
   2189	return status;
   2190}
   2191
   2192static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
   2193{
   2194	u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT);
   2195
   2196	iowrite32(val, qp->rq_db);
   2197}
   2198
   2199static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe,
   2200			     const struct ib_recv_wr *wr, u16 tag)
   2201{
   2202	u32 wqe_size = 0;
   2203	struct ocrdma_sge *sge;
   2204	if (wr->num_sge)
   2205		wqe_size = (wr->num_sge * sizeof(*sge)) + sizeof(*rqe);
   2206	else
   2207		wqe_size = sizeof(*sge) + sizeof(*rqe);
   2208
   2209	rqe->cw = ((wqe_size / OCRDMA_WQE_STRIDE) <<
   2210				OCRDMA_WQE_SIZE_SHIFT);
   2211	rqe->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
   2212	rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
   2213	rqe->total_len = 0;
   2214	rqe->rsvd_tag = tag;
   2215	sge = (struct ocrdma_sge *)(rqe + 1);
   2216	ocrdma_build_sges(rqe, sge, wr->num_sge, wr->sg_list);
   2217	ocrdma_cpu_to_le32(rqe, wqe_size);
   2218}
   2219
   2220int ocrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
   2221		     const struct ib_recv_wr **bad_wr)
   2222{
   2223	int status = 0;
   2224	unsigned long flags;
   2225	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
   2226	struct ocrdma_hdr_wqe *rqe;
   2227
   2228	spin_lock_irqsave(&qp->q_lock, flags);
   2229	if (qp->state == OCRDMA_QPS_RST || qp->state == OCRDMA_QPS_ERR) {
   2230		spin_unlock_irqrestore(&qp->q_lock, flags);
   2231		*bad_wr = wr;
   2232		return -EINVAL;
   2233	}
   2234	while (wr) {
   2235		if (ocrdma_hwq_free_cnt(&qp->rq) == 0 ||
   2236		    wr->num_sge > qp->rq.max_sges) {
   2237			*bad_wr = wr;
   2238			status = -ENOMEM;
   2239			break;
   2240		}
   2241		rqe = ocrdma_hwq_head(&qp->rq);
   2242		ocrdma_build_rqe(rqe, wr, 0);
   2243
   2244		qp->rqe_wr_id_tbl[qp->rq.head] = wr->wr_id;
   2245		/* make sure rqe is written before adapter can access it */
   2246		wmb();
   2247
   2248		/* inform hw to start processing it */
   2249		ocrdma_ring_rq_db(qp);
   2250
   2251		/* update pointer, counter for next wr */
   2252		ocrdma_hwq_inc_head(&qp->rq);
   2253		wr = wr->next;
   2254	}
   2255	spin_unlock_irqrestore(&qp->q_lock, flags);
   2256	return status;
   2257}
   2258
   2259/* cqe for srq's rqe can potentially arrive out of order.
   2260 * index gives the entry in the shadow table where to store
   2261 * the wr_id. tag/index is returned in cqe to reference back
   2262 * for a given rqe.
   2263 */
   2264static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
   2265{
   2266	int row = 0;
   2267	int indx = 0;
   2268
   2269	for (row = 0; row < srq->bit_fields_len; row++) {
   2270		if (srq->idx_bit_fields[row]) {
   2271			indx = ffs(srq->idx_bit_fields[row]);
   2272			indx = (row * 32) + (indx - 1);
   2273			BUG_ON(indx >= srq->rq.max_cnt);
   2274			ocrdma_srq_toggle_bit(srq, indx);
   2275			break;
   2276		}
   2277	}
   2278
   2279	BUG_ON(row == srq->bit_fields_len);
   2280	return indx + 1; /* Use from index 1 */
   2281}
   2282
   2283static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
   2284{
   2285	u32 val = srq->rq.dbid | (1 << 16);
   2286
   2287	iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET);
   2288}
   2289
   2290int ocrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
   2291			 const struct ib_recv_wr **bad_wr)
   2292{
   2293	int status = 0;
   2294	unsigned long flags;
   2295	struct ocrdma_srq *srq;
   2296	struct ocrdma_hdr_wqe *rqe;
   2297	u16 tag;
   2298
   2299	srq = get_ocrdma_srq(ibsrq);
   2300
   2301	spin_lock_irqsave(&srq->q_lock, flags);
   2302	while (wr) {
   2303		if (ocrdma_hwq_free_cnt(&srq->rq) == 0 ||
   2304		    wr->num_sge > srq->rq.max_sges) {
   2305			status = -ENOMEM;
   2306			*bad_wr = wr;
   2307			break;
   2308		}
   2309		tag = ocrdma_srq_get_idx(srq);
   2310		rqe = ocrdma_hwq_head(&srq->rq);
   2311		ocrdma_build_rqe(rqe, wr, tag);
   2312
   2313		srq->rqe_wr_id_tbl[tag] = wr->wr_id;
   2314		/* make sure rqe is written before adapter can perform DMA */
   2315		wmb();
   2316		/* inform hw to start processing it */
   2317		ocrdma_ring_srq_db(srq);
   2318		/* update pointer, counter for next wr */
   2319		ocrdma_hwq_inc_head(&srq->rq);
   2320		wr = wr->next;
   2321	}
   2322	spin_unlock_irqrestore(&srq->q_lock, flags);
   2323	return status;
   2324}
   2325
   2326static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
   2327{
   2328	enum ib_wc_status ibwc_status;
   2329
   2330	switch (status) {
   2331	case OCRDMA_CQE_GENERAL_ERR:
   2332		ibwc_status = IB_WC_GENERAL_ERR;
   2333		break;
   2334	case OCRDMA_CQE_LOC_LEN_ERR:
   2335		ibwc_status = IB_WC_LOC_LEN_ERR;
   2336		break;
   2337	case OCRDMA_CQE_LOC_QP_OP_ERR:
   2338		ibwc_status = IB_WC_LOC_QP_OP_ERR;
   2339		break;
   2340	case OCRDMA_CQE_LOC_EEC_OP_ERR:
   2341		ibwc_status = IB_WC_LOC_EEC_OP_ERR;
   2342		break;
   2343	case OCRDMA_CQE_LOC_PROT_ERR:
   2344		ibwc_status = IB_WC_LOC_PROT_ERR;
   2345		break;
   2346	case OCRDMA_CQE_WR_FLUSH_ERR:
   2347		ibwc_status = IB_WC_WR_FLUSH_ERR;
   2348		break;
   2349	case OCRDMA_CQE_MW_BIND_ERR:
   2350		ibwc_status = IB_WC_MW_BIND_ERR;
   2351		break;
   2352	case OCRDMA_CQE_BAD_RESP_ERR:
   2353		ibwc_status = IB_WC_BAD_RESP_ERR;
   2354		break;
   2355	case OCRDMA_CQE_LOC_ACCESS_ERR:
   2356		ibwc_status = IB_WC_LOC_ACCESS_ERR;
   2357		break;
   2358	case OCRDMA_CQE_REM_INV_REQ_ERR:
   2359		ibwc_status = IB_WC_REM_INV_REQ_ERR;
   2360		break;
   2361	case OCRDMA_CQE_REM_ACCESS_ERR:
   2362		ibwc_status = IB_WC_REM_ACCESS_ERR;
   2363		break;
   2364	case OCRDMA_CQE_REM_OP_ERR:
   2365		ibwc_status = IB_WC_REM_OP_ERR;
   2366		break;
   2367	case OCRDMA_CQE_RETRY_EXC_ERR:
   2368		ibwc_status = IB_WC_RETRY_EXC_ERR;
   2369		break;
   2370	case OCRDMA_CQE_RNR_RETRY_EXC_ERR:
   2371		ibwc_status = IB_WC_RNR_RETRY_EXC_ERR;
   2372		break;
   2373	case OCRDMA_CQE_LOC_RDD_VIOL_ERR:
   2374		ibwc_status = IB_WC_LOC_RDD_VIOL_ERR;
   2375		break;
   2376	case OCRDMA_CQE_REM_INV_RD_REQ_ERR:
   2377		ibwc_status = IB_WC_REM_INV_RD_REQ_ERR;
   2378		break;
   2379	case OCRDMA_CQE_REM_ABORT_ERR:
   2380		ibwc_status = IB_WC_REM_ABORT_ERR;
   2381		break;
   2382	case OCRDMA_CQE_INV_EECN_ERR:
   2383		ibwc_status = IB_WC_INV_EECN_ERR;
   2384		break;
   2385	case OCRDMA_CQE_INV_EEC_STATE_ERR:
   2386		ibwc_status = IB_WC_INV_EEC_STATE_ERR;
   2387		break;
   2388	case OCRDMA_CQE_FATAL_ERR:
   2389		ibwc_status = IB_WC_FATAL_ERR;
   2390		break;
   2391	case OCRDMA_CQE_RESP_TIMEOUT_ERR:
   2392		ibwc_status = IB_WC_RESP_TIMEOUT_ERR;
   2393		break;
   2394	default:
   2395		ibwc_status = IB_WC_GENERAL_ERR;
   2396		break;
   2397	}
   2398	return ibwc_status;
   2399}
   2400
   2401static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
   2402		      u32 wqe_idx)
   2403{
   2404	struct ocrdma_hdr_wqe *hdr;
   2405	struct ocrdma_sge *rw;
   2406	int opcode;
   2407
   2408	hdr = ocrdma_hwq_head_from_idx(&qp->sq, wqe_idx);
   2409
   2410	ibwc->wr_id = qp->wqe_wr_id_tbl[wqe_idx].wrid;
   2411	/* Undo the hdr->cw swap */
   2412	opcode = le32_to_cpu(hdr->cw) & OCRDMA_WQE_OPCODE_MASK;
   2413	switch (opcode) {
   2414	case OCRDMA_WRITE:
   2415		ibwc->opcode = IB_WC_RDMA_WRITE;
   2416		break;
   2417	case OCRDMA_READ:
   2418		rw = (struct ocrdma_sge *)(hdr + 1);
   2419		ibwc->opcode = IB_WC_RDMA_READ;
   2420		ibwc->byte_len = rw->len;
   2421		break;
   2422	case OCRDMA_SEND:
   2423		ibwc->opcode = IB_WC_SEND;
   2424		break;
   2425	case OCRDMA_FR_MR:
   2426		ibwc->opcode = IB_WC_REG_MR;
   2427		break;
   2428	case OCRDMA_LKEY_INV:
   2429		ibwc->opcode = IB_WC_LOCAL_INV;
   2430		break;
   2431	default:
   2432		ibwc->status = IB_WC_GENERAL_ERR;
   2433		pr_err("%s() invalid opcode received = 0x%x\n",
   2434		       __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
   2435		break;
   2436	}
   2437}
   2438
   2439static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp,
   2440						struct ocrdma_cqe *cqe)
   2441{
   2442	if (is_cqe_for_sq(cqe)) {
   2443		cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
   2444				cqe->flags_status_srcqpn) &
   2445					~OCRDMA_CQE_STATUS_MASK);
   2446		cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
   2447				cqe->flags_status_srcqpn) |
   2448				(OCRDMA_CQE_WR_FLUSH_ERR <<
   2449					OCRDMA_CQE_STATUS_SHIFT));
   2450	} else {
   2451		if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
   2452			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
   2453					cqe->flags_status_srcqpn) &
   2454						~OCRDMA_CQE_UD_STATUS_MASK);
   2455			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
   2456					cqe->flags_status_srcqpn) |
   2457					(OCRDMA_CQE_WR_FLUSH_ERR <<
   2458						OCRDMA_CQE_UD_STATUS_SHIFT));
   2459		} else {
   2460			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
   2461					cqe->flags_status_srcqpn) &
   2462						~OCRDMA_CQE_STATUS_MASK);
   2463			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
   2464					cqe->flags_status_srcqpn) |
   2465					(OCRDMA_CQE_WR_FLUSH_ERR <<
   2466						OCRDMA_CQE_STATUS_SHIFT));
   2467		}
   2468	}
   2469}
   2470
   2471static bool ocrdma_update_err_cqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
   2472				  struct ocrdma_qp *qp, int status)
   2473{
   2474	bool expand = false;
   2475
   2476	ibwc->byte_len = 0;
   2477	ibwc->qp = &qp->ibqp;
   2478	ibwc->status = ocrdma_to_ibwc_err(status);
   2479
   2480	ocrdma_flush_qp(qp);
   2481	ocrdma_qp_state_change(qp, IB_QPS_ERR, NULL);
   2482
   2483	/* if wqe/rqe pending for which cqe needs to be returned,
   2484	 * trigger inflating it.
   2485	 */
   2486	if (!is_hw_rq_empty(qp) || !is_hw_sq_empty(qp)) {
   2487		expand = true;
   2488		ocrdma_set_cqe_status_flushed(qp, cqe);
   2489	}
   2490	return expand;
   2491}
   2492
   2493static int ocrdma_update_err_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
   2494				  struct ocrdma_qp *qp, int status)
   2495{
   2496	ibwc->opcode = IB_WC_RECV;
   2497	ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
   2498	ocrdma_hwq_inc_tail(&qp->rq);
   2499
   2500	return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
   2501}
   2502
   2503static int ocrdma_update_err_scqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
   2504				  struct ocrdma_qp *qp, int status)
   2505{
   2506	ocrdma_update_wc(qp, ibwc, qp->sq.tail);
   2507	ocrdma_hwq_inc_tail(&qp->sq);
   2508
   2509	return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
   2510}
   2511
   2512
   2513static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
   2514				 struct ocrdma_cqe *cqe, struct ib_wc *ibwc,
   2515				 bool *polled, bool *stop)
   2516{
   2517	bool expand;
   2518	struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
   2519	int status = (le32_to_cpu(cqe->flags_status_srcqpn) &
   2520		OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
   2521	if (status < OCRDMA_MAX_CQE_ERR)
   2522		atomic_inc(&dev->cqe_err_stats[status]);
   2523
   2524	/* when hw sq is empty, but rq is not empty, so we continue
   2525	 * to keep the cqe in order to get the cq event again.
   2526	 */
   2527	if (is_hw_sq_empty(qp) && !is_hw_rq_empty(qp)) {
   2528		/* when cq for rq and sq is same, it is safe to return
   2529		 * flush cqe for RQEs.
   2530		 */
   2531		if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
   2532			*polled = true;
   2533			status = OCRDMA_CQE_WR_FLUSH_ERR;
   2534			expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
   2535		} else {
   2536			/* stop processing further cqe as this cqe is used for
   2537			 * triggering cq event on buddy cq of RQ.
   2538			 * When QP is destroyed, this cqe will be removed
   2539			 * from the cq's hardware q.
   2540			 */
   2541			*polled = false;
   2542			*stop = true;
   2543			expand = false;
   2544		}
   2545	} else if (is_hw_sq_empty(qp)) {
   2546		/* Do nothing */
   2547		expand = false;
   2548		*polled = false;
   2549		*stop = false;
   2550	} else {
   2551		*polled = true;
   2552		expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
   2553	}
   2554	return expand;
   2555}
   2556
   2557static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp,
   2558				     struct ocrdma_cqe *cqe,
   2559				     struct ib_wc *ibwc, bool *polled)
   2560{
   2561	bool expand = false;
   2562	int tail = qp->sq.tail;
   2563	u32 wqe_idx;
   2564
   2565	if (!qp->wqe_wr_id_tbl[tail].signaled) {
   2566		*polled = false;    /* WC cannot be consumed yet */
   2567	} else {
   2568		ibwc->status = IB_WC_SUCCESS;
   2569		ibwc->wc_flags = 0;
   2570		ibwc->qp = &qp->ibqp;
   2571		ocrdma_update_wc(qp, ibwc, tail);
   2572		*polled = true;
   2573	}
   2574	wqe_idx = (le32_to_cpu(cqe->wq.wqeidx) &
   2575			OCRDMA_CQE_WQEIDX_MASK) & qp->sq.max_wqe_idx;
   2576	if (tail != wqe_idx)
   2577		expand = true; /* Coalesced CQE can't be consumed yet */
   2578
   2579	ocrdma_hwq_inc_tail(&qp->sq);
   2580	return expand;
   2581}
   2582
   2583static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
   2584			     struct ib_wc *ibwc, bool *polled, bool *stop)
   2585{
   2586	int status;
   2587	bool expand;
   2588
   2589	status = (le32_to_cpu(cqe->flags_status_srcqpn) &
   2590		OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
   2591
   2592	if (status == OCRDMA_CQE_SUCCESS)
   2593		expand = ocrdma_poll_success_scqe(qp, cqe, ibwc, polled);
   2594	else
   2595		expand = ocrdma_poll_err_scqe(qp, cqe, ibwc, polled, stop);
   2596	return expand;
   2597}
   2598
   2599static int ocrdma_update_ud_rcqe(struct ocrdma_dev *dev, struct ib_wc *ibwc,
   2600				 struct ocrdma_cqe *cqe)
   2601{
   2602	int status;
   2603	u16 hdr_type = 0;
   2604
   2605	status = (le32_to_cpu(cqe->flags_status_srcqpn) &
   2606		OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
   2607	ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) &
   2608						OCRDMA_CQE_SRCQP_MASK;
   2609	ibwc->pkey_index = 0;
   2610	ibwc->wc_flags = IB_WC_GRH;
   2611	ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
   2612			  OCRDMA_CQE_UD_XFER_LEN_SHIFT) &
   2613			  OCRDMA_CQE_UD_XFER_LEN_MASK;
   2614
   2615	if (ocrdma_is_udp_encap_supported(dev)) {
   2616		hdr_type = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
   2617			    OCRDMA_CQE_UD_L3TYPE_SHIFT) &
   2618			    OCRDMA_CQE_UD_L3TYPE_MASK;
   2619		ibwc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
   2620		ibwc->network_hdr_type = hdr_type;
   2621	}
   2622
   2623	return status;
   2624}
   2625
   2626static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
   2627				       struct ocrdma_cqe *cqe,
   2628				       struct ocrdma_qp *qp)
   2629{
   2630	unsigned long flags;
   2631	struct ocrdma_srq *srq;
   2632	u32 wqe_idx;
   2633
   2634	srq = get_ocrdma_srq(qp->ibqp.srq);
   2635	wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
   2636		OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
   2637	BUG_ON(wqe_idx < 1);
   2638
   2639	ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
   2640	spin_lock_irqsave(&srq->q_lock, flags);
   2641	ocrdma_srq_toggle_bit(srq, wqe_idx - 1);
   2642	spin_unlock_irqrestore(&srq->q_lock, flags);
   2643	ocrdma_hwq_inc_tail(&srq->rq);
   2644}
   2645
   2646static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
   2647				struct ib_wc *ibwc, bool *polled, bool *stop,
   2648				int status)
   2649{
   2650	bool expand;
   2651	struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
   2652
   2653	if (status < OCRDMA_MAX_CQE_ERR)
   2654		atomic_inc(&dev->cqe_err_stats[status]);
   2655
   2656	/* when hw_rq is empty, but wq is not empty, so continue
   2657	 * to keep the cqe to get the cq event again.
   2658	 */
   2659	if (is_hw_rq_empty(qp) && !is_hw_sq_empty(qp)) {
   2660		if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
   2661			*polled = true;
   2662			status = OCRDMA_CQE_WR_FLUSH_ERR;
   2663			expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
   2664		} else {
   2665			*polled = false;
   2666			*stop = true;
   2667			expand = false;
   2668		}
   2669	} else if (is_hw_rq_empty(qp)) {
   2670		/* Do nothing */
   2671		expand = false;
   2672		*polled = false;
   2673		*stop = false;
   2674	} else {
   2675		*polled = true;
   2676		expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
   2677	}
   2678	return expand;
   2679}
   2680
   2681static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
   2682				     struct ocrdma_cqe *cqe, struct ib_wc *ibwc)
   2683{
   2684	struct ocrdma_dev *dev;
   2685
   2686	dev = get_ocrdma_dev(qp->ibqp.device);
   2687	ibwc->opcode = IB_WC_RECV;
   2688	ibwc->qp = &qp->ibqp;
   2689	ibwc->status = IB_WC_SUCCESS;
   2690
   2691	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
   2692		ocrdma_update_ud_rcqe(dev, ibwc, cqe);
   2693	else
   2694		ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen);
   2695
   2696	if (is_cqe_imm(cqe)) {
   2697		ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
   2698		ibwc->wc_flags |= IB_WC_WITH_IMM;
   2699	} else if (is_cqe_wr_imm(cqe)) {
   2700		ibwc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
   2701		ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
   2702		ibwc->wc_flags |= IB_WC_WITH_IMM;
   2703	} else if (is_cqe_invalidated(cqe)) {
   2704		ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt);
   2705		ibwc->wc_flags |= IB_WC_WITH_INVALIDATE;
   2706	}
   2707	if (qp->ibqp.srq) {
   2708		ocrdma_update_free_srq_cqe(ibwc, cqe, qp);
   2709	} else {
   2710		ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
   2711		ocrdma_hwq_inc_tail(&qp->rq);
   2712	}
   2713}
   2714
   2715static bool ocrdma_poll_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
   2716			     struct ib_wc *ibwc, bool *polled, bool *stop)
   2717{
   2718	int status;
   2719	bool expand = false;
   2720
   2721	ibwc->wc_flags = 0;
   2722	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
   2723		status = (le32_to_cpu(cqe->flags_status_srcqpn) &
   2724					OCRDMA_CQE_UD_STATUS_MASK) >>
   2725					OCRDMA_CQE_UD_STATUS_SHIFT;
   2726	} else {
   2727		status = (le32_to_cpu(cqe->flags_status_srcqpn) &
   2728			     OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
   2729	}
   2730
   2731	if (status == OCRDMA_CQE_SUCCESS) {
   2732		*polled = true;
   2733		ocrdma_poll_success_rcqe(qp, cqe, ibwc);
   2734	} else {
   2735		expand = ocrdma_poll_err_rcqe(qp, cqe, ibwc, polled, stop,
   2736					      status);
   2737	}
   2738	return expand;
   2739}
   2740
   2741static void ocrdma_change_cq_phase(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe,
   2742				   u16 cur_getp)
   2743{
   2744	if (cq->phase_change) {
   2745		if (cur_getp == 0)
   2746			cq->phase = (~cq->phase & OCRDMA_CQE_VALID);
   2747	} else {
   2748		/* clear valid bit */
   2749		cqe->flags_status_srcqpn = 0;
   2750	}
   2751}
   2752
   2753static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries,
   2754			    struct ib_wc *ibwc)
   2755{
   2756	u16 qpn = 0;
   2757	int i = 0;
   2758	bool expand = false;
   2759	int polled_hw_cqes = 0;
   2760	struct ocrdma_qp *qp = NULL;
   2761	struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
   2762	struct ocrdma_cqe *cqe;
   2763	u16 cur_getp; bool polled = false; bool stop = false;
   2764
   2765	cur_getp = cq->getp;
   2766	while (num_entries) {
   2767		cqe = cq->va + cur_getp;
   2768		/* check whether valid cqe or not */
   2769		if (!is_cqe_valid(cq, cqe))
   2770			break;
   2771		qpn = (le32_to_cpu(cqe->cmn.qpn) & OCRDMA_CQE_QPN_MASK);
   2772		/* ignore discarded cqe */
   2773		if (qpn == 0)
   2774			goto skip_cqe;
   2775		qp = dev->qp_tbl[qpn];
   2776		BUG_ON(qp == NULL);
   2777
   2778		if (is_cqe_for_sq(cqe)) {
   2779			expand = ocrdma_poll_scqe(qp, cqe, ibwc, &polled,
   2780						  &stop);
   2781		} else {
   2782			expand = ocrdma_poll_rcqe(qp, cqe, ibwc, &polled,
   2783						  &stop);
   2784		}
   2785		if (expand)
   2786			goto expand_cqe;
   2787		if (stop)
   2788			goto stop_cqe;
   2789		/* clear qpn to avoid duplicate processing by discard_cqe() */
   2790		cqe->cmn.qpn = 0;
   2791skip_cqe:
   2792		polled_hw_cqes += 1;
   2793		cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
   2794		ocrdma_change_cq_phase(cq, cqe, cur_getp);
   2795expand_cqe:
   2796		if (polled) {
   2797			num_entries -= 1;
   2798			i += 1;
   2799			ibwc = ibwc + 1;
   2800			polled = false;
   2801		}
   2802	}
   2803stop_cqe:
   2804	cq->getp = cur_getp;
   2805
   2806	if (polled_hw_cqes)
   2807		ocrdma_ring_cq_db(dev, cq->id, false, false, polled_hw_cqes);
   2808
   2809	return i;
   2810}
   2811
   2812/* insert error cqe if the QP's SQ or RQ's CQ matches the CQ under poll. */
   2813static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries,
   2814			      struct ocrdma_qp *qp, struct ib_wc *ibwc)
   2815{
   2816	int err_cqes = 0;
   2817
   2818	while (num_entries) {
   2819		if (is_hw_sq_empty(qp) && is_hw_rq_empty(qp))
   2820			break;
   2821		if (!is_hw_sq_empty(qp) && qp->sq_cq == cq) {
   2822			ocrdma_update_wc(qp, ibwc, qp->sq.tail);
   2823			ocrdma_hwq_inc_tail(&qp->sq);
   2824		} else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) {
   2825			ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
   2826			ocrdma_hwq_inc_tail(&qp->rq);
   2827		} else {
   2828			return err_cqes;
   2829		}
   2830		ibwc->byte_len = 0;
   2831		ibwc->status = IB_WC_WR_FLUSH_ERR;
   2832		ibwc = ibwc + 1;
   2833		err_cqes += 1;
   2834		num_entries -= 1;
   2835	}
   2836	return err_cqes;
   2837}
   2838
   2839int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
   2840{
   2841	int cqes_to_poll = num_entries;
   2842	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
   2843	struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
   2844	int num_os_cqe = 0, err_cqes = 0;
   2845	struct ocrdma_qp *qp;
   2846	unsigned long flags;
   2847
   2848	/* poll cqes from adapter CQ */
   2849	spin_lock_irqsave(&cq->cq_lock, flags);
   2850	num_os_cqe = ocrdma_poll_hwcq(cq, cqes_to_poll, wc);
   2851	spin_unlock_irqrestore(&cq->cq_lock, flags);
   2852	cqes_to_poll -= num_os_cqe;
   2853
   2854	if (cqes_to_poll) {
   2855		wc = wc + num_os_cqe;
   2856		/* adapter returns single error cqe when qp moves to
   2857		 * error state. So insert error cqes with wc_status as
   2858		 * FLUSHED for pending WQEs and RQEs of QP's SQ and RQ
   2859		 * respectively which uses this CQ.
   2860		 */
   2861		spin_lock_irqsave(&dev->flush_q_lock, flags);
   2862		list_for_each_entry(qp, &cq->sq_head, sq_entry) {
   2863			if (cqes_to_poll == 0)
   2864				break;
   2865			err_cqes = ocrdma_add_err_cqe(cq, cqes_to_poll, qp, wc);
   2866			cqes_to_poll -= err_cqes;
   2867			num_os_cqe += err_cqes;
   2868			wc = wc + err_cqes;
   2869		}
   2870		spin_unlock_irqrestore(&dev->flush_q_lock, flags);
   2871	}
   2872	return num_os_cqe;
   2873}
   2874
   2875int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
   2876{
   2877	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
   2878	struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
   2879	u16 cq_id;
   2880	unsigned long flags;
   2881	bool arm_needed = false, sol_needed = false;
   2882
   2883	cq_id = cq->id;
   2884
   2885	spin_lock_irqsave(&cq->cq_lock, flags);
   2886	if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
   2887		arm_needed = true;
   2888	if (cq_flags & IB_CQ_SOLICITED)
   2889		sol_needed = true;
   2890
   2891	ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
   2892	spin_unlock_irqrestore(&cq->cq_lock, flags);
   2893
   2894	return 0;
   2895}
   2896
   2897struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
   2898			      u32 max_num_sg)
   2899{
   2900	int status;
   2901	struct ocrdma_mr *mr;
   2902	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
   2903	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
   2904
   2905	if (mr_type != IB_MR_TYPE_MEM_REG)
   2906		return ERR_PTR(-EINVAL);
   2907
   2908	if (max_num_sg > dev->attr.max_pages_per_frmr)
   2909		return ERR_PTR(-EINVAL);
   2910
   2911	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
   2912	if (!mr)
   2913		return ERR_PTR(-ENOMEM);
   2914
   2915	mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
   2916	if (!mr->pages) {
   2917		status = -ENOMEM;
   2918		goto pl_err;
   2919	}
   2920
   2921	status = ocrdma_get_pbl_info(dev, mr, max_num_sg);
   2922	if (status)
   2923		goto pbl_err;
   2924	mr->hwmr.fr_mr = 1;
   2925	mr->hwmr.remote_rd = 0;
   2926	mr->hwmr.remote_wr = 0;
   2927	mr->hwmr.local_rd = 0;
   2928	mr->hwmr.local_wr = 0;
   2929	mr->hwmr.mw_bind = 0;
   2930	status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
   2931	if (status)
   2932		goto pbl_err;
   2933	status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, 0);
   2934	if (status)
   2935		goto mbx_err;
   2936	mr->ibmr.rkey = mr->hwmr.lkey;
   2937	mr->ibmr.lkey = mr->hwmr.lkey;
   2938	dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] =
   2939		(unsigned long) mr;
   2940	return &mr->ibmr;
   2941mbx_err:
   2942	ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
   2943pbl_err:
   2944	kfree(mr->pages);
   2945pl_err:
   2946	kfree(mr);
   2947	return ERR_PTR(-ENOMEM);
   2948}
   2949
   2950static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
   2951{
   2952	struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
   2953
   2954	if (unlikely(mr->npages == mr->hwmr.num_pbes))
   2955		return -ENOMEM;
   2956
   2957	mr->pages[mr->npages++] = addr;
   2958
   2959	return 0;
   2960}
   2961
   2962int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
   2963		     unsigned int *sg_offset)
   2964{
   2965	struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
   2966
   2967	mr->npages = 0;
   2968
   2969	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, ocrdma_set_page);
   2970}