cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

efa_verbs.c (55785B)


      1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
      2/*
      3 * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
      4 */
      5
      6#include <linux/dma-buf.h>
      7#include <linux/dma-resv.h>
      8#include <linux/vmalloc.h>
      9#include <linux/log2.h>
     10
     11#include <rdma/ib_addr.h>
     12#include <rdma/ib_umem.h>
     13#include <rdma/ib_user_verbs.h>
     14#include <rdma/ib_verbs.h>
     15#include <rdma/uverbs_ioctl.h>
     16
     17#include "efa.h"
     18
     19enum {
     20	EFA_MMAP_DMA_PAGE = 0,
     21	EFA_MMAP_IO_WC,
     22	EFA_MMAP_IO_NC,
     23};
     24
     25#define EFA_AENQ_ENABLED_GROUPS \
     26	(BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
     27	 BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
     28
     29struct efa_user_mmap_entry {
     30	struct rdma_user_mmap_entry rdma_entry;
     31	u64 address;
     32	u8 mmap_flag;
     33};
     34
     35#define EFA_DEFINE_DEVICE_STATS(op) \
     36	op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
     37	op(EFA_COMPLETED_CMDS, "completed_cmds") \
     38	op(EFA_CMDS_ERR, "cmds_err") \
     39	op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
     40	op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
     41	op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
     42	op(EFA_CREATE_QP_ERR, "create_qp_err") \
     43	op(EFA_CREATE_CQ_ERR, "create_cq_err") \
     44	op(EFA_REG_MR_ERR, "reg_mr_err") \
     45	op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \
     46	op(EFA_CREATE_AH_ERR, "create_ah_err") \
     47	op(EFA_MMAP_ERR, "mmap_err")
     48
     49#define EFA_DEFINE_PORT_STATS(op) \
     50	op(EFA_TX_BYTES, "tx_bytes") \
     51	op(EFA_TX_PKTS, "tx_pkts") \
     52	op(EFA_RX_BYTES, "rx_bytes") \
     53	op(EFA_RX_PKTS, "rx_pkts") \
     54	op(EFA_RX_DROPS, "rx_drops") \
     55	op(EFA_SEND_BYTES, "send_bytes") \
     56	op(EFA_SEND_WRS, "send_wrs") \
     57	op(EFA_RECV_BYTES, "recv_bytes") \
     58	op(EFA_RECV_WRS, "recv_wrs") \
     59	op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \
     60	op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \
     61	op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \
     62	op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \
     63
     64#define EFA_STATS_ENUM(ename, name) ename,
     65#define EFA_STATS_STR(ename, nam) \
     66	[ename].name = nam,
     67
     68enum efa_hw_device_stats {
     69	EFA_DEFINE_DEVICE_STATS(EFA_STATS_ENUM)
     70};
     71
     72static const struct rdma_stat_desc efa_device_stats_descs[] = {
     73	EFA_DEFINE_DEVICE_STATS(EFA_STATS_STR)
     74};
     75
     76enum efa_hw_port_stats {
     77	EFA_DEFINE_PORT_STATS(EFA_STATS_ENUM)
     78};
     79
     80static const struct rdma_stat_desc efa_port_stats_descs[] = {
     81	EFA_DEFINE_PORT_STATS(EFA_STATS_STR)
     82};
     83
     84#define EFA_CHUNK_PAYLOAD_SHIFT       12
     85#define EFA_CHUNK_PAYLOAD_SIZE        BIT(EFA_CHUNK_PAYLOAD_SHIFT)
     86#define EFA_CHUNK_PAYLOAD_PTR_SIZE    8
     87
     88#define EFA_CHUNK_SHIFT               12
     89#define EFA_CHUNK_SIZE                BIT(EFA_CHUNK_SHIFT)
     90#define EFA_CHUNK_PTR_SIZE            sizeof(struct efa_com_ctrl_buff_info)
     91
     92#define EFA_PTRS_PER_CHUNK \
     93	((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE)
     94
     95#define EFA_CHUNK_USED_SIZE \
     96	((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
     97
     98struct pbl_chunk {
     99	dma_addr_t dma_addr;
    100	u64 *buf;
    101	u32 length;
    102};
    103
    104struct pbl_chunk_list {
    105	struct pbl_chunk *chunks;
    106	unsigned int size;
    107};
    108
    109struct pbl_context {
    110	union {
    111		struct {
    112			dma_addr_t dma_addr;
    113		} continuous;
    114		struct {
    115			u32 pbl_buf_size_in_pages;
    116			struct scatterlist *sgl;
    117			int sg_dma_cnt;
    118			struct pbl_chunk_list chunk_list;
    119		} indirect;
    120	} phys;
    121	u64 *pbl_buf;
    122	u32 pbl_buf_size_in_bytes;
    123	u8 physically_continuous;
    124};
    125
    126static inline struct efa_dev *to_edev(struct ib_device *ibdev)
    127{
    128	return container_of(ibdev, struct efa_dev, ibdev);
    129}
    130
    131static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext)
    132{
    133	return container_of(ibucontext, struct efa_ucontext, ibucontext);
    134}
    135
    136static inline struct efa_pd *to_epd(struct ib_pd *ibpd)
    137{
    138	return container_of(ibpd, struct efa_pd, ibpd);
    139}
    140
    141static inline struct efa_mr *to_emr(struct ib_mr *ibmr)
    142{
    143	return container_of(ibmr, struct efa_mr, ibmr);
    144}
    145
    146static inline struct efa_qp *to_eqp(struct ib_qp *ibqp)
    147{
    148	return container_of(ibqp, struct efa_qp, ibqp);
    149}
    150
    151static inline struct efa_cq *to_ecq(struct ib_cq *ibcq)
    152{
    153	return container_of(ibcq, struct efa_cq, ibcq);
    154}
    155
    156static inline struct efa_ah *to_eah(struct ib_ah *ibah)
    157{
    158	return container_of(ibah, struct efa_ah, ibah);
    159}
    160
    161static inline struct efa_user_mmap_entry *
    162to_emmap(struct rdma_user_mmap_entry *rdma_entry)
    163{
    164	return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry);
    165}
    166
    167#define EFA_DEV_CAP(dev, cap) \
    168	((dev)->dev_attr.device_caps & \
    169	 EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK)
    170
    171#define is_reserved_cleared(reserved) \
    172	!memchr_inv(reserved, 0, sizeof(reserved))
    173
    174static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
    175			       size_t size, enum dma_data_direction dir)
    176{
    177	void *addr;
    178
    179	addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
    180	if (!addr)
    181		return NULL;
    182
    183	*dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir);
    184	if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) {
    185		ibdev_err(&dev->ibdev, "Failed to map DMA address\n");
    186		free_pages_exact(addr, size);
    187		return NULL;
    188	}
    189
    190	return addr;
    191}
    192
    193static void efa_free_mapped(struct efa_dev *dev, void *cpu_addr,
    194			    dma_addr_t dma_addr,
    195			    size_t size, enum dma_data_direction dir)
    196{
    197	dma_unmap_single(&dev->pdev->dev, dma_addr, size, dir);
    198	free_pages_exact(cpu_addr, size);
    199}
    200
    201int efa_query_device(struct ib_device *ibdev,
    202		     struct ib_device_attr *props,
    203		     struct ib_udata *udata)
    204{
    205	struct efa_com_get_device_attr_result *dev_attr;
    206	struct efa_ibv_ex_query_device_resp resp = {};
    207	struct efa_dev *dev = to_edev(ibdev);
    208	int err;
    209
    210	if (udata && udata->inlen &&
    211	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
    212		ibdev_dbg(ibdev,
    213			  "Incompatible ABI params, udata not cleared\n");
    214		return -EINVAL;
    215	}
    216
    217	dev_attr = &dev->dev_attr;
    218
    219	memset(props, 0, sizeof(*props));
    220	props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE;
    221	props->page_size_cap = dev_attr->page_size_cap;
    222	props->vendor_id = dev->pdev->vendor;
    223	props->vendor_part_id = dev->pdev->device;
    224	props->hw_ver = dev->pdev->subsystem_device;
    225	props->max_qp = dev_attr->max_qp;
    226	props->max_cq = dev_attr->max_cq;
    227	props->max_pd = dev_attr->max_pd;
    228	props->max_mr = dev_attr->max_mr;
    229	props->max_ah = dev_attr->max_ah;
    230	props->max_cqe = dev_attr->max_cq_depth;
    231	props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth,
    232				 dev_attr->max_rq_depth);
    233	props->max_send_sge = dev_attr->max_sq_sge;
    234	props->max_recv_sge = dev_attr->max_rq_sge;
    235	props->max_sge_rd = dev_attr->max_wr_rdma_sge;
    236	props->max_pkeys = 1;
    237
    238	if (udata && udata->outlen) {
    239		resp.max_sq_sge = dev_attr->max_sq_sge;
    240		resp.max_rq_sge = dev_attr->max_rq_sge;
    241		resp.max_sq_wr = dev_attr->max_sq_depth;
    242		resp.max_rq_wr = dev_attr->max_rq_depth;
    243		resp.max_rdma_size = dev_attr->max_rdma_size;
    244
    245		if (EFA_DEV_CAP(dev, RDMA_READ))
    246			resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
    247
    248		if (EFA_DEV_CAP(dev, RNR_RETRY))
    249			resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY;
    250
    251		if (dev->neqs)
    252			resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS;
    253
    254		err = ib_copy_to_udata(udata, &resp,
    255				       min(sizeof(resp), udata->outlen));
    256		if (err) {
    257			ibdev_dbg(ibdev,
    258				  "Failed to copy udata for query_device\n");
    259			return err;
    260		}
    261	}
    262
    263	return 0;
    264}
    265
    266int efa_query_port(struct ib_device *ibdev, u32 port,
    267		   struct ib_port_attr *props)
    268{
    269	struct efa_dev *dev = to_edev(ibdev);
    270
    271	props->lmc = 1;
    272
    273	props->state = IB_PORT_ACTIVE;
    274	props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
    275	props->gid_tbl_len = 1;
    276	props->pkey_tbl_len = 1;
    277	props->active_speed = IB_SPEED_EDR;
    278	props->active_width = IB_WIDTH_4X;
    279	props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
    280	props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
    281	props->max_msg_sz = dev->dev_attr.mtu;
    282	props->max_vl_num = 1;
    283
    284	return 0;
    285}
    286
    287int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
    288		 int qp_attr_mask,
    289		 struct ib_qp_init_attr *qp_init_attr)
    290{
    291	struct efa_dev *dev = to_edev(ibqp->device);
    292	struct efa_com_query_qp_params params = {};
    293	struct efa_com_query_qp_result result;
    294	struct efa_qp *qp = to_eqp(ibqp);
    295	int err;
    296
    297#define EFA_QUERY_QP_SUPP_MASK \
    298	(IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
    299	 IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY)
    300
    301	if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) {
    302		ibdev_dbg(&dev->ibdev,
    303			  "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
    304			  qp_attr_mask, EFA_QUERY_QP_SUPP_MASK);
    305		return -EOPNOTSUPP;
    306	}
    307
    308	memset(qp_attr, 0, sizeof(*qp_attr));
    309	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
    310
    311	params.qp_handle = qp->qp_handle;
    312	err = efa_com_query_qp(&dev->edev, &params, &result);
    313	if (err)
    314		return err;
    315
    316	qp_attr->qp_state = result.qp_state;
    317	qp_attr->qkey = result.qkey;
    318	qp_attr->sq_psn = result.sq_psn;
    319	qp_attr->sq_draining = result.sq_draining;
    320	qp_attr->port_num = 1;
    321	qp_attr->rnr_retry = result.rnr_retry;
    322
    323	qp_attr->cap.max_send_wr = qp->max_send_wr;
    324	qp_attr->cap.max_recv_wr = qp->max_recv_wr;
    325	qp_attr->cap.max_send_sge = qp->max_send_sge;
    326	qp_attr->cap.max_recv_sge = qp->max_recv_sge;
    327	qp_attr->cap.max_inline_data = qp->max_inline_data;
    328
    329	qp_init_attr->qp_type = ibqp->qp_type;
    330	qp_init_attr->recv_cq = ibqp->recv_cq;
    331	qp_init_attr->send_cq = ibqp->send_cq;
    332	qp_init_attr->qp_context = ibqp->qp_context;
    333	qp_init_attr->cap = qp_attr->cap;
    334
    335	return 0;
    336}
    337
    338int efa_query_gid(struct ib_device *ibdev, u32 port, int index,
    339		  union ib_gid *gid)
    340{
    341	struct efa_dev *dev = to_edev(ibdev);
    342
    343	memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr));
    344
    345	return 0;
    346}
    347
    348int efa_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
    349		   u16 *pkey)
    350{
    351	if (index > 0)
    352		return -EINVAL;
    353
    354	*pkey = 0xffff;
    355	return 0;
    356}
    357
    358static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn)
    359{
    360	struct efa_com_dealloc_pd_params params = {
    361		.pdn = pdn,
    362	};
    363
    364	return efa_com_dealloc_pd(&dev->edev, &params);
    365}
    366
    367int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
    368{
    369	struct efa_dev *dev = to_edev(ibpd->device);
    370	struct efa_ibv_alloc_pd_resp resp = {};
    371	struct efa_com_alloc_pd_result result;
    372	struct efa_pd *pd = to_epd(ibpd);
    373	int err;
    374
    375	if (udata->inlen &&
    376	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
    377		ibdev_dbg(&dev->ibdev,
    378			  "Incompatible ABI params, udata not cleared\n");
    379		err = -EINVAL;
    380		goto err_out;
    381	}
    382
    383	err = efa_com_alloc_pd(&dev->edev, &result);
    384	if (err)
    385		goto err_out;
    386
    387	pd->pdn = result.pdn;
    388	resp.pdn = result.pdn;
    389
    390	if (udata->outlen) {
    391		err = ib_copy_to_udata(udata, &resp,
    392				       min(sizeof(resp), udata->outlen));
    393		if (err) {
    394			ibdev_dbg(&dev->ibdev,
    395				  "Failed to copy udata for alloc_pd\n");
    396			goto err_dealloc_pd;
    397		}
    398	}
    399
    400	ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn);
    401
    402	return 0;
    403
    404err_dealloc_pd:
    405	efa_pd_dealloc(dev, result.pdn);
    406err_out:
    407	atomic64_inc(&dev->stats.alloc_pd_err);
    408	return err;
    409}
    410
    411int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
    412{
    413	struct efa_dev *dev = to_edev(ibpd->device);
    414	struct efa_pd *pd = to_epd(ibpd);
    415
    416	ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn);
    417	efa_pd_dealloc(dev, pd->pdn);
    418	return 0;
    419}
    420
    421static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
    422{
    423	struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle };
    424
    425	return efa_com_destroy_qp(&dev->edev, &params);
    426}
    427
    428static void efa_qp_user_mmap_entries_remove(struct efa_qp *qp)
    429{
    430	rdma_user_mmap_entry_remove(qp->rq_mmap_entry);
    431	rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry);
    432	rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry);
    433	rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry);
    434}
    435
    436int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
    437{
    438	struct efa_dev *dev = to_edev(ibqp->pd->device);
    439	struct efa_qp *qp = to_eqp(ibqp);
    440	int err;
    441
    442	ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num);
    443
    444	efa_qp_user_mmap_entries_remove(qp);
    445
    446	err = efa_destroy_qp_handle(dev, qp->qp_handle);
    447	if (err)
    448		return err;
    449
    450	if (qp->rq_cpu_addr) {
    451		ibdev_dbg(&dev->ibdev,
    452			  "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n",
    453			  qp->rq_cpu_addr, qp->rq_size,
    454			  &qp->rq_dma_addr);
    455		efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr,
    456				qp->rq_size, DMA_TO_DEVICE);
    457	}
    458
    459	return 0;
    460}
    461
    462static struct rdma_user_mmap_entry*
    463efa_user_mmap_entry_insert(struct ib_ucontext *ucontext,
    464			   u64 address, size_t length,
    465			   u8 mmap_flag, u64 *offset)
    466{
    467	struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
    468	int err;
    469
    470	if (!entry)
    471		return NULL;
    472
    473	entry->address = address;
    474	entry->mmap_flag = mmap_flag;
    475
    476	err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry,
    477					  length);
    478	if (err) {
    479		kfree(entry);
    480		return NULL;
    481	}
    482	*offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
    483
    484	return &entry->rdma_entry;
    485}
    486
    487static int qp_mmap_entries_setup(struct efa_qp *qp,
    488				 struct efa_dev *dev,
    489				 struct efa_ucontext *ucontext,
    490				 struct efa_com_create_qp_params *params,
    491				 struct efa_ibv_create_qp_resp *resp)
    492{
    493	size_t length;
    494	u64 address;
    495
    496	address = dev->db_bar_addr + resp->sq_db_offset;
    497	qp->sq_db_mmap_entry =
    498		efa_user_mmap_entry_insert(&ucontext->ibucontext,
    499					   address,
    500					   PAGE_SIZE, EFA_MMAP_IO_NC,
    501					   &resp->sq_db_mmap_key);
    502	if (!qp->sq_db_mmap_entry)
    503		return -ENOMEM;
    504
    505	resp->sq_db_offset &= ~PAGE_MASK;
    506
    507	address = dev->mem_bar_addr + resp->llq_desc_offset;
    508	length = PAGE_ALIGN(params->sq_ring_size_in_bytes +
    509			    (resp->llq_desc_offset & ~PAGE_MASK));
    510
    511	qp->llq_desc_mmap_entry =
    512		efa_user_mmap_entry_insert(&ucontext->ibucontext,
    513					   address, length,
    514					   EFA_MMAP_IO_WC,
    515					   &resp->llq_desc_mmap_key);
    516	if (!qp->llq_desc_mmap_entry)
    517		goto err_remove_mmap;
    518
    519	resp->llq_desc_offset &= ~PAGE_MASK;
    520
    521	if (qp->rq_size) {
    522		address = dev->db_bar_addr + resp->rq_db_offset;
    523
    524		qp->rq_db_mmap_entry =
    525			efa_user_mmap_entry_insert(&ucontext->ibucontext,
    526						   address, PAGE_SIZE,
    527						   EFA_MMAP_IO_NC,
    528						   &resp->rq_db_mmap_key);
    529		if (!qp->rq_db_mmap_entry)
    530			goto err_remove_mmap;
    531
    532		resp->rq_db_offset &= ~PAGE_MASK;
    533
    534		address = virt_to_phys(qp->rq_cpu_addr);
    535		qp->rq_mmap_entry =
    536			efa_user_mmap_entry_insert(&ucontext->ibucontext,
    537						   address, qp->rq_size,
    538						   EFA_MMAP_DMA_PAGE,
    539						   &resp->rq_mmap_key);
    540		if (!qp->rq_mmap_entry)
    541			goto err_remove_mmap;
    542
    543		resp->rq_mmap_size = qp->rq_size;
    544	}
    545
    546	return 0;
    547
    548err_remove_mmap:
    549	efa_qp_user_mmap_entries_remove(qp);
    550
    551	return -ENOMEM;
    552}
    553
    554static int efa_qp_validate_cap(struct efa_dev *dev,
    555			       struct ib_qp_init_attr *init_attr)
    556{
    557	if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) {
    558		ibdev_dbg(&dev->ibdev,
    559			  "qp: requested send wr[%u] exceeds the max[%u]\n",
    560			  init_attr->cap.max_send_wr,
    561			  dev->dev_attr.max_sq_depth);
    562		return -EINVAL;
    563	}
    564	if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) {
    565		ibdev_dbg(&dev->ibdev,
    566			  "qp: requested receive wr[%u] exceeds the max[%u]\n",
    567			  init_attr->cap.max_recv_wr,
    568			  dev->dev_attr.max_rq_depth);
    569		return -EINVAL;
    570	}
    571	if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) {
    572		ibdev_dbg(&dev->ibdev,
    573			  "qp: requested sge send[%u] exceeds the max[%u]\n",
    574			  init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge);
    575		return -EINVAL;
    576	}
    577	if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) {
    578		ibdev_dbg(&dev->ibdev,
    579			  "qp: requested sge recv[%u] exceeds the max[%u]\n",
    580			  init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge);
    581		return -EINVAL;
    582	}
    583	if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) {
    584		ibdev_dbg(&dev->ibdev,
    585			  "qp: requested inline data[%u] exceeds the max[%u]\n",
    586			  init_attr->cap.max_inline_data,
    587			  dev->dev_attr.inline_buf_size);
    588		return -EINVAL;
    589	}
    590
    591	return 0;
    592}
    593
    594static int efa_qp_validate_attr(struct efa_dev *dev,
    595				struct ib_qp_init_attr *init_attr)
    596{
    597	if (init_attr->qp_type != IB_QPT_DRIVER &&
    598	    init_attr->qp_type != IB_QPT_UD) {
    599		ibdev_dbg(&dev->ibdev,
    600			  "Unsupported qp type %d\n", init_attr->qp_type);
    601		return -EOPNOTSUPP;
    602	}
    603
    604	if (init_attr->srq) {
    605		ibdev_dbg(&dev->ibdev, "SRQ is not supported\n");
    606		return -EOPNOTSUPP;
    607	}
    608
    609	if (init_attr->create_flags) {
    610		ibdev_dbg(&dev->ibdev, "Unsupported create flags\n");
    611		return -EOPNOTSUPP;
    612	}
    613
    614	return 0;
    615}
    616
    617int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
    618		  struct ib_udata *udata)
    619{
    620	struct efa_com_create_qp_params create_qp_params = {};
    621	struct efa_com_create_qp_result create_qp_resp;
    622	struct efa_dev *dev = to_edev(ibqp->device);
    623	struct efa_ibv_create_qp_resp resp = {};
    624	struct efa_ibv_create_qp cmd = {};
    625	struct efa_qp *qp = to_eqp(ibqp);
    626	struct efa_ucontext *ucontext;
    627	int err;
    628
    629	ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext,
    630					     ibucontext);
    631
    632	err = efa_qp_validate_cap(dev, init_attr);
    633	if (err)
    634		goto err_out;
    635
    636	err = efa_qp_validate_attr(dev, init_attr);
    637	if (err)
    638		goto err_out;
    639
    640	if (offsetofend(typeof(cmd), driver_qp_type) > udata->inlen) {
    641		ibdev_dbg(&dev->ibdev,
    642			  "Incompatible ABI params, no input udata\n");
    643		err = -EINVAL;
    644		goto err_out;
    645	}
    646
    647	if (udata->inlen > sizeof(cmd) &&
    648	    !ib_is_udata_cleared(udata, sizeof(cmd),
    649				 udata->inlen - sizeof(cmd))) {
    650		ibdev_dbg(&dev->ibdev,
    651			  "Incompatible ABI params, unknown fields in udata\n");
    652		err = -EINVAL;
    653		goto err_out;
    654	}
    655
    656	err = ib_copy_from_udata(&cmd, udata,
    657				 min(sizeof(cmd), udata->inlen));
    658	if (err) {
    659		ibdev_dbg(&dev->ibdev,
    660			  "Cannot copy udata for create_qp\n");
    661		goto err_out;
    662	}
    663
    664	if (cmd.comp_mask) {
    665		ibdev_dbg(&dev->ibdev,
    666			  "Incompatible ABI params, unknown fields in udata\n");
    667		err = -EINVAL;
    668		goto err_out;
    669	}
    670
    671	create_qp_params.uarn = ucontext->uarn;
    672	create_qp_params.pd = to_epd(ibqp->pd)->pdn;
    673
    674	if (init_attr->qp_type == IB_QPT_UD) {
    675		create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD;
    676	} else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) {
    677		create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD;
    678	} else {
    679		ibdev_dbg(&dev->ibdev,
    680			  "Unsupported qp type %d driver qp type %d\n",
    681			  init_attr->qp_type, cmd.driver_qp_type);
    682		err = -EOPNOTSUPP;
    683		goto err_out;
    684	}
    685
    686	ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n",
    687		  init_attr->qp_type, cmd.driver_qp_type);
    688	create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx;
    689	create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx;
    690	create_qp_params.sq_depth = init_attr->cap.max_send_wr;
    691	create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size;
    692
    693	create_qp_params.rq_depth = init_attr->cap.max_recv_wr;
    694	create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size;
    695	qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes);
    696	if (qp->rq_size) {
    697		qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr,
    698						    qp->rq_size, DMA_TO_DEVICE);
    699		if (!qp->rq_cpu_addr) {
    700			err = -ENOMEM;
    701			goto err_out;
    702		}
    703
    704		ibdev_dbg(&dev->ibdev,
    705			  "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n",
    706			  qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr);
    707		create_qp_params.rq_base_addr = qp->rq_dma_addr;
    708	}
    709
    710	err = efa_com_create_qp(&dev->edev, &create_qp_params,
    711				&create_qp_resp);
    712	if (err)
    713		goto err_free_mapped;
    714
    715	resp.sq_db_offset = create_qp_resp.sq_db_offset;
    716	resp.rq_db_offset = create_qp_resp.rq_db_offset;
    717	resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset;
    718	resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx;
    719	resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx;
    720
    721	err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params,
    722				    &resp);
    723	if (err)
    724		goto err_destroy_qp;
    725
    726	qp->qp_handle = create_qp_resp.qp_handle;
    727	qp->ibqp.qp_num = create_qp_resp.qp_num;
    728	qp->max_send_wr = init_attr->cap.max_send_wr;
    729	qp->max_recv_wr = init_attr->cap.max_recv_wr;
    730	qp->max_send_sge = init_attr->cap.max_send_sge;
    731	qp->max_recv_sge = init_attr->cap.max_recv_sge;
    732	qp->max_inline_data = init_attr->cap.max_inline_data;
    733
    734	if (udata->outlen) {
    735		err = ib_copy_to_udata(udata, &resp,
    736				       min(sizeof(resp), udata->outlen));
    737		if (err) {
    738			ibdev_dbg(&dev->ibdev,
    739				  "Failed to copy udata for qp[%u]\n",
    740				  create_qp_resp.qp_num);
    741			goto err_remove_mmap_entries;
    742		}
    743	}
    744
    745	ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num);
    746
    747	return 0;
    748
    749err_remove_mmap_entries:
    750	efa_qp_user_mmap_entries_remove(qp);
    751err_destroy_qp:
    752	efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
    753err_free_mapped:
    754	if (qp->rq_size)
    755		efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr,
    756				qp->rq_size, DMA_TO_DEVICE);
    757err_out:
    758	atomic64_inc(&dev->stats.create_qp_err);
    759	return err;
    760}
    761
    762static const struct {
    763	int			valid;
    764	enum ib_qp_attr_mask	req_param;
    765	enum ib_qp_attr_mask	opt_param;
    766} srd_qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
    767	[IB_QPS_RESET] = {
    768		[IB_QPS_RESET] = { .valid = 1 },
    769		[IB_QPS_INIT]  = {
    770			.valid = 1,
    771			.req_param = IB_QP_PKEY_INDEX |
    772				     IB_QP_PORT |
    773				     IB_QP_QKEY,
    774		},
    775	},
    776	[IB_QPS_INIT] = {
    777		[IB_QPS_RESET] = { .valid = 1 },
    778		[IB_QPS_ERR]   = { .valid = 1 },
    779		[IB_QPS_INIT]  = {
    780			.valid = 1,
    781			.opt_param = IB_QP_PKEY_INDEX |
    782				     IB_QP_PORT |
    783				     IB_QP_QKEY,
    784		},
    785		[IB_QPS_RTR]   = {
    786			.valid = 1,
    787			.opt_param = IB_QP_PKEY_INDEX |
    788				     IB_QP_QKEY,
    789		},
    790	},
    791	[IB_QPS_RTR] = {
    792		[IB_QPS_RESET] = { .valid = 1 },
    793		[IB_QPS_ERR]   = { .valid = 1 },
    794		[IB_QPS_RTS]   = {
    795			.valid = 1,
    796			.req_param = IB_QP_SQ_PSN,
    797			.opt_param = IB_QP_CUR_STATE |
    798				     IB_QP_QKEY |
    799				     IB_QP_RNR_RETRY,
    800
    801		}
    802	},
    803	[IB_QPS_RTS] = {
    804		[IB_QPS_RESET] = { .valid = 1 },
    805		[IB_QPS_ERR]   = { .valid = 1 },
    806		[IB_QPS_RTS]   = {
    807			.valid = 1,
    808			.opt_param = IB_QP_CUR_STATE |
    809				     IB_QP_QKEY,
    810		},
    811		[IB_QPS_SQD] = {
    812			.valid = 1,
    813			.opt_param = IB_QP_EN_SQD_ASYNC_NOTIFY,
    814		},
    815	},
    816	[IB_QPS_SQD] = {
    817		[IB_QPS_RESET] = { .valid = 1 },
    818		[IB_QPS_ERR]   = { .valid = 1 },
    819		[IB_QPS_RTS]   = {
    820			.valid = 1,
    821			.opt_param = IB_QP_CUR_STATE |
    822				     IB_QP_QKEY,
    823		},
    824		[IB_QPS_SQD] = {
    825			.valid = 1,
    826			.opt_param = IB_QP_PKEY_INDEX |
    827				     IB_QP_QKEY,
    828		}
    829	},
    830	[IB_QPS_SQE] = {
    831		[IB_QPS_RESET] = { .valid = 1 },
    832		[IB_QPS_ERR]   = { .valid = 1 },
    833		[IB_QPS_RTS]   = {
    834			.valid = 1,
    835			.opt_param = IB_QP_CUR_STATE |
    836				     IB_QP_QKEY,
    837		}
    838	},
    839	[IB_QPS_ERR] = {
    840		[IB_QPS_RESET] = { .valid = 1 },
    841		[IB_QPS_ERR]   = { .valid = 1 },
    842	}
    843};
    844
    845static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state,
    846				    enum ib_qp_state next_state,
    847				    enum ib_qp_attr_mask mask)
    848{
    849	enum ib_qp_attr_mask req_param, opt_param;
    850
    851	if (mask & IB_QP_CUR_STATE  &&
    852	    cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
    853	    cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
    854		return false;
    855
    856	if (!srd_qp_state_table[cur_state][next_state].valid)
    857		return false;
    858
    859	req_param = srd_qp_state_table[cur_state][next_state].req_param;
    860	opt_param = srd_qp_state_table[cur_state][next_state].opt_param;
    861
    862	if ((mask & req_param) != req_param)
    863		return false;
    864
    865	if (mask & ~(req_param | opt_param | IB_QP_STATE))
    866		return false;
    867
    868	return true;
    869}
    870
    871static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
    872				  struct ib_qp_attr *qp_attr, int qp_attr_mask,
    873				  enum ib_qp_state cur_state,
    874				  enum ib_qp_state new_state)
    875{
    876	int err;
    877
    878#define EFA_MODIFY_QP_SUPP_MASK \
    879	(IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
    880	 IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \
    881	 IB_QP_RNR_RETRY)
    882
    883	if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) {
    884		ibdev_dbg(&dev->ibdev,
    885			  "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
    886			  qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK);
    887		return -EOPNOTSUPP;
    888	}
    889
    890	if (qp->ibqp.qp_type == IB_QPT_DRIVER)
    891		err = !efa_modify_srd_qp_is_ok(cur_state, new_state,
    892					       qp_attr_mask);
    893	else
    894		err = !ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
    895					  qp_attr_mask);
    896
    897	if (err) {
    898		ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n");
    899		return -EINVAL;
    900	}
    901
    902	if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) {
    903		ibdev_dbg(&dev->ibdev, "Can't change port num\n");
    904		return -EOPNOTSUPP;
    905	}
    906
    907	if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) {
    908		ibdev_dbg(&dev->ibdev, "Can't change pkey index\n");
    909		return -EOPNOTSUPP;
    910	}
    911
    912	return 0;
    913}
    914
    915int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
    916		  int qp_attr_mask, struct ib_udata *udata)
    917{
    918	struct efa_dev *dev = to_edev(ibqp->device);
    919	struct efa_com_modify_qp_params params = {};
    920	struct efa_qp *qp = to_eqp(ibqp);
    921	enum ib_qp_state cur_state;
    922	enum ib_qp_state new_state;
    923	int err;
    924
    925	if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
    926		return -EOPNOTSUPP;
    927
    928	if (udata->inlen &&
    929	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
    930		ibdev_dbg(&dev->ibdev,
    931			  "Incompatible ABI params, udata not cleared\n");
    932		return -EINVAL;
    933	}
    934
    935	cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state :
    936						     qp->state;
    937	new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state;
    938
    939	err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state,
    940				     new_state);
    941	if (err)
    942		return err;
    943
    944	params.qp_handle = qp->qp_handle;
    945
    946	if (qp_attr_mask & IB_QP_STATE) {
    947		EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE,
    948			1);
    949		EFA_SET(&params.modify_mask,
    950			EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1);
    951		params.cur_qp_state = cur_state;
    952		params.qp_state = new_state;
    953	}
    954
    955	if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
    956		EFA_SET(&params.modify_mask,
    957			EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY, 1);
    958		params.sq_drained_async_notify = qp_attr->en_sqd_async_notify;
    959	}
    960
    961	if (qp_attr_mask & IB_QP_QKEY) {
    962		EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QKEY, 1);
    963		params.qkey = qp_attr->qkey;
    964	}
    965
    966	if (qp_attr_mask & IB_QP_SQ_PSN) {
    967		EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN, 1);
    968		params.sq_psn = qp_attr->sq_psn;
    969	}
    970
    971	if (qp_attr_mask & IB_QP_RNR_RETRY) {
    972		EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY,
    973			1);
    974		params.rnr_retry = qp_attr->rnr_retry;
    975	}
    976
    977	err = efa_com_modify_qp(&dev->edev, &params);
    978	if (err)
    979		return err;
    980
    981	qp->state = new_state;
    982
    983	return 0;
    984}
    985
    986static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx)
    987{
    988	struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx };
    989
    990	return efa_com_destroy_cq(&dev->edev, &params);
    991}
    992
    993static void efa_cq_user_mmap_entries_remove(struct efa_cq *cq)
    994{
    995	rdma_user_mmap_entry_remove(cq->db_mmap_entry);
    996	rdma_user_mmap_entry_remove(cq->mmap_entry);
    997}
    998
    999int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
   1000{
   1001	struct efa_dev *dev = to_edev(ibcq->device);
   1002	struct efa_cq *cq = to_ecq(ibcq);
   1003
   1004	ibdev_dbg(&dev->ibdev,
   1005		  "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
   1006		  cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr);
   1007
   1008	efa_cq_user_mmap_entries_remove(cq);
   1009	efa_destroy_cq_idx(dev, cq->cq_idx);
   1010	if (cq->eq) {
   1011		xa_erase(&dev->cqs_xa, cq->cq_idx);
   1012		synchronize_irq(cq->eq->irq.irqn);
   1013	}
   1014	efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
   1015			DMA_FROM_DEVICE);
   1016	return 0;
   1017}
   1018
   1019static struct efa_eq *efa_vec2eq(struct efa_dev *dev, int vec)
   1020{
   1021	return &dev->eqs[vec];
   1022}
   1023
   1024static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
   1025				 struct efa_ibv_create_cq_resp *resp,
   1026				 bool db_valid)
   1027{
   1028	resp->q_mmap_size = cq->size;
   1029	cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext,
   1030						    virt_to_phys(cq->cpu_addr),
   1031						    cq->size, EFA_MMAP_DMA_PAGE,
   1032						    &resp->q_mmap_key);
   1033	if (!cq->mmap_entry)
   1034		return -ENOMEM;
   1035
   1036	if (db_valid) {
   1037		cq->db_mmap_entry =
   1038			efa_user_mmap_entry_insert(&cq->ucontext->ibucontext,
   1039						   dev->db_bar_addr + resp->db_off,
   1040						   PAGE_SIZE, EFA_MMAP_IO_NC,
   1041						   &resp->db_mmap_key);
   1042		if (!cq->db_mmap_entry) {
   1043			rdma_user_mmap_entry_remove(cq->mmap_entry);
   1044			return -ENOMEM;
   1045		}
   1046
   1047		resp->db_off &= ~PAGE_MASK;
   1048		resp->comp_mask |= EFA_CREATE_CQ_RESP_DB_OFF;
   1049	}
   1050
   1051	return 0;
   1052}
   1053
   1054int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
   1055		  struct ib_udata *udata)
   1056{
   1057	struct efa_ucontext *ucontext = rdma_udata_to_drv_context(
   1058		udata, struct efa_ucontext, ibucontext);
   1059	struct efa_com_create_cq_params params = {};
   1060	struct efa_ibv_create_cq_resp resp = {};
   1061	struct efa_com_create_cq_result result;
   1062	struct ib_device *ibdev = ibcq->device;
   1063	struct efa_dev *dev = to_edev(ibdev);
   1064	struct efa_ibv_create_cq cmd = {};
   1065	struct efa_cq *cq = to_ecq(ibcq);
   1066	int entries = attr->cqe;
   1067	int err;
   1068
   1069	ibdev_dbg(ibdev, "create_cq entries %d\n", entries);
   1070
   1071	if (attr->flags)
   1072		return -EOPNOTSUPP;
   1073
   1074	if (entries < 1 || entries > dev->dev_attr.max_cq_depth) {
   1075		ibdev_dbg(ibdev,
   1076			  "cq: requested entries[%u] non-positive or greater than max[%u]\n",
   1077			  entries, dev->dev_attr.max_cq_depth);
   1078		err = -EINVAL;
   1079		goto err_out;
   1080	}
   1081
   1082	if (offsetofend(typeof(cmd), num_sub_cqs) > udata->inlen) {
   1083		ibdev_dbg(ibdev,
   1084			  "Incompatible ABI params, no input udata\n");
   1085		err = -EINVAL;
   1086		goto err_out;
   1087	}
   1088
   1089	if (udata->inlen > sizeof(cmd) &&
   1090	    !ib_is_udata_cleared(udata, sizeof(cmd),
   1091				 udata->inlen - sizeof(cmd))) {
   1092		ibdev_dbg(ibdev,
   1093			  "Incompatible ABI params, unknown fields in udata\n");
   1094		err = -EINVAL;
   1095		goto err_out;
   1096	}
   1097
   1098	err = ib_copy_from_udata(&cmd, udata,
   1099				 min(sizeof(cmd), udata->inlen));
   1100	if (err) {
   1101		ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n");
   1102		goto err_out;
   1103	}
   1104
   1105	if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_58)) {
   1106		ibdev_dbg(ibdev,
   1107			  "Incompatible ABI params, unknown fields in udata\n");
   1108		err = -EINVAL;
   1109		goto err_out;
   1110	}
   1111
   1112	if (!cmd.cq_entry_size) {
   1113		ibdev_dbg(ibdev,
   1114			  "Invalid entry size [%u]\n", cmd.cq_entry_size);
   1115		err = -EINVAL;
   1116		goto err_out;
   1117	}
   1118
   1119	if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) {
   1120		ibdev_dbg(ibdev,
   1121			  "Invalid number of sub cqs[%u] expected[%u]\n",
   1122			  cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq);
   1123		err = -EINVAL;
   1124		goto err_out;
   1125	}
   1126
   1127	cq->ucontext = ucontext;
   1128	cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs);
   1129	cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
   1130					 DMA_FROM_DEVICE);
   1131	if (!cq->cpu_addr) {
   1132		err = -ENOMEM;
   1133		goto err_out;
   1134	}
   1135
   1136	params.uarn = cq->ucontext->uarn;
   1137	params.cq_depth = entries;
   1138	params.dma_addr = cq->dma_addr;
   1139	params.entry_size_in_bytes = cmd.cq_entry_size;
   1140	params.num_sub_cqs = cmd.num_sub_cqs;
   1141	if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) {
   1142		cq->eq = efa_vec2eq(dev, attr->comp_vector);
   1143		params.eqn = cq->eq->eeq.eqn;
   1144		params.interrupt_mode_enabled = true;
   1145	}
   1146
   1147	err = efa_com_create_cq(&dev->edev, &params, &result);
   1148	if (err)
   1149		goto err_free_mapped;
   1150
   1151	resp.db_off = result.db_off;
   1152	resp.cq_idx = result.cq_idx;
   1153	cq->cq_idx = result.cq_idx;
   1154	cq->ibcq.cqe = result.actual_depth;
   1155	WARN_ON_ONCE(entries != result.actual_depth);
   1156
   1157	err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid);
   1158	if (err) {
   1159		ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n",
   1160			  cq->cq_idx);
   1161		goto err_destroy_cq;
   1162	}
   1163
   1164	if (cq->eq) {
   1165		err = xa_err(xa_store(&dev->cqs_xa, cq->cq_idx, cq, GFP_KERNEL));
   1166		if (err) {
   1167			ibdev_dbg(ibdev, "Failed to store cq[%u] in xarray\n",
   1168				  cq->cq_idx);
   1169			goto err_remove_mmap;
   1170		}
   1171	}
   1172
   1173	if (udata->outlen) {
   1174		err = ib_copy_to_udata(udata, &resp,
   1175				       min(sizeof(resp), udata->outlen));
   1176		if (err) {
   1177			ibdev_dbg(ibdev,
   1178				  "Failed to copy udata for create_cq\n");
   1179			goto err_xa_erase;
   1180		}
   1181	}
   1182
   1183	ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
   1184		  cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr);
   1185
   1186	return 0;
   1187
   1188err_xa_erase:
   1189	if (cq->eq)
   1190		xa_erase(&dev->cqs_xa, cq->cq_idx);
   1191err_remove_mmap:
   1192	efa_cq_user_mmap_entries_remove(cq);
   1193err_destroy_cq:
   1194	efa_destroy_cq_idx(dev, cq->cq_idx);
   1195err_free_mapped:
   1196	efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
   1197			DMA_FROM_DEVICE);
   1198
   1199err_out:
   1200	atomic64_inc(&dev->stats.create_cq_err);
   1201	return err;
   1202}
   1203
   1204static int umem_to_page_list(struct efa_dev *dev,
   1205			     struct ib_umem *umem,
   1206			     u64 *page_list,
   1207			     u32 hp_cnt,
   1208			     u8 hp_shift)
   1209{
   1210	u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT);
   1211	struct ib_block_iter biter;
   1212	unsigned int hp_idx = 0;
   1213
   1214	ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
   1215		  hp_cnt, pages_in_hp);
   1216
   1217	rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift))
   1218		page_list[hp_idx++] = rdma_block_iter_dma_address(&biter);
   1219
   1220	return 0;
   1221}
   1222
   1223static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt)
   1224{
   1225	struct scatterlist *sglist;
   1226	struct page *pg;
   1227	int i;
   1228
   1229	sglist = kmalloc_array(page_cnt, sizeof(*sglist), GFP_KERNEL);
   1230	if (!sglist)
   1231		return NULL;
   1232	sg_init_table(sglist, page_cnt);
   1233	for (i = 0; i < page_cnt; i++) {
   1234		pg = vmalloc_to_page(buf);
   1235		if (!pg)
   1236			goto err;
   1237		sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
   1238		buf += PAGE_SIZE / sizeof(*buf);
   1239	}
   1240	return sglist;
   1241
   1242err:
   1243	kfree(sglist);
   1244	return NULL;
   1245}
   1246
   1247/*
   1248 * create a chunk list of physical pages dma addresses from the supplied
   1249 * scatter gather list
   1250 */
   1251static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl)
   1252{
   1253	struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
   1254	int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages;
   1255	struct scatterlist *pages_sgl = pbl->phys.indirect.sgl;
   1256	unsigned int chunk_list_size, chunk_idx, payload_idx;
   1257	int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt;
   1258	struct efa_com_ctrl_buff_info *ctrl_buf;
   1259	u64 *cur_chunk_buf, *prev_chunk_buf;
   1260	struct ib_block_iter biter;
   1261	dma_addr_t dma_addr;
   1262	int i;
   1263
   1264	/* allocate a chunk list that consists of 4KB chunks */
   1265	chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK);
   1266
   1267	chunk_list->size = chunk_list_size;
   1268	chunk_list->chunks = kcalloc(chunk_list_size,
   1269				     sizeof(*chunk_list->chunks),
   1270				     GFP_KERNEL);
   1271	if (!chunk_list->chunks)
   1272		return -ENOMEM;
   1273
   1274	ibdev_dbg(&dev->ibdev,
   1275		  "chunk_list_size[%u] - pages[%u]\n", chunk_list_size,
   1276		  page_cnt);
   1277
   1278	/* allocate chunk buffers: */
   1279	for (i = 0; i < chunk_list_size; i++) {
   1280		chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL);
   1281		if (!chunk_list->chunks[i].buf)
   1282			goto chunk_list_dealloc;
   1283
   1284		chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE;
   1285	}
   1286	chunk_list->chunks[chunk_list_size - 1].length =
   1287		((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) +
   1288			EFA_CHUNK_PTR_SIZE;
   1289
   1290	/* fill the dma addresses of sg list pages to chunks: */
   1291	chunk_idx = 0;
   1292	payload_idx = 0;
   1293	cur_chunk_buf = chunk_list->chunks[0].buf;
   1294	rdma_for_each_block(pages_sgl, &biter, sg_dma_cnt,
   1295			    EFA_CHUNK_PAYLOAD_SIZE) {
   1296		cur_chunk_buf[payload_idx++] =
   1297			rdma_block_iter_dma_address(&biter);
   1298
   1299		if (payload_idx == EFA_PTRS_PER_CHUNK) {
   1300			chunk_idx++;
   1301			cur_chunk_buf = chunk_list->chunks[chunk_idx].buf;
   1302			payload_idx = 0;
   1303		}
   1304	}
   1305
   1306	/* map chunks to dma and fill chunks next ptrs */
   1307	for (i = chunk_list_size - 1; i >= 0; i--) {
   1308		dma_addr = dma_map_single(&dev->pdev->dev,
   1309					  chunk_list->chunks[i].buf,
   1310					  chunk_list->chunks[i].length,
   1311					  DMA_TO_DEVICE);
   1312		if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
   1313			ibdev_err(&dev->ibdev,
   1314				  "chunk[%u] dma_map_failed\n", i);
   1315			goto chunk_list_unmap;
   1316		}
   1317
   1318		chunk_list->chunks[i].dma_addr = dma_addr;
   1319		ibdev_dbg(&dev->ibdev,
   1320			  "chunk[%u] mapped at [%pad]\n", i, &dma_addr);
   1321
   1322		if (!i)
   1323			break;
   1324
   1325		prev_chunk_buf = chunk_list->chunks[i - 1].buf;
   1326
   1327		ctrl_buf = (struct efa_com_ctrl_buff_info *)
   1328				&prev_chunk_buf[EFA_PTRS_PER_CHUNK];
   1329		ctrl_buf->length = chunk_list->chunks[i].length;
   1330
   1331		efa_com_set_dma_addr(dma_addr,
   1332				     &ctrl_buf->address.mem_addr_high,
   1333				     &ctrl_buf->address.mem_addr_low);
   1334	}
   1335
   1336	return 0;
   1337
   1338chunk_list_unmap:
   1339	for (; i < chunk_list_size; i++) {
   1340		dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
   1341				 chunk_list->chunks[i].length, DMA_TO_DEVICE);
   1342	}
   1343chunk_list_dealloc:
   1344	for (i = 0; i < chunk_list_size; i++)
   1345		kfree(chunk_list->chunks[i].buf);
   1346
   1347	kfree(chunk_list->chunks);
   1348	return -ENOMEM;
   1349}
   1350
   1351static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl)
   1352{
   1353	struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
   1354	int i;
   1355
   1356	for (i = 0; i < chunk_list->size; i++) {
   1357		dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
   1358				 chunk_list->chunks[i].length, DMA_TO_DEVICE);
   1359		kfree(chunk_list->chunks[i].buf);
   1360	}
   1361
   1362	kfree(chunk_list->chunks);
   1363}
   1364
   1365/* initialize pbl continuous mode: map pbl buffer to a dma address. */
   1366static int pbl_continuous_initialize(struct efa_dev *dev,
   1367				     struct pbl_context *pbl)
   1368{
   1369	dma_addr_t dma_addr;
   1370
   1371	dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf,
   1372				  pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
   1373	if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
   1374		ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n");
   1375		return -ENOMEM;
   1376	}
   1377
   1378	pbl->phys.continuous.dma_addr = dma_addr;
   1379	ibdev_dbg(&dev->ibdev,
   1380		  "pbl continuous - dma_addr = %pad, size[%u]\n",
   1381		  &dma_addr, pbl->pbl_buf_size_in_bytes);
   1382
   1383	return 0;
   1384}
   1385
   1386/*
   1387 * initialize pbl indirect mode:
   1388 * create a chunk list out of the dma addresses of the physical pages of
   1389 * pbl buffer.
   1390 */
   1391static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl)
   1392{
   1393	u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE);
   1394	struct scatterlist *sgl;
   1395	int sg_dma_cnt, err;
   1396
   1397	BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE);
   1398	sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages);
   1399	if (!sgl)
   1400		return -ENOMEM;
   1401
   1402	sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
   1403	if (!sg_dma_cnt) {
   1404		err = -EINVAL;
   1405		goto err_map;
   1406	}
   1407
   1408	pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages;
   1409	pbl->phys.indirect.sgl = sgl;
   1410	pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt;
   1411	err = pbl_chunk_list_create(dev, pbl);
   1412	if (err) {
   1413		ibdev_dbg(&dev->ibdev,
   1414			  "chunk_list creation failed[%d]\n", err);
   1415		goto err_chunk;
   1416	}
   1417
   1418	ibdev_dbg(&dev->ibdev,
   1419		  "pbl indirect - size[%u], chunks[%u]\n",
   1420		  pbl->pbl_buf_size_in_bytes,
   1421		  pbl->phys.indirect.chunk_list.size);
   1422
   1423	return 0;
   1424
   1425err_chunk:
   1426	dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
   1427err_map:
   1428	kfree(sgl);
   1429	return err;
   1430}
   1431
   1432static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl)
   1433{
   1434	pbl_chunk_list_destroy(dev, pbl);
   1435	dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl,
   1436		     pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE);
   1437	kfree(pbl->phys.indirect.sgl);
   1438}
   1439
   1440/* create a page buffer list from a mapped user memory region */
   1441static int pbl_create(struct efa_dev *dev,
   1442		      struct pbl_context *pbl,
   1443		      struct ib_umem *umem,
   1444		      int hp_cnt,
   1445		      u8 hp_shift)
   1446{
   1447	int err;
   1448
   1449	pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE;
   1450	pbl->pbl_buf = kvzalloc(pbl->pbl_buf_size_in_bytes, GFP_KERNEL);
   1451	if (!pbl->pbl_buf)
   1452		return -ENOMEM;
   1453
   1454	if (is_vmalloc_addr(pbl->pbl_buf)) {
   1455		pbl->physically_continuous = 0;
   1456		err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
   1457					hp_shift);
   1458		if (err)
   1459			goto err_free;
   1460
   1461		err = pbl_indirect_initialize(dev, pbl);
   1462		if (err)
   1463			goto err_free;
   1464	} else {
   1465		pbl->physically_continuous = 1;
   1466		err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
   1467					hp_shift);
   1468		if (err)
   1469			goto err_free;
   1470
   1471		err = pbl_continuous_initialize(dev, pbl);
   1472		if (err)
   1473			goto err_free;
   1474	}
   1475
   1476	ibdev_dbg(&dev->ibdev,
   1477		  "user_pbl_created: user_pages[%u], continuous[%u]\n",
   1478		  hp_cnt, pbl->physically_continuous);
   1479
   1480	return 0;
   1481
   1482err_free:
   1483	kvfree(pbl->pbl_buf);
   1484	return err;
   1485}
   1486
   1487static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl)
   1488{
   1489	if (pbl->physically_continuous)
   1490		dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr,
   1491				 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
   1492	else
   1493		pbl_indirect_terminate(dev, pbl);
   1494
   1495	kvfree(pbl->pbl_buf);
   1496}
   1497
   1498static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr,
   1499				 struct efa_com_reg_mr_params *params)
   1500{
   1501	int err;
   1502
   1503	params->inline_pbl = 1;
   1504	err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array,
   1505				params->page_num, params->page_shift);
   1506	if (err)
   1507		return err;
   1508
   1509	ibdev_dbg(&dev->ibdev,
   1510		  "inline_pbl_array - pages[%u]\n", params->page_num);
   1511
   1512	return 0;
   1513}
   1514
   1515static int efa_create_pbl(struct efa_dev *dev,
   1516			  struct pbl_context *pbl,
   1517			  struct efa_mr *mr,
   1518			  struct efa_com_reg_mr_params *params)
   1519{
   1520	int err;
   1521
   1522	err = pbl_create(dev, pbl, mr->umem, params->page_num,
   1523			 params->page_shift);
   1524	if (err) {
   1525		ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err);
   1526		return err;
   1527	}
   1528
   1529	params->inline_pbl = 0;
   1530	params->indirect = !pbl->physically_continuous;
   1531	if (pbl->physically_continuous) {
   1532		params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes;
   1533
   1534		efa_com_set_dma_addr(pbl->phys.continuous.dma_addr,
   1535				     &params->pbl.pbl.address.mem_addr_high,
   1536				     &params->pbl.pbl.address.mem_addr_low);
   1537	} else {
   1538		params->pbl.pbl.length =
   1539			pbl->phys.indirect.chunk_list.chunks[0].length;
   1540
   1541		efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr,
   1542				     &params->pbl.pbl.address.mem_addr_high,
   1543				     &params->pbl.pbl.address.mem_addr_low);
   1544	}
   1545
   1546	return 0;
   1547}
   1548
   1549static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags,
   1550				   struct ib_udata *udata)
   1551{
   1552	struct efa_dev *dev = to_edev(ibpd->device);
   1553	int supp_access_flags;
   1554	struct efa_mr *mr;
   1555
   1556	if (udata && udata->inlen &&
   1557	    !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) {
   1558		ibdev_dbg(&dev->ibdev,
   1559			  "Incompatible ABI params, udata not cleared\n");
   1560		return ERR_PTR(-EINVAL);
   1561	}
   1562
   1563	supp_access_flags =
   1564		IB_ACCESS_LOCAL_WRITE |
   1565		(EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0);
   1566
   1567	access_flags &= ~IB_ACCESS_OPTIONAL;
   1568	if (access_flags & ~supp_access_flags) {
   1569		ibdev_dbg(&dev->ibdev,
   1570			  "Unsupported access flags[%#x], supported[%#x]\n",
   1571			  access_flags, supp_access_flags);
   1572		return ERR_PTR(-EOPNOTSUPP);
   1573	}
   1574
   1575	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
   1576	if (!mr)
   1577		return ERR_PTR(-ENOMEM);
   1578
   1579	return mr;
   1580}
   1581
   1582static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start,
   1583			   u64 length, u64 virt_addr, int access_flags)
   1584{
   1585	struct efa_dev *dev = to_edev(ibpd->device);
   1586	struct efa_com_reg_mr_params params = {};
   1587	struct efa_com_reg_mr_result result = {};
   1588	struct pbl_context pbl;
   1589	unsigned int pg_sz;
   1590	int inline_size;
   1591	int err;
   1592
   1593	params.pd = to_epd(ibpd)->pdn;
   1594	params.iova = virt_addr;
   1595	params.mr_length_in_bytes = length;
   1596	params.permissions = access_flags;
   1597
   1598	pg_sz = ib_umem_find_best_pgsz(mr->umem,
   1599				       dev->dev_attr.page_size_cap,
   1600				       virt_addr);
   1601	if (!pg_sz) {
   1602		ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n",
   1603			  dev->dev_attr.page_size_cap);
   1604		return -EOPNOTSUPP;
   1605	}
   1606
   1607	params.page_shift = order_base_2(pg_sz);
   1608	params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz);
   1609
   1610	ibdev_dbg(&dev->ibdev,
   1611		  "start %#llx length %#llx params.page_shift %u params.page_num %u\n",
   1612		  start, length, params.page_shift, params.page_num);
   1613
   1614	inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array);
   1615	if (params.page_num <= inline_size) {
   1616		err = efa_create_inline_pbl(dev, mr, &params);
   1617		if (err)
   1618			return err;
   1619
   1620		err = efa_com_register_mr(&dev->edev, &params, &result);
   1621		if (err)
   1622			return err;
   1623	} else {
   1624		err = efa_create_pbl(dev, &pbl, mr, &params);
   1625		if (err)
   1626			return err;
   1627
   1628		err = efa_com_register_mr(&dev->edev, &params, &result);
   1629		pbl_destroy(dev, &pbl);
   1630
   1631		if (err)
   1632			return err;
   1633	}
   1634
   1635	mr->ibmr.lkey = result.l_key;
   1636	mr->ibmr.rkey = result.r_key;
   1637	mr->ibmr.length = length;
   1638	ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey);
   1639
   1640	return 0;
   1641}
   1642
   1643struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
   1644				     u64 length, u64 virt_addr,
   1645				     int fd, int access_flags,
   1646				     struct ib_udata *udata)
   1647{
   1648	struct efa_dev *dev = to_edev(ibpd->device);
   1649	struct ib_umem_dmabuf *umem_dmabuf;
   1650	struct efa_mr *mr;
   1651	int err;
   1652
   1653	mr = efa_alloc_mr(ibpd, access_flags, udata);
   1654	if (IS_ERR(mr)) {
   1655		err = PTR_ERR(mr);
   1656		goto err_out;
   1657	}
   1658
   1659	umem_dmabuf = ib_umem_dmabuf_get_pinned(ibpd->device, start, length, fd,
   1660						access_flags);
   1661	if (IS_ERR(umem_dmabuf)) {
   1662		err = PTR_ERR(umem_dmabuf);
   1663		ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%d]\n", err);
   1664		goto err_free;
   1665	}
   1666
   1667	mr->umem = &umem_dmabuf->umem;
   1668	err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags);
   1669	if (err)
   1670		goto err_release;
   1671
   1672	return &mr->ibmr;
   1673
   1674err_release:
   1675	ib_umem_release(mr->umem);
   1676err_free:
   1677	kfree(mr);
   1678err_out:
   1679	atomic64_inc(&dev->stats.reg_mr_err);
   1680	return ERR_PTR(err);
   1681}
   1682
   1683struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
   1684			 u64 virt_addr, int access_flags,
   1685			 struct ib_udata *udata)
   1686{
   1687	struct efa_dev *dev = to_edev(ibpd->device);
   1688	struct efa_mr *mr;
   1689	int err;
   1690
   1691	mr = efa_alloc_mr(ibpd, access_flags, udata);
   1692	if (IS_ERR(mr)) {
   1693		err = PTR_ERR(mr);
   1694		goto err_out;
   1695	}
   1696
   1697	mr->umem = ib_umem_get(ibpd->device, start, length, access_flags);
   1698	if (IS_ERR(mr->umem)) {
   1699		err = PTR_ERR(mr->umem);
   1700		ibdev_dbg(&dev->ibdev,
   1701			  "Failed to pin and map user space memory[%d]\n", err);
   1702		goto err_free;
   1703	}
   1704
   1705	err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags);
   1706	if (err)
   1707		goto err_release;
   1708
   1709	return &mr->ibmr;
   1710
   1711err_release:
   1712	ib_umem_release(mr->umem);
   1713err_free:
   1714	kfree(mr);
   1715err_out:
   1716	atomic64_inc(&dev->stats.reg_mr_err);
   1717	return ERR_PTR(err);
   1718}
   1719
   1720int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
   1721{
   1722	struct efa_dev *dev = to_edev(ibmr->device);
   1723	struct efa_com_dereg_mr_params params;
   1724	struct efa_mr *mr = to_emr(ibmr);
   1725	int err;
   1726
   1727	ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey);
   1728
   1729	params.l_key = mr->ibmr.lkey;
   1730	err = efa_com_dereg_mr(&dev->edev, &params);
   1731	if (err)
   1732		return err;
   1733
   1734	ib_umem_release(mr->umem);
   1735	kfree(mr);
   1736
   1737	return 0;
   1738}
   1739
   1740int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num,
   1741			   struct ib_port_immutable *immutable)
   1742{
   1743	struct ib_port_attr attr;
   1744	int err;
   1745
   1746	err = ib_query_port(ibdev, port_num, &attr);
   1747	if (err) {
   1748		ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err);
   1749		return err;
   1750	}
   1751
   1752	immutable->pkey_tbl_len = attr.pkey_tbl_len;
   1753	immutable->gid_tbl_len = attr.gid_tbl_len;
   1754
   1755	return 0;
   1756}
   1757
   1758static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn)
   1759{
   1760	struct efa_com_dealloc_uar_params params = {
   1761		.uarn = uarn,
   1762	};
   1763
   1764	return efa_com_dealloc_uar(&dev->edev, &params);
   1765}
   1766
   1767#define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \
   1768	(_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \
   1769		     NULL : #_attr)
   1770
   1771static int efa_user_comp_handshake(const struct ib_ucontext *ibucontext,
   1772				   const struct efa_ibv_alloc_ucontext_cmd *cmd)
   1773{
   1774	struct efa_dev *dev = to_edev(ibucontext->device);
   1775	char *attr_str;
   1776
   1777	if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, max_tx_batch,
   1778				EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH, attr_str))
   1779		goto err;
   1780
   1781	if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, min_sq_depth,
   1782				EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR,
   1783				attr_str))
   1784		goto err;
   1785
   1786	return 0;
   1787
   1788err:
   1789	ibdev_dbg(&dev->ibdev, "Userspace handshake failed for %s attribute\n",
   1790		  attr_str);
   1791	return -EOPNOTSUPP;
   1792}
   1793
   1794int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
   1795{
   1796	struct efa_ucontext *ucontext = to_eucontext(ibucontext);
   1797	struct efa_dev *dev = to_edev(ibucontext->device);
   1798	struct efa_ibv_alloc_ucontext_resp resp = {};
   1799	struct efa_ibv_alloc_ucontext_cmd cmd = {};
   1800	struct efa_com_alloc_uar_result result;
   1801	int err;
   1802
   1803	/*
   1804	 * it's fine if the driver does not know all request fields,
   1805	 * we will ack input fields in our response.
   1806	 */
   1807
   1808	err = ib_copy_from_udata(&cmd, udata,
   1809				 min(sizeof(cmd), udata->inlen));
   1810	if (err) {
   1811		ibdev_dbg(&dev->ibdev,
   1812			  "Cannot copy udata for alloc_ucontext\n");
   1813		goto err_out;
   1814	}
   1815
   1816	err = efa_user_comp_handshake(ibucontext, &cmd);
   1817	if (err)
   1818		goto err_out;
   1819
   1820	err = efa_com_alloc_uar(&dev->edev, &result);
   1821	if (err)
   1822		goto err_out;
   1823
   1824	ucontext->uarn = result.uarn;
   1825
   1826	resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
   1827	resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
   1828	resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq;
   1829	resp.inline_buf_size = dev->dev_attr.inline_buf_size;
   1830	resp.max_llq_size = dev->dev_attr.max_llq_size;
   1831	resp.max_tx_batch = dev->dev_attr.max_tx_batch;
   1832	resp.min_sq_wr = dev->dev_attr.min_sq_depth;
   1833
   1834	err = ib_copy_to_udata(udata, &resp,
   1835			       min(sizeof(resp), udata->outlen));
   1836	if (err)
   1837		goto err_dealloc_uar;
   1838
   1839	return 0;
   1840
   1841err_dealloc_uar:
   1842	efa_dealloc_uar(dev, result.uarn);
   1843err_out:
   1844	atomic64_inc(&dev->stats.alloc_ucontext_err);
   1845	return err;
   1846}
   1847
   1848void efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
   1849{
   1850	struct efa_ucontext *ucontext = to_eucontext(ibucontext);
   1851	struct efa_dev *dev = to_edev(ibucontext->device);
   1852
   1853	efa_dealloc_uar(dev, ucontext->uarn);
   1854}
   1855
   1856void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
   1857{
   1858	struct efa_user_mmap_entry *entry = to_emmap(rdma_entry);
   1859
   1860	kfree(entry);
   1861}
   1862
   1863static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
   1864		      struct vm_area_struct *vma)
   1865{
   1866	struct rdma_user_mmap_entry *rdma_entry;
   1867	struct efa_user_mmap_entry *entry;
   1868	unsigned long va;
   1869	int err = 0;
   1870	u64 pfn;
   1871
   1872	rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma);
   1873	if (!rdma_entry) {
   1874		ibdev_dbg(&dev->ibdev,
   1875			  "pgoff[%#lx] does not have valid entry\n",
   1876			  vma->vm_pgoff);
   1877		atomic64_inc(&dev->stats.mmap_err);
   1878		return -EINVAL;
   1879	}
   1880	entry = to_emmap(rdma_entry);
   1881
   1882	ibdev_dbg(&dev->ibdev,
   1883		  "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
   1884		  entry->address, rdma_entry->npages * PAGE_SIZE,
   1885		  entry->mmap_flag);
   1886
   1887	pfn = entry->address >> PAGE_SHIFT;
   1888	switch (entry->mmap_flag) {
   1889	case EFA_MMAP_IO_NC:
   1890		err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
   1891					entry->rdma_entry.npages * PAGE_SIZE,
   1892					pgprot_noncached(vma->vm_page_prot),
   1893					rdma_entry);
   1894		break;
   1895	case EFA_MMAP_IO_WC:
   1896		err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
   1897					entry->rdma_entry.npages * PAGE_SIZE,
   1898					pgprot_writecombine(vma->vm_page_prot),
   1899					rdma_entry);
   1900		break;
   1901	case EFA_MMAP_DMA_PAGE:
   1902		for (va = vma->vm_start; va < vma->vm_end;
   1903		     va += PAGE_SIZE, pfn++) {
   1904			err = vm_insert_page(vma, va, pfn_to_page(pfn));
   1905			if (err)
   1906				break;
   1907		}
   1908		break;
   1909	default:
   1910		err = -EINVAL;
   1911	}
   1912
   1913	if (err) {
   1914		ibdev_dbg(
   1915			&dev->ibdev,
   1916			"Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
   1917			entry->address, rdma_entry->npages * PAGE_SIZE,
   1918			entry->mmap_flag, err);
   1919		atomic64_inc(&dev->stats.mmap_err);
   1920	}
   1921
   1922	rdma_user_mmap_entry_put(rdma_entry);
   1923	return err;
   1924}
   1925
   1926int efa_mmap(struct ib_ucontext *ibucontext,
   1927	     struct vm_area_struct *vma)
   1928{
   1929	struct efa_ucontext *ucontext = to_eucontext(ibucontext);
   1930	struct efa_dev *dev = to_edev(ibucontext->device);
   1931	size_t length = vma->vm_end - vma->vm_start;
   1932
   1933	ibdev_dbg(&dev->ibdev,
   1934		  "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
   1935		  vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
   1936
   1937	return __efa_mmap(dev, ucontext, vma);
   1938}
   1939
   1940static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
   1941{
   1942	struct efa_com_destroy_ah_params params = {
   1943		.ah = ah->ah,
   1944		.pdn = to_epd(ah->ibah.pd)->pdn,
   1945	};
   1946
   1947	return efa_com_destroy_ah(&dev->edev, &params);
   1948}
   1949
   1950int efa_create_ah(struct ib_ah *ibah,
   1951		  struct rdma_ah_init_attr *init_attr,
   1952		  struct ib_udata *udata)
   1953{
   1954	struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
   1955	struct efa_dev *dev = to_edev(ibah->device);
   1956	struct efa_com_create_ah_params params = {};
   1957	struct efa_ibv_create_ah_resp resp = {};
   1958	struct efa_com_create_ah_result result;
   1959	struct efa_ah *ah = to_eah(ibah);
   1960	int err;
   1961
   1962	if (!(init_attr->flags & RDMA_CREATE_AH_SLEEPABLE)) {
   1963		ibdev_dbg(&dev->ibdev,
   1964			  "Create address handle is not supported in atomic context\n");
   1965		err = -EOPNOTSUPP;
   1966		goto err_out;
   1967	}
   1968
   1969	if (udata->inlen &&
   1970	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
   1971		ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
   1972		err = -EINVAL;
   1973		goto err_out;
   1974	}
   1975
   1976	memcpy(params.dest_addr, ah_attr->grh.dgid.raw,
   1977	       sizeof(params.dest_addr));
   1978	params.pdn = to_epd(ibah->pd)->pdn;
   1979	err = efa_com_create_ah(&dev->edev, &params, &result);
   1980	if (err)
   1981		goto err_out;
   1982
   1983	memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id));
   1984	ah->ah = result.ah;
   1985
   1986	resp.efa_address_handle = result.ah;
   1987
   1988	if (udata->outlen) {
   1989		err = ib_copy_to_udata(udata, &resp,
   1990				       min(sizeof(resp), udata->outlen));
   1991		if (err) {
   1992			ibdev_dbg(&dev->ibdev,
   1993				  "Failed to copy udata for create_ah response\n");
   1994			goto err_destroy_ah;
   1995		}
   1996	}
   1997	ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah);
   1998
   1999	return 0;
   2000
   2001err_destroy_ah:
   2002	efa_ah_destroy(dev, ah);
   2003err_out:
   2004	atomic64_inc(&dev->stats.create_ah_err);
   2005	return err;
   2006}
   2007
   2008int efa_destroy_ah(struct ib_ah *ibah, u32 flags)
   2009{
   2010	struct efa_dev *dev = to_edev(ibah->pd->device);
   2011	struct efa_ah *ah = to_eah(ibah);
   2012
   2013	ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah);
   2014
   2015	if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) {
   2016		ibdev_dbg(&dev->ibdev,
   2017			  "Destroy address handle is not supported in atomic context\n");
   2018		return -EOPNOTSUPP;
   2019	}
   2020
   2021	efa_ah_destroy(dev, ah);
   2022	return 0;
   2023}
   2024
   2025struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev,
   2026					      u32 port_num)
   2027{
   2028	return rdma_alloc_hw_stats_struct(efa_port_stats_descs,
   2029					  ARRAY_SIZE(efa_port_stats_descs),
   2030					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
   2031}
   2032
   2033struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev)
   2034{
   2035	return rdma_alloc_hw_stats_struct(efa_device_stats_descs,
   2036					  ARRAY_SIZE(efa_device_stats_descs),
   2037					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
   2038}
   2039
   2040static int efa_fill_device_stats(struct efa_dev *dev,
   2041				 struct rdma_hw_stats *stats)
   2042{
   2043	struct efa_com_stats_admin *as = &dev->edev.aq.stats;
   2044	struct efa_stats *s = &dev->stats;
   2045
   2046	stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
   2047	stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
   2048	stats->value[EFA_CMDS_ERR] = atomic64_read(&as->cmd_err);
   2049	stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion);
   2050
   2051	stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
   2052	stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err);
   2053	stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err);
   2054	stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err);
   2055	stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err);
   2056	stats->value[EFA_ALLOC_UCONTEXT_ERR] =
   2057		atomic64_read(&s->alloc_ucontext_err);
   2058	stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err);
   2059	stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err);
   2060
   2061	return ARRAY_SIZE(efa_device_stats_descs);
   2062}
   2063
   2064static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats,
   2065			       u32 port_num)
   2066{
   2067	struct efa_com_get_stats_params params = {};
   2068	union efa_com_get_stats_result result;
   2069	struct efa_com_rdma_read_stats *rrs;
   2070	struct efa_com_messages_stats *ms;
   2071	struct efa_com_basic_stats *bs;
   2072	int err;
   2073
   2074	params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL;
   2075	params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
   2076
   2077	err = efa_com_get_stats(&dev->edev, &params, &result);
   2078	if (err)
   2079		return err;
   2080
   2081	bs = &result.basic_stats;
   2082	stats->value[EFA_TX_BYTES] = bs->tx_bytes;
   2083	stats->value[EFA_TX_PKTS] = bs->tx_pkts;
   2084	stats->value[EFA_RX_BYTES] = bs->rx_bytes;
   2085	stats->value[EFA_RX_PKTS] = bs->rx_pkts;
   2086	stats->value[EFA_RX_DROPS] = bs->rx_drops;
   2087
   2088	params.type = EFA_ADMIN_GET_STATS_TYPE_MESSAGES;
   2089	err = efa_com_get_stats(&dev->edev, &params, &result);
   2090	if (err)
   2091		return err;
   2092
   2093	ms = &result.messages_stats;
   2094	stats->value[EFA_SEND_BYTES] = ms->send_bytes;
   2095	stats->value[EFA_SEND_WRS] = ms->send_wrs;
   2096	stats->value[EFA_RECV_BYTES] = ms->recv_bytes;
   2097	stats->value[EFA_RECV_WRS] = ms->recv_wrs;
   2098
   2099	params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_READ;
   2100	err = efa_com_get_stats(&dev->edev, &params, &result);
   2101	if (err)
   2102		return err;
   2103
   2104	rrs = &result.rdma_read_stats;
   2105	stats->value[EFA_RDMA_READ_WRS] = rrs->read_wrs;
   2106	stats->value[EFA_RDMA_READ_BYTES] = rrs->read_bytes;
   2107	stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err;
   2108	stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes;
   2109
   2110	return ARRAY_SIZE(efa_port_stats_descs);
   2111}
   2112
   2113int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
   2114		     u32 port_num, int index)
   2115{
   2116	if (port_num)
   2117		return efa_fill_port_stats(to_edev(ibdev), stats, port_num);
   2118	else
   2119		return efa_fill_device_stats(to_edev(ibdev), stats);
   2120}
   2121
   2122enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
   2123					 u32 port_num)
   2124{
   2125	return IB_LINK_LAYER_UNSPECIFIED;
   2126}
   2127