cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

wr.c (34739B)


      1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
      2/*
      3 * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
      4 */
      5
      6#include <linux/gfp.h>
      7#include <linux/mlx5/qp.h>
      8#include <linux/mlx5/driver.h>
      9#include "wr.h"
     10#include "umr.h"
     11
     12static const u32 mlx5_ib_opcode[] = {
     13	[IB_WR_SEND]				= MLX5_OPCODE_SEND,
     14	[IB_WR_LSO]				= MLX5_OPCODE_LSO,
     15	[IB_WR_SEND_WITH_IMM]			= MLX5_OPCODE_SEND_IMM,
     16	[IB_WR_RDMA_WRITE]			= MLX5_OPCODE_RDMA_WRITE,
     17	[IB_WR_RDMA_WRITE_WITH_IMM]		= MLX5_OPCODE_RDMA_WRITE_IMM,
     18	[IB_WR_RDMA_READ]			= MLX5_OPCODE_RDMA_READ,
     19	[IB_WR_ATOMIC_CMP_AND_SWP]		= MLX5_OPCODE_ATOMIC_CS,
     20	[IB_WR_ATOMIC_FETCH_AND_ADD]		= MLX5_OPCODE_ATOMIC_FA,
     21	[IB_WR_SEND_WITH_INV]			= MLX5_OPCODE_SEND_INVAL,
     22	[IB_WR_LOCAL_INV]			= MLX5_OPCODE_UMR,
     23	[IB_WR_REG_MR]				= MLX5_OPCODE_UMR,
     24	[IB_WR_MASKED_ATOMIC_CMP_AND_SWP]	= MLX5_OPCODE_ATOMIC_MASKED_CS,
     25	[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]	= MLX5_OPCODE_ATOMIC_MASKED_FA,
     26	[MLX5_IB_WR_UMR]			= MLX5_OPCODE_UMR,
     27};
     28
     29int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
     30{
     31	struct mlx5_ib_cq *cq;
     32	unsigned int cur;
     33
     34	cur = wq->head - wq->tail;
     35	if (likely(cur + nreq < wq->max_post))
     36		return 0;
     37
     38	cq = to_mcq(ib_cq);
     39	spin_lock(&cq->lock);
     40	cur = wq->head - wq->tail;
     41	spin_unlock(&cq->lock);
     42
     43	return cur + nreq >= wq->max_post;
     44}
     45
     46static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
     47					  u64 remote_addr, u32 rkey)
     48{
     49	rseg->raddr    = cpu_to_be64(remote_addr);
     50	rseg->rkey     = cpu_to_be32(rkey);
     51	rseg->reserved = 0;
     52}
     53
     54static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
     55			void **seg, int *size, void **cur_edge)
     56{
     57	struct mlx5_wqe_eth_seg *eseg = *seg;
     58
     59	memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
     60
     61	if (wr->send_flags & IB_SEND_IP_CSUM)
     62		eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
     63				 MLX5_ETH_WQE_L4_CSUM;
     64
     65	if (wr->opcode == IB_WR_LSO) {
     66		struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
     67		size_t left, copysz;
     68		void *pdata = ud_wr->header;
     69		size_t stride;
     70
     71		left = ud_wr->hlen;
     72		eseg->mss = cpu_to_be16(ud_wr->mss);
     73		eseg->inline_hdr.sz = cpu_to_be16(left);
     74
     75		/* mlx5r_memcpy_send_wqe should get a 16B align address. Hence,
     76		 * we first copy up to the current edge and then, if needed,
     77		 * continue to mlx5r_memcpy_send_wqe.
     78		 */
     79		copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
     80			       left);
     81		memcpy(eseg->inline_hdr.start, pdata, copysz);
     82		stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
     83			       sizeof(eseg->inline_hdr.start) + copysz, 16);
     84		*size += stride / 16;
     85		*seg += stride;
     86
     87		if (copysz < left) {
     88			handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
     89			left -= copysz;
     90			pdata += copysz;
     91			mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size,
     92					      pdata, left);
     93		}
     94
     95		return;
     96	}
     97
     98	*seg += sizeof(struct mlx5_wqe_eth_seg);
     99	*size += sizeof(struct mlx5_wqe_eth_seg) / 16;
    100}
    101
    102static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
    103			     const struct ib_send_wr *wr)
    104{
    105	memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
    106	dseg->av.dqp_dct =
    107		cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
    108	dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
    109}
    110
    111static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
    112{
    113	dseg->byte_count = cpu_to_be32(sg->length);
    114	dseg->lkey       = cpu_to_be32(sg->lkey);
    115	dseg->addr       = cpu_to_be64(sg->addr);
    116}
    117
    118static __be64 frwr_mkey_mask(bool atomic)
    119{
    120	u64 result;
    121
    122	result = MLX5_MKEY_MASK_LEN		|
    123		MLX5_MKEY_MASK_PAGE_SIZE	|
    124		MLX5_MKEY_MASK_START_ADDR	|
    125		MLX5_MKEY_MASK_EN_RINVAL	|
    126		MLX5_MKEY_MASK_KEY		|
    127		MLX5_MKEY_MASK_LR		|
    128		MLX5_MKEY_MASK_LW		|
    129		MLX5_MKEY_MASK_RR		|
    130		MLX5_MKEY_MASK_RW		|
    131		MLX5_MKEY_MASK_SMALL_FENCE	|
    132		MLX5_MKEY_MASK_FREE;
    133
    134	if (atomic)
    135		result |= MLX5_MKEY_MASK_A;
    136
    137	return cpu_to_be64(result);
    138}
    139
    140static __be64 sig_mkey_mask(void)
    141{
    142	u64 result;
    143
    144	result = MLX5_MKEY_MASK_LEN		|
    145		MLX5_MKEY_MASK_PAGE_SIZE	|
    146		MLX5_MKEY_MASK_START_ADDR	|
    147		MLX5_MKEY_MASK_EN_SIGERR	|
    148		MLX5_MKEY_MASK_EN_RINVAL	|
    149		MLX5_MKEY_MASK_KEY		|
    150		MLX5_MKEY_MASK_LR		|
    151		MLX5_MKEY_MASK_LW		|
    152		MLX5_MKEY_MASK_RR		|
    153		MLX5_MKEY_MASK_RW		|
    154		MLX5_MKEY_MASK_SMALL_FENCE	|
    155		MLX5_MKEY_MASK_FREE		|
    156		MLX5_MKEY_MASK_BSF_EN;
    157
    158	return cpu_to_be64(result);
    159}
    160
    161static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
    162			    struct mlx5_ib_mr *mr, u8 flags, bool atomic)
    163{
    164	int size = (mr->mmkey.ndescs + mr->meta_ndescs) * mr->desc_size;
    165
    166	memset(umr, 0, sizeof(*umr));
    167
    168	umr->flags = flags;
    169	umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
    170	umr->mkey_mask = frwr_mkey_mask(atomic);
    171}
    172
    173static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
    174{
    175	memset(umr, 0, sizeof(*umr));
    176	umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
    177	umr->flags = MLX5_UMR_INLINE;
    178}
    179
    180static u8 get_umr_flags(int acc)
    181{
    182	return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC       : 0) |
    183	       (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |
    184	       (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |
    185	       (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) |
    186		MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
    187}
    188
    189static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
    190			     struct mlx5_ib_mr *mr,
    191			     u32 key, int access)
    192{
    193	int ndescs = ALIGN(mr->mmkey.ndescs + mr->meta_ndescs, 8) >> 1;
    194
    195	memset(seg, 0, sizeof(*seg));
    196
    197	if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT)
    198		seg->log2_page_size = ilog2(mr->ibmr.page_size);
    199	else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
    200		/* KLMs take twice the size of MTTs */
    201		ndescs *= 2;
    202
    203	seg->flags = get_umr_flags(access) | mr->access_mode;
    204	seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
    205	seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
    206	seg->start_addr = cpu_to_be64(mr->ibmr.iova);
    207	seg->len = cpu_to_be64(mr->ibmr.length);
    208	seg->xlt_oct_size = cpu_to_be32(ndescs);
    209}
    210
    211static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
    212{
    213	memset(seg, 0, sizeof(*seg));
    214	seg->status = MLX5_MKEY_STATUS_FREE;
    215}
    216
    217static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
    218			     struct mlx5_ib_mr *mr,
    219			     struct mlx5_ib_pd *pd)
    220{
    221	int bcount = mr->desc_size * (mr->mmkey.ndescs + mr->meta_ndescs);
    222
    223	dseg->addr = cpu_to_be64(mr->desc_map);
    224	dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
    225	dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
    226}
    227
    228static __be32 send_ieth(const struct ib_send_wr *wr)
    229{
    230	switch (wr->opcode) {
    231	case IB_WR_SEND_WITH_IMM:
    232	case IB_WR_RDMA_WRITE_WITH_IMM:
    233		return wr->ex.imm_data;
    234
    235	case IB_WR_SEND_WITH_INV:
    236		return cpu_to_be32(wr->ex.invalidate_rkey);
    237
    238	default:
    239		return 0;
    240	}
    241}
    242
    243static u8 calc_sig(void *wqe, int size)
    244{
    245	u8 *p = wqe;
    246	u8 res = 0;
    247	int i;
    248
    249	for (i = 0; i < size; i++)
    250		res ^= p[i];
    251
    252	return ~res;
    253}
    254
    255static u8 wq_sig(void *wqe)
    256{
    257	return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
    258}
    259
    260static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
    261			    void **wqe, int *wqe_sz, void **cur_edge)
    262{
    263	struct mlx5_wqe_inline_seg *seg;
    264	size_t offset;
    265	int inl = 0;
    266	int i;
    267
    268	seg = *wqe;
    269	*wqe += sizeof(*seg);
    270	offset = sizeof(*seg);
    271
    272	for (i = 0; i < wr->num_sge; i++) {
    273		size_t len  = wr->sg_list[i].length;
    274		void *addr = (void *)(unsigned long)(wr->sg_list[i].addr);
    275
    276		inl += len;
    277
    278		if (unlikely(inl > qp->max_inline_data))
    279			return -ENOMEM;
    280
    281		while (likely(len)) {
    282			size_t leftlen;
    283			size_t copysz;
    284
    285			handle_post_send_edge(&qp->sq, wqe,
    286					      *wqe_sz + (offset >> 4),
    287					      cur_edge);
    288
    289			leftlen = *cur_edge - *wqe;
    290			copysz = min_t(size_t, leftlen, len);
    291
    292			memcpy(*wqe, addr, copysz);
    293			len -= copysz;
    294			addr += copysz;
    295			*wqe += copysz;
    296			offset += copysz;
    297		}
    298	}
    299
    300	seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
    301
    302	*wqe_sz +=  ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
    303
    304	return 0;
    305}
    306
    307static u16 prot_field_size(enum ib_signature_type type)
    308{
    309	switch (type) {
    310	case IB_SIG_TYPE_T10_DIF:
    311		return MLX5_DIF_SIZE;
    312	default:
    313		return 0;
    314	}
    315}
    316
    317static u8 bs_selector(int block_size)
    318{
    319	switch (block_size) {
    320	case 512:	    return 0x1;
    321	case 520:	    return 0x2;
    322	case 4096:	    return 0x3;
    323	case 4160:	    return 0x4;
    324	case 1073741824:    return 0x5;
    325	default:	    return 0;
    326	}
    327}
    328
    329static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
    330			      struct mlx5_bsf_inl *inl)
    331{
    332	/* Valid inline section and allow BSF refresh */
    333	inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
    334				       MLX5_BSF_REFRESH_DIF);
    335	inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
    336	inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
    337	/* repeating block */
    338	inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
    339	inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
    340			MLX5_DIF_CRC : MLX5_DIF_IPCS;
    341
    342	if (domain->sig.dif.ref_remap)
    343		inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG;
    344
    345	if (domain->sig.dif.app_escape) {
    346		if (domain->sig.dif.ref_escape)
    347			inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE;
    348		else
    349			inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE;
    350	}
    351
    352	inl->dif_app_bitmask_check =
    353		cpu_to_be16(domain->sig.dif.apptag_check_mask);
    354}
    355
    356static int mlx5_set_bsf(struct ib_mr *sig_mr,
    357			struct ib_sig_attrs *sig_attrs,
    358			struct mlx5_bsf *bsf, u32 data_size)
    359{
    360	struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
    361	struct mlx5_bsf_basic *basic = &bsf->basic;
    362	struct ib_sig_domain *mem = &sig_attrs->mem;
    363	struct ib_sig_domain *wire = &sig_attrs->wire;
    364
    365	memset(bsf, 0, sizeof(*bsf));
    366
    367	/* Basic + Extended + Inline */
    368	basic->bsf_size_sbs = 1 << 7;
    369	/* Input domain check byte mask */
    370	basic->check_byte_mask = sig_attrs->check_mask;
    371	basic->raw_data_size = cpu_to_be32(data_size);
    372
    373	/* Memory domain */
    374	switch (sig_attrs->mem.sig_type) {
    375	case IB_SIG_TYPE_NONE:
    376		break;
    377	case IB_SIG_TYPE_T10_DIF:
    378		basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
    379		basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
    380		mlx5_fill_inl_bsf(mem, &bsf->m_inl);
    381		break;
    382	default:
    383		return -EINVAL;
    384	}
    385
    386	/* Wire domain */
    387	switch (sig_attrs->wire.sig_type) {
    388	case IB_SIG_TYPE_NONE:
    389		break;
    390	case IB_SIG_TYPE_T10_DIF:
    391		if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
    392		    mem->sig_type == wire->sig_type) {
    393			/* Same block structure */
    394			basic->bsf_size_sbs |= 1 << 4;
    395			if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
    396				basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
    397			if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
    398				basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
    399			if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
    400				basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
    401		} else
    402			basic->wire.bs_selector =
    403				bs_selector(wire->sig.dif.pi_interval);
    404
    405		basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
    406		mlx5_fill_inl_bsf(wire, &bsf->w_inl);
    407		break;
    408	default:
    409		return -EINVAL;
    410	}
    411
    412	return 0;
    413}
    414
    415
    416static int set_sig_data_segment(const struct ib_send_wr *send_wr,
    417				struct ib_mr *sig_mr,
    418				struct ib_sig_attrs *sig_attrs,
    419				struct mlx5_ib_qp *qp, void **seg, int *size,
    420				void **cur_edge)
    421{
    422	struct mlx5_bsf *bsf;
    423	u32 data_len;
    424	u32 data_key;
    425	u64 data_va;
    426	u32 prot_len = 0;
    427	u32 prot_key = 0;
    428	u64 prot_va = 0;
    429	bool prot = false;
    430	int ret;
    431	int wqe_size;
    432	struct mlx5_ib_mr *mr = to_mmr(sig_mr);
    433	struct mlx5_ib_mr *pi_mr = mr->pi_mr;
    434
    435	data_len = pi_mr->data_length;
    436	data_key = pi_mr->ibmr.lkey;
    437	data_va = pi_mr->data_iova;
    438	if (pi_mr->meta_ndescs) {
    439		prot_len = pi_mr->meta_length;
    440		prot_key = pi_mr->ibmr.lkey;
    441		prot_va = pi_mr->pi_iova;
    442		prot = true;
    443	}
    444
    445	if (!prot || (data_key == prot_key && data_va == prot_va &&
    446		      data_len == prot_len)) {
    447		/**
    448		 * Source domain doesn't contain signature information
    449		 * or data and protection are interleaved in memory.
    450		 * So need construct:
    451		 *                  ------------------
    452		 *                 |     data_klm     |
    453		 *                  ------------------
    454		 *                 |       BSF        |
    455		 *                  ------------------
    456		 **/
    457		struct mlx5_klm *data_klm = *seg;
    458
    459		data_klm->bcount = cpu_to_be32(data_len);
    460		data_klm->key = cpu_to_be32(data_key);
    461		data_klm->va = cpu_to_be64(data_va);
    462		wqe_size = ALIGN(sizeof(*data_klm), 64);
    463	} else {
    464		/**
    465		 * Source domain contains signature information
    466		 * So need construct a strided block format:
    467		 *               ---------------------------
    468		 *              |     stride_block_ctrl     |
    469		 *               ---------------------------
    470		 *              |          data_klm         |
    471		 *               ---------------------------
    472		 *              |          prot_klm         |
    473		 *               ---------------------------
    474		 *              |             BSF           |
    475		 *               ---------------------------
    476		 **/
    477		struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
    478		struct mlx5_stride_block_entry *data_sentry;
    479		struct mlx5_stride_block_entry *prot_sentry;
    480		u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
    481		int prot_size;
    482
    483		sblock_ctrl = *seg;
    484		data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
    485		prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
    486
    487		prot_size = prot_field_size(sig_attrs->mem.sig_type);
    488		if (!prot_size) {
    489			pr_err("Bad block size given: %u\n", block_size);
    490			return -EINVAL;
    491		}
    492		sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
    493							    prot_size);
    494		sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
    495		sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
    496		sblock_ctrl->num_entries = cpu_to_be16(2);
    497
    498		data_sentry->bcount = cpu_to_be16(block_size);
    499		data_sentry->key = cpu_to_be32(data_key);
    500		data_sentry->va = cpu_to_be64(data_va);
    501		data_sentry->stride = cpu_to_be16(block_size);
    502
    503		prot_sentry->bcount = cpu_to_be16(prot_size);
    504		prot_sentry->key = cpu_to_be32(prot_key);
    505		prot_sentry->va = cpu_to_be64(prot_va);
    506		prot_sentry->stride = cpu_to_be16(prot_size);
    507
    508		wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
    509				 sizeof(*prot_sentry), 64);
    510	}
    511
    512	*seg += wqe_size;
    513	*size += wqe_size / 16;
    514	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
    515
    516	bsf = *seg;
    517	ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
    518	if (ret)
    519		return -EINVAL;
    520
    521	*seg += sizeof(*bsf);
    522	*size += sizeof(*bsf) / 16;
    523	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
    524
    525	return 0;
    526}
    527
    528static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
    529				 struct ib_mr *sig_mr, int access_flags,
    530				 u32 size, u32 length, u32 pdn)
    531{
    532	u32 sig_key = sig_mr->rkey;
    533	u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
    534
    535	memset(seg, 0, sizeof(*seg));
    536
    537	seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS;
    538	seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
    539	seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
    540				    MLX5_MKEY_BSF_EN | pdn);
    541	seg->len = cpu_to_be64(length);
    542	seg->xlt_oct_size = cpu_to_be32(mlx5r_umr_get_xlt_octo(size));
    543	seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
    544}
    545
    546static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
    547				u32 size)
    548{
    549	memset(umr, 0, sizeof(*umr));
    550
    551	umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
    552	umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
    553	umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
    554	umr->mkey_mask = sig_mkey_mask();
    555}
    556
    557static int set_pi_umr_wr(const struct ib_send_wr *send_wr,
    558			 struct mlx5_ib_qp *qp, void **seg, int *size,
    559			 void **cur_edge)
    560{
    561	const struct ib_reg_wr *wr = reg_wr(send_wr);
    562	struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr);
    563	struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr;
    564	struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs;
    565	u32 pdn = to_mpd(qp->ibqp.pd)->pdn;
    566	u32 xlt_size;
    567	int region_len, ret;
    568
    569	if (unlikely(send_wr->num_sge != 0) ||
    570	    unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) ||
    571	    unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) ||
    572	    unlikely(!sig_mr->sig->sig_status_checked))
    573		return -EINVAL;
    574
    575	/* length of the protected region, data + protection */
    576	region_len = pi_mr->ibmr.length;
    577
    578	/**
    579	 * KLM octoword size - if protection was provided
    580	 * then we use strided block format (3 octowords),
    581	 * else we use single KLM (1 octoword)
    582	 **/
    583	if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE)
    584		xlt_size = 0x30;
    585	else
    586		xlt_size = sizeof(struct mlx5_klm);
    587
    588	set_sig_umr_segment(*seg, xlt_size);
    589	*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
    590	*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
    591	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
    592
    593	set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len,
    594			     pdn);
    595	*seg += sizeof(struct mlx5_mkey_seg);
    596	*size += sizeof(struct mlx5_mkey_seg) / 16;
    597	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
    598
    599	ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size,
    600				   cur_edge);
    601	if (ret)
    602		return ret;
    603
    604	sig_mr->sig->sig_status_checked = false;
    605	return 0;
    606}
    607
    608static int set_psv_wr(struct ib_sig_domain *domain,
    609		      u32 psv_idx, void **seg, int *size)
    610{
    611	struct mlx5_seg_set_psv *psv_seg = *seg;
    612
    613	memset(psv_seg, 0, sizeof(*psv_seg));
    614	psv_seg->psv_num = cpu_to_be32(psv_idx);
    615	switch (domain->sig_type) {
    616	case IB_SIG_TYPE_NONE:
    617		break;
    618	case IB_SIG_TYPE_T10_DIF:
    619		psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
    620						     domain->sig.dif.app_tag);
    621		psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
    622		break;
    623	default:
    624		pr_err("Bad signature type (%d) is given.\n",
    625		       domain->sig_type);
    626		return -EINVAL;
    627	}
    628
    629	*seg += sizeof(*psv_seg);
    630	*size += sizeof(*psv_seg) / 16;
    631
    632	return 0;
    633}
    634
    635static int set_reg_wr(struct mlx5_ib_qp *qp,
    636		      const struct ib_reg_wr *wr,
    637		      void **seg, int *size, void **cur_edge,
    638		      bool check_not_free)
    639{
    640	struct mlx5_ib_mr *mr = to_mmr(wr->mr);
    641	struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
    642	struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device);
    643	int mr_list_size = (mr->mmkey.ndescs + mr->meta_ndescs) * mr->desc_size;
    644	bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
    645	bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
    646	u8 flags = 0;
    647
    648	/* Matches access in mlx5_set_umr_free_mkey().
    649	 * Relaxed Ordering is set implicitly in mlx5_set_umr_free_mkey() and
    650	 * kernel ULPs are not aware of it, so we don't set it here.
    651	 */
    652	if (!mlx5r_umr_can_reconfig(dev, 0, wr->access)) {
    653		mlx5_ib_warn(
    654			to_mdev(qp->ibqp.device),
    655			"Fast update for MR access flags is not possible\n");
    656		return -EINVAL;
    657	}
    658
    659	if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
    660		mlx5_ib_warn(to_mdev(qp->ibqp.device),
    661			     "Invalid IB_SEND_INLINE send flag\n");
    662		return -EINVAL;
    663	}
    664
    665	if (check_not_free)
    666		flags |= MLX5_UMR_CHECK_NOT_FREE;
    667	if (umr_inline)
    668		flags |= MLX5_UMR_INLINE;
    669
    670	set_reg_umr_seg(*seg, mr, flags, atomic);
    671	*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
    672	*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
    673	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
    674
    675	set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
    676	*seg += sizeof(struct mlx5_mkey_seg);
    677	*size += sizeof(struct mlx5_mkey_seg) / 16;
    678	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
    679
    680	if (umr_inline) {
    681		mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
    682				      mr_list_size);
    683		*size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
    684	} else {
    685		set_reg_data_seg(*seg, mr, pd);
    686		*seg += sizeof(struct mlx5_wqe_data_seg);
    687		*size += (sizeof(struct mlx5_wqe_data_seg) / 16);
    688	}
    689	return 0;
    690}
    691
    692static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size,
    693			void **cur_edge)
    694{
    695	set_linv_umr_seg(*seg);
    696	*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
    697	*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
    698	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
    699	set_linv_mkey_seg(*seg);
    700	*seg += sizeof(struct mlx5_mkey_seg);
    701	*size += sizeof(struct mlx5_mkey_seg) / 16;
    702	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
    703}
    704
    705static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
    706{
    707	__be32 *p = NULL;
    708	int i, j;
    709
    710	pr_debug("dump WQE index %u:\n", idx);
    711	for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
    712		if ((i & 0xf) == 0) {
    713			p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
    714			pr_debug("WQBB at %p:\n", (void *)p);
    715			j = 0;
    716			idx = (idx + 1) & (qp->sq.wqe_cnt - 1);
    717		}
    718		pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
    719			 be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
    720			 be32_to_cpu(p[j + 3]));
    721	}
    722}
    723
    724int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg,
    725		    struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx,
    726		    int *size, void **cur_edge, int nreq, __be32 general_id,
    727		    bool send_signaled, bool solicited)
    728{
    729	if (unlikely(mlx5r_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
    730		return -ENOMEM;
    731
    732	*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
    733	*seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
    734	*ctrl = *seg;
    735	*(uint32_t *)(*seg + 8) = 0;
    736	(*ctrl)->general_id = general_id;
    737	(*ctrl)->fm_ce_se = qp->sq_signal_bits |
    738			    (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
    739			    (solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
    740
    741	*seg += sizeof(**ctrl);
    742	*size = sizeof(**ctrl) / 16;
    743	*cur_edge = qp->sq.cur_edge;
    744
    745	return 0;
    746}
    747
    748static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
    749		     struct mlx5_wqe_ctrl_seg **ctrl,
    750		     const struct ib_send_wr *wr, unsigned int *idx, int *size,
    751		     void **cur_edge, int nreq)
    752{
    753	return mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
    754			       send_ieth(wr), wr->send_flags & IB_SEND_SIGNALED,
    755			       wr->send_flags & IB_SEND_SOLICITED);
    756}
    757
    758void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl,
    759		      void *seg, u8 size, void *cur_edge, unsigned int idx,
    760		      u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode)
    761{
    762	u8 opmod = 0;
    763
    764	ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
    765					     mlx5_opcode | ((u32)opmod << 24));
    766	ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
    767	ctrl->fm_ce_se |= fence;
    768	if (unlikely(qp->flags_en & MLX5_QP_FLAG_SIGNATURE))
    769		ctrl->signature = wq_sig(ctrl);
    770
    771	qp->sq.wrid[idx] = wr_id;
    772	qp->sq.w_list[idx].opcode = mlx5_opcode;
    773	qp->sq.wqe_head[idx] = qp->sq.head + nreq;
    774	qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
    775	qp->sq.w_list[idx].next = qp->sq.cur_post;
    776
    777	/* We save the edge which was possibly updated during the WQE
    778	 * construction, into SQ's cache.
    779	 */
    780	seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB);
    781	qp->sq.cur_edge = (unlikely(seg == cur_edge)) ?
    782			  get_sq_edge(&qp->sq, qp->sq.cur_post &
    783				      (qp->sq.wqe_cnt - 1)) :
    784			  cur_edge;
    785}
    786
    787static void handle_rdma_op(const struct ib_send_wr *wr, void **seg, int *size)
    788{
    789	set_raddr_seg(*seg, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey);
    790	*seg += sizeof(struct mlx5_wqe_raddr_seg);
    791	*size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
    792}
    793
    794static void handle_local_inv(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
    795			     struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
    796			     int *size, void **cur_edge, unsigned int idx)
    797{
    798	qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
    799	(*ctrl)->imm = cpu_to_be32(wr->ex.invalidate_rkey);
    800	set_linv_wr(qp, seg, size, cur_edge);
    801}
    802
    803static int handle_reg_mr(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
    804			 struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
    805			 void **cur_edge, unsigned int idx)
    806{
    807	qp->sq.wr_data[idx] = IB_WR_REG_MR;
    808	(*ctrl)->imm = cpu_to_be32(reg_wr(wr)->key);
    809	return set_reg_wr(qp, reg_wr(wr), seg, size, cur_edge, true);
    810}
    811
    812static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
    813		      const struct ib_send_wr *wr,
    814		      struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
    815		      void **cur_edge, unsigned int *idx, int nreq,
    816		      struct ib_sig_domain *domain, u32 psv_index,
    817		      u8 next_fence)
    818{
    819	int err;
    820
    821	/*
    822	 * SET_PSV WQEs are not signaled and solicited on error.
    823	 */
    824	err = mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
    825			      send_ieth(wr), false, true);
    826	if (unlikely(err)) {
    827		mlx5_ib_warn(dev, "\n");
    828		err = -ENOMEM;
    829		goto out;
    830	}
    831	err = set_psv_wr(domain, psv_index, seg, size);
    832	if (unlikely(err)) {
    833		mlx5_ib_warn(dev, "\n");
    834		goto out;
    835	}
    836	mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
    837			 nreq, next_fence, MLX5_OPCODE_SET_PSV);
    838
    839out:
    840	return err;
    841}
    842
    843static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
    844				   struct mlx5_ib_qp *qp,
    845				   const struct ib_send_wr *wr,
    846				   struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
    847				   int *size, void **cur_edge,
    848				   unsigned int *idx, int nreq, u8 fence,
    849				   u8 next_fence)
    850{
    851	struct mlx5_ib_mr *mr;
    852	struct mlx5_ib_mr *pi_mr;
    853	struct mlx5_ib_mr pa_pi_mr;
    854	struct ib_sig_attrs *sig_attrs;
    855	struct ib_reg_wr reg_pi_wr;
    856	int err;
    857
    858	qp->sq.wr_data[*idx] = IB_WR_REG_MR_INTEGRITY;
    859
    860	mr = to_mmr(reg_wr(wr)->mr);
    861	pi_mr = mr->pi_mr;
    862
    863	if (pi_mr) {
    864		memset(&reg_pi_wr, 0,
    865		       sizeof(struct ib_reg_wr));
    866
    867		reg_pi_wr.mr = &pi_mr->ibmr;
    868		reg_pi_wr.access = reg_wr(wr)->access;
    869		reg_pi_wr.key = pi_mr->ibmr.rkey;
    870
    871		(*ctrl)->imm = cpu_to_be32(reg_pi_wr.key);
    872		/* UMR for data + prot registration */
    873		err = set_reg_wr(qp, &reg_pi_wr, seg, size, cur_edge, false);
    874		if (unlikely(err))
    875			goto out;
    876
    877		mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx,
    878				 wr->wr_id, nreq, fence, MLX5_OPCODE_UMR);
    879
    880		err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq);
    881		if (unlikely(err)) {
    882			mlx5_ib_warn(dev, "\n");
    883			err = -ENOMEM;
    884			goto out;
    885		}
    886	} else {
    887		memset(&pa_pi_mr, 0, sizeof(struct mlx5_ib_mr));
    888		/* No UMR, use local_dma_lkey */
    889		pa_pi_mr.ibmr.lkey = mr->ibmr.pd->local_dma_lkey;
    890		pa_pi_mr.mmkey.ndescs = mr->mmkey.ndescs;
    891		pa_pi_mr.data_length = mr->data_length;
    892		pa_pi_mr.data_iova = mr->data_iova;
    893		if (mr->meta_ndescs) {
    894			pa_pi_mr.meta_ndescs = mr->meta_ndescs;
    895			pa_pi_mr.meta_length = mr->meta_length;
    896			pa_pi_mr.pi_iova = mr->pi_iova;
    897		}
    898
    899		pa_pi_mr.ibmr.length = mr->ibmr.length;
    900		mr->pi_mr = &pa_pi_mr;
    901	}
    902	(*ctrl)->imm = cpu_to_be32(mr->ibmr.rkey);
    903	/* UMR for sig MR */
    904	err = set_pi_umr_wr(wr, qp, seg, size, cur_edge);
    905	if (unlikely(err)) {
    906		mlx5_ib_warn(dev, "\n");
    907		goto out;
    908	}
    909	mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
    910			 nreq, fence, MLX5_OPCODE_UMR);
    911
    912	sig_attrs = mr->ibmr.sig_attrs;
    913	err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
    914			 &sig_attrs->mem, mr->sig->psv_memory.psv_idx,
    915			 next_fence);
    916	if (unlikely(err))
    917		goto out;
    918
    919	err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
    920			 &sig_attrs->wire, mr->sig->psv_wire.psv_idx,
    921			 next_fence);
    922	if (unlikely(err))
    923		goto out;
    924
    925	qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
    926
    927out:
    928	return err;
    929}
    930
    931static int handle_qpt_rc(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
    932			 const struct ib_send_wr *wr,
    933			 struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
    934			 void **cur_edge, unsigned int *idx, int nreq, u8 fence,
    935			 u8 next_fence, int *num_sge)
    936{
    937	int err = 0;
    938
    939	switch (wr->opcode) {
    940	case IB_WR_RDMA_READ:
    941	case IB_WR_RDMA_WRITE:
    942	case IB_WR_RDMA_WRITE_WITH_IMM:
    943		handle_rdma_op(wr, seg, size);
    944		break;
    945
    946	case IB_WR_ATOMIC_CMP_AND_SWP:
    947	case IB_WR_ATOMIC_FETCH_AND_ADD:
    948	case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
    949		mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
    950		err = -EOPNOTSUPP;
    951		goto out;
    952
    953	case IB_WR_LOCAL_INV:
    954		handle_local_inv(qp, wr, ctrl, seg, size, cur_edge, *idx);
    955		*num_sge = 0;
    956		break;
    957
    958	case IB_WR_REG_MR:
    959		err = handle_reg_mr(qp, wr, ctrl, seg, size, cur_edge, *idx);
    960		if (unlikely(err))
    961			goto out;
    962		*num_sge = 0;
    963		break;
    964
    965	case IB_WR_REG_MR_INTEGRITY:
    966		err = handle_reg_mr_integrity(dev, qp, wr, ctrl, seg, size,
    967					      cur_edge, idx, nreq, fence,
    968					      next_fence);
    969		if (unlikely(err))
    970			goto out;
    971		*num_sge = 0;
    972		break;
    973
    974	default:
    975		break;
    976	}
    977
    978out:
    979	return err;
    980}
    981
    982static void handle_qpt_uc(const struct ib_send_wr *wr, void **seg, int *size)
    983{
    984	switch (wr->opcode) {
    985	case IB_WR_RDMA_WRITE:
    986	case IB_WR_RDMA_WRITE_WITH_IMM:
    987		handle_rdma_op(wr, seg, size);
    988		break;
    989	default:
    990		break;
    991	}
    992}
    993
    994static void handle_qpt_hw_gsi(struct mlx5_ib_qp *qp,
    995			      const struct ib_send_wr *wr, void **seg,
    996			      int *size, void **cur_edge)
    997{
    998	set_datagram_seg(*seg, wr);
    999	*seg += sizeof(struct mlx5_wqe_datagram_seg);
   1000	*size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
   1001	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
   1002}
   1003
   1004static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
   1005			  void **seg, int *size, void **cur_edge)
   1006{
   1007	set_datagram_seg(*seg, wr);
   1008	*seg += sizeof(struct mlx5_wqe_datagram_seg);
   1009	*size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
   1010	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
   1011
   1012	/* handle qp that supports ud offload */
   1013	if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
   1014		struct mlx5_wqe_eth_pad *pad;
   1015
   1016		pad = *seg;
   1017		memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
   1018		*seg += sizeof(struct mlx5_wqe_eth_pad);
   1019		*size += sizeof(struct mlx5_wqe_eth_pad) / 16;
   1020		set_eth_seg(wr, qp, seg, size, cur_edge);
   1021		handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
   1022	}
   1023}
   1024
   1025void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq,
   1026		   struct mlx5_wqe_ctrl_seg *ctrl)
   1027{
   1028	struct mlx5_bf *bf = &qp->bf;
   1029
   1030	qp->sq.head += nreq;
   1031
   1032	/* Make sure that descriptors are written before
   1033	 * updating doorbell record and ringing the doorbell
   1034	 */
   1035	wmb();
   1036
   1037	qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
   1038
   1039	/* Make sure doorbell record is visible to the HCA before
   1040	 * we hit doorbell.
   1041	 */
   1042	wmb();
   1043
   1044	mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
   1045	/* Make sure doorbells don't leak out of SQ spinlock
   1046	 * and reach the HCA out of order.
   1047	 */
   1048	bf->offset ^= bf->buf_size;
   1049}
   1050
   1051int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
   1052		      const struct ib_send_wr **bad_wr, bool drain)
   1053{
   1054	struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
   1055	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
   1056	struct mlx5_core_dev *mdev = dev->mdev;
   1057	struct mlx5_ib_qp *qp = to_mqp(ibqp);
   1058	struct mlx5_wqe_xrc_seg *xrc;
   1059	void *cur_edge;
   1060	int size;
   1061	unsigned long flags;
   1062	unsigned int idx;
   1063	int err = 0;
   1064	int num_sge;
   1065	void *seg;
   1066	int nreq;
   1067	int i;
   1068	u8 next_fence = 0;
   1069	u8 fence;
   1070
   1071	if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
   1072		     !drain)) {
   1073		*bad_wr = wr;
   1074		return -EIO;
   1075	}
   1076
   1077	if (qp->type == IB_QPT_GSI)
   1078		return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
   1079
   1080	spin_lock_irqsave(&qp->sq.lock, flags);
   1081
   1082	for (nreq = 0; wr; nreq++, wr = wr->next) {
   1083		if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
   1084			mlx5_ib_warn(dev, "\n");
   1085			err = -EINVAL;
   1086			*bad_wr = wr;
   1087			goto out;
   1088		}
   1089
   1090		num_sge = wr->num_sge;
   1091		if (unlikely(num_sge > qp->sq.max_gs)) {
   1092			mlx5_ib_warn(dev, "\n");
   1093			err = -EINVAL;
   1094			*bad_wr = wr;
   1095			goto out;
   1096		}
   1097
   1098		err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge,
   1099				nreq);
   1100		if (err) {
   1101			mlx5_ib_warn(dev, "\n");
   1102			err = -ENOMEM;
   1103			*bad_wr = wr;
   1104			goto out;
   1105		}
   1106
   1107		if (wr->opcode == IB_WR_REG_MR ||
   1108		    wr->opcode == IB_WR_REG_MR_INTEGRITY) {
   1109			fence = dev->umr_fence;
   1110			next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
   1111		} else  {
   1112			if (wr->send_flags & IB_SEND_FENCE) {
   1113				if (qp->next_fence)
   1114					fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
   1115				else
   1116					fence = MLX5_FENCE_MODE_FENCE;
   1117			} else {
   1118				fence = qp->next_fence;
   1119			}
   1120		}
   1121
   1122		switch (qp->type) {
   1123		case IB_QPT_XRC_INI:
   1124			xrc = seg;
   1125			seg += sizeof(*xrc);
   1126			size += sizeof(*xrc) / 16;
   1127			fallthrough;
   1128		case IB_QPT_RC:
   1129			err = handle_qpt_rc(dev, qp, wr, &ctrl, &seg, &size,
   1130					    &cur_edge, &idx, nreq, fence,
   1131					    next_fence, &num_sge);
   1132			if (unlikely(err)) {
   1133				*bad_wr = wr;
   1134				goto out;
   1135			} else if (wr->opcode == IB_WR_REG_MR_INTEGRITY) {
   1136				goto skip_psv;
   1137			}
   1138			break;
   1139
   1140		case IB_QPT_UC:
   1141			handle_qpt_uc(wr, &seg, &size);
   1142			break;
   1143		case IB_QPT_SMI:
   1144			if (unlikely(!dev->port_caps[qp->port - 1].has_smi)) {
   1145				mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
   1146				err = -EPERM;
   1147				*bad_wr = wr;
   1148				goto out;
   1149			}
   1150			fallthrough;
   1151		case MLX5_IB_QPT_HW_GSI:
   1152			handle_qpt_hw_gsi(qp, wr, &seg, &size, &cur_edge);
   1153			break;
   1154		case IB_QPT_UD:
   1155			handle_qpt_ud(qp, wr, &seg, &size, &cur_edge);
   1156			break;
   1157
   1158		default:
   1159			break;
   1160		}
   1161
   1162		if (wr->send_flags & IB_SEND_INLINE && num_sge) {
   1163			err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge);
   1164			if (unlikely(err)) {
   1165				mlx5_ib_warn(dev, "\n");
   1166				*bad_wr = wr;
   1167				goto out;
   1168			}
   1169		} else {
   1170			for (i = 0; i < num_sge; i++) {
   1171				handle_post_send_edge(&qp->sq, &seg, size,
   1172						      &cur_edge);
   1173				if (unlikely(!wr->sg_list[i].length))
   1174					continue;
   1175
   1176				set_data_ptr_seg(
   1177					(struct mlx5_wqe_data_seg *)seg,
   1178					wr->sg_list + i);
   1179				size += sizeof(struct mlx5_wqe_data_seg) / 16;
   1180				seg += sizeof(struct mlx5_wqe_data_seg);
   1181			}
   1182		}
   1183
   1184		qp->next_fence = next_fence;
   1185		mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id,
   1186				 nreq, fence, mlx5_ib_opcode[wr->opcode]);
   1187skip_psv:
   1188		if (0)
   1189			dump_wqe(qp, idx, size);
   1190	}
   1191
   1192out:
   1193	if (likely(nreq))
   1194		mlx5r_ring_db(qp, nreq, ctrl);
   1195
   1196	spin_unlock_irqrestore(&qp->sq.lock, flags);
   1197
   1198	return err;
   1199}
   1200
   1201static void set_sig_seg(struct mlx5_rwqe_sig *sig, int max_gs)
   1202{
   1203	 sig->signature = calc_sig(sig, (max_gs + 1) << 2);
   1204}
   1205
   1206int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
   1207		      const struct ib_recv_wr **bad_wr, bool drain)
   1208{
   1209	struct mlx5_ib_qp *qp = to_mqp(ibqp);
   1210	struct mlx5_wqe_data_seg *scat;
   1211	struct mlx5_rwqe_sig *sig;
   1212	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
   1213	struct mlx5_core_dev *mdev = dev->mdev;
   1214	unsigned long flags;
   1215	int err = 0;
   1216	int nreq;
   1217	int ind;
   1218	int i;
   1219
   1220	if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
   1221		     !drain)) {
   1222		*bad_wr = wr;
   1223		return -EIO;
   1224	}
   1225
   1226	if (qp->type == IB_QPT_GSI)
   1227		return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
   1228
   1229	spin_lock_irqsave(&qp->rq.lock, flags);
   1230
   1231	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
   1232
   1233	for (nreq = 0; wr; nreq++, wr = wr->next) {
   1234		if (mlx5r_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
   1235			err = -ENOMEM;
   1236			*bad_wr = wr;
   1237			goto out;
   1238		}
   1239
   1240		if (unlikely(wr->num_sge > qp->rq.max_gs)) {
   1241			err = -EINVAL;
   1242			*bad_wr = wr;
   1243			goto out;
   1244		}
   1245
   1246		scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind);
   1247		if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
   1248			scat++;
   1249
   1250		for (i = 0; i < wr->num_sge; i++)
   1251			set_data_ptr_seg(scat + i, wr->sg_list + i);
   1252
   1253		if (i < qp->rq.max_gs) {
   1254			scat[i].byte_count = 0;
   1255			scat[i].lkey       = cpu_to_be32(MLX5_INVALID_LKEY);
   1256			scat[i].addr       = 0;
   1257		}
   1258
   1259		if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) {
   1260			sig = (struct mlx5_rwqe_sig *)scat;
   1261			set_sig_seg(sig, qp->rq.max_gs);
   1262		}
   1263
   1264		qp->rq.wrid[ind] = wr->wr_id;
   1265
   1266		ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
   1267	}
   1268
   1269out:
   1270	if (likely(nreq)) {
   1271		qp->rq.head += nreq;
   1272
   1273		/* Make sure that descriptors are written before
   1274		 * doorbell record.
   1275		 */
   1276		wmb();
   1277
   1278		*qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
   1279	}
   1280
   1281	spin_unlock_irqrestore(&qp->rq.lock, flags);
   1282
   1283	return err;
   1284}