cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

rxe_req.c (18371B)


      1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
      2/*
      3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
      4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
      5 */
      6
      7#include <linux/skbuff.h>
      8#include <crypto/hash.h>
      9
     10#include "rxe.h"
     11#include "rxe_loc.h"
     12#include "rxe_queue.h"
     13
     14static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
     15		       u32 opcode);
     16
     17static inline void retry_first_write_send(struct rxe_qp *qp,
     18					  struct rxe_send_wqe *wqe,
     19					  unsigned int mask, int npsn)
     20{
     21	int i;
     22
     23	for (i = 0; i < npsn; i++) {
     24		int to_send = (wqe->dma.resid > qp->mtu) ?
     25				qp->mtu : wqe->dma.resid;
     26
     27		qp->req.opcode = next_opcode(qp, wqe,
     28					     wqe->wr.opcode);
     29
     30		if (wqe->wr.send_flags & IB_SEND_INLINE) {
     31			wqe->dma.resid -= to_send;
     32			wqe->dma.sge_offset += to_send;
     33		} else {
     34			advance_dma_data(&wqe->dma, to_send);
     35		}
     36	}
     37}
     38
     39static void req_retry(struct rxe_qp *qp)
     40{
     41	struct rxe_send_wqe *wqe;
     42	unsigned int wqe_index;
     43	unsigned int mask;
     44	int npsn;
     45	int first = 1;
     46	struct rxe_queue *q = qp->sq.queue;
     47	unsigned int cons;
     48	unsigned int prod;
     49
     50	cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT);
     51	prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT);
     52
     53	qp->req.wqe_index	= cons;
     54	qp->req.psn		= qp->comp.psn;
     55	qp->req.opcode		= -1;
     56
     57	for (wqe_index = cons; wqe_index != prod;
     58			wqe_index = queue_next_index(q, wqe_index)) {
     59		wqe = queue_addr_from_index(qp->sq.queue, wqe_index);
     60		mask = wr_opcode_mask(wqe->wr.opcode, qp);
     61
     62		if (wqe->state == wqe_state_posted)
     63			break;
     64
     65		if (wqe->state == wqe_state_done)
     66			continue;
     67
     68		wqe->iova = (mask & WR_ATOMIC_MASK) ?
     69			     wqe->wr.wr.atomic.remote_addr :
     70			     (mask & WR_READ_OR_WRITE_MASK) ?
     71			     wqe->wr.wr.rdma.remote_addr :
     72			     0;
     73
     74		if (!first || (mask & WR_READ_MASK) == 0) {
     75			wqe->dma.resid = wqe->dma.length;
     76			wqe->dma.cur_sge = 0;
     77			wqe->dma.sge_offset = 0;
     78		}
     79
     80		if (first) {
     81			first = 0;
     82
     83			if (mask & WR_WRITE_OR_SEND_MASK) {
     84				npsn = (qp->comp.psn - wqe->first_psn) &
     85					BTH_PSN_MASK;
     86				retry_first_write_send(qp, wqe, mask, npsn);
     87			}
     88
     89			if (mask & WR_READ_MASK) {
     90				npsn = (wqe->dma.length - wqe->dma.resid) /
     91					qp->mtu;
     92				wqe->iova += npsn * qp->mtu;
     93			}
     94		}
     95
     96		wqe->state = wqe_state_posted;
     97	}
     98}
     99
    100void rnr_nak_timer(struct timer_list *t)
    101{
    102	struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
    103
    104	pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp));
    105	rxe_run_task(&qp->req.task, 1);
    106}
    107
    108static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
    109{
    110	struct rxe_send_wqe *wqe;
    111	struct rxe_queue *q = qp->sq.queue;
    112	unsigned int index = qp->req.wqe_index;
    113	unsigned int cons;
    114	unsigned int prod;
    115
    116	wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
    117	cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT);
    118	prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT);
    119
    120	if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
    121		/* check to see if we are drained;
    122		 * state_lock used by requester and completer
    123		 */
    124		spin_lock_bh(&qp->state_lock);
    125		do {
    126			if (qp->req.state != QP_STATE_DRAIN) {
    127				/* comp just finished */
    128				spin_unlock_bh(&qp->state_lock);
    129				break;
    130			}
    131
    132			if (wqe && ((index != cons) ||
    133				(wqe->state != wqe_state_posted))) {
    134				/* comp not done yet */
    135				spin_unlock_bh(&qp->state_lock);
    136				break;
    137			}
    138
    139			qp->req.state = QP_STATE_DRAINED;
    140			spin_unlock_bh(&qp->state_lock);
    141
    142			if (qp->ibqp.event_handler) {
    143				struct ib_event ev;
    144
    145				ev.device = qp->ibqp.device;
    146				ev.element.qp = &qp->ibqp;
    147				ev.event = IB_EVENT_SQ_DRAINED;
    148				qp->ibqp.event_handler(&ev,
    149					qp->ibqp.qp_context);
    150			}
    151		} while (0);
    152	}
    153
    154	if (index == prod)
    155		return NULL;
    156
    157	wqe = queue_addr_from_index(q, index);
    158
    159	if (unlikely((qp->req.state == QP_STATE_DRAIN ||
    160		      qp->req.state == QP_STATE_DRAINED) &&
    161		     (wqe->state != wqe_state_processing)))
    162		return NULL;
    163
    164	if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) &&
    165						     (index != cons))) {
    166		qp->req.wait_fence = 1;
    167		return NULL;
    168	}
    169
    170	wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
    171	return wqe;
    172}
    173
    174static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
    175{
    176	switch (opcode) {
    177	case IB_WR_RDMA_WRITE:
    178		if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||
    179		    qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)
    180			return fits ?
    181				IB_OPCODE_RC_RDMA_WRITE_LAST :
    182				IB_OPCODE_RC_RDMA_WRITE_MIDDLE;
    183		else
    184			return fits ?
    185				IB_OPCODE_RC_RDMA_WRITE_ONLY :
    186				IB_OPCODE_RC_RDMA_WRITE_FIRST;
    187
    188	case IB_WR_RDMA_WRITE_WITH_IMM:
    189		if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||
    190		    qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)
    191			return fits ?
    192				IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE :
    193				IB_OPCODE_RC_RDMA_WRITE_MIDDLE;
    194		else
    195			return fits ?
    196				IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :
    197				IB_OPCODE_RC_RDMA_WRITE_FIRST;
    198
    199	case IB_WR_SEND:
    200		if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
    201		    qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
    202			return fits ?
    203				IB_OPCODE_RC_SEND_LAST :
    204				IB_OPCODE_RC_SEND_MIDDLE;
    205		else
    206			return fits ?
    207				IB_OPCODE_RC_SEND_ONLY :
    208				IB_OPCODE_RC_SEND_FIRST;
    209
    210	case IB_WR_SEND_WITH_IMM:
    211		if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
    212		    qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
    213			return fits ?
    214				IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE :
    215				IB_OPCODE_RC_SEND_MIDDLE;
    216		else
    217			return fits ?
    218				IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE :
    219				IB_OPCODE_RC_SEND_FIRST;
    220
    221	case IB_WR_RDMA_READ:
    222		return IB_OPCODE_RC_RDMA_READ_REQUEST;
    223
    224	case IB_WR_ATOMIC_CMP_AND_SWP:
    225		return IB_OPCODE_RC_COMPARE_SWAP;
    226
    227	case IB_WR_ATOMIC_FETCH_AND_ADD:
    228		return IB_OPCODE_RC_FETCH_ADD;
    229
    230	case IB_WR_SEND_WITH_INV:
    231		if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
    232		    qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
    233			return fits ? IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE :
    234				IB_OPCODE_RC_SEND_MIDDLE;
    235		else
    236			return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE :
    237				IB_OPCODE_RC_SEND_FIRST;
    238	case IB_WR_REG_MR:
    239	case IB_WR_LOCAL_INV:
    240		return opcode;
    241	}
    242
    243	return -EINVAL;
    244}
    245
    246static int next_opcode_uc(struct rxe_qp *qp, u32 opcode, int fits)
    247{
    248	switch (opcode) {
    249	case IB_WR_RDMA_WRITE:
    250		if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||
    251		    qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)
    252			return fits ?
    253				IB_OPCODE_UC_RDMA_WRITE_LAST :
    254				IB_OPCODE_UC_RDMA_WRITE_MIDDLE;
    255		else
    256			return fits ?
    257				IB_OPCODE_UC_RDMA_WRITE_ONLY :
    258				IB_OPCODE_UC_RDMA_WRITE_FIRST;
    259
    260	case IB_WR_RDMA_WRITE_WITH_IMM:
    261		if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||
    262		    qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)
    263			return fits ?
    264				IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE :
    265				IB_OPCODE_UC_RDMA_WRITE_MIDDLE;
    266		else
    267			return fits ?
    268				IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :
    269				IB_OPCODE_UC_RDMA_WRITE_FIRST;
    270
    271	case IB_WR_SEND:
    272		if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||
    273		    qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)
    274			return fits ?
    275				IB_OPCODE_UC_SEND_LAST :
    276				IB_OPCODE_UC_SEND_MIDDLE;
    277		else
    278			return fits ?
    279				IB_OPCODE_UC_SEND_ONLY :
    280				IB_OPCODE_UC_SEND_FIRST;
    281
    282	case IB_WR_SEND_WITH_IMM:
    283		if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||
    284		    qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)
    285			return fits ?
    286				IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE :
    287				IB_OPCODE_UC_SEND_MIDDLE;
    288		else
    289			return fits ?
    290				IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE :
    291				IB_OPCODE_UC_SEND_FIRST;
    292	}
    293
    294	return -EINVAL;
    295}
    296
    297static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
    298		       u32 opcode)
    299{
    300	int fits = (wqe->dma.resid <= qp->mtu);
    301
    302	switch (qp_type(qp)) {
    303	case IB_QPT_RC:
    304		return next_opcode_rc(qp, opcode, fits);
    305
    306	case IB_QPT_UC:
    307		return next_opcode_uc(qp, opcode, fits);
    308
    309	case IB_QPT_UD:
    310	case IB_QPT_GSI:
    311		switch (opcode) {
    312		case IB_WR_SEND:
    313			return IB_OPCODE_UD_SEND_ONLY;
    314
    315		case IB_WR_SEND_WITH_IMM:
    316			return IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
    317		}
    318		break;
    319
    320	default:
    321		break;
    322	}
    323
    324	return -EINVAL;
    325}
    326
    327static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
    328{
    329	int depth;
    330
    331	if (wqe->has_rd_atomic)
    332		return 0;
    333
    334	qp->req.need_rd_atomic = 1;
    335	depth = atomic_dec_return(&qp->req.rd_atomic);
    336
    337	if (depth >= 0) {
    338		qp->req.need_rd_atomic = 0;
    339		wqe->has_rd_atomic = 1;
    340		return 0;
    341	}
    342
    343	atomic_inc(&qp->req.rd_atomic);
    344	return -EAGAIN;
    345}
    346
    347static inline int get_mtu(struct rxe_qp *qp)
    348{
    349	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
    350
    351	if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC))
    352		return qp->mtu;
    353
    354	return rxe->port.mtu_cap;
    355}
    356
    357static struct sk_buff *init_req_packet(struct rxe_qp *qp,
    358				       struct rxe_av *av,
    359				       struct rxe_send_wqe *wqe,
    360				       int opcode, u32 payload,
    361				       struct rxe_pkt_info *pkt)
    362{
    363	struct rxe_dev		*rxe = to_rdev(qp->ibqp.device);
    364	struct sk_buff		*skb;
    365	struct rxe_send_wr	*ibwr = &wqe->wr;
    366	int			pad = (-payload) & 0x3;
    367	int			paylen;
    368	int			solicited;
    369	u32			qp_num;
    370	int			ack_req;
    371
    372	/* length from start of bth to end of icrc */
    373	paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
    374	pkt->paylen = paylen;
    375
    376	/* init skb */
    377	skb = rxe_init_packet(rxe, av, paylen, pkt);
    378	if (unlikely(!skb))
    379		return NULL;
    380
    381	/* init bth */
    382	solicited = (ibwr->send_flags & IB_SEND_SOLICITED) &&
    383			(pkt->mask & RXE_END_MASK) &&
    384			((pkt->mask & (RXE_SEND_MASK)) ||
    385			(pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) ==
    386			(RXE_WRITE_MASK | RXE_IMMDT_MASK));
    387
    388	qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn :
    389					 qp->attr.dest_qp_num;
    390
    391	ack_req = ((pkt->mask & RXE_END_MASK) ||
    392		(qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK));
    393	if (ack_req)
    394		qp->req.noack_pkts = 0;
    395
    396	bth_init(pkt, pkt->opcode, solicited, 0, pad, IB_DEFAULT_PKEY_FULL, qp_num,
    397		 ack_req, pkt->psn);
    398
    399	/* init optional headers */
    400	if (pkt->mask & RXE_RETH_MASK) {
    401		reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
    402		reth_set_va(pkt, wqe->iova);
    403		reth_set_len(pkt, wqe->dma.resid);
    404	}
    405
    406	if (pkt->mask & RXE_IMMDT_MASK)
    407		immdt_set_imm(pkt, ibwr->ex.imm_data);
    408
    409	if (pkt->mask & RXE_IETH_MASK)
    410		ieth_set_rkey(pkt, ibwr->ex.invalidate_rkey);
    411
    412	if (pkt->mask & RXE_ATMETH_MASK) {
    413		atmeth_set_va(pkt, wqe->iova);
    414		if (opcode == IB_OPCODE_RC_COMPARE_SWAP) {
    415			atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap);
    416			atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add);
    417		} else {
    418			atmeth_set_swap_add(pkt, ibwr->wr.atomic.compare_add);
    419		}
    420		atmeth_set_rkey(pkt, ibwr->wr.atomic.rkey);
    421	}
    422
    423	if (pkt->mask & RXE_DETH_MASK) {
    424		if (qp->ibqp.qp_num == 1)
    425			deth_set_qkey(pkt, GSI_QKEY);
    426		else
    427			deth_set_qkey(pkt, ibwr->wr.ud.remote_qkey);
    428		deth_set_sqp(pkt, qp->ibqp.qp_num);
    429	}
    430
    431	return skb;
    432}
    433
    434static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
    435			 struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt,
    436			 struct sk_buff *skb, u32 payload)
    437{
    438	int err;
    439
    440	err = rxe_prepare(av, pkt, skb);
    441	if (err)
    442		return err;
    443
    444	if (pkt->mask & RXE_WRITE_OR_SEND_MASK) {
    445		if (wqe->wr.send_flags & IB_SEND_INLINE) {
    446			u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
    447
    448			memcpy(payload_addr(pkt), tmp, payload);
    449
    450			wqe->dma.resid -= payload;
    451			wqe->dma.sge_offset += payload;
    452		} else {
    453			err = copy_data(qp->pd, 0, &wqe->dma,
    454					payload_addr(pkt), payload,
    455					RXE_FROM_MR_OBJ);
    456			if (err)
    457				return err;
    458		}
    459		if (bth_pad(pkt)) {
    460			u8 *pad = payload_addr(pkt) + payload;
    461
    462			memset(pad, 0, bth_pad(pkt));
    463		}
    464	}
    465
    466	return 0;
    467}
    468
    469static void update_wqe_state(struct rxe_qp *qp,
    470		struct rxe_send_wqe *wqe,
    471		struct rxe_pkt_info *pkt)
    472{
    473	if (pkt->mask & RXE_END_MASK) {
    474		if (qp_type(qp) == IB_QPT_RC)
    475			wqe->state = wqe_state_pending;
    476	} else {
    477		wqe->state = wqe_state_processing;
    478	}
    479}
    480
    481static void update_wqe_psn(struct rxe_qp *qp,
    482			   struct rxe_send_wqe *wqe,
    483			   struct rxe_pkt_info *pkt,
    484			   u32 payload)
    485{
    486	/* number of packets left to send including current one */
    487	int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;
    488
    489	/* handle zero length packet case */
    490	if (num_pkt == 0)
    491		num_pkt = 1;
    492
    493	if (pkt->mask & RXE_START_MASK) {
    494		wqe->first_psn = qp->req.psn;
    495		wqe->last_psn = (qp->req.psn + num_pkt - 1) & BTH_PSN_MASK;
    496	}
    497
    498	if (pkt->mask & RXE_READ_MASK)
    499		qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK;
    500	else
    501		qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
    502}
    503
    504static void save_state(struct rxe_send_wqe *wqe,
    505		       struct rxe_qp *qp,
    506		       struct rxe_send_wqe *rollback_wqe,
    507		       u32 *rollback_psn)
    508{
    509	rollback_wqe->state     = wqe->state;
    510	rollback_wqe->first_psn = wqe->first_psn;
    511	rollback_wqe->last_psn  = wqe->last_psn;
    512	*rollback_psn		= qp->req.psn;
    513}
    514
    515static void rollback_state(struct rxe_send_wqe *wqe,
    516			   struct rxe_qp *qp,
    517			   struct rxe_send_wqe *rollback_wqe,
    518			   u32 rollback_psn)
    519{
    520	wqe->state     = rollback_wqe->state;
    521	wqe->first_psn = rollback_wqe->first_psn;
    522	wqe->last_psn  = rollback_wqe->last_psn;
    523	qp->req.psn    = rollback_psn;
    524}
    525
    526static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
    527{
    528	qp->req.opcode = pkt->opcode;
    529
    530	if (pkt->mask & RXE_END_MASK)
    531		qp->req.wqe_index = queue_next_index(qp->sq.queue,
    532						     qp->req.wqe_index);
    533
    534	qp->need_req_skb = 0;
    535
    536	if (qp->qp_timeout_jiffies && !timer_pending(&qp->retrans_timer))
    537		mod_timer(&qp->retrans_timer,
    538			  jiffies + qp->qp_timeout_jiffies);
    539}
    540
    541static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
    542{
    543	u8 opcode = wqe->wr.opcode;
    544	u32 rkey;
    545	int ret;
    546
    547	switch (opcode) {
    548	case IB_WR_LOCAL_INV:
    549		rkey = wqe->wr.ex.invalidate_rkey;
    550		if (rkey_is_mw(rkey))
    551			ret = rxe_invalidate_mw(qp, rkey);
    552		else
    553			ret = rxe_invalidate_mr(qp, rkey);
    554
    555		if (unlikely(ret)) {
    556			wqe->status = IB_WC_LOC_QP_OP_ERR;
    557			return ret;
    558		}
    559		break;
    560	case IB_WR_REG_MR:
    561		ret = rxe_reg_fast_mr(qp, wqe);
    562		if (unlikely(ret)) {
    563			wqe->status = IB_WC_LOC_QP_OP_ERR;
    564			return ret;
    565		}
    566		break;
    567	case IB_WR_BIND_MW:
    568		ret = rxe_bind_mw(qp, wqe);
    569		if (unlikely(ret)) {
    570			wqe->status = IB_WC_MW_BIND_ERR;
    571			return ret;
    572		}
    573		break;
    574	default:
    575		pr_err("Unexpected send wqe opcode %d\n", opcode);
    576		wqe->status = IB_WC_LOC_QP_OP_ERR;
    577		return -EINVAL;
    578	}
    579
    580	wqe->state = wqe_state_done;
    581	wqe->status = IB_WC_SUCCESS;
    582	qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
    583
    584	if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
    585	    qp->sq_sig_type == IB_SIGNAL_ALL_WR)
    586		rxe_run_task(&qp->comp.task, 1);
    587
    588	return 0;
    589}
    590
    591int rxe_requester(void *arg)
    592{
    593	struct rxe_qp *qp = (struct rxe_qp *)arg;
    594	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
    595	struct rxe_pkt_info pkt;
    596	struct sk_buff *skb;
    597	struct rxe_send_wqe *wqe;
    598	enum rxe_hdr_mask mask;
    599	u32 payload;
    600	int mtu;
    601	int opcode;
    602	int ret;
    603	struct rxe_send_wqe rollback_wqe;
    604	u32 rollback_psn;
    605	struct rxe_queue *q = qp->sq.queue;
    606	struct rxe_ah *ah;
    607	struct rxe_av *av;
    608
    609	if (!rxe_get(qp))
    610		return -EAGAIN;
    611
    612next_wqe:
    613	if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
    614		goto exit;
    615
    616	if (unlikely(qp->req.state == QP_STATE_RESET)) {
    617		qp->req.wqe_index = queue_get_consumer(q,
    618						QUEUE_TYPE_FROM_CLIENT);
    619		qp->req.opcode = -1;
    620		qp->req.need_rd_atomic = 0;
    621		qp->req.wait_psn = 0;
    622		qp->req.need_retry = 0;
    623		goto exit;
    624	}
    625
    626	if (unlikely(qp->req.need_retry)) {
    627		req_retry(qp);
    628		qp->req.need_retry = 0;
    629	}
    630
    631	wqe = req_next_wqe(qp);
    632	if (unlikely(!wqe))
    633		goto exit;
    634
    635	if (wqe->mask & WR_LOCAL_OP_MASK) {
    636		ret = rxe_do_local_ops(qp, wqe);
    637		if (unlikely(ret))
    638			goto err;
    639		else
    640			goto next_wqe;
    641	}
    642
    643	if (unlikely(qp_type(qp) == IB_QPT_RC &&
    644		psn_compare(qp->req.psn, (qp->comp.psn +
    645				RXE_MAX_UNACKED_PSNS)) > 0)) {
    646		qp->req.wait_psn = 1;
    647		goto exit;
    648	}
    649
    650	/* Limit the number of inflight SKBs per QP */
    651	if (unlikely(atomic_read(&qp->skb_out) >
    652		     RXE_INFLIGHT_SKBS_PER_QP_HIGH)) {
    653		qp->need_req_skb = 1;
    654		goto exit;
    655	}
    656
    657	opcode = next_opcode(qp, wqe, wqe->wr.opcode);
    658	if (unlikely(opcode < 0)) {
    659		wqe->status = IB_WC_LOC_QP_OP_ERR;
    660		goto err;
    661	}
    662
    663	mask = rxe_opcode[opcode].mask;
    664	if (unlikely(mask & RXE_READ_OR_ATOMIC_MASK)) {
    665		if (check_init_depth(qp, wqe))
    666			goto exit;
    667	}
    668
    669	mtu = get_mtu(qp);
    670	payload = (mask & RXE_WRITE_OR_SEND_MASK) ? wqe->dma.resid : 0;
    671	if (payload > mtu) {
    672		if (qp_type(qp) == IB_QPT_UD) {
    673			/* C10-93.1.1: If the total sum of all the buffer lengths specified for a
    674			 * UD message exceeds the MTU of the port as returned by QueryHCA, the CI
    675			 * shall not emit any packets for this message. Further, the CI shall not
    676			 * generate an error due to this condition.
    677			 */
    678
    679			/* fake a successful UD send */
    680			wqe->first_psn = qp->req.psn;
    681			wqe->last_psn = qp->req.psn;
    682			qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
    683			qp->req.opcode = IB_OPCODE_UD_SEND_ONLY;
    684			qp->req.wqe_index = queue_next_index(qp->sq.queue,
    685						       qp->req.wqe_index);
    686			wqe->state = wqe_state_done;
    687			wqe->status = IB_WC_SUCCESS;
    688			__rxe_do_task(&qp->comp.task);
    689			rxe_put(qp);
    690			return 0;
    691		}
    692		payload = mtu;
    693	}
    694
    695	pkt.rxe = rxe;
    696	pkt.opcode = opcode;
    697	pkt.qp = qp;
    698	pkt.psn = qp->req.psn;
    699	pkt.mask = rxe_opcode[opcode].mask;
    700	pkt.wqe = wqe;
    701
    702	av = rxe_get_av(&pkt, &ah);
    703	if (unlikely(!av)) {
    704		pr_err("qp#%d Failed no address vector\n", qp_num(qp));
    705		wqe->status = IB_WC_LOC_QP_OP_ERR;
    706		goto err_drop_ah;
    707	}
    708
    709	skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt);
    710	if (unlikely(!skb)) {
    711		pr_err("qp#%d Failed allocating skb\n", qp_num(qp));
    712		wqe->status = IB_WC_LOC_QP_OP_ERR;
    713		goto err_drop_ah;
    714	}
    715
    716	ret = finish_packet(qp, av, wqe, &pkt, skb, payload);
    717	if (unlikely(ret)) {
    718		pr_debug("qp#%d Error during finish packet\n", qp_num(qp));
    719		if (ret == -EFAULT)
    720			wqe->status = IB_WC_LOC_PROT_ERR;
    721		else
    722			wqe->status = IB_WC_LOC_QP_OP_ERR;
    723		kfree_skb(skb);
    724		goto err_drop_ah;
    725	}
    726
    727	if (ah)
    728		rxe_put(ah);
    729
    730	/*
    731	 * To prevent a race on wqe access between requester and completer,
    732	 * wqe members state and psn need to be set before calling
    733	 * rxe_xmit_packet().
    734	 * Otherwise, completer might initiate an unjustified retry flow.
    735	 */
    736	save_state(wqe, qp, &rollback_wqe, &rollback_psn);
    737	update_wqe_state(qp, wqe, &pkt);
    738	update_wqe_psn(qp, wqe, &pkt, payload);
    739	ret = rxe_xmit_packet(qp, &pkt, skb);
    740	if (ret) {
    741		qp->need_req_skb = 1;
    742
    743		rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
    744
    745		if (ret == -EAGAIN) {
    746			rxe_run_task(&qp->req.task, 1);
    747			goto exit;
    748		}
    749
    750		wqe->status = IB_WC_LOC_QP_OP_ERR;
    751		goto err;
    752	}
    753
    754	update_state(qp, &pkt);
    755
    756	goto next_wqe;
    757
    758err_drop_ah:
    759	if (ah)
    760		rxe_put(ah);
    761err:
    762	wqe->state = wqe_state_error;
    763	__rxe_do_task(&qp->comp.task);
    764
    765exit:
    766	rxe_put(qp);
    767	return -EAGAIN;
    768}