cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

xdp.c (18637B)


      1/*
      2 * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
      3 *
      4 * This software is available to you under a choice of one of two
      5 * licenses.  You may choose to be licensed under the terms of the GNU
      6 * General Public License (GPL) Version 2, available from the file
      7 * COPYING in the main directory of this source tree, or the
      8 * OpenIB.org BSD license below:
      9 *
     10 *     Redistribution and use in source and binary forms, with or
     11 *     without modification, are permitted provided that the following
     12 *     conditions are met:
     13 *
     14 *      - Redistributions of source code must retain the above
     15 *        copyright notice, this list of conditions and the following
     16 *        disclaimer.
     17 *
     18 *      - Redistributions in binary form must reproduce the above
     19 *        copyright notice, this list of conditions and the following
     20 *        disclaimer in the documentation and/or other materials
     21 *        provided with the distribution.
     22 *
     23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     30 * SOFTWARE.
     31 */
     32
     33#include <linux/bpf_trace.h>
     34#include <net/xdp_sock_drv.h>
     35#include "en/xdp.h"
     36#include "en/params.h"
     37
     38int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
     39{
     40	int hr = mlx5e_get_linear_rq_headroom(params, xsk);
     41
     42	/* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
     43	 * The condition checked in mlx5e_rx_is_linear_skb is:
     44	 *   SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE         (1)
     45	 *   (Note that hw_mtu == sw_mtu + hard_mtu.)
     46	 * What is returned from this function is:
     47	 *   max_mtu = PAGE_SIZE - S - hr - hard_mtu                         (2)
     48	 * After assigning sw_mtu := max_mtu, the left side of (1) turns to
     49	 * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
     50	 * because both PAGE_SIZE and S are already aligned. Any number greater
     51	 * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
     52	 * so max_mtu is the maximum MTU allowed.
     53	 */
     54
     55	return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
     56}
     57
     58static inline bool
     59mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
     60		    struct page *page, struct xdp_buff *xdp)
     61{
     62	struct skb_shared_info *sinfo = NULL;
     63	struct mlx5e_xmit_data xdptxd;
     64	struct mlx5e_xdp_info xdpi;
     65	struct xdp_frame *xdpf;
     66	dma_addr_t dma_addr;
     67	int i;
     68
     69	xdpf = xdp_convert_buff_to_frame(xdp);
     70	if (unlikely(!xdpf))
     71		return false;
     72
     73	xdptxd.data = xdpf->data;
     74	xdptxd.len  = xdpf->len;
     75
     76	if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
     77		/* The xdp_buff was in the UMEM and was copied into a newly
     78		 * allocated page. The UMEM page was returned via the ZCA, and
     79		 * this new page has to be mapped at this point and has to be
     80		 * unmapped and returned via xdp_return_frame on completion.
     81		 */
     82
     83		/* Prevent double recycling of the UMEM page. Even in case this
     84		 * function returns false, the xdp_buff shouldn't be recycled,
     85		 * as it was already done in xdp_convert_zc_to_xdp_frame.
     86		 */
     87		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
     88
     89		xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
     90
     91		dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len,
     92					  DMA_TO_DEVICE);
     93		if (dma_mapping_error(sq->pdev, dma_addr)) {
     94			xdp_return_frame(xdpf);
     95			return false;
     96		}
     97
     98		xdptxd.dma_addr     = dma_addr;
     99		xdpi.frame.xdpf     = xdpf;
    100		xdpi.frame.dma_addr = dma_addr;
    101
    102		if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
    103					      mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0)))
    104			return false;
    105
    106		mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
    107		return true;
    108	}
    109
    110	/* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame
    111	 * that points to the same memory region as the original xdp_buff. It
    112	 * allows to map the memory only once and to use the DMA_BIDIRECTIONAL
    113	 * mode.
    114	 */
    115
    116	xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
    117	xdpi.page.rq = rq;
    118
    119	dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf);
    120	dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_TO_DEVICE);
    121
    122	if (unlikely(xdp_frame_has_frags(xdpf))) {
    123		sinfo = xdp_get_shared_info_from_frame(xdpf);
    124
    125		for (i = 0; i < sinfo->nr_frags; i++) {
    126			skb_frag_t *frag = &sinfo->frags[i];
    127			dma_addr_t addr;
    128			u32 len;
    129
    130			addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
    131				skb_frag_off(frag);
    132			len = skb_frag_size(frag);
    133			dma_sync_single_for_device(sq->pdev, addr, len,
    134						   DMA_TO_DEVICE);
    135		}
    136	}
    137
    138	xdptxd.dma_addr = dma_addr;
    139
    140	if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
    141				      mlx5e_xmit_xdp_frame, sq, &xdptxd, sinfo, 0)))
    142		return false;
    143
    144	xdpi.page.page = page;
    145	mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
    146
    147	if (unlikely(xdp_frame_has_frags(xdpf))) {
    148		for (i = 0; i < sinfo->nr_frags; i++) {
    149			skb_frag_t *frag = &sinfo->frags[i];
    150
    151			xdpi.page.page = skb_frag_page(frag);
    152			mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
    153		}
    154	}
    155
    156	return true;
    157}
    158
    159/* returns true if packet was consumed by xdp */
    160bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page,
    161		      struct bpf_prog *prog, struct xdp_buff *xdp)
    162{
    163	u32 act;
    164	int err;
    165
    166	act = bpf_prog_run_xdp(prog, xdp);
    167	switch (act) {
    168	case XDP_PASS:
    169		return false;
    170	case XDP_TX:
    171		if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, page, xdp)))
    172			goto xdp_abort;
    173		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
    174		return true;
    175	case XDP_REDIRECT:
    176		/* When XDP enabled then page-refcnt==1 here */
    177		err = xdp_do_redirect(rq->netdev, xdp, prog);
    178		if (unlikely(err))
    179			goto xdp_abort;
    180		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
    181		__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
    182		if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL)
    183			mlx5e_page_dma_unmap(rq, page);
    184		rq->stats->xdp_redirect++;
    185		return true;
    186	default:
    187		bpf_warn_invalid_xdp_action(rq->netdev, prog, act);
    188		fallthrough;
    189	case XDP_ABORTED:
    190xdp_abort:
    191		trace_xdp_exception(rq->netdev, prog, act);
    192		fallthrough;
    193	case XDP_DROP:
    194		rq->stats->xdp_drop++;
    195		return true;
    196	}
    197}
    198
    199static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size)
    200{
    201	struct mlx5_wq_cyc *wq = &sq->wq;
    202	u16 pi, contig_wqebbs;
    203
    204	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
    205	contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
    206	if (unlikely(contig_wqebbs < size)) {
    207		struct mlx5e_xdp_wqe_info *wi, *edge_wi;
    208
    209		wi = &sq->db.wqe_info[pi];
    210		edge_wi = wi + contig_wqebbs;
    211
    212		/* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
    213		for (; wi < edge_wi; wi++) {
    214			*wi = (struct mlx5e_xdp_wqe_info) {
    215				.num_wqebbs = 1,
    216				.num_pkts = 0,
    217			};
    218			mlx5e_post_nop(wq, sq->sqn, &sq->pc);
    219		}
    220		sq->stats->nops += contig_wqebbs;
    221
    222		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
    223	}
    224
    225	return pi;
    226}
    227
    228static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
    229{
    230	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
    231	struct mlx5e_xdpsq_stats *stats = sq->stats;
    232	struct mlx5e_tx_wqe *wqe;
    233	u16 pi;
    234
    235	pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
    236	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
    237	net_prefetchw(wqe->data);
    238
    239	*session = (struct mlx5e_tx_mpwqe) {
    240		.wqe = wqe,
    241		.bytes_count = 0,
    242		.ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
    243		.pkt_count = 0,
    244		.inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on),
    245	};
    246
    247	stats->mpwqe++;
    248}
    249
    250void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
    251{
    252	struct mlx5_wq_cyc       *wq    = &sq->wq;
    253	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
    254	struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
    255	u16 ds_count = session->ds_count;
    256	u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
    257	struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
    258
    259	cseg->opmod_idx_opcode =
    260		cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
    261	cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
    262
    263	wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
    264	wi->num_pkts   = session->pkt_count;
    265
    266	sq->pc += wi->num_wqebbs;
    267
    268	sq->doorbell_cseg = cseg;
    269
    270	session->wqe = NULL; /* Close session */
    271}
    272
    273enum {
    274	MLX5E_XDP_CHECK_OK = 1,
    275	MLX5E_XDP_CHECK_START_MPWQE = 2,
    276};
    277
    278INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
    279{
    280	if (unlikely(!sq->mpwqe.wqe)) {
    281		if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
    282						     sq->stop_room))) {
    283			/* SQ is full, ring doorbell */
    284			mlx5e_xmit_xdp_doorbell(sq);
    285			sq->stats->full++;
    286			return -EBUSY;
    287		}
    288
    289		return MLX5E_XDP_CHECK_START_MPWQE;
    290	}
    291
    292	return MLX5E_XDP_CHECK_OK;
    293}
    294
    295INDIRECT_CALLABLE_SCOPE bool
    296mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
    297		     struct skb_shared_info *sinfo, int check_result);
    298
    299INDIRECT_CALLABLE_SCOPE bool
    300mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
    301			   struct skb_shared_info *sinfo, int check_result)
    302{
    303	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
    304	struct mlx5e_xdpsq_stats *stats = sq->stats;
    305
    306	if (unlikely(sinfo)) {
    307		/* MPWQE is enabled, but a multi-buffer packet is queued for
    308		 * transmission. MPWQE can't send fragmented packets, so close
    309		 * the current session and fall back to a regular WQE.
    310		 */
    311		if (unlikely(sq->mpwqe.wqe))
    312			mlx5e_xdp_mpwqe_complete(sq);
    313		return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0);
    314	}
    315
    316	if (unlikely(xdptxd->len > sq->hw_mtu)) {
    317		stats->err++;
    318		return false;
    319	}
    320
    321	if (!check_result)
    322		check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq);
    323	if (unlikely(check_result < 0))
    324		return false;
    325
    326	if (check_result == MLX5E_XDP_CHECK_START_MPWQE) {
    327		/* Start the session when nothing can fail, so it's guaranteed
    328		 * that if there is an active session, it has at least one dseg,
    329		 * and it's safe to complete it at any time.
    330		 */
    331		mlx5e_xdp_mpwqe_session_start(sq);
    332	}
    333
    334	mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
    335
    336	if (unlikely(mlx5e_xdp_mpqwe_is_full(session, sq->max_sq_mpw_wqebbs)))
    337		mlx5e_xdp_mpwqe_complete(sq);
    338
    339	stats->xmit++;
    340	return true;
    341}
    342
    343static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq *sq, int stop_room)
    344{
    345	if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, stop_room))) {
    346		/* SQ is full, ring doorbell */
    347		mlx5e_xmit_xdp_doorbell(sq);
    348		sq->stats->full++;
    349		return -EBUSY;
    350	}
    351
    352	return MLX5E_XDP_CHECK_OK;
    353}
    354
    355INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
    356{
    357	return mlx5e_xmit_xdp_frame_check_stop_room(sq, 1);
    358}
    359
    360INDIRECT_CALLABLE_SCOPE bool
    361mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
    362		     struct skb_shared_info *sinfo, int check_result)
    363{
    364	struct mlx5_wq_cyc       *wq   = &sq->wq;
    365	struct mlx5_wqe_ctrl_seg *cseg;
    366	struct mlx5_wqe_data_seg *dseg;
    367	struct mlx5_wqe_eth_seg *eseg;
    368	struct mlx5e_tx_wqe *wqe;
    369
    370	dma_addr_t dma_addr = xdptxd->dma_addr;
    371	u32 dma_len = xdptxd->len;
    372	u16 ds_cnt, inline_hdr_sz;
    373	u8 num_wqebbs = 1;
    374	int num_frags = 0;
    375	u16 pi;
    376
    377	struct mlx5e_xdpsq_stats *stats = sq->stats;
    378
    379	if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
    380		stats->err++;
    381		return false;
    382	}
    383
    384	ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
    385	if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE)
    386		ds_cnt++;
    387
    388	/* check_result must be 0 if sinfo is passed. */
    389	if (!check_result) {
    390		int stop_room = 1;
    391
    392		if (unlikely(sinfo)) {
    393			ds_cnt += sinfo->nr_frags;
    394			num_frags = sinfo->nr_frags;
    395			num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
    396			/* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big
    397			 * enough to hold all fragments.
    398			 */
    399			stop_room = MLX5E_STOP_ROOM(num_wqebbs);
    400		}
    401
    402		check_result = mlx5e_xmit_xdp_frame_check_stop_room(sq, stop_room);
    403	}
    404	if (unlikely(check_result < 0))
    405		return false;
    406
    407	pi = mlx5e_xdpsq_get_next_pi(sq, num_wqebbs);
    408	wqe = mlx5_wq_cyc_get_wqe(wq, pi);
    409	net_prefetchw(wqe);
    410
    411	cseg = &wqe->ctrl;
    412	eseg = &wqe->eth;
    413	dseg = wqe->data;
    414
    415	inline_hdr_sz = 0;
    416
    417	/* copy the inline part if required */
    418	if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
    419		memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
    420		memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
    421		       MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start));
    422		dma_len  -= MLX5E_XDP_MIN_INLINE;
    423		dma_addr += MLX5E_XDP_MIN_INLINE;
    424		inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
    425		dseg++;
    426	}
    427
    428	/* write the dma part */
    429	dseg->addr       = cpu_to_be64(dma_addr);
    430	dseg->byte_count = cpu_to_be32(dma_len);
    431
    432	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
    433
    434	if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) {
    435		u8 num_pkts = 1 + num_frags;
    436		int i;
    437
    438		memset(&cseg->trailer, 0, sizeof(cseg->trailer));
    439		memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer));
    440
    441		eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
    442		dseg->lkey = sq->mkey_be;
    443
    444		for (i = 0; i < num_frags; i++) {
    445			skb_frag_t *frag = &sinfo->frags[i];
    446			dma_addr_t addr;
    447
    448			addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
    449				skb_frag_off(frag);
    450
    451			dseg++;
    452			dseg->addr = cpu_to_be64(addr);
    453			dseg->byte_count = cpu_to_be32(skb_frag_size(frag));
    454			dseg->lkey = sq->mkey_be;
    455		}
    456
    457		cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
    458
    459		sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) {
    460			.num_wqebbs = num_wqebbs,
    461			.num_pkts = num_pkts,
    462		};
    463
    464		sq->pc += num_wqebbs;
    465	} else {
    466		cseg->fm_ce_se = 0;
    467
    468		sq->pc++;
    469	}
    470
    471	sq->doorbell_cseg = cseg;
    472
    473	stats->xmit++;
    474	return true;
    475}
    476
    477static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
    478				  struct mlx5e_xdp_wqe_info *wi,
    479				  u32 *xsk_frames,
    480				  bool recycle,
    481				  struct xdp_frame_bulk *bq)
    482{
    483	struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
    484	u16 i;
    485
    486	for (i = 0; i < wi->num_pkts; i++) {
    487		struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
    488
    489		switch (xdpi.mode) {
    490		case MLX5E_XDP_XMIT_MODE_FRAME:
    491			/* XDP_TX from the XSK RQ and XDP_REDIRECT */
    492			dma_unmap_single(sq->pdev, xdpi.frame.dma_addr,
    493					 xdpi.frame.xdpf->len, DMA_TO_DEVICE);
    494			xdp_return_frame_bulk(xdpi.frame.xdpf, bq);
    495			break;
    496		case MLX5E_XDP_XMIT_MODE_PAGE:
    497			/* XDP_TX from the regular RQ */
    498			mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle);
    499			break;
    500		case MLX5E_XDP_XMIT_MODE_XSK:
    501			/* AF_XDP send */
    502			(*xsk_frames)++;
    503			break;
    504		default:
    505			WARN_ON_ONCE(true);
    506		}
    507	}
    508}
    509
    510bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
    511{
    512	struct xdp_frame_bulk bq;
    513	struct mlx5e_xdpsq *sq;
    514	struct mlx5_cqe64 *cqe;
    515	u32 xsk_frames = 0;
    516	u16 sqcc;
    517	int i;
    518
    519	xdp_frame_bulk_init(&bq);
    520
    521	sq = container_of(cq, struct mlx5e_xdpsq, cq);
    522
    523	if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
    524		return false;
    525
    526	cqe = mlx5_cqwq_get_cqe(&cq->wq);
    527	if (!cqe)
    528		return false;
    529
    530	/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
    531	 * otherwise a cq overrun may occur
    532	 */
    533	sqcc = sq->cc;
    534
    535	i = 0;
    536	do {
    537		struct mlx5e_xdp_wqe_info *wi;
    538		u16 wqe_counter, ci;
    539		bool last_wqe;
    540
    541		mlx5_cqwq_pop(&cq->wq);
    542
    543		wqe_counter = be16_to_cpu(cqe->wqe_counter);
    544
    545		do {
    546			last_wqe = (sqcc == wqe_counter);
    547			ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
    548			wi = &sq->db.wqe_info[ci];
    549
    550			sqcc += wi->num_wqebbs;
    551
    552			mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq);
    553		} while (!last_wqe);
    554
    555		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
    556			netdev_WARN_ONCE(sq->channel->netdev,
    557					 "Bad OP in XDPSQ CQE: 0x%x\n",
    558					 get_cqe_opcode(cqe));
    559			mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
    560					     (struct mlx5_err_cqe *)cqe);
    561			mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
    562		}
    563	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
    564
    565	xdp_flush_frame_bulk(&bq);
    566
    567	if (xsk_frames)
    568		xsk_tx_completed(sq->xsk_pool, xsk_frames);
    569
    570	sq->stats->cqes += i;
    571
    572	mlx5_cqwq_update_db_record(&cq->wq);
    573
    574	/* ensure cq space is freed before enabling more cqes */
    575	wmb();
    576
    577	sq->cc = sqcc;
    578	return (i == MLX5E_TX_CQ_POLL_BUDGET);
    579}
    580
    581void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
    582{
    583	struct xdp_frame_bulk bq;
    584	u32 xsk_frames = 0;
    585
    586	xdp_frame_bulk_init(&bq);
    587
    588	rcu_read_lock(); /* need for xdp_return_frame_bulk */
    589
    590	while (sq->cc != sq->pc) {
    591		struct mlx5e_xdp_wqe_info *wi;
    592		u16 ci;
    593
    594		ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
    595		wi = &sq->db.wqe_info[ci];
    596
    597		sq->cc += wi->num_wqebbs;
    598
    599		mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq);
    600	}
    601
    602	xdp_flush_frame_bulk(&bq);
    603	rcu_read_unlock();
    604
    605	if (xsk_frames)
    606		xsk_tx_completed(sq->xsk_pool, xsk_frames);
    607}
    608
    609int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
    610		   u32 flags)
    611{
    612	struct mlx5e_priv *priv = netdev_priv(dev);
    613	struct mlx5e_xdpsq *sq;
    614	int nxmit = 0;
    615	int sq_num;
    616	int i;
    617
    618	/* this flag is sufficient, no need to test internal sq state */
    619	if (unlikely(!mlx5e_xdp_tx_is_enabled(priv)))
    620		return -ENETDOWN;
    621
    622	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
    623		return -EINVAL;
    624
    625	sq_num = smp_processor_id();
    626
    627	if (unlikely(sq_num >= priv->channels.num))
    628		return -ENXIO;
    629
    630	sq = &priv->channels.c[sq_num]->xdpsq;
    631
    632	for (i = 0; i < n; i++) {
    633		struct xdp_frame *xdpf = frames[i];
    634		struct mlx5e_xmit_data xdptxd;
    635		struct mlx5e_xdp_info xdpi;
    636		bool ret;
    637
    638		xdptxd.data = xdpf->data;
    639		xdptxd.len = xdpf->len;
    640		xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data,
    641						 xdptxd.len, DMA_TO_DEVICE);
    642
    643		if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr)))
    644			break;
    645
    646		xdpi.mode           = MLX5E_XDP_XMIT_MODE_FRAME;
    647		xdpi.frame.xdpf     = xdpf;
    648		xdpi.frame.dma_addr = xdptxd.dma_addr;
    649
    650		ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
    651				      mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0);
    652		if (unlikely(!ret)) {
    653			dma_unmap_single(sq->pdev, xdptxd.dma_addr,
    654					 xdptxd.len, DMA_TO_DEVICE);
    655			break;
    656		}
    657		mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
    658		nxmit++;
    659	}
    660
    661	if (flags & XDP_XMIT_FLUSH) {
    662		if (sq->mpwqe.wqe)
    663			mlx5e_xdp_mpwqe_complete(sq);
    664		mlx5e_xmit_xdp_doorbell(sq);
    665	}
    666
    667	return nxmit;
    668}
    669
    670void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
    671{
    672	struct mlx5e_xdpsq *xdpsq = rq->xdpsq;
    673
    674	if (xdpsq->mpwqe.wqe)
    675		mlx5e_xdp_mpwqe_complete(xdpsq);
    676
    677	mlx5e_xmit_xdp_doorbell(xdpsq);
    678
    679	if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) {
    680		xdp_do_flush_map();
    681		__clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
    682	}
    683}
    684
    685void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
    686{
    687	sq->xmit_xdp_frame_check = is_mpw ?
    688		mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check;
    689	sq->xmit_xdp_frame = is_mpw ?
    690		mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
    691}