cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ice_xsk.c (26840B)


      1// SPDX-License-Identifier: GPL-2.0
      2/* Copyright (c) 2019, Intel Corporation. */
      3
      4#include <linux/bpf_trace.h>
      5#include <net/xdp_sock_drv.h>
      6#include <net/xdp.h>
      7#include "ice.h"
      8#include "ice_base.h"
      9#include "ice_type.h"
     10#include "ice_xsk.h"
     11#include "ice_txrx.h"
     12#include "ice_txrx_lib.h"
     13#include "ice_lib.h"
     14
     15static struct xdp_buff **ice_xdp_buf(struct ice_rx_ring *rx_ring, u32 idx)
     16{
     17	return &rx_ring->xdp_buf[idx];
     18}
     19
     20/**
     21 * ice_qp_reset_stats - Resets all stats for rings of given index
     22 * @vsi: VSI that contains rings of interest
     23 * @q_idx: ring index in array
     24 */
     25static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
     26{
     27	memset(&vsi->rx_rings[q_idx]->rx_stats, 0,
     28	       sizeof(vsi->rx_rings[q_idx]->rx_stats));
     29	memset(&vsi->tx_rings[q_idx]->stats, 0,
     30	       sizeof(vsi->tx_rings[q_idx]->stats));
     31	if (ice_is_xdp_ena_vsi(vsi))
     32		memset(&vsi->xdp_rings[q_idx]->stats, 0,
     33		       sizeof(vsi->xdp_rings[q_idx]->stats));
     34}
     35
     36/**
     37 * ice_qp_clean_rings - Cleans all the rings of a given index
     38 * @vsi: VSI that contains rings of interest
     39 * @q_idx: ring index in array
     40 */
     41static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
     42{
     43	ice_clean_tx_ring(vsi->tx_rings[q_idx]);
     44	if (ice_is_xdp_ena_vsi(vsi)) {
     45		synchronize_rcu();
     46		ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
     47	}
     48	ice_clean_rx_ring(vsi->rx_rings[q_idx]);
     49}
     50
     51/**
     52 * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector
     53 * @vsi: VSI that has netdev
     54 * @q_vector: q_vector that has NAPI context
     55 * @enable: true for enable, false for disable
     56 */
     57static void
     58ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector,
     59		     bool enable)
     60{
     61	if (!vsi->netdev || !q_vector)
     62		return;
     63
     64	if (enable)
     65		napi_enable(&q_vector->napi);
     66	else
     67		napi_disable(&q_vector->napi);
     68}
     69
     70/**
     71 * ice_qvec_dis_irq - Mask off queue interrupt generation on given ring
     72 * @vsi: the VSI that contains queue vector being un-configured
     73 * @rx_ring: Rx ring that will have its IRQ disabled
     74 * @q_vector: queue vector
     75 */
     76static void
     77ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring,
     78		 struct ice_q_vector *q_vector)
     79{
     80	struct ice_pf *pf = vsi->back;
     81	struct ice_hw *hw = &pf->hw;
     82	int base = vsi->base_vector;
     83	u16 reg;
     84	u32 val;
     85
     86	/* QINT_TQCTL is being cleared in ice_vsi_stop_tx_ring, so handle
     87	 * here only QINT_RQCTL
     88	 */
     89	reg = rx_ring->reg_idx;
     90	val = rd32(hw, QINT_RQCTL(reg));
     91	val &= ~QINT_RQCTL_CAUSE_ENA_M;
     92	wr32(hw, QINT_RQCTL(reg), val);
     93
     94	if (q_vector) {
     95		u16 v_idx = q_vector->v_idx;
     96
     97		wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0);
     98		ice_flush(hw);
     99		synchronize_irq(pf->msix_entries[v_idx + base].vector);
    100	}
    101}
    102
    103/**
    104 * ice_qvec_cfg_msix - Enable IRQ for given queue vector
    105 * @vsi: the VSI that contains queue vector
    106 * @q_vector: queue vector
    107 */
    108static void
    109ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
    110{
    111	u16 reg_idx = q_vector->reg_idx;
    112	struct ice_pf *pf = vsi->back;
    113	struct ice_hw *hw = &pf->hw;
    114	struct ice_tx_ring *tx_ring;
    115	struct ice_rx_ring *rx_ring;
    116
    117	ice_cfg_itr(hw, q_vector);
    118
    119	ice_for_each_tx_ring(tx_ring, q_vector->tx)
    120		ice_cfg_txq_interrupt(vsi, tx_ring->reg_idx, reg_idx,
    121				      q_vector->tx.itr_idx);
    122
    123	ice_for_each_rx_ring(rx_ring, q_vector->rx)
    124		ice_cfg_rxq_interrupt(vsi, rx_ring->reg_idx, reg_idx,
    125				      q_vector->rx.itr_idx);
    126
    127	ice_flush(hw);
    128}
    129
    130/**
    131 * ice_qvec_ena_irq - Enable IRQ for given queue vector
    132 * @vsi: the VSI that contains queue vector
    133 * @q_vector: queue vector
    134 */
    135static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
    136{
    137	struct ice_pf *pf = vsi->back;
    138	struct ice_hw *hw = &pf->hw;
    139
    140	ice_irq_dynamic_ena(hw, vsi, q_vector);
    141
    142	ice_flush(hw);
    143}
    144
    145/**
    146 * ice_qp_dis - Disables a queue pair
    147 * @vsi: VSI of interest
    148 * @q_idx: ring index in array
    149 *
    150 * Returns 0 on success, negative on failure.
    151 */
    152static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
    153{
    154	struct ice_txq_meta txq_meta = { };
    155	struct ice_q_vector *q_vector;
    156	struct ice_tx_ring *tx_ring;
    157	struct ice_rx_ring *rx_ring;
    158	int timeout = 50;
    159	int err;
    160
    161	if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
    162		return -EINVAL;
    163
    164	tx_ring = vsi->tx_rings[q_idx];
    165	rx_ring = vsi->rx_rings[q_idx];
    166	q_vector = rx_ring->q_vector;
    167
    168	while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) {
    169		timeout--;
    170		if (!timeout)
    171			return -EBUSY;
    172		usleep_range(1000, 2000);
    173	}
    174	netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
    175
    176	ice_qvec_dis_irq(vsi, rx_ring, q_vector);
    177
    178	ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
    179	err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
    180	if (err)
    181		return err;
    182	if (ice_is_xdp_ena_vsi(vsi)) {
    183		struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
    184
    185		memset(&txq_meta, 0, sizeof(txq_meta));
    186		ice_fill_txq_meta(vsi, xdp_ring, &txq_meta);
    187		err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring,
    188					   &txq_meta);
    189		if (err)
    190			return err;
    191	}
    192	err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true);
    193	if (err)
    194		return err;
    195
    196	ice_qvec_toggle_napi(vsi, q_vector, false);
    197	ice_qp_clean_rings(vsi, q_idx);
    198	ice_qp_reset_stats(vsi, q_idx);
    199
    200	return 0;
    201}
    202
    203/**
    204 * ice_qp_ena - Enables a queue pair
    205 * @vsi: VSI of interest
    206 * @q_idx: ring index in array
    207 *
    208 * Returns 0 on success, negative on failure.
    209 */
    210static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
    211{
    212	struct ice_aqc_add_tx_qgrp *qg_buf;
    213	struct ice_q_vector *q_vector;
    214	struct ice_tx_ring *tx_ring;
    215	struct ice_rx_ring *rx_ring;
    216	u16 size;
    217	int err;
    218
    219	if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
    220		return -EINVAL;
    221
    222	size = struct_size(qg_buf, txqs, 1);
    223	qg_buf = kzalloc(size, GFP_KERNEL);
    224	if (!qg_buf)
    225		return -ENOMEM;
    226
    227	qg_buf->num_txqs = 1;
    228
    229	tx_ring = vsi->tx_rings[q_idx];
    230	rx_ring = vsi->rx_rings[q_idx];
    231	q_vector = rx_ring->q_vector;
    232
    233	err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf);
    234	if (err)
    235		goto free_buf;
    236
    237	if (ice_is_xdp_ena_vsi(vsi)) {
    238		struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
    239
    240		memset(qg_buf, 0, size);
    241		qg_buf->num_txqs = 1;
    242		err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf);
    243		if (err)
    244			goto free_buf;
    245		ice_set_ring_xdp(xdp_ring);
    246		xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring);
    247	}
    248
    249	err = ice_vsi_cfg_rxq(rx_ring);
    250	if (err)
    251		goto free_buf;
    252
    253	ice_qvec_cfg_msix(vsi, q_vector);
    254
    255	err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true);
    256	if (err)
    257		goto free_buf;
    258
    259	clear_bit(ICE_CFG_BUSY, vsi->state);
    260	ice_qvec_toggle_napi(vsi, q_vector, true);
    261	ice_qvec_ena_irq(vsi, q_vector);
    262
    263	netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
    264free_buf:
    265	kfree(qg_buf);
    266	return err;
    267}
    268
    269/**
    270 * ice_xsk_pool_disable - disable a buffer pool region
    271 * @vsi: Current VSI
    272 * @qid: queue ID
    273 *
    274 * Returns 0 on success, negative on failure
    275 */
    276static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
    277{
    278	struct xsk_buff_pool *pool = xsk_get_pool_from_qid(vsi->netdev, qid);
    279
    280	if (!pool)
    281		return -EINVAL;
    282
    283	clear_bit(qid, vsi->af_xdp_zc_qps);
    284	xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR);
    285
    286	return 0;
    287}
    288
    289/**
    290 * ice_xsk_pool_enable - enable a buffer pool region
    291 * @vsi: Current VSI
    292 * @pool: pointer to a requested buffer pool region
    293 * @qid: queue ID
    294 *
    295 * Returns 0 on success, negative on failure
    296 */
    297static int
    298ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
    299{
    300	int err;
    301
    302	if (vsi->type != ICE_VSI_PF)
    303		return -EINVAL;
    304
    305	if (qid >= vsi->netdev->real_num_rx_queues ||
    306	    qid >= vsi->netdev->real_num_tx_queues)
    307		return -EINVAL;
    308
    309	err = xsk_pool_dma_map(pool, ice_pf_to_dev(vsi->back),
    310			       ICE_RX_DMA_ATTR);
    311	if (err)
    312		return err;
    313
    314	set_bit(qid, vsi->af_xdp_zc_qps);
    315
    316	return 0;
    317}
    318
    319/**
    320 * ice_xsk_pool_setup - enable/disable a buffer pool region depending on its state
    321 * @vsi: Current VSI
    322 * @pool: buffer pool to enable/associate to a ring, NULL to disable
    323 * @qid: queue ID
    324 *
    325 * Returns 0 on success, negative on failure
    326 */
    327int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
    328{
    329	bool if_running, pool_present = !!pool;
    330	int ret = 0, pool_failure = 0;
    331
    332	if (!is_power_of_2(vsi->rx_rings[qid]->count) ||
    333	    !is_power_of_2(vsi->tx_rings[qid]->count)) {
    334		netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n");
    335		pool_failure = -EINVAL;
    336		goto failure;
    337	}
    338
    339	if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
    340
    341	if (if_running) {
    342		ret = ice_qp_dis(vsi, qid);
    343		if (ret) {
    344			netdev_err(vsi->netdev, "ice_qp_dis error = %d\n", ret);
    345			goto xsk_pool_if_up;
    346		}
    347	}
    348
    349	pool_failure = pool_present ? ice_xsk_pool_enable(vsi, pool, qid) :
    350				      ice_xsk_pool_disable(vsi, qid);
    351
    352xsk_pool_if_up:
    353	if (if_running) {
    354		ret = ice_qp_ena(vsi, qid);
    355		if (!ret && pool_present)
    356			napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi);
    357		else if (ret)
    358			netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
    359	}
    360
    361failure:
    362	if (pool_failure) {
    363		netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n",
    364			   pool_present ? "en" : "dis", pool_failure);
    365		return pool_failure;
    366	}
    367
    368	return ret;
    369}
    370
    371/**
    372 * ice_fill_rx_descs - pick buffers from XSK buffer pool and use it
    373 * @pool: XSK Buffer pool to pull the buffers from
    374 * @xdp: SW ring of xdp_buff that will hold the buffers
    375 * @rx_desc: Pointer to Rx descriptors that will be filled
    376 * @count: The number of buffers to allocate
    377 *
    378 * This function allocates a number of Rx buffers from the fill ring
    379 * or the internal recycle mechanism and places them on the Rx ring.
    380 *
    381 * Note that ring wrap should be handled by caller of this function.
    382 *
    383 * Returns the amount of allocated Rx descriptors
    384 */
    385static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
    386			     union ice_32b_rx_flex_desc *rx_desc, u16 count)
    387{
    388	dma_addr_t dma;
    389	u16 buffs;
    390	int i;
    391
    392	buffs = xsk_buff_alloc_batch(pool, xdp, count);
    393	for (i = 0; i < buffs; i++) {
    394		dma = xsk_buff_xdp_get_dma(*xdp);
    395		rx_desc->read.pkt_addr = cpu_to_le64(dma);
    396		rx_desc->wb.status_error0 = 0;
    397
    398		rx_desc++;
    399		xdp++;
    400	}
    401
    402	return buffs;
    403}
    404
    405/**
    406 * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
    407 * @rx_ring: Rx ring
    408 * @count: The number of buffers to allocate
    409 *
    410 * Place the @count of descriptors onto Rx ring. Handle the ring wrap
    411 * for case where space from next_to_use up to the end of ring is less
    412 * than @count. Finally do a tail bump.
    413 *
    414 * Returns true if all allocations were successful, false if any fail.
    415 */
    416static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
    417{
    418	u32 nb_buffs_extra = 0, nb_buffs = 0;
    419	union ice_32b_rx_flex_desc *rx_desc;
    420	u16 ntu = rx_ring->next_to_use;
    421	u16 total_count = count;
    422	struct xdp_buff **xdp;
    423
    424	rx_desc = ICE_RX_DESC(rx_ring, ntu);
    425	xdp = ice_xdp_buf(rx_ring, ntu);
    426
    427	if (ntu + count >= rx_ring->count) {
    428		nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp,
    429						   rx_desc,
    430						   rx_ring->count - ntu);
    431		if (nb_buffs_extra != rx_ring->count - ntu) {
    432			ntu += nb_buffs_extra;
    433			goto exit;
    434		}
    435		rx_desc = ICE_RX_DESC(rx_ring, 0);
    436		xdp = ice_xdp_buf(rx_ring, 0);
    437		ntu = 0;
    438		count -= nb_buffs_extra;
    439		ice_release_rx_desc(rx_ring, 0);
    440	}
    441
    442	nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count);
    443
    444	ntu += nb_buffs;
    445	if (ntu == rx_ring->count)
    446		ntu = 0;
    447
    448exit:
    449	if (rx_ring->next_to_use != ntu)
    450		ice_release_rx_desc(rx_ring, ntu);
    451
    452	return total_count == (nb_buffs_extra + nb_buffs);
    453}
    454
    455/**
    456 * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
    457 * @rx_ring: Rx ring
    458 * @count: The number of buffers to allocate
    459 *
    460 * Wrapper for internal allocation routine; figure out how many tail
    461 * bumps should take place based on the given threshold
    462 *
    463 * Returns true if all calls to internal alloc routine succeeded
    464 */
    465bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
    466{
    467	u16 rx_thresh = ICE_RING_QUARTER(rx_ring);
    468	u16 batched, leftover, i, tail_bumps;
    469
    470	batched = ALIGN_DOWN(count, rx_thresh);
    471	tail_bumps = batched / rx_thresh;
    472	leftover = count & (rx_thresh - 1);
    473
    474	for (i = 0; i < tail_bumps; i++)
    475		if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh))
    476			return false;
    477	return __ice_alloc_rx_bufs_zc(rx_ring, leftover);
    478}
    479
    480/**
    481 * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
    482 * @rx_ring: Rx ring
    483 */
    484static void ice_bump_ntc(struct ice_rx_ring *rx_ring)
    485{
    486	int ntc = rx_ring->next_to_clean + 1;
    487
    488	ntc = (ntc < rx_ring->count) ? ntc : 0;
    489	rx_ring->next_to_clean = ntc;
    490	prefetch(ICE_RX_DESC(rx_ring, ntc));
    491}
    492
    493/**
    494 * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
    495 * @rx_ring: Rx ring
    496 * @xdp: Pointer to XDP buffer
    497 *
    498 * This function allocates a new skb from a zero-copy Rx buffer.
    499 *
    500 * Returns the skb on success, NULL on failure.
    501 */
    502static struct sk_buff *
    503ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
    504{
    505	unsigned int totalsize = xdp->data_end - xdp->data_meta;
    506	unsigned int metasize = xdp->data - xdp->data_meta;
    507	struct sk_buff *skb;
    508
    509	net_prefetch(xdp->data_meta);
    510
    511	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
    512			       GFP_ATOMIC | __GFP_NOWARN);
    513	if (unlikely(!skb))
    514		return NULL;
    515
    516	memcpy(__skb_put(skb, totalsize), xdp->data_meta,
    517	       ALIGN(totalsize, sizeof(long)));
    518
    519	if (metasize) {
    520		skb_metadata_set(skb, metasize);
    521		__skb_pull(skb, metasize);
    522	}
    523
    524	xsk_buff_free(xdp);
    525	return skb;
    526}
    527
    528/**
    529 * ice_run_xdp_zc - Executes an XDP program in zero-copy path
    530 * @rx_ring: Rx ring
    531 * @xdp: xdp_buff used as input to the XDP program
    532 * @xdp_prog: XDP program to run
    533 * @xdp_ring: ring to be used for XDP_TX action
    534 *
    535 * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
    536 */
    537static int
    538ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
    539	       struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
    540{
    541	int err, result = ICE_XDP_PASS;
    542	u32 act;
    543
    544	act = bpf_prog_run_xdp(xdp_prog, xdp);
    545
    546	if (likely(act == XDP_REDIRECT)) {
    547		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
    548		if (!err)
    549			return ICE_XDP_REDIR;
    550		if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
    551			result = ICE_XDP_EXIT;
    552		else
    553			result = ICE_XDP_CONSUMED;
    554		goto out_failure;
    555	}
    556
    557	switch (act) {
    558	case XDP_PASS:
    559		break;
    560	case XDP_TX:
    561		result = ice_xmit_xdp_buff(xdp, xdp_ring);
    562		if (result == ICE_XDP_CONSUMED)
    563			goto out_failure;
    564		break;
    565	case XDP_DROP:
    566		result = ICE_XDP_CONSUMED;
    567		break;
    568	default:
    569		bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
    570		fallthrough;
    571	case XDP_ABORTED:
    572		result = ICE_XDP_CONSUMED;
    573out_failure:
    574		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
    575		break;
    576	}
    577
    578	return result;
    579}
    580
    581/**
    582 * ice_clean_rx_irq_zc - consumes packets from the hardware ring
    583 * @rx_ring: AF_XDP Rx ring
    584 * @budget: NAPI budget
    585 *
    586 * Returns number of processed packets on success, remaining budget on failure.
    587 */
    588int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
    589{
    590	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
    591	struct ice_tx_ring *xdp_ring;
    592	unsigned int xdp_xmit = 0;
    593	struct bpf_prog *xdp_prog;
    594	bool failure = false;
    595	int entries_to_alloc;
    596
    597	/* ZC patch is enabled only when XDP program is set,
    598	 * so here it can not be NULL
    599	 */
    600	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
    601	xdp_ring = rx_ring->xdp_ring;
    602
    603	while (likely(total_rx_packets < (unsigned int)budget)) {
    604		union ice_32b_rx_flex_desc *rx_desc;
    605		unsigned int size, xdp_res = 0;
    606		struct xdp_buff *xdp;
    607		struct sk_buff *skb;
    608		u16 stat_err_bits;
    609		u16 vlan_tag = 0;
    610		u16 rx_ptype;
    611
    612		rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
    613
    614		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
    615		if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
    616			break;
    617
    618		/* This memory barrier is needed to keep us from reading
    619		 * any other fields out of the rx_desc until we have
    620		 * verified the descriptor has been written back.
    621		 */
    622		dma_rmb();
    623
    624		if (unlikely(rx_ring->next_to_clean == rx_ring->next_to_use))
    625			break;
    626
    627		xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean);
    628
    629		size = le16_to_cpu(rx_desc->wb.pkt_len) &
    630				   ICE_RX_FLX_DESC_PKT_LEN_M;
    631		if (!size) {
    632			xdp->data = NULL;
    633			xdp->data_end = NULL;
    634			xdp->data_hard_start = NULL;
    635			xdp->data_meta = NULL;
    636			goto construct_skb;
    637		}
    638
    639		xsk_buff_set_size(xdp, size);
    640		xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool);
    641
    642		xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring);
    643		if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
    644			xdp_xmit |= xdp_res;
    645		} else if (xdp_res == ICE_XDP_EXIT) {
    646			failure = true;
    647			break;
    648		} else if (xdp_res == ICE_XDP_CONSUMED) {
    649			xsk_buff_free(xdp);
    650		} else if (xdp_res == ICE_XDP_PASS) {
    651			goto construct_skb;
    652		}
    653
    654		total_rx_bytes += size;
    655		total_rx_packets++;
    656
    657		ice_bump_ntc(rx_ring);
    658		continue;
    659
    660construct_skb:
    661		/* XDP_PASS path */
    662		skb = ice_construct_skb_zc(rx_ring, xdp);
    663		if (!skb) {
    664			rx_ring->rx_stats.alloc_buf_failed++;
    665			break;
    666		}
    667
    668		ice_bump_ntc(rx_ring);
    669
    670		if (eth_skb_pad(skb)) {
    671			skb = NULL;
    672			continue;
    673		}
    674
    675		total_rx_bytes += skb->len;
    676		total_rx_packets++;
    677
    678		vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc);
    679
    680		rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
    681				       ICE_RX_FLEX_DESC_PTYPE_M;
    682
    683		ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
    684		ice_receive_skb(rx_ring, skb, vlan_tag);
    685	}
    686
    687	entries_to_alloc = ICE_DESC_UNUSED(rx_ring);
    688	if (entries_to_alloc > ICE_RING_QUARTER(rx_ring))
    689		failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc);
    690
    691	ice_finalize_xdp_rx(xdp_ring, xdp_xmit);
    692	ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
    693
    694	if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
    695		if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
    696			xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
    697		else
    698			xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
    699
    700		return (int)total_rx_packets;
    701	}
    702
    703	return failure ? budget : (int)total_rx_packets;
    704}
    705
    706/**
    707 * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
    708 * @xdp_ring: XDP Tx ring
    709 * @tx_buf: Tx buffer to clean
    710 */
    711static void
    712ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
    713{
    714	xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
    715	xdp_ring->xdp_tx_active--;
    716	dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
    717			 dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
    718	dma_unmap_len_set(tx_buf, len, 0);
    719}
    720
    721/**
    722 * ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring
    723 * @xdp_ring: XDP ring to clean
    724 * @napi_budget: amount of descriptors that NAPI allows us to clean
    725 *
    726 * Returns count of cleaned descriptors
    727 */
    728static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget)
    729{
    730	u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
    731	int budget = napi_budget / tx_thresh;
    732	u16 next_dd = xdp_ring->next_dd;
    733	u16 ntc, cleared_dds = 0;
    734
    735	do {
    736		struct ice_tx_desc *next_dd_desc;
    737		u16 desc_cnt = xdp_ring->count;
    738		struct ice_tx_buf *tx_buf;
    739		u32 xsk_frames;
    740		u16 i;
    741
    742		next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd);
    743		if (!(next_dd_desc->cmd_type_offset_bsz &
    744		    cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
    745			break;
    746
    747		cleared_dds++;
    748		xsk_frames = 0;
    749		if (likely(!xdp_ring->xdp_tx_active)) {
    750			xsk_frames = tx_thresh;
    751			goto skip;
    752		}
    753
    754		ntc = xdp_ring->next_to_clean;
    755
    756		for (i = 0; i < tx_thresh; i++) {
    757			tx_buf = &xdp_ring->tx_buf[ntc];
    758
    759			if (tx_buf->raw_buf) {
    760				ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
    761				tx_buf->raw_buf = NULL;
    762			} else {
    763				xsk_frames++;
    764			}
    765
    766			ntc++;
    767			if (ntc >= xdp_ring->count)
    768				ntc = 0;
    769		}
    770skip:
    771		xdp_ring->next_to_clean += tx_thresh;
    772		if (xdp_ring->next_to_clean >= desc_cnt)
    773			xdp_ring->next_to_clean -= desc_cnt;
    774		if (xsk_frames)
    775			xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
    776		next_dd_desc->cmd_type_offset_bsz = 0;
    777		next_dd = next_dd + tx_thresh;
    778		if (next_dd >= desc_cnt)
    779			next_dd = tx_thresh - 1;
    780	} while (--budget);
    781
    782	xdp_ring->next_dd = next_dd;
    783
    784	return cleared_dds * tx_thresh;
    785}
    786
    787/**
    788 * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
    789 * @xdp_ring: XDP ring to produce the HW Tx descriptor on
    790 * @desc: AF_XDP descriptor to pull the DMA address and length from
    791 * @total_bytes: bytes accumulator that will be used for stats update
    792 */
    793static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
    794			 unsigned int *total_bytes)
    795{
    796	struct ice_tx_desc *tx_desc;
    797	dma_addr_t dma;
    798
    799	dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr);
    800	xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len);
    801
    802	tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
    803	tx_desc->buf_addr = cpu_to_le64(dma);
    804	tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
    805						      0, desc->len, 0);
    806
    807	*total_bytes += desc->len;
    808}
    809
    810/**
    811 * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors
    812 * @xdp_ring: XDP ring to produce the HW Tx descriptors on
    813 * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
    814 * @total_bytes: bytes accumulator that will be used for stats update
    815 */
    816static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
    817			       unsigned int *total_bytes)
    818{
    819	u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
    820	u16 ntu = xdp_ring->next_to_use;
    821	struct ice_tx_desc *tx_desc;
    822	u32 i;
    823
    824	loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
    825		dma_addr_t dma;
    826
    827		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr);
    828		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len);
    829
    830		tx_desc = ICE_TX_DESC(xdp_ring, ntu++);
    831		tx_desc->buf_addr = cpu_to_le64(dma);
    832		tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
    833							      0, descs[i].len, 0);
    834
    835		*total_bytes += descs[i].len;
    836	}
    837
    838	xdp_ring->next_to_use = ntu;
    839
    840	if (xdp_ring->next_to_use > xdp_ring->next_rs) {
    841		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
    842		tx_desc->cmd_type_offset_bsz |=
    843			cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
    844		xdp_ring->next_rs += tx_thresh;
    845	}
    846}
    847
    848/**
    849 * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring
    850 * @xdp_ring: XDP ring to produce the HW Tx descriptors on
    851 * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
    852 * @nb_pkts: count of packets to be send
    853 * @total_bytes: bytes accumulator that will be used for stats update
    854 */
    855static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
    856				u32 nb_pkts, unsigned int *total_bytes)
    857{
    858	u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
    859	u32 batched, leftover, i;
    860
    861	batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH);
    862	leftover = nb_pkts & (PKTS_PER_BATCH - 1);
    863	for (i = 0; i < batched; i += PKTS_PER_BATCH)
    864		ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
    865	for (; i < batched + leftover; i++)
    866		ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
    867
    868	if (xdp_ring->next_to_use > xdp_ring->next_rs) {
    869		struct ice_tx_desc *tx_desc;
    870
    871		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
    872		tx_desc->cmd_type_offset_bsz |=
    873			cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
    874		xdp_ring->next_rs += tx_thresh;
    875	}
    876}
    877
    878/**
    879 * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
    880 * @xdp_ring: XDP ring to produce the HW Tx descriptors on
    881 * @budget: number of free descriptors on HW Tx ring that can be used
    882 * @napi_budget: amount of descriptors that NAPI allows us to clean
    883 *
    884 * Returns true if there is no more work that needs to be done, false otherwise
    885 */
    886bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget)
    887{
    888	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
    889	u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
    890	u32 nb_pkts, nb_processed = 0;
    891	unsigned int total_bytes = 0;
    892
    893	if (budget < tx_thresh)
    894		budget += ice_clean_xdp_irq_zc(xdp_ring, napi_budget);
    895
    896	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
    897	if (!nb_pkts)
    898		return true;
    899
    900	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
    901		struct ice_tx_desc *tx_desc;
    902
    903		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
    904		ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
    905		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
    906		tx_desc->cmd_type_offset_bsz |=
    907			cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
    908		xdp_ring->next_rs = tx_thresh - 1;
    909		xdp_ring->next_to_use = 0;
    910	}
    911
    912	ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
    913			    &total_bytes);
    914
    915	ice_xdp_ring_update_tail(xdp_ring);
    916	ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes);
    917
    918	if (xsk_uses_need_wakeup(xdp_ring->xsk_pool))
    919		xsk_set_tx_need_wakeup(xdp_ring->xsk_pool);
    920
    921	return nb_pkts < budget;
    922}
    923
    924/**
    925 * ice_xsk_wakeup - Implements ndo_xsk_wakeup
    926 * @netdev: net_device
    927 * @queue_id: queue to wake up
    928 * @flags: ignored in our case, since we have Rx and Tx in the same NAPI
    929 *
    930 * Returns negative on error, zero otherwise.
    931 */
    932int
    933ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
    934	       u32 __always_unused flags)
    935{
    936	struct ice_netdev_priv *np = netdev_priv(netdev);
    937	struct ice_q_vector *q_vector;
    938	struct ice_vsi *vsi = np->vsi;
    939	struct ice_tx_ring *ring;
    940
    941	if (test_bit(ICE_VSI_DOWN, vsi->state))
    942		return -ENETDOWN;
    943
    944	if (!ice_is_xdp_ena_vsi(vsi))
    945		return -EINVAL;
    946
    947	if (queue_id >= vsi->num_txq)
    948		return -EINVAL;
    949
    950	if (!vsi->xdp_rings[queue_id]->xsk_pool)
    951		return -EINVAL;
    952
    953	ring = vsi->xdp_rings[queue_id];
    954
    955	/* The idea here is that if NAPI is running, mark a miss, so
    956	 * it will run again. If not, trigger an interrupt and
    957	 * schedule the NAPI from interrupt context. If NAPI would be
    958	 * scheduled here, the interrupt affinity would not be
    959	 * honored.
    960	 */
    961	q_vector = ring->q_vector;
    962	if (!napi_if_scheduled_mark_missed(&q_vector->napi))
    963		ice_trigger_sw_intr(&vsi->back->hw, q_vector);
    964
    965	return 0;
    966}
    967
    968/**
    969 * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP buff pool attached
    970 * @vsi: VSI to be checked
    971 *
    972 * Returns true if any of the Rx rings has an AF_XDP buff pool attached
    973 */
    974bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
    975{
    976	int i;
    977
    978	ice_for_each_rxq(vsi, i) {
    979		if (xsk_get_pool_from_qid(vsi->netdev, i))
    980			return true;
    981	}
    982
    983	return false;
    984}
    985
    986/**
    987 * ice_xsk_clean_rx_ring - clean buffer pool queues connected to a given Rx ring
    988 * @rx_ring: ring to be cleaned
    989 */
    990void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring)
    991{
    992	u16 count_mask = rx_ring->count - 1;
    993	u16 ntc = rx_ring->next_to_clean;
    994	u16 ntu = rx_ring->next_to_use;
    995
    996	for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) {
    997		struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc);
    998
    999		xsk_buff_free(xdp);
   1000	}
   1001}
   1002
   1003/**
   1004 * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its buffer pool queues
   1005 * @xdp_ring: XDP_Tx ring
   1006 */
   1007void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring)
   1008{
   1009	u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use;
   1010	u32 xsk_frames = 0;
   1011
   1012	while (ntc != ntu) {
   1013		struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
   1014
   1015		if (tx_buf->raw_buf)
   1016			ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
   1017		else
   1018			xsk_frames++;
   1019
   1020		tx_buf->raw_buf = NULL;
   1021
   1022		ntc++;
   1023		if (ntc >= xdp_ring->count)
   1024			ntc = 0;
   1025	}
   1026
   1027	if (xsk_frames)
   1028		xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
   1029}