cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

funeth_rx.c (23019B)


      1// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
      2
      3#include <linux/bpf_trace.h>
      4#include <linux/dma-mapping.h>
      5#include <linux/etherdevice.h>
      6#include <linux/filter.h>
      7#include <linux/irq.h>
      8#include <linux/pci.h>
      9#include <linux/skbuff.h>
     10#include "funeth_txrx.h"
     11#include "funeth.h"
     12#include "fun_queue.h"
     13
     14#define CREATE_TRACE_POINTS
     15#include "funeth_trace.h"
     16
     17/* Given the device's max supported MTU and pages of at least 4KB a packet can
     18 * be scattered into at most 4 buffers.
     19 */
     20#define RX_MAX_FRAGS 4
     21
     22/* Per packet headroom in non-XDP mode. Present only for 1-frag packets. */
     23#define FUN_RX_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)
     24
     25/* We try to reuse pages for our buffers. To avoid frequent page ref writes we
     26 * take EXTRA_PAGE_REFS references at once and then hand them out one per packet
     27 * occupying the buffer.
     28 */
     29#define EXTRA_PAGE_REFS 1000000
     30#define MIN_PAGE_REFS 1000
     31
     32enum {
     33	FUN_XDP_FLUSH_REDIR = 1,
     34	FUN_XDP_FLUSH_TX = 2,
     35};
     36
     37/* See if a page is running low on refs we are holding and if so take more. */
     38static void refresh_refs(struct funeth_rxbuf *buf)
     39{
     40	if (unlikely(buf->pg_refs < MIN_PAGE_REFS)) {
     41		buf->pg_refs += EXTRA_PAGE_REFS;
     42		page_ref_add(buf->page, EXTRA_PAGE_REFS);
     43	}
     44}
     45
     46/* Offer a buffer to the Rx buffer cache. The cache will hold the buffer if its
     47 * page is worth retaining and there's room for it. Otherwise the page is
     48 * unmapped and our references released.
     49 */
     50static void cache_offer(struct funeth_rxq *q, const struct funeth_rxbuf *buf)
     51{
     52	struct funeth_rx_cache *c = &q->cache;
     53
     54	if (c->prod_cnt - c->cons_cnt <= c->mask && buf->node == numa_mem_id()) {
     55		c->bufs[c->prod_cnt & c->mask] = *buf;
     56		c->prod_cnt++;
     57	} else {
     58		dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE,
     59				     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
     60		__page_frag_cache_drain(buf->page, buf->pg_refs);
     61	}
     62}
     63
     64/* Get a page from the Rx buffer cache. We only consider the next available
     65 * page and return it if we own all its references.
     66 */
     67static bool cache_get(struct funeth_rxq *q, struct funeth_rxbuf *rb)
     68{
     69	struct funeth_rx_cache *c = &q->cache;
     70	struct funeth_rxbuf *buf;
     71
     72	if (c->prod_cnt == c->cons_cnt)
     73		return false;             /* empty cache */
     74
     75	buf = &c->bufs[c->cons_cnt & c->mask];
     76	if (page_ref_count(buf->page) == buf->pg_refs) {
     77		dma_sync_single_for_device(q->dma_dev, buf->dma_addr,
     78					   PAGE_SIZE, DMA_FROM_DEVICE);
     79		*rb = *buf;
     80		buf->page = NULL;
     81		refresh_refs(rb);
     82		c->cons_cnt++;
     83		return true;
     84	}
     85
     86	/* Page can't be reused. If the cache is full drop this page. */
     87	if (c->prod_cnt - c->cons_cnt > c->mask) {
     88		dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE,
     89				     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
     90		__page_frag_cache_drain(buf->page, buf->pg_refs);
     91		buf->page = NULL;
     92		c->cons_cnt++;
     93	}
     94	return false;
     95}
     96
     97/* Allocate and DMA-map a page for receive. */
     98static int funeth_alloc_page(struct funeth_rxq *q, struct funeth_rxbuf *rb,
     99			     int node, gfp_t gfp)
    100{
    101	struct page *p;
    102
    103	if (cache_get(q, rb))
    104		return 0;
    105
    106	p = __alloc_pages_node(node, gfp | __GFP_NOWARN, 0);
    107	if (unlikely(!p))
    108		return -ENOMEM;
    109
    110	rb->dma_addr = dma_map_page(q->dma_dev, p, 0, PAGE_SIZE,
    111				    DMA_FROM_DEVICE);
    112	if (unlikely(dma_mapping_error(q->dma_dev, rb->dma_addr))) {
    113		FUN_QSTAT_INC(q, rx_map_err);
    114		__free_page(p);
    115		return -ENOMEM;
    116	}
    117
    118	FUN_QSTAT_INC(q, rx_page_alloc);
    119
    120	rb->page = p;
    121	rb->pg_refs = 1;
    122	refresh_refs(rb);
    123	rb->node = page_is_pfmemalloc(p) ? -1 : page_to_nid(p);
    124	return 0;
    125}
    126
    127static void funeth_free_page(struct funeth_rxq *q, struct funeth_rxbuf *rb)
    128{
    129	if (rb->page) {
    130		dma_unmap_page(q->dma_dev, rb->dma_addr, PAGE_SIZE,
    131			       DMA_FROM_DEVICE);
    132		__page_frag_cache_drain(rb->page, rb->pg_refs);
    133		rb->page = NULL;
    134	}
    135}
    136
    137/* Run the XDP program assigned to an Rx queue.
    138 * Return %NULL if the buffer is consumed, or the virtual address of the packet
    139 * to turn into an skb.
    140 */
    141static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va,
    142			 int ref_ok, struct funeth_txq *xdp_q)
    143{
    144	struct bpf_prog *xdp_prog;
    145	struct xdp_buff xdp;
    146	u32 act;
    147
    148	/* VA includes the headroom, frag size includes headroom + tailroom */
    149	xdp_init_buff(&xdp, ALIGN(skb_frag_size(frags), FUN_EPRQ_PKT_ALIGN),
    150		      &q->xdp_rxq);
    151	xdp_prepare_buff(&xdp, buf_va, FUN_XDP_HEADROOM, skb_frag_size(frags) -
    152			 (FUN_RX_TAILROOM + FUN_XDP_HEADROOM), false);
    153
    154	xdp_prog = READ_ONCE(q->xdp_prog);
    155	act = bpf_prog_run_xdp(xdp_prog, &xdp);
    156
    157	switch (act) {
    158	case XDP_PASS:
    159		/* remove headroom, which may not be FUN_XDP_HEADROOM now */
    160		skb_frag_size_set(frags, xdp.data_end - xdp.data);
    161		skb_frag_off_add(frags, xdp.data - xdp.data_hard_start);
    162		goto pass;
    163	case XDP_TX:
    164		if (unlikely(!ref_ok))
    165			goto pass;
    166		if (!fun_xdp_tx(xdp_q, xdp.data, xdp.data_end - xdp.data))
    167			goto xdp_error;
    168		FUN_QSTAT_INC(q, xdp_tx);
    169		q->xdp_flush |= FUN_XDP_FLUSH_TX;
    170		break;
    171	case XDP_REDIRECT:
    172		if (unlikely(!ref_ok))
    173			goto pass;
    174		if (unlikely(xdp_do_redirect(q->netdev, &xdp, xdp_prog)))
    175			goto xdp_error;
    176		FUN_QSTAT_INC(q, xdp_redir);
    177		q->xdp_flush |= FUN_XDP_FLUSH_REDIR;
    178		break;
    179	default:
    180		bpf_warn_invalid_xdp_action(q->netdev, xdp_prog, act);
    181		fallthrough;
    182	case XDP_ABORTED:
    183		trace_xdp_exception(q->netdev, xdp_prog, act);
    184xdp_error:
    185		q->cur_buf->pg_refs++; /* return frags' page reference */
    186		FUN_QSTAT_INC(q, xdp_err);
    187		break;
    188	case XDP_DROP:
    189		q->cur_buf->pg_refs++;
    190		FUN_QSTAT_INC(q, xdp_drops);
    191		break;
    192	}
    193	return NULL;
    194
    195pass:
    196	return xdp.data;
    197}
    198
    199/* A CQE contains a fixed completion structure along with optional metadata and
    200 * even packet data. Given the start address of a CQE return the start of the
    201 * contained fixed structure, which lies at the end.
    202 */
    203static const void *cqe_to_info(const void *cqe)
    204{
    205	return cqe + FUNETH_CQE_INFO_OFFSET;
    206}
    207
    208/* The inverse of cqe_to_info(). */
    209static const void *info_to_cqe(const void *cqe_info)
    210{
    211	return cqe_info - FUNETH_CQE_INFO_OFFSET;
    212}
    213
    214/* Return the type of hash provided by the device based on the L3 and L4
    215 * protocols it parsed for the packet.
    216 */
    217static enum pkt_hash_types cqe_to_pkt_hash_type(u16 pkt_parse)
    218{
    219	static const enum pkt_hash_types htype_map[] = {
    220		PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3,
    221		PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L4,
    222		PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3,
    223		PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3
    224	};
    225	u16 key;
    226
    227	/* Build the key from the TCP/UDP and IP/IPv6 bits */
    228	key = ((pkt_parse >> FUN_ETH_RX_CV_OL4_PROT_S) & 6) |
    229	      ((pkt_parse >> (FUN_ETH_RX_CV_OL3_PROT_S + 1)) & 1);
    230
    231	return htype_map[key];
    232}
    233
    234/* Each received packet can be scattered across several Rx buffers or can
    235 * share a buffer with previously received packets depending on the buffer
    236 * and packet sizes and the room available in the most recently used buffer.
    237 *
    238 * The rules are:
    239 * - If the buffer at the head of an RQ has not been used it gets (part of) the
    240 *   next incoming packet.
    241 * - Otherwise, if the packet fully fits in the buffer's remaining space the
    242 *   packet is written there.
    243 * - Otherwise, the packet goes into the next Rx buffer.
    244 *
    245 * This function returns the Rx buffer for a packet or fragment thereof of the
    246 * given length. If it isn't @buf it either recycles or frees that buffer
    247 * before advancing the queue to the next buffer.
    248 *
    249 * If called repeatedly with the remaining length of a packet it will walk
    250 * through all the buffers containing the packet.
    251 */
    252static struct funeth_rxbuf *
    253get_buf(struct funeth_rxq *q, struct funeth_rxbuf *buf, unsigned int len)
    254{
    255	if (q->buf_offset + len <= PAGE_SIZE || !q->buf_offset)
    256		return buf;            /* @buf holds (part of) the packet */
    257
    258	/* The packet occupies part of the next buffer. Move there after
    259	 * replenishing the current buffer slot either with the spare page or
    260	 * by reusing the slot's existing page. Note that if a spare page isn't
    261	 * available and the current packet occupies @buf it is a multi-frag
    262	 * packet that will be dropped leaving @buf available for reuse.
    263	 */
    264	if ((page_ref_count(buf->page) == buf->pg_refs &&
    265	     buf->node == numa_mem_id()) || !q->spare_buf.page) {
    266		dma_sync_single_for_device(q->dma_dev, buf->dma_addr,
    267					   PAGE_SIZE, DMA_FROM_DEVICE);
    268		refresh_refs(buf);
    269	} else {
    270		cache_offer(q, buf);
    271		*buf = q->spare_buf;
    272		q->spare_buf.page = NULL;
    273		q->rqes[q->rq_cons & q->rq_mask] =
    274			FUN_EPRQ_RQBUF_INIT(buf->dma_addr);
    275	}
    276	q->buf_offset = 0;
    277	q->rq_cons++;
    278	return &q->bufs[q->rq_cons & q->rq_mask];
    279}
    280
    281/* Gather the page fragments making up the first Rx packet on @q. Its total
    282 * length @tot_len includes optional head- and tail-rooms.
    283 *
    284 * Return 0 if the device retains ownership of at least some of the pages.
    285 * In this case the caller may only copy the packet.
    286 *
    287 * A non-zero return value gives the caller permission to use references to the
    288 * pages, e.g., attach them to skbs. Additionally, if the value is <0 at least
    289 * one of the pages is PF_MEMALLOC.
    290 *
    291 * Regardless of outcome the caller is granted a reference to each of the pages.
    292 */
    293static int fun_gather_pkt(struct funeth_rxq *q, unsigned int tot_len,
    294			  skb_frag_t *frags)
    295{
    296	struct funeth_rxbuf *buf = q->cur_buf;
    297	unsigned int frag_len;
    298	int ref_ok = 1;
    299
    300	for (;;) {
    301		buf = get_buf(q, buf, tot_len);
    302
    303		/* We always keep the RQ full of buffers so before we can give
    304		 * one of our pages to the stack we require that we can obtain
    305		 * a replacement page. If we can't the packet will either be
    306		 * copied or dropped so we can retain ownership of the page and
    307		 * reuse it.
    308		 */
    309		if (!q->spare_buf.page &&
    310		    funeth_alloc_page(q, &q->spare_buf, numa_mem_id(),
    311				      GFP_ATOMIC | __GFP_MEMALLOC))
    312			ref_ok = 0;
    313
    314		frag_len = min_t(unsigned int, tot_len,
    315				 PAGE_SIZE - q->buf_offset);
    316		dma_sync_single_for_cpu(q->dma_dev,
    317					buf->dma_addr + q->buf_offset,
    318					frag_len, DMA_FROM_DEVICE);
    319		buf->pg_refs--;
    320		if (ref_ok)
    321			ref_ok |= buf->node;
    322
    323		__skb_frag_set_page(frags, buf->page);
    324		skb_frag_off_set(frags, q->buf_offset);
    325		skb_frag_size_set(frags++, frag_len);
    326
    327		tot_len -= frag_len;
    328		if (!tot_len)
    329			break;
    330
    331		q->buf_offset = PAGE_SIZE;
    332	}
    333	q->buf_offset = ALIGN(q->buf_offset + frag_len, FUN_EPRQ_PKT_ALIGN);
    334	q->cur_buf = buf;
    335	return ref_ok;
    336}
    337
    338static bool rx_hwtstamp_enabled(const struct net_device *dev)
    339{
    340	const struct funeth_priv *d = netdev_priv(dev);
    341
    342	return d->hwtstamp_cfg.rx_filter == HWTSTAMP_FILTER_ALL;
    343}
    344
    345/* Advance the CQ pointers and phase tag to the next CQE. */
    346static void advance_cq(struct funeth_rxq *q)
    347{
    348	if (unlikely(q->cq_head == q->cq_mask)) {
    349		q->cq_head = 0;
    350		q->phase ^= 1;
    351		q->next_cqe_info = cqe_to_info(q->cqes);
    352	} else {
    353		q->cq_head++;
    354		q->next_cqe_info += FUNETH_CQE_SIZE;
    355	}
    356	prefetch(q->next_cqe_info);
    357}
    358
    359/* Process the packet represented by the head CQE of @q. Gather the packet's
    360 * fragments, run it through the optional XDP program, and if needed construct
    361 * an skb and pass it to the stack.
    362 */
    363static void fun_handle_cqe_pkt(struct funeth_rxq *q, struct funeth_txq *xdp_q)
    364{
    365	const struct fun_eth_cqe *rxreq = info_to_cqe(q->next_cqe_info);
    366	unsigned int i, tot_len, pkt_len = be32_to_cpu(rxreq->pkt_len);
    367	struct net_device *ndev = q->netdev;
    368	skb_frag_t frags[RX_MAX_FRAGS];
    369	struct skb_shared_info *si;
    370	unsigned int headroom;
    371	gro_result_t gro_res;
    372	struct sk_buff *skb;
    373	int ref_ok;
    374	void *va;
    375	u16 cv;
    376
    377	u64_stats_update_begin(&q->syncp);
    378	q->stats.rx_pkts++;
    379	q->stats.rx_bytes += pkt_len;
    380	u64_stats_update_end(&q->syncp);
    381
    382	advance_cq(q);
    383
    384	/* account for head- and tail-room, present only for 1-buffer packets */
    385	tot_len = pkt_len;
    386	headroom = be16_to_cpu(rxreq->headroom);
    387	if (likely(headroom))
    388		tot_len += FUN_RX_TAILROOM + headroom;
    389
    390	ref_ok = fun_gather_pkt(q, tot_len, frags);
    391	va = skb_frag_address(frags);
    392	if (xdp_q && headroom == FUN_XDP_HEADROOM) {
    393		va = fun_run_xdp(q, frags, va, ref_ok, xdp_q);
    394		if (!va)
    395			return;
    396		headroom = 0;   /* XDP_PASS trims it */
    397	}
    398	if (unlikely(!ref_ok))
    399		goto no_mem;
    400
    401	if (likely(headroom)) {
    402		/* headroom is either FUN_RX_HEADROOM or FUN_XDP_HEADROOM */
    403		prefetch(va + headroom);
    404		skb = napi_build_skb(va, ALIGN(tot_len, FUN_EPRQ_PKT_ALIGN));
    405		if (unlikely(!skb))
    406			goto no_mem;
    407
    408		skb_reserve(skb, headroom);
    409		__skb_put(skb, pkt_len);
    410		skb->protocol = eth_type_trans(skb, ndev);
    411	} else {
    412		prefetch(va);
    413		skb = napi_get_frags(q->napi);
    414		if (unlikely(!skb))
    415			goto no_mem;
    416
    417		if (ref_ok < 0)
    418			skb->pfmemalloc = 1;
    419
    420		si = skb_shinfo(skb);
    421		si->nr_frags = rxreq->nsgl;
    422		for (i = 0; i < si->nr_frags; i++)
    423			si->frags[i] = frags[i];
    424
    425		skb->len = pkt_len;
    426		skb->data_len = pkt_len;
    427		skb->truesize += round_up(pkt_len, FUN_EPRQ_PKT_ALIGN);
    428	}
    429
    430	skb_record_rx_queue(skb, q->qidx);
    431	cv = be16_to_cpu(rxreq->pkt_cv);
    432	if (likely((q->netdev->features & NETIF_F_RXHASH) && rxreq->hash))
    433		skb_set_hash(skb, be32_to_cpu(rxreq->hash),
    434			     cqe_to_pkt_hash_type(cv));
    435	if (likely((q->netdev->features & NETIF_F_RXCSUM) && rxreq->csum)) {
    436		FUN_QSTAT_INC(q, rx_cso);
    437		skb->ip_summed = CHECKSUM_UNNECESSARY;
    438		skb->csum_level = be16_to_cpu(rxreq->csum) - 1;
    439	}
    440	if (unlikely(rx_hwtstamp_enabled(q->netdev)))
    441		skb_hwtstamps(skb)->hwtstamp = be64_to_cpu(rxreq->timestamp);
    442
    443	trace_funeth_rx(q, rxreq->nsgl, pkt_len, skb->hash, cv);
    444
    445	gro_res = skb->data_len ? napi_gro_frags(q->napi) :
    446				  napi_gro_receive(q->napi, skb);
    447	if (gro_res == GRO_MERGED || gro_res == GRO_MERGED_FREE)
    448		FUN_QSTAT_INC(q, gro_merged);
    449	else if (gro_res == GRO_HELD)
    450		FUN_QSTAT_INC(q, gro_pkts);
    451	return;
    452
    453no_mem:
    454	FUN_QSTAT_INC(q, rx_mem_drops);
    455
    456	/* Release the references we've been granted for the frag pages.
    457	 * We return the ref of the last frag and free the rest.
    458	 */
    459	q->cur_buf->pg_refs++;
    460	for (i = 0; i < rxreq->nsgl - 1; i++)
    461		__free_page(skb_frag_page(frags + i));
    462}
    463
    464/* Return 0 if the phase tag of the CQE at the CQ's head matches expectations
    465 * indicating the CQE is new.
    466 */
    467static u16 cqe_phase_mismatch(const struct fun_cqe_info *ci, u16 phase)
    468{
    469	u16 sf_p = be16_to_cpu(ci->sf_p);
    470
    471	return (sf_p & 1) ^ phase;
    472}
    473
    474/* Walk through a CQ identifying and processing fresh CQEs up to the given
    475 * budget. Return the remaining budget.
    476 */
    477static int fun_process_cqes(struct funeth_rxq *q, int budget)
    478{
    479	struct funeth_priv *fp = netdev_priv(q->netdev);
    480	struct funeth_txq **xdpqs, *xdp_q = NULL;
    481
    482	xdpqs = rcu_dereference_bh(fp->xdpqs);
    483	if (xdpqs)
    484		xdp_q = xdpqs[smp_processor_id()];
    485
    486	while (budget && !cqe_phase_mismatch(q->next_cqe_info, q->phase)) {
    487		/* access other descriptor fields after the phase check */
    488		dma_rmb();
    489
    490		fun_handle_cqe_pkt(q, xdp_q);
    491		budget--;
    492	}
    493
    494	if (unlikely(q->xdp_flush)) {
    495		if (q->xdp_flush & FUN_XDP_FLUSH_TX)
    496			fun_txq_wr_db(xdp_q);
    497		if (q->xdp_flush & FUN_XDP_FLUSH_REDIR)
    498			xdp_do_flush();
    499		q->xdp_flush = 0;
    500	}
    501
    502	return budget;
    503}
    504
    505/* NAPI handler for Rx queues. Calls the CQE processing loop and writes RQ/CQ
    506 * doorbells as needed.
    507 */
    508int fun_rxq_napi_poll(struct napi_struct *napi, int budget)
    509{
    510	struct fun_irq *irq = container_of(napi, struct fun_irq, napi);
    511	struct funeth_rxq *q = irq->rxq;
    512	int work_done = budget - fun_process_cqes(q, budget);
    513	u32 cq_db_val = q->cq_head;
    514
    515	if (unlikely(work_done >= budget))
    516		FUN_QSTAT_INC(q, rx_budget);
    517	else if (napi_complete_done(napi, work_done))
    518		cq_db_val |= q->irq_db_val;
    519
    520	/* check whether to post new Rx buffers */
    521	if (q->rq_cons - q->rq_cons_db >= q->rq_db_thres) {
    522		u64_stats_update_begin(&q->syncp);
    523		q->stats.rx_bufs += q->rq_cons - q->rq_cons_db;
    524		u64_stats_update_end(&q->syncp);
    525		q->rq_cons_db = q->rq_cons;
    526		writel((q->rq_cons - 1) & q->rq_mask, q->rq_db);
    527	}
    528
    529	writel(cq_db_val, q->cq_db);
    530	return work_done;
    531}
    532
    533/* Free the Rx buffers of an Rx queue. */
    534static void fun_rxq_free_bufs(struct funeth_rxq *q)
    535{
    536	struct funeth_rxbuf *b = q->bufs;
    537	unsigned int i;
    538
    539	for (i = 0; i <= q->rq_mask; i++, b++)
    540		funeth_free_page(q, b);
    541
    542	funeth_free_page(q, &q->spare_buf);
    543	q->cur_buf = NULL;
    544}
    545
    546/* Initially provision an Rx queue with Rx buffers. */
    547static int fun_rxq_alloc_bufs(struct funeth_rxq *q, int node)
    548{
    549	struct funeth_rxbuf *b = q->bufs;
    550	unsigned int i;
    551
    552	for (i = 0; i <= q->rq_mask; i++, b++) {
    553		if (funeth_alloc_page(q, b, node, GFP_KERNEL)) {
    554			fun_rxq_free_bufs(q);
    555			return -ENOMEM;
    556		}
    557		q->rqes[i] = FUN_EPRQ_RQBUF_INIT(b->dma_addr);
    558	}
    559	q->cur_buf = q->bufs;
    560	return 0;
    561}
    562
    563/* Initialize a used-buffer cache of the given depth. */
    564static int fun_rxq_init_cache(struct funeth_rx_cache *c, unsigned int depth,
    565			      int node)
    566{
    567	c->mask = depth - 1;
    568	c->bufs = kvzalloc_node(depth * sizeof(*c->bufs), GFP_KERNEL, node);
    569	return c->bufs ? 0 : -ENOMEM;
    570}
    571
    572/* Deallocate an Rx queue's used-buffer cache and its contents. */
    573static void fun_rxq_free_cache(struct funeth_rxq *q)
    574{
    575	struct funeth_rxbuf *b = q->cache.bufs;
    576	unsigned int i;
    577
    578	for (i = 0; i <= q->cache.mask; i++, b++)
    579		funeth_free_page(q, b);
    580
    581	kvfree(q->cache.bufs);
    582	q->cache.bufs = NULL;
    583}
    584
    585int fun_rxq_set_bpf(struct funeth_rxq *q, struct bpf_prog *prog)
    586{
    587	struct funeth_priv *fp = netdev_priv(q->netdev);
    588	struct fun_admin_epcq_req cmd;
    589	u16 headroom;
    590	int err;
    591
    592	headroom = prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM;
    593	if (headroom != q->headroom) {
    594		cmd.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ,
    595							sizeof(cmd));
    596		cmd.u.modify =
    597			FUN_ADMIN_EPCQ_MODIFY_REQ_INIT(FUN_ADMIN_SUBOP_MODIFY,
    598						       0, q->hw_cqid, headroom);
    599		err = fun_submit_admin_sync_cmd(fp->fdev, &cmd.common, NULL, 0,
    600						0);
    601		if (err)
    602			return err;
    603		q->headroom = headroom;
    604	}
    605
    606	WRITE_ONCE(q->xdp_prog, prog);
    607	return 0;
    608}
    609
    610/* Create an Rx queue, allocating the host memory it needs. */
    611static struct funeth_rxq *fun_rxq_create_sw(struct net_device *dev,
    612					    unsigned int qidx,
    613					    unsigned int ncqe,
    614					    unsigned int nrqe,
    615					    struct fun_irq *irq)
    616{
    617	struct funeth_priv *fp = netdev_priv(dev);
    618	struct funeth_rxq *q;
    619	int err = -ENOMEM;
    620	int numa_node;
    621
    622	numa_node = fun_irq_node(irq);
    623	q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node);
    624	if (!q)
    625		goto err;
    626
    627	q->qidx = qidx;
    628	q->netdev = dev;
    629	q->cq_mask = ncqe - 1;
    630	q->rq_mask = nrqe - 1;
    631	q->numa_node = numa_node;
    632	q->rq_db_thres = nrqe / 4;
    633	u64_stats_init(&q->syncp);
    634	q->dma_dev = &fp->pdev->dev;
    635
    636	q->rqes = fun_alloc_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes),
    637				     sizeof(*q->bufs), false, numa_node,
    638				     &q->rq_dma_addr, (void **)&q->bufs, NULL);
    639	if (!q->rqes)
    640		goto free_q;
    641
    642	q->cqes = fun_alloc_ring_mem(q->dma_dev, ncqe, FUNETH_CQE_SIZE, 0,
    643				     false, numa_node, &q->cq_dma_addr, NULL,
    644				     NULL);
    645	if (!q->cqes)
    646		goto free_rqes;
    647
    648	err = fun_rxq_init_cache(&q->cache, nrqe, numa_node);
    649	if (err)
    650		goto free_cqes;
    651
    652	err = fun_rxq_alloc_bufs(q, numa_node);
    653	if (err)
    654		goto free_cache;
    655
    656	q->stats.rx_bufs = q->rq_mask;
    657	q->init_state = FUN_QSTATE_INIT_SW;
    658	return q;
    659
    660free_cache:
    661	fun_rxq_free_cache(q);
    662free_cqes:
    663	dma_free_coherent(q->dma_dev, ncqe * FUNETH_CQE_SIZE, q->cqes,
    664			  q->cq_dma_addr);
    665free_rqes:
    666	fun_free_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes), false, q->rqes,
    667			  q->rq_dma_addr, q->bufs);
    668free_q:
    669	kfree(q);
    670err:
    671	netdev_err(dev, "Unable to allocate memory for Rx queue %u\n", qidx);
    672	return ERR_PTR(err);
    673}
    674
    675static void fun_rxq_free_sw(struct funeth_rxq *q)
    676{
    677	struct funeth_priv *fp = netdev_priv(q->netdev);
    678
    679	fun_rxq_free_cache(q);
    680	fun_rxq_free_bufs(q);
    681	fun_free_ring_mem(q->dma_dev, q->rq_mask + 1, sizeof(*q->rqes), false,
    682			  q->rqes, q->rq_dma_addr, q->bufs);
    683	dma_free_coherent(q->dma_dev, (q->cq_mask + 1) * FUNETH_CQE_SIZE,
    684			  q->cqes, q->cq_dma_addr);
    685
    686	/* Before freeing the queue transfer key counters to the device. */
    687	fp->rx_packets += q->stats.rx_pkts;
    688	fp->rx_bytes   += q->stats.rx_bytes;
    689	fp->rx_dropped += q->stats.rx_map_err + q->stats.rx_mem_drops;
    690
    691	kfree(q);
    692}
    693
    694/* Create an Rx queue's resources on the device. */
    695int fun_rxq_create_dev(struct funeth_rxq *q, struct fun_irq *irq)
    696{
    697	struct funeth_priv *fp = netdev_priv(q->netdev);
    698	unsigned int ncqe = q->cq_mask + 1;
    699	unsigned int nrqe = q->rq_mask + 1;
    700	int err;
    701
    702	err = xdp_rxq_info_reg(&q->xdp_rxq, q->netdev, q->qidx,
    703			       irq->napi.napi_id);
    704	if (err)
    705		goto out;
    706
    707	err = xdp_rxq_info_reg_mem_model(&q->xdp_rxq, MEM_TYPE_PAGE_SHARED,
    708					 NULL);
    709	if (err)
    710		goto xdp_unreg;
    711
    712	q->phase = 1;
    713	q->irq_cnt = 0;
    714	q->cq_head = 0;
    715	q->rq_cons = 0;
    716	q->rq_cons_db = 0;
    717	q->buf_offset = 0;
    718	q->napi = &irq->napi;
    719	q->irq_db_val = fp->cq_irq_db;
    720	q->next_cqe_info = cqe_to_info(q->cqes);
    721
    722	q->xdp_prog = fp->xdp_prog;
    723	q->headroom = fp->xdp_prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM;
    724
    725	err = fun_sq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR |
    726			    FUN_ADMIN_EPSQ_CREATE_FLAG_RQ, 0,
    727			    FUN_HCI_ID_INVALID, 0, nrqe, q->rq_dma_addr, 0, 0,
    728			    0, 0, fp->fdev->kern_end_qid, PAGE_SHIFT,
    729			    &q->hw_sqid, &q->rq_db);
    730	if (err)
    731		goto xdp_unreg;
    732
    733	err = fun_cq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR |
    734			    FUN_ADMIN_EPCQ_CREATE_FLAG_RQ, 0,
    735			    q->hw_sqid, ilog2(FUNETH_CQE_SIZE), ncqe,
    736			    q->cq_dma_addr, q->headroom, FUN_RX_TAILROOM, 0, 0,
    737			    irq->irq_idx, 0, fp->fdev->kern_end_qid,
    738			    &q->hw_cqid, &q->cq_db);
    739	if (err)
    740		goto free_rq;
    741
    742	irq->rxq = q;
    743	writel(q->rq_mask, q->rq_db);
    744	q->init_state = FUN_QSTATE_INIT_FULL;
    745
    746	netif_info(fp, ifup, q->netdev,
    747		   "Rx queue %u, depth %u/%u, HW qid %u/%u, IRQ idx %u, node %d, headroom %u\n",
    748		   q->qidx, ncqe, nrqe, q->hw_cqid, q->hw_sqid, irq->irq_idx,
    749		   q->numa_node, q->headroom);
    750	return 0;
    751
    752free_rq:
    753	fun_destroy_sq(fp->fdev, q->hw_sqid);
    754xdp_unreg:
    755	xdp_rxq_info_unreg(&q->xdp_rxq);
    756out:
    757	netdev_err(q->netdev,
    758		   "Failed to create Rx queue %u on device, error %d\n",
    759		   q->qidx, err);
    760	return err;
    761}
    762
    763static void fun_rxq_free_dev(struct funeth_rxq *q)
    764{
    765	struct funeth_priv *fp = netdev_priv(q->netdev);
    766	struct fun_irq *irq;
    767
    768	if (q->init_state < FUN_QSTATE_INIT_FULL)
    769		return;
    770
    771	irq = container_of(q->napi, struct fun_irq, napi);
    772	netif_info(fp, ifdown, q->netdev,
    773		   "Freeing Rx queue %u (id %u/%u), IRQ %u\n",
    774		   q->qidx, q->hw_cqid, q->hw_sqid, irq->irq_idx);
    775
    776	irq->rxq = NULL;
    777	xdp_rxq_info_unreg(&q->xdp_rxq);
    778	fun_destroy_sq(fp->fdev, q->hw_sqid);
    779	fun_destroy_cq(fp->fdev, q->hw_cqid);
    780	q->init_state = FUN_QSTATE_INIT_SW;
    781}
    782
    783/* Create or advance an Rx queue, allocating all the host and device resources
    784 * needed to reach the target state.
    785 */
    786int funeth_rxq_create(struct net_device *dev, unsigned int qidx,
    787		      unsigned int ncqe, unsigned int nrqe, struct fun_irq *irq,
    788		      int state, struct funeth_rxq **qp)
    789{
    790	struct funeth_rxq *q = *qp;
    791	int err;
    792
    793	if (!q) {
    794		q = fun_rxq_create_sw(dev, qidx, ncqe, nrqe, irq);
    795		if (IS_ERR(q))
    796			return PTR_ERR(q);
    797	}
    798
    799	if (q->init_state >= state)
    800		goto out;
    801
    802	err = fun_rxq_create_dev(q, irq);
    803	if (err) {
    804		if (!*qp)
    805			fun_rxq_free_sw(q);
    806		return err;
    807	}
    808
    809out:
    810	*qp = q;
    811	return 0;
    812}
    813
    814/* Free Rx queue resources until it reaches the target state. */
    815struct funeth_rxq *funeth_rxq_free(struct funeth_rxq *q, int state)
    816{
    817	if (state < FUN_QSTATE_INIT_FULL)
    818		fun_rxq_free_dev(q);
    819
    820	if (state == FUN_QSTATE_DESTROYED) {
    821		fun_rxq_free_sw(q);
    822		q = NULL;
    823	}
    824
    825	return q;
    826}