cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

gve_rx.c (21479B)


      1// SPDX-License-Identifier: (GPL-2.0 OR MIT)
      2/* Google virtual Ethernet (gve) driver
      3 *
      4 * Copyright (C) 2015-2021 Google, Inc.
      5 */
      6
      7#include "gve.h"
      8#include "gve_adminq.h"
      9#include "gve_utils.h"
     10#include <linux/etherdevice.h>
     11
     12static void gve_rx_free_buffer(struct device *dev,
     13			       struct gve_rx_slot_page_info *page_info,
     14			       union gve_rx_data_slot *data_slot)
     15{
     16	dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) &
     17				      GVE_DATA_SLOT_ADDR_PAGE_MASK);
     18
     19	page_ref_sub(page_info->page, page_info->pagecnt_bias - 1);
     20	gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
     21}
     22
     23static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
     24{
     25	u32 slots = rx->mask + 1;
     26	int i;
     27
     28	if (rx->data.raw_addressing) {
     29		for (i = 0; i < slots; i++)
     30			gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
     31					   &rx->data.data_ring[i]);
     32	} else {
     33		for (i = 0; i < slots; i++)
     34			page_ref_sub(rx->data.page_info[i].page,
     35				     rx->data.page_info[i].pagecnt_bias - 1);
     36		gve_unassign_qpl(priv, rx->data.qpl->id);
     37		rx->data.qpl = NULL;
     38	}
     39	kvfree(rx->data.page_info);
     40	rx->data.page_info = NULL;
     41}
     42
     43static void gve_rx_free_ring(struct gve_priv *priv, int idx)
     44{
     45	struct gve_rx_ring *rx = &priv->rx[idx];
     46	struct device *dev = &priv->pdev->dev;
     47	u32 slots = rx->mask + 1;
     48	size_t bytes;
     49
     50	gve_rx_remove_from_block(priv, idx);
     51
     52	bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
     53	dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
     54	rx->desc.desc_ring = NULL;
     55
     56	dma_free_coherent(dev, sizeof(*rx->q_resources),
     57			  rx->q_resources, rx->q_resources_bus);
     58	rx->q_resources = NULL;
     59
     60	gve_rx_unfill_pages(priv, rx);
     61
     62	bytes = sizeof(*rx->data.data_ring) * slots;
     63	dma_free_coherent(dev, bytes, rx->data.data_ring,
     64			  rx->data.data_bus);
     65	rx->data.data_ring = NULL;
     66	netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
     67}
     68
     69static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
     70			     dma_addr_t addr, struct page *page, __be64 *slot_addr)
     71{
     72	page_info->page = page;
     73	page_info->page_offset = 0;
     74	page_info->page_address = page_address(page);
     75	*slot_addr = cpu_to_be64(addr);
     76	/* The page already has 1 ref */
     77	page_ref_add(page, INT_MAX - 1);
     78	page_info->pagecnt_bias = INT_MAX;
     79}
     80
     81static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
     82			       struct gve_rx_slot_page_info *page_info,
     83			       union gve_rx_data_slot *data_slot)
     84{
     85	struct page *page;
     86	dma_addr_t dma;
     87	int err;
     88
     89	err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE,
     90			     GFP_ATOMIC);
     91	if (err)
     92		return err;
     93
     94	gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
     95	return 0;
     96}
     97
     98static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
     99{
    100	struct gve_priv *priv = rx->gve;
    101	u32 slots;
    102	int err;
    103	int i;
    104
    105	/* Allocate one page per Rx queue slot. Each page is split into two
    106	 * packet buffers, when possible we "page flip" between the two.
    107	 */
    108	slots = rx->mask + 1;
    109
    110	rx->data.page_info = kvzalloc(slots *
    111				      sizeof(*rx->data.page_info), GFP_KERNEL);
    112	if (!rx->data.page_info)
    113		return -ENOMEM;
    114
    115	if (!rx->data.raw_addressing) {
    116		rx->data.qpl = gve_assign_rx_qpl(priv);
    117		if (!rx->data.qpl) {
    118			kvfree(rx->data.page_info);
    119			rx->data.page_info = NULL;
    120			return -ENOMEM;
    121		}
    122	}
    123	for (i = 0; i < slots; i++) {
    124		if (!rx->data.raw_addressing) {
    125			struct page *page = rx->data.qpl->pages[i];
    126			dma_addr_t addr = i * PAGE_SIZE;
    127
    128			gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
    129					    &rx->data.data_ring[i].qpl_offset);
    130			continue;
    131		}
    132		err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
    133					  &rx->data.data_ring[i]);
    134		if (err)
    135			goto alloc_err;
    136	}
    137
    138	return slots;
    139alloc_err:
    140	while (i--)
    141		gve_rx_free_buffer(&priv->pdev->dev,
    142				   &rx->data.page_info[i],
    143				   &rx->data.data_ring[i]);
    144	return err;
    145}
    146
    147static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx)
    148{
    149	ctx->curr_frag_cnt = 0;
    150	ctx->total_expected_size = 0;
    151	ctx->expected_frag_cnt = 0;
    152	ctx->skb_head = NULL;
    153	ctx->skb_tail = NULL;
    154	ctx->reuse_frags = false;
    155}
    156
    157static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
    158{
    159	struct gve_rx_ring *rx = &priv->rx[idx];
    160	struct device *hdev = &priv->pdev->dev;
    161	u32 slots, npages;
    162	int filled_pages;
    163	size_t bytes;
    164	int err;
    165
    166	netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
    167	/* Make sure everything is zeroed to start with */
    168	memset(rx, 0, sizeof(*rx));
    169
    170	rx->gve = priv;
    171	rx->q_num = idx;
    172
    173	slots = priv->rx_data_slot_cnt;
    174	rx->mask = slots - 1;
    175	rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
    176
    177	/* alloc rx data ring */
    178	bytes = sizeof(*rx->data.data_ring) * slots;
    179	rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
    180						&rx->data.data_bus,
    181						GFP_KERNEL);
    182	if (!rx->data.data_ring)
    183		return -ENOMEM;
    184	filled_pages = gve_prefill_rx_pages(rx);
    185	if (filled_pages < 0) {
    186		err = -ENOMEM;
    187		goto abort_with_slots;
    188	}
    189	rx->fill_cnt = filled_pages;
    190	/* Ensure data ring slots (packet buffers) are visible. */
    191	dma_wmb();
    192
    193	/* Alloc gve_queue_resources */
    194	rx->q_resources =
    195		dma_alloc_coherent(hdev,
    196				   sizeof(*rx->q_resources),
    197				   &rx->q_resources_bus,
    198				   GFP_KERNEL);
    199	if (!rx->q_resources) {
    200		err = -ENOMEM;
    201		goto abort_filled;
    202	}
    203	netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
    204		  (unsigned long)rx->data.data_bus);
    205
    206	/* alloc rx desc ring */
    207	bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
    208	npages = bytes / PAGE_SIZE;
    209	if (npages * PAGE_SIZE != bytes) {
    210		err = -EIO;
    211		goto abort_with_q_resources;
    212	}
    213
    214	rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
    215						GFP_KERNEL);
    216	if (!rx->desc.desc_ring) {
    217		err = -ENOMEM;
    218		goto abort_with_q_resources;
    219	}
    220	rx->cnt = 0;
    221	rx->db_threshold = priv->rx_desc_cnt / 2;
    222	rx->desc.seqno = 1;
    223
    224	/* Allocating half-page buffers allows page-flipping which is faster
    225	 * than copying or allocating new pages.
    226	 */
    227	rx->packet_buffer_size = PAGE_SIZE / 2;
    228	gve_rx_ctx_clear(&rx->ctx);
    229	gve_rx_add_to_block(priv, idx);
    230
    231	return 0;
    232
    233abort_with_q_resources:
    234	dma_free_coherent(hdev, sizeof(*rx->q_resources),
    235			  rx->q_resources, rx->q_resources_bus);
    236	rx->q_resources = NULL;
    237abort_filled:
    238	gve_rx_unfill_pages(priv, rx);
    239abort_with_slots:
    240	bytes = sizeof(*rx->data.data_ring) * slots;
    241	dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
    242	rx->data.data_ring = NULL;
    243
    244	return err;
    245}
    246
    247int gve_rx_alloc_rings(struct gve_priv *priv)
    248{
    249	int err = 0;
    250	int i;
    251
    252	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
    253		err = gve_rx_alloc_ring(priv, i);
    254		if (err) {
    255			netif_err(priv, drv, priv->dev,
    256				  "Failed to alloc rx ring=%d: err=%d\n",
    257				  i, err);
    258			break;
    259		}
    260	}
    261	/* Unallocate if there was an error */
    262	if (err) {
    263		int j;
    264
    265		for (j = 0; j < i; j++)
    266			gve_rx_free_ring(priv, j);
    267	}
    268	return err;
    269}
    270
    271void gve_rx_free_rings_gqi(struct gve_priv *priv)
    272{
    273	int i;
    274
    275	for (i = 0; i < priv->rx_cfg.num_queues; i++)
    276		gve_rx_free_ring(priv, i);
    277}
    278
    279void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
    280{
    281	u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
    282
    283	iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]);
    284}
    285
    286static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
    287{
    288	if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
    289		return PKT_HASH_TYPE_L4;
    290	if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
    291		return PKT_HASH_TYPE_L3;
    292	return PKT_HASH_TYPE_L2;
    293}
    294
    295static u16 gve_rx_ctx_padding(struct gve_rx_ctx *ctx)
    296{
    297	return (ctx->curr_frag_cnt == 0) ? GVE_RX_PAD : 0;
    298}
    299
    300static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
    301					struct gve_rx_slot_page_info *page_info,
    302					u16 packet_buffer_size, u16 len,
    303					struct gve_rx_ctx *ctx)
    304{
    305	u32 offset = page_info->page_offset +  gve_rx_ctx_padding(ctx);
    306	struct sk_buff *skb;
    307
    308	if (!ctx->skb_head)
    309		ctx->skb_head = napi_get_frags(napi);
    310
    311	if (unlikely(!ctx->skb_head))
    312		return NULL;
    313
    314	skb = ctx->skb_head;
    315	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page_info->page,
    316			offset, len, packet_buffer_size);
    317
    318	return skb;
    319}
    320
    321static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
    322{
    323	const __be64 offset = cpu_to_be64(PAGE_SIZE / 2);
    324
    325	/* "flip" to other packet buffer on this page */
    326	page_info->page_offset ^= PAGE_SIZE / 2;
    327	*(slot_addr) ^= offset;
    328}
    329
    330static int gve_rx_can_recycle_buffer(struct gve_rx_slot_page_info *page_info)
    331{
    332	int pagecount = page_count(page_info->page);
    333
    334	/* This page is not being used by any SKBs - reuse */
    335	if (pagecount == page_info->pagecnt_bias)
    336		return 1;
    337	/* This page is still being used by an SKB - we can't reuse */
    338	else if (pagecount > page_info->pagecnt_bias)
    339		return 0;
    340	WARN(pagecount < page_info->pagecnt_bias,
    341	     "Pagecount should never be less than the bias.");
    342	return -1;
    343}
    344
    345static struct sk_buff *
    346gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
    347		      struct gve_rx_slot_page_info *page_info, u16 len,
    348		      struct napi_struct *napi,
    349		      union gve_rx_data_slot *data_slot,
    350		      u16 packet_buffer_size, struct gve_rx_ctx *ctx)
    351{
    352	struct sk_buff *skb = gve_rx_add_frags(napi, page_info, packet_buffer_size, len, ctx);
    353
    354	if (!skb)
    355		return NULL;
    356
    357	/* Optimistically stop the kernel from freeing the page.
    358	 * We will check again in refill to determine if we need to alloc a
    359	 * new page.
    360	 */
    361	gve_dec_pagecnt_bias(page_info);
    362
    363	return skb;
    364}
    365
    366static struct sk_buff *
    367gve_rx_qpl(struct device *dev, struct net_device *netdev,
    368	   struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
    369	   u16 len, struct napi_struct *napi,
    370	   union gve_rx_data_slot *data_slot)
    371{
    372	struct gve_rx_ctx *ctx = &rx->ctx;
    373	struct sk_buff *skb;
    374
    375	/* if raw_addressing mode is not enabled gvnic can only receive into
    376	 * registered segments. If the buffer can't be recycled, our only
    377	 * choice is to copy the data out of it so that we can return it to the
    378	 * device.
    379	 */
    380	if (ctx->reuse_frags) {
    381		skb = gve_rx_add_frags(napi, page_info, rx->packet_buffer_size, len, ctx);
    382		/* No point in recycling if we didn't get the skb */
    383		if (skb) {
    384			/* Make sure that the page isn't freed. */
    385			gve_dec_pagecnt_bias(page_info);
    386			gve_rx_flip_buff(page_info, &data_slot->qpl_offset);
    387		}
    388	} else {
    389		const u16 padding = gve_rx_ctx_padding(ctx);
    390
    391		skb = gve_rx_copy(netdev, napi, page_info, len, padding, ctx);
    392		if (skb) {
    393			u64_stats_update_begin(&rx->statss);
    394			rx->rx_frag_copy_cnt++;
    395			u64_stats_update_end(&rx->statss);
    396		}
    397	}
    398	return skb;
    399}
    400
    401#define GVE_PKTCONT_BIT_IS_SET(x) (GVE_RXF_PKT_CONT & (x))
    402static u16 gve_rx_get_fragment_size(struct gve_rx_ctx *ctx, struct gve_rx_desc *desc)
    403{
    404	return be16_to_cpu(desc->len) - gve_rx_ctx_padding(ctx);
    405}
    406
    407static bool gve_rx_ctx_init(struct gve_rx_ctx *ctx, struct gve_rx_ring *rx)
    408{
    409	bool qpl_mode = !rx->data.raw_addressing, packet_size_error = false;
    410	bool buffer_error = false, desc_error = false, seqno_error = false;
    411	struct gve_rx_slot_page_info *page_info;
    412	struct gve_priv *priv = rx->gve;
    413	u32 idx = rx->cnt & rx->mask;
    414	bool reuse_frags, can_flip;
    415	struct gve_rx_desc *desc;
    416	u16 packet_size = 0;
    417	u16 n_frags = 0;
    418	int recycle;
    419
    420	/** In QPL mode, we only flip buffers when all buffers containing the packet
    421	 * can be flipped. RDA can_flip decisions will be made later, per frag.
    422	 */
    423	can_flip = qpl_mode;
    424	reuse_frags = can_flip;
    425	do {
    426		u16 frag_size;
    427
    428		n_frags++;
    429		desc = &rx->desc.desc_ring[idx];
    430		desc_error = unlikely(desc->flags_seq & GVE_RXF_ERR) || desc_error;
    431		if (GVE_SEQNO(desc->flags_seq) != rx->desc.seqno) {
    432			seqno_error = true;
    433			netdev_warn(priv->dev,
    434				    "RX seqno error: want=%d, got=%d, dropping packet and scheduling reset.",
    435				    rx->desc.seqno, GVE_SEQNO(desc->flags_seq));
    436		}
    437		frag_size = be16_to_cpu(desc->len);
    438		packet_size += frag_size;
    439		if (frag_size > rx->packet_buffer_size) {
    440			packet_size_error = true;
    441			netdev_warn(priv->dev,
    442				    "RX fragment error: packet_buffer_size=%d, frag_size=%d, dropping packet.",
    443				    rx->packet_buffer_size, be16_to_cpu(desc->len));
    444		}
    445		page_info = &rx->data.page_info[idx];
    446		if (can_flip) {
    447			recycle = gve_rx_can_recycle_buffer(page_info);
    448			reuse_frags = reuse_frags && recycle > 0;
    449			buffer_error = buffer_error || unlikely(recycle < 0);
    450		}
    451		idx = (idx + 1) & rx->mask;
    452		rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
    453	} while (GVE_PKTCONT_BIT_IS_SET(desc->flags_seq));
    454
    455	prefetch(rx->desc.desc_ring + idx);
    456
    457	ctx->curr_frag_cnt = 0;
    458	ctx->total_expected_size = packet_size - GVE_RX_PAD;
    459	ctx->expected_frag_cnt = n_frags;
    460	ctx->skb_head = NULL;
    461	ctx->reuse_frags = reuse_frags;
    462
    463	if (ctx->expected_frag_cnt > 1) {
    464		u64_stats_update_begin(&rx->statss);
    465		rx->rx_cont_packet_cnt++;
    466		u64_stats_update_end(&rx->statss);
    467	}
    468	if (ctx->total_expected_size > priv->rx_copybreak && !ctx->reuse_frags && qpl_mode) {
    469		u64_stats_update_begin(&rx->statss);
    470		rx->rx_copied_pkt++;
    471		u64_stats_update_end(&rx->statss);
    472	}
    473
    474	if (unlikely(buffer_error || seqno_error || packet_size_error)) {
    475		gve_schedule_reset(priv);
    476		return false;
    477	}
    478
    479	if (unlikely(desc_error)) {
    480		u64_stats_update_begin(&rx->statss);
    481		rx->rx_desc_err_dropped_pkt++;
    482		u64_stats_update_end(&rx->statss);
    483		return false;
    484	}
    485	return true;
    486}
    487
    488static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx,
    489				  struct gve_rx_slot_page_info *page_info, struct napi_struct *napi,
    490				  u16 len, union gve_rx_data_slot *data_slot)
    491{
    492	struct net_device *netdev = priv->dev;
    493	struct gve_rx_ctx *ctx = &rx->ctx;
    494	struct sk_buff *skb = NULL;
    495
    496	if (len <= priv->rx_copybreak && ctx->expected_frag_cnt == 1) {
    497		/* Just copy small packets */
    498		skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD, ctx);
    499		if (skb) {
    500			u64_stats_update_begin(&rx->statss);
    501			rx->rx_copied_pkt++;
    502			rx->rx_frag_copy_cnt++;
    503			rx->rx_copybreak_pkt++;
    504			u64_stats_update_end(&rx->statss);
    505		}
    506	} else {
    507		if (rx->data.raw_addressing) {
    508			int recycle = gve_rx_can_recycle_buffer(page_info);
    509
    510			if (unlikely(recycle < 0)) {
    511				gve_schedule_reset(priv);
    512				return NULL;
    513			}
    514			page_info->can_flip = recycle;
    515			if (page_info->can_flip) {
    516				u64_stats_update_begin(&rx->statss);
    517				rx->rx_frag_flip_cnt++;
    518				u64_stats_update_end(&rx->statss);
    519			}
    520			skb = gve_rx_raw_addressing(&priv->pdev->dev, netdev,
    521						    page_info, len, napi,
    522						    data_slot,
    523						    rx->packet_buffer_size, ctx);
    524		} else {
    525			if (ctx->reuse_frags) {
    526				u64_stats_update_begin(&rx->statss);
    527				rx->rx_frag_flip_cnt++;
    528				u64_stats_update_end(&rx->statss);
    529			}
    530			skb = gve_rx_qpl(&priv->pdev->dev, netdev, rx,
    531					 page_info, len, napi, data_slot);
    532		}
    533	}
    534	return skb;
    535}
    536
    537static bool gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
    538		   u64 *packet_size_bytes, u32 *work_done)
    539{
    540	struct gve_rx_slot_page_info *page_info;
    541	struct gve_rx_ctx *ctx = &rx->ctx;
    542	union gve_rx_data_slot *data_slot;
    543	struct gve_priv *priv = rx->gve;
    544	struct gve_rx_desc *first_desc;
    545	struct sk_buff *skb = NULL;
    546	struct gve_rx_desc *desc;
    547	struct napi_struct *napi;
    548	dma_addr_t page_bus;
    549	u32 work_cnt = 0;
    550	void *va;
    551	u32 idx;
    552	u16 len;
    553
    554	idx = rx->cnt & rx->mask;
    555	first_desc = &rx->desc.desc_ring[idx];
    556	desc = first_desc;
    557	napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
    558
    559	if (unlikely(!gve_rx_ctx_init(ctx, rx)))
    560		goto skb_alloc_fail;
    561
    562	while (ctx->curr_frag_cnt < ctx->expected_frag_cnt) {
    563		/* Prefetch two packet buffers ahead, we will need it soon. */
    564		page_info = &rx->data.page_info[(idx + 2) & rx->mask];
    565		va = page_info->page_address + page_info->page_offset;
    566
    567		prefetch(page_info->page); /* Kernel page struct. */
    568		prefetch(va);              /* Packet header. */
    569		prefetch(va + 64);         /* Next cacheline too. */
    570
    571		len = gve_rx_get_fragment_size(ctx, desc);
    572
    573		page_info = &rx->data.page_info[idx];
    574		data_slot = &rx->data.data_ring[idx];
    575		page_bus = rx->data.raw_addressing ?
    576			   be64_to_cpu(data_slot->addr) - page_info->page_offset :
    577			   rx->data.qpl->page_buses[idx];
    578		dma_sync_single_for_cpu(&priv->pdev->dev, page_bus, PAGE_SIZE, DMA_FROM_DEVICE);
    579
    580		skb = gve_rx_skb(priv, rx, page_info, napi, len, data_slot);
    581		if (!skb) {
    582			u64_stats_update_begin(&rx->statss);
    583			rx->rx_skb_alloc_fail++;
    584			u64_stats_update_end(&rx->statss);
    585			goto skb_alloc_fail;
    586		}
    587
    588		ctx->curr_frag_cnt++;
    589		rx->cnt++;
    590		idx = rx->cnt & rx->mask;
    591		work_cnt++;
    592		desc = &rx->desc.desc_ring[idx];
    593	}
    594
    595	if (likely(feat & NETIF_F_RXCSUM)) {
    596		/* NIC passes up the partial sum */
    597		if (first_desc->csum)
    598			skb->ip_summed = CHECKSUM_COMPLETE;
    599		else
    600			skb->ip_summed = CHECKSUM_NONE;
    601		skb->csum = csum_unfold(first_desc->csum);
    602	}
    603
    604	/* parse flags & pass relevant info up */
    605	if (likely(feat & NETIF_F_RXHASH) &&
    606	    gve_needs_rss(first_desc->flags_seq))
    607		skb_set_hash(skb, be32_to_cpu(first_desc->rss_hash),
    608			     gve_rss_type(first_desc->flags_seq));
    609
    610	*packet_size_bytes = skb->len + (skb->protocol ? ETH_HLEN : 0);
    611	*work_done = work_cnt;
    612	skb_record_rx_queue(skb, rx->q_num);
    613	if (skb_is_nonlinear(skb))
    614		napi_gro_frags(napi);
    615	else
    616		napi_gro_receive(napi, skb);
    617
    618	gve_rx_ctx_clear(ctx);
    619	return true;
    620
    621skb_alloc_fail:
    622	if (napi->skb)
    623		napi_free_frags(napi);
    624	*packet_size_bytes = 0;
    625	*work_done = ctx->expected_frag_cnt;
    626	while (ctx->curr_frag_cnt < ctx->expected_frag_cnt) {
    627		rx->cnt++;
    628		ctx->curr_frag_cnt++;
    629	}
    630	gve_rx_ctx_clear(ctx);
    631	return false;
    632}
    633
    634bool gve_rx_work_pending(struct gve_rx_ring *rx)
    635{
    636	struct gve_rx_desc *desc;
    637	__be16 flags_seq;
    638	u32 next_idx;
    639
    640	next_idx = rx->cnt & rx->mask;
    641	desc = rx->desc.desc_ring + next_idx;
    642
    643	flags_seq = desc->flags_seq;
    644
    645	return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
    646}
    647
    648static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
    649{
    650	int refill_target = rx->mask + 1;
    651	u32 fill_cnt = rx->fill_cnt;
    652
    653	while (fill_cnt - rx->cnt < refill_target) {
    654		struct gve_rx_slot_page_info *page_info;
    655		u32 idx = fill_cnt & rx->mask;
    656
    657		page_info = &rx->data.page_info[idx];
    658		if (page_info->can_flip) {
    659			/* The other half of the page is free because it was
    660			 * free when we processed the descriptor. Flip to it.
    661			 */
    662			union gve_rx_data_slot *data_slot =
    663						&rx->data.data_ring[idx];
    664
    665			gve_rx_flip_buff(page_info, &data_slot->addr);
    666			page_info->can_flip = 0;
    667		} else {
    668			/* It is possible that the networking stack has already
    669			 * finished processing all outstanding packets in the buffer
    670			 * and it can be reused.
    671			 * Flipping is unnecessary here - if the networking stack still
    672			 * owns half the page it is impossible to tell which half. Either
    673			 * the whole page is free or it needs to be replaced.
    674			 */
    675			int recycle = gve_rx_can_recycle_buffer(page_info);
    676
    677			if (recycle < 0) {
    678				if (!rx->data.raw_addressing)
    679					gve_schedule_reset(priv);
    680				return false;
    681			}
    682			if (!recycle) {
    683				/* We can't reuse the buffer - alloc a new one*/
    684				union gve_rx_data_slot *data_slot =
    685						&rx->data.data_ring[idx];
    686				struct device *dev = &priv->pdev->dev;
    687				gve_rx_free_buffer(dev, page_info, data_slot);
    688				page_info->page = NULL;
    689				if (gve_rx_alloc_buffer(priv, dev, page_info,
    690							data_slot)) {
    691					u64_stats_update_begin(&rx->statss);
    692					rx->rx_buf_alloc_fail++;
    693					u64_stats_update_end(&rx->statss);
    694					break;
    695				}
    696			}
    697		}
    698		fill_cnt++;
    699	}
    700	rx->fill_cnt = fill_cnt;
    701	return true;
    702}
    703
    704static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
    705			     netdev_features_t feat)
    706{
    707	u32 work_done = 0, total_packet_cnt = 0, ok_packet_cnt = 0;
    708	struct gve_priv *priv = rx->gve;
    709	u32 idx = rx->cnt & rx->mask;
    710	struct gve_rx_desc *desc;
    711	u64 bytes = 0;
    712
    713	desc = &rx->desc.desc_ring[idx];
    714	while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
    715	       work_done < budget) {
    716		u64 packet_size_bytes = 0;
    717		u32 work_cnt = 0;
    718		bool dropped;
    719
    720		netif_info(priv, rx_status, priv->dev,
    721			   "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
    722			   rx->q_num, idx, desc, desc->flags_seq);
    723		netif_info(priv, rx_status, priv->dev,
    724			   "[%d] seqno=%d rx->desc.seqno=%d\n",
    725			   rx->q_num, GVE_SEQNO(desc->flags_seq),
    726			   rx->desc.seqno);
    727
    728		dropped = !gve_rx(rx, feat, &packet_size_bytes, &work_cnt);
    729		if (!dropped) {
    730			bytes += packet_size_bytes;
    731			ok_packet_cnt++;
    732		}
    733		total_packet_cnt++;
    734		idx = rx->cnt & rx->mask;
    735		desc = &rx->desc.desc_ring[idx];
    736		work_done += work_cnt;
    737	}
    738
    739	if (!work_done && rx->fill_cnt - rx->cnt > rx->db_threshold)
    740		return 0;
    741
    742	if (work_done) {
    743		u64_stats_update_begin(&rx->statss);
    744		rx->rpackets += ok_packet_cnt;
    745		rx->rbytes += bytes;
    746		u64_stats_update_end(&rx->statss);
    747	}
    748
    749	/* restock ring slots */
    750	if (!rx->data.raw_addressing) {
    751		/* In QPL mode buffs are refilled as the desc are processed */
    752		rx->fill_cnt += work_done;
    753	} else if (rx->fill_cnt - rx->cnt <= rx->db_threshold) {
    754		/* In raw addressing mode buffs are only refilled if the avail
    755		 * falls below a threshold.
    756		 */
    757		if (!gve_rx_refill_buffers(priv, rx))
    758			return 0;
    759
    760		/* If we were not able to completely refill buffers, we'll want
    761		 * to schedule this queue for work again to refill buffers.
    762		 */
    763		if (rx->fill_cnt - rx->cnt <= rx->db_threshold) {
    764			gve_rx_write_doorbell(priv, rx);
    765			return budget;
    766		}
    767	}
    768
    769	gve_rx_write_doorbell(priv, rx);
    770	return total_packet_cnt;
    771}
    772
    773int gve_rx_poll(struct gve_notify_block *block, int budget)
    774{
    775	struct gve_rx_ring *rx = block->rx;
    776	netdev_features_t feat;
    777	int work_done = 0;
    778
    779	feat = block->napi.dev->features;
    780
    781	/* If budget is 0, do all the work */
    782	if (budget == 0)
    783		budget = INT_MAX;
    784
    785	if (budget > 0)
    786		work_done = gve_clean_rx_done(rx, budget, feat);
    787
    788	return work_done;
    789}