rx_common.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
rx_common.c (31256B)
      1// SPDX-License-Identifier: GPL-2.0-only
      2/****************************************************************************
      3 * Driver for Solarflare network controllers and boards
      4 * Copyright 2018 Solarflare Communications Inc.
      5 *
      6 * This program is free software; you can redistribute it and/or modify it
      7 * under the terms of the GNU General Public License version 2 as published
      8 * by the Free Software Foundation, incorporated herein by reference.
      9 */
     10
     11#include "net_driver.h"
     12#include <linux/module.h>
     13#include <linux/iommu.h>
     14#include "efx.h"
     15#include "nic.h"
     16#include "rx_common.h"
     17
     18/* This is the percentage fill level below which new RX descriptors
     19 * will be added to the RX descriptor ring.
     20 */
     21static unsigned int rx_refill_threshold;
     22module_param(rx_refill_threshold, uint, 0444);
     23MODULE_PARM_DESC(rx_refill_threshold,
     24		 "RX descriptor ring refill threshold (%)");
     25
     26/* RX maximum head room required.
     27 *
     28 * This must be at least 1 to prevent overflow, plus one packet-worth
     29 * to allow pipelined receives.
     30 */
     31#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
     32
     33/* Check the RX page recycle ring for a page that can be reused. */
     34static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
     35{
     36	struct efx_nic *efx = rx_queue->efx;
     37	struct efx_rx_page_state *state;
     38	unsigned int index;
     39	struct page *page;
     40
     41	if (unlikely(!rx_queue->page_ring))
     42		return NULL;
     43	index = rx_queue->page_remove & rx_queue->page_ptr_mask;
     44	page = rx_queue->page_ring[index];
     45	if (page == NULL)
     46		return NULL;
     47
     48	rx_queue->page_ring[index] = NULL;
     49	/* page_remove cannot exceed page_add. */
     50	if (rx_queue->page_remove != rx_queue->page_add)
     51		++rx_queue->page_remove;
     52
     53	/* If page_count is 1 then we hold the only reference to this page. */
     54	if (page_count(page) == 1) {
     55		++rx_queue->page_recycle_count;
     56		return page;
     57	} else {
     58		state = page_address(page);
     59		dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
     60			       PAGE_SIZE << efx->rx_buffer_order,
     61			       DMA_FROM_DEVICE);
     62		put_page(page);
     63		++rx_queue->page_recycle_failed;
     64	}
     65
     66	return NULL;
     67}
     68
     69/* Attempt to recycle the page if there is an RX recycle ring; the page can
     70 * only be added if this is the final RX buffer, to prevent pages being used in
     71 * the descriptor ring and appearing in the recycle ring simultaneously.
     72 */
     73static void efx_recycle_rx_page(struct efx_channel *channel,
     74				struct efx_rx_buffer *rx_buf)
     75{
     76	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
     77	struct efx_nic *efx = rx_queue->efx;
     78	struct page *page = rx_buf->page;
     79	unsigned int index;
     80
     81	/* Only recycle the page after processing the final buffer. */
     82	if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE))
     83		return;
     84
     85	index = rx_queue->page_add & rx_queue->page_ptr_mask;
     86	if (rx_queue->page_ring[index] == NULL) {
     87		unsigned int read_index = rx_queue->page_remove &
     88			rx_queue->page_ptr_mask;
     89
     90		/* The next slot in the recycle ring is available, but
     91		 * increment page_remove if the read pointer currently
     92		 * points here.
     93		 */
     94		if (read_index == index)
     95			++rx_queue->page_remove;
     96		rx_queue->page_ring[index] = page;
     97		++rx_queue->page_add;
     98		return;
     99	}
    100	++rx_queue->page_recycle_full;
    101	efx_unmap_rx_buffer(efx, rx_buf);
    102	put_page(rx_buf->page);
    103}
    104
    105/* Recycle the pages that are used by buffers that have just been received. */
    106void efx_recycle_rx_pages(struct efx_channel *channel,
    107			  struct efx_rx_buffer *rx_buf,
    108			  unsigned int n_frags)
    109{
    110	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
    111
    112	if (unlikely(!rx_queue->page_ring))
    113		return;
    114
    115	do {
    116		efx_recycle_rx_page(channel, rx_buf);
    117		rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
    118	} while (--n_frags);
    119}
    120
    121void efx_discard_rx_packet(struct efx_channel *channel,
    122			   struct efx_rx_buffer *rx_buf,
    123			   unsigned int n_frags)
    124{
    125	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
    126
    127	efx_recycle_rx_pages(channel, rx_buf, n_frags);
    128
    129	efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
    130}
    131
    132static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue)
    133{
    134	unsigned int bufs_in_recycle_ring, page_ring_size;
    135	struct efx_nic *efx = rx_queue->efx;
    136
    137	bufs_in_recycle_ring = efx_rx_recycle_ring_size(efx);
    138	page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
    139					    efx->rx_bufs_per_page);
    140	rx_queue->page_ring = kcalloc(page_ring_size,
    141				      sizeof(*rx_queue->page_ring), GFP_KERNEL);
    142	if (!rx_queue->page_ring)
    143		rx_queue->page_ptr_mask = 0;
    144	else
    145		rx_queue->page_ptr_mask = page_ring_size - 1;
    146}
    147
    148static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue)
    149{
    150	struct efx_nic *efx = rx_queue->efx;
    151	int i;
    152
    153	if (unlikely(!rx_queue->page_ring))
    154		return;
    155
    156	/* Unmap and release the pages in the recycle ring. Remove the ring. */
    157	for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
    158		struct page *page = rx_queue->page_ring[i];
    159		struct efx_rx_page_state *state;
    160
    161		if (page == NULL)
    162			continue;
    163
    164		state = page_address(page);
    165		dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
    166			       PAGE_SIZE << efx->rx_buffer_order,
    167			       DMA_FROM_DEVICE);
    168		put_page(page);
    169	}
    170	kfree(rx_queue->page_ring);
    171	rx_queue->page_ring = NULL;
    172}
    173
    174static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
    175			       struct efx_rx_buffer *rx_buf)
    176{
    177	/* Release the page reference we hold for the buffer. */
    178	if (rx_buf->page)
    179		put_page(rx_buf->page);
    180
    181	/* If this is the last buffer in a page, unmap and free it. */
    182	if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) {
    183		efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
    184		efx_free_rx_buffers(rx_queue, rx_buf, 1);
    185	}
    186	rx_buf->page = NULL;
    187}
    188
    189int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
    190{
    191	struct efx_nic *efx = rx_queue->efx;
    192	unsigned int entries;
    193	int rc;
    194
    195	/* Create the smallest power-of-two aligned ring */
    196	entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
    197	EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
    198	rx_queue->ptr_mask = entries - 1;
    199
    200	netif_dbg(efx, probe, efx->net_dev,
    201		  "creating RX queue %d size %#x mask %#x\n",
    202		  efx_rx_queue_index(rx_queue), efx->rxq_entries,
    203		  rx_queue->ptr_mask);
    204
    205	/* Allocate RX buffers */
    206	rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
    207				   GFP_KERNEL);
    208	if (!rx_queue->buffer)
    209		return -ENOMEM;
    210
    211	rc = efx_nic_probe_rx(rx_queue);
    212	if (rc) {
    213		kfree(rx_queue->buffer);
    214		rx_queue->buffer = NULL;
    215	}
    216
    217	return rc;
    218}
    219
    220void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
    221{
    222	unsigned int max_fill, trigger, max_trigger;
    223	struct efx_nic *efx = rx_queue->efx;
    224	int rc = 0;
    225
    226	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
    227		  "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
    228
    229	/* Initialise ptr fields */
    230	rx_queue->added_count = 0;
    231	rx_queue->notified_count = 0;
    232	rx_queue->removed_count = 0;
    233	rx_queue->min_fill = -1U;
    234	efx_init_rx_recycle_ring(rx_queue);
    235
    236	rx_queue->page_remove = 0;
    237	rx_queue->page_add = rx_queue->page_ptr_mask + 1;
    238	rx_queue->page_recycle_count = 0;
    239	rx_queue->page_recycle_failed = 0;
    240	rx_queue->page_recycle_full = 0;
    241
    242	/* Initialise limit fields */
    243	max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
    244	max_trigger =
    245		max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
    246	if (rx_refill_threshold != 0) {
    247		trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
    248		if (trigger > max_trigger)
    249			trigger = max_trigger;
    250	} else {
    251		trigger = max_trigger;
    252	}
    253
    254	rx_queue->max_fill = max_fill;
    255	rx_queue->fast_fill_trigger = trigger;
    256	rx_queue->refill_enabled = true;
    257
    258	/* Initialise XDP queue information */
    259	rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev,
    260			      rx_queue->core_index, 0);
    261
    262	if (rc) {
    263		netif_err(efx, rx_err, efx->net_dev,
    264			  "Failure to initialise XDP queue information rc=%d\n",
    265			  rc);
    266		efx->xdp_rxq_info_failed = true;
    267	} else {
    268		rx_queue->xdp_rxq_info_valid = true;
    269	}
    270
    271	/* Set up RX descriptor ring */
    272	efx_nic_init_rx(rx_queue);
    273}
    274
    275void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
    276{
    277	struct efx_rx_buffer *rx_buf;
    278	int i;
    279
    280	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
    281		  "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue));
    282
    283	del_timer_sync(&rx_queue->slow_fill);
    284
    285	/* Release RX buffers from the current read ptr to the write ptr */
    286	if (rx_queue->buffer) {
    287		for (i = rx_queue->removed_count; i < rx_queue->added_count;
    288		     i++) {
    289			unsigned int index = i & rx_queue->ptr_mask;
    290
    291			rx_buf = efx_rx_buffer(rx_queue, index);
    292			efx_fini_rx_buffer(rx_queue, rx_buf);
    293		}
    294	}
    295
    296	efx_fini_rx_recycle_ring(rx_queue);
    297
    298	if (rx_queue->xdp_rxq_info_valid)
    299		xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info);
    300
    301	rx_queue->xdp_rxq_info_valid = false;
    302}
    303
    304void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
    305{
    306	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
    307		  "destroying RX queue %d\n", efx_rx_queue_index(rx_queue));
    308
    309	efx_nic_remove_rx(rx_queue);
    310
    311	kfree(rx_queue->buffer);
    312	rx_queue->buffer = NULL;
    313}
    314
    315/* Unmap a DMA-mapped page.  This function is only called for the final RX
    316 * buffer in a page.
    317 */
    318void efx_unmap_rx_buffer(struct efx_nic *efx,
    319			 struct efx_rx_buffer *rx_buf)
    320{
    321	struct page *page = rx_buf->page;
    322
    323	if (page) {
    324		struct efx_rx_page_state *state = page_address(page);
    325
    326		dma_unmap_page(&efx->pci_dev->dev,
    327			       state->dma_addr,
    328			       PAGE_SIZE << efx->rx_buffer_order,
    329			       DMA_FROM_DEVICE);
    330	}
    331}
    332
    333void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
    334			 struct efx_rx_buffer *rx_buf,
    335			 unsigned int num_bufs)
    336{
    337	do {
    338		if (rx_buf->page) {
    339			put_page(rx_buf->page);
    340			rx_buf->page = NULL;
    341		}
    342		rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
    343	} while (--num_bufs);
    344}
    345
    346void efx_rx_slow_fill(struct timer_list *t)
    347{
    348	struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
    349
    350	/* Post an event to cause NAPI to run and refill the queue */
    351	efx_nic_generate_fill_event(rx_queue);
    352	++rx_queue->slow_fill_count;
    353}
    354
    355void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
    356{
    357	mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10));
    358}
    359
    360/* efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
    361 *
    362 * @rx_queue:		Efx RX queue
    363 *
    364 * This allocates a batch of pages, maps them for DMA, and populates
    365 * struct efx_rx_buffers for each one. Return a negative error code or
    366 * 0 on success. If a single page can be used for multiple buffers,
    367 * then the page will either be inserted fully, or not at all.
    368 */
    369static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
    370{
    371	unsigned int page_offset, index, count;
    372	struct efx_nic *efx = rx_queue->efx;
    373	struct efx_rx_page_state *state;
    374	struct efx_rx_buffer *rx_buf;
    375	dma_addr_t dma_addr;
    376	struct page *page;
    377
    378	count = 0;
    379	do {
    380		page = efx_reuse_page(rx_queue);
    381		if (page == NULL) {
    382			page = alloc_pages(__GFP_COMP |
    383					   (atomic ? GFP_ATOMIC : GFP_KERNEL),
    384					   efx->rx_buffer_order);
    385			if (unlikely(page == NULL))
    386				return -ENOMEM;
    387			dma_addr =
    388				dma_map_page(&efx->pci_dev->dev, page, 0,
    389					     PAGE_SIZE << efx->rx_buffer_order,
    390					     DMA_FROM_DEVICE);
    391			if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
    392						       dma_addr))) {
    393				__free_pages(page, efx->rx_buffer_order);
    394				return -EIO;
    395			}
    396			state = page_address(page);
    397			state->dma_addr = dma_addr;
    398		} else {
    399			state = page_address(page);
    400			dma_addr = state->dma_addr;
    401		}
    402
    403		dma_addr += sizeof(struct efx_rx_page_state);
    404		page_offset = sizeof(struct efx_rx_page_state);
    405
    406		do {
    407			index = rx_queue->added_count & rx_queue->ptr_mask;
    408			rx_buf = efx_rx_buffer(rx_queue, index);
    409			rx_buf->dma_addr = dma_addr + efx->rx_ip_align +
    410					   EFX_XDP_HEADROOM;
    411			rx_buf->page = page;
    412			rx_buf->page_offset = page_offset + efx->rx_ip_align +
    413					      EFX_XDP_HEADROOM;
    414			rx_buf->len = efx->rx_dma_len;
    415			rx_buf->flags = 0;
    416			++rx_queue->added_count;
    417			get_page(page);
    418			dma_addr += efx->rx_page_buf_step;
    419			page_offset += efx->rx_page_buf_step;
    420		} while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
    421
    422		rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
    423	} while (++count < efx->rx_pages_per_batch);
    424
    425	return 0;
    426}
    427
    428void efx_rx_config_page_split(struct efx_nic *efx)
    429{
    430	efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align +
    431				      EFX_XDP_HEADROOM + EFX_XDP_TAILROOM,
    432				      EFX_RX_BUF_ALIGNMENT);
    433	efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
    434		((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
    435		efx->rx_page_buf_step);
    436	efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
    437		efx->rx_bufs_per_page;
    438	efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
    439					       efx->rx_bufs_per_page);
    440}
    441
    442/* efx_fast_push_rx_descriptors - push new RX descriptors quickly
    443 * @rx_queue:		RX descriptor queue
    444 *
    445 * This will aim to fill the RX descriptor queue up to
    446 * @rx_queue->@max_fill. If there is insufficient atomic
    447 * memory to do so, a slow fill will be scheduled.
    448 *
    449 * The caller must provide serialisation (none is used here). In practise,
    450 * this means this function must run from the NAPI handler, or be called
    451 * when NAPI is disabled.
    452 */
    453void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
    454{
    455	struct efx_nic *efx = rx_queue->efx;
    456	unsigned int fill_level, batch_size;
    457	int space, rc = 0;
    458
    459	if (!rx_queue->refill_enabled)
    460		return;
    461
    462	/* Calculate current fill level, and exit if we don't need to fill */
    463	fill_level = (rx_queue->added_count - rx_queue->removed_count);
    464	EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries);
    465	if (fill_level >= rx_queue->fast_fill_trigger)
    466		goto out;
    467
    468	/* Record minimum fill level */
    469	if (unlikely(fill_level < rx_queue->min_fill)) {
    470		if (fill_level)
    471			rx_queue->min_fill = fill_level;
    472	}
    473
    474	batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
    475	space = rx_queue->max_fill - fill_level;
    476	EFX_WARN_ON_ONCE_PARANOID(space < batch_size);
    477
    478	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
    479		   "RX queue %d fast-filling descriptor ring from"
    480		   " level %d to level %d\n",
    481		   efx_rx_queue_index(rx_queue), fill_level,
    482		   rx_queue->max_fill);
    483
    484	do {
    485		rc = efx_init_rx_buffers(rx_queue, atomic);
    486		if (unlikely(rc)) {
    487			/* Ensure that we don't leave the rx queue empty */
    488			efx_schedule_slow_fill(rx_queue);
    489			goto out;
    490		}
    491	} while ((space -= batch_size) >= batch_size);
    492
    493	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
    494		   "RX queue %d fast-filled descriptor ring "
    495		   "to level %d\n", efx_rx_queue_index(rx_queue),
    496		   rx_queue->added_count - rx_queue->removed_count);
    497
    498 out:
    499	if (rx_queue->notified_count != rx_queue->added_count)
    500		efx_nic_notify_rx_desc(rx_queue);
    501}
    502
    503/* Pass a received packet up through GRO.  GRO can handle pages
    504 * regardless of checksum state and skbs with a good checksum.
    505 */
    506void
    507efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
    508		  unsigned int n_frags, u8 *eh, __wsum csum)
    509{
    510	struct napi_struct *napi = &channel->napi_str;
    511	struct efx_nic *efx = channel->efx;
    512	struct sk_buff *skb;
    513
    514	skb = napi_get_frags(napi);
    515	if (unlikely(!skb)) {
    516		struct efx_rx_queue *rx_queue;
    517
    518		rx_queue = efx_channel_get_rx_queue(channel);
    519		efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
    520		return;
    521	}
    522
    523	if (efx->net_dev->features & NETIF_F_RXHASH &&
    524	    efx_rx_buf_hash_valid(efx, eh))
    525		skb_set_hash(skb, efx_rx_buf_hash(efx, eh),
    526			     PKT_HASH_TYPE_L3);
    527	if (csum) {
    528		skb->csum = csum;
    529		skb->ip_summed = CHECKSUM_COMPLETE;
    530	} else {
    531		skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
    532				  CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
    533	}
    534	skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
    535
    536	for (;;) {
    537		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
    538				   rx_buf->page, rx_buf->page_offset,
    539				   rx_buf->len);
    540		rx_buf->page = NULL;
    541		skb->len += rx_buf->len;
    542		if (skb_shinfo(skb)->nr_frags == n_frags)
    543			break;
    544
    545		rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
    546	}
    547
    548	skb->data_len = skb->len;
    549	skb->truesize += n_frags * efx->rx_buffer_truesize;
    550
    551	skb_record_rx_queue(skb, channel->rx_queue.core_index);
    552
    553	napi_gro_frags(napi);
    554}
    555
    556/* RSS contexts.  We're using linked lists and crappy O(n) algorithms, because
    557 * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
    558 */
    559struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx)
    560{
    561	struct list_head *head = &efx->rss_context.list;
    562	struct efx_rss_context *ctx, *new;
    563	u32 id = 1; /* Don't use zero, that refers to the master RSS context */
    564
    565	WARN_ON(!mutex_is_locked(&efx->rss_lock));
    566
    567	/* Search for first gap in the numbering */
    568	list_for_each_entry(ctx, head, list) {
    569		if (ctx->user_id != id)
    570			break;
    571		id++;
    572		/* Check for wrap.  If this happens, we have nearly 2^32
    573		 * allocated RSS contexts, which seems unlikely.
    574		 */
    575		if (WARN_ON_ONCE(!id))
    576			return NULL;
    577	}
    578
    579	/* Create the new entry */
    580	new = kmalloc(sizeof(*new), GFP_KERNEL);
    581	if (!new)
    582		return NULL;
    583	new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
    584	new->rx_hash_udp_4tuple = false;
    585
    586	/* Insert the new entry into the gap */
    587	new->user_id = id;
    588	list_add_tail(&new->list, &ctx->list);
    589	return new;
    590}
    591
    592struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id)
    593{
    594	struct list_head *head = &efx->rss_context.list;
    595	struct efx_rss_context *ctx;
    596
    597	WARN_ON(!mutex_is_locked(&efx->rss_lock));
    598
    599	list_for_each_entry(ctx, head, list)
    600		if (ctx->user_id == id)
    601			return ctx;
    602	return NULL;
    603}
    604
    605void efx_free_rss_context_entry(struct efx_rss_context *ctx)
    606{
    607	list_del(&ctx->list);
    608	kfree(ctx);
    609}
    610
    611void efx_set_default_rx_indir_table(struct efx_nic *efx,
    612				    struct efx_rss_context *ctx)
    613{
    614	size_t i;
    615
    616	for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
    617		ctx->rx_indir_table[i] =
    618			ethtool_rxfh_indir_default(i, efx->rss_spread);
    619}
    620
    621/**
    622 * efx_filter_is_mc_recipient - test whether spec is a multicast recipient
    623 * @spec: Specification to test
    624 *
    625 * Return: %true if the specification is a non-drop RX filter that
    626 * matches a local MAC address I/G bit value of 1 or matches a local
    627 * IPv4 or IPv6 address value in the respective multicast address
    628 * range.  Otherwise %false.
    629 */
    630bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec)
    631{
    632	if (!(spec->flags & EFX_FILTER_FLAG_RX) ||
    633	    spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP)
    634		return false;
    635
    636	if (spec->match_flags &
    637	    (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) &&
    638	    is_multicast_ether_addr(spec->loc_mac))
    639		return true;
    640
    641	if ((spec->match_flags &
    642	     (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
    643	    (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
    644		if (spec->ether_type == htons(ETH_P_IP) &&
    645		    ipv4_is_multicast(spec->loc_host[0]))
    646			return true;
    647		if (spec->ether_type == htons(ETH_P_IPV6) &&
    648		    ((const u8 *)spec->loc_host)[0] == 0xff)
    649			return true;
    650	}
    651
    652	return false;
    653}
    654
    655bool efx_filter_spec_equal(const struct efx_filter_spec *left,
    656			   const struct efx_filter_spec *right)
    657{
    658	if ((left->match_flags ^ right->match_flags) |
    659	    ((left->flags ^ right->flags) &
    660	     (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
    661		return false;
    662
    663	return memcmp(&left->outer_vid, &right->outer_vid,
    664		      sizeof(struct efx_filter_spec) -
    665		      offsetof(struct efx_filter_spec, outer_vid)) == 0;
    666}
    667
    668u32 efx_filter_spec_hash(const struct efx_filter_spec *spec)
    669{
    670	BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
    671	return jhash2((const u32 *)&spec->outer_vid,
    672		      (sizeof(struct efx_filter_spec) -
    673		       offsetof(struct efx_filter_spec, outer_vid)) / 4,
    674		      0);
    675}
    676
    677#ifdef CONFIG_RFS_ACCEL
    678bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
    679			bool *force)
    680{
    681	if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) {
    682		/* ARFS is currently updating this entry, leave it */
    683		return false;
    684	}
    685	if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) {
    686		/* ARFS tried and failed to update this, so it's probably out
    687		 * of date.  Remove the filter and the ARFS rule entry.
    688		 */
    689		rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
    690		*force = true;
    691		return true;
    692	} else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */
    693		/* ARFS has moved on, so old filter is not needed.  Since we did
    694		 * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will
    695		 * not be removed by efx_rps_hash_del() subsequently.
    696		 */
    697		*force = true;
    698		return true;
    699	}
    700	/* Remove it iff ARFS wants to. */
    701	return true;
    702}
    703
    704static
    705struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx,
    706				       const struct efx_filter_spec *spec)
    707{
    708	u32 hash = efx_filter_spec_hash(spec);
    709
    710	lockdep_assert_held(&efx->rps_hash_lock);
    711	if (!efx->rps_hash_table)
    712		return NULL;
    713	return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE];
    714}
    715
    716struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
    717					const struct efx_filter_spec *spec)
    718{
    719	struct efx_arfs_rule *rule;
    720	struct hlist_head *head;
    721	struct hlist_node *node;
    722
    723	head = efx_rps_hash_bucket(efx, spec);
    724	if (!head)
    725		return NULL;
    726	hlist_for_each(node, head) {
    727		rule = container_of(node, struct efx_arfs_rule, node);
    728		if (efx_filter_spec_equal(spec, &rule->spec))
    729			return rule;
    730	}
    731	return NULL;
    732}
    733
    734struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
    735				       const struct efx_filter_spec *spec,
    736				       bool *new)
    737{
    738	struct efx_arfs_rule *rule;
    739	struct hlist_head *head;
    740	struct hlist_node *node;
    741
    742	head = efx_rps_hash_bucket(efx, spec);
    743	if (!head)
    744		return NULL;
    745	hlist_for_each(node, head) {
    746		rule = container_of(node, struct efx_arfs_rule, node);
    747		if (efx_filter_spec_equal(spec, &rule->spec)) {
    748			*new = false;
    749			return rule;
    750		}
    751	}
    752	rule = kmalloc(sizeof(*rule), GFP_ATOMIC);
    753	*new = true;
    754	if (rule) {
    755		memcpy(&rule->spec, spec, sizeof(rule->spec));
    756		hlist_add_head(&rule->node, head);
    757	}
    758	return rule;
    759}
    760
    761void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec)
    762{
    763	struct efx_arfs_rule *rule;
    764	struct hlist_head *head;
    765	struct hlist_node *node;
    766
    767	head = efx_rps_hash_bucket(efx, spec);
    768	if (WARN_ON(!head))
    769		return;
    770	hlist_for_each(node, head) {
    771		rule = container_of(node, struct efx_arfs_rule, node);
    772		if (efx_filter_spec_equal(spec, &rule->spec)) {
    773			/* Someone already reused the entry.  We know that if
    774			 * this check doesn't fire (i.e. filter_id == REMOVING)
    775			 * then the REMOVING mark was put there by our caller,
    776			 * because caller is holding a lock on filter table and
    777			 * only holders of that lock set REMOVING.
    778			 */
    779			if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING)
    780				return;
    781			hlist_del(node);
    782			kfree(rule);
    783			return;
    784		}
    785	}
    786	/* We didn't find it. */
    787	WARN_ON(1);
    788}
    789#endif
    790
    791int efx_probe_filters(struct efx_nic *efx)
    792{
    793	int rc;
    794
    795	mutex_lock(&efx->mac_lock);
    796	down_write(&efx->filter_sem);
    797	rc = efx->type->filter_table_probe(efx);
    798	if (rc)
    799		goto out_unlock;
    800
    801#ifdef CONFIG_RFS_ACCEL
    802	if (efx->type->offload_features & NETIF_F_NTUPLE) {
    803		struct efx_channel *channel;
    804		int i, success = 1;
    805
    806		efx_for_each_channel(channel, efx) {
    807			channel->rps_flow_id =
    808				kcalloc(efx->type->max_rx_ip_filters,
    809					sizeof(*channel->rps_flow_id),
    810					GFP_KERNEL);
    811			if (!channel->rps_flow_id)
    812				success = 0;
    813			else
    814				for (i = 0;
    815				     i < efx->type->max_rx_ip_filters;
    816				     ++i)
    817					channel->rps_flow_id[i] =
    818						RPS_FLOW_ID_INVALID;
    819			channel->rfs_expire_index = 0;
    820			channel->rfs_filter_count = 0;
    821		}
    822
    823		if (!success) {
    824			efx_for_each_channel(channel, efx)
    825				kfree(channel->rps_flow_id);
    826			efx->type->filter_table_remove(efx);
    827			rc = -ENOMEM;
    828			goto out_unlock;
    829		}
    830	}
    831#endif
    832out_unlock:
    833	up_write(&efx->filter_sem);
    834	mutex_unlock(&efx->mac_lock);
    835	return rc;
    836}
    837
    838void efx_remove_filters(struct efx_nic *efx)
    839{
    840#ifdef CONFIG_RFS_ACCEL
    841	struct efx_channel *channel;
    842
    843	efx_for_each_channel(channel, efx) {
    844		cancel_delayed_work_sync(&channel->filter_work);
    845		kfree(channel->rps_flow_id);
    846		channel->rps_flow_id = NULL;
    847	}
    848#endif
    849	down_write(&efx->filter_sem);
    850	efx->type->filter_table_remove(efx);
    851	up_write(&efx->filter_sem);
    852}
    853
    854#ifdef CONFIG_RFS_ACCEL
    855
    856static void efx_filter_rfs_work(struct work_struct *data)
    857{
    858	struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
    859							      work);
    860	struct efx_nic *efx = netdev_priv(req->net_dev);
    861	struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
    862	int slot_idx = req - efx->rps_slot;
    863	struct efx_arfs_rule *rule;
    864	u16 arfs_id = 0;
    865	int rc;
    866
    867	rc = efx->type->filter_insert(efx, &req->spec, true);
    868	if (rc >= 0)
    869		/* Discard 'priority' part of EF10+ filter ID (mcdi_filters) */
    870		rc %= efx->type->max_rx_ip_filters;
    871	if (efx->rps_hash_table) {
    872		spin_lock_bh(&efx->rps_hash_lock);
    873		rule = efx_rps_hash_find(efx, &req->spec);
    874		/* The rule might have already gone, if someone else's request
    875		 * for the same spec was already worked and then expired before
    876		 * we got around to our work.  In that case we have nothing
    877		 * tying us to an arfs_id, meaning that as soon as the filter
    878		 * is considered for expiry it will be removed.
    879		 */
    880		if (rule) {
    881			if (rc < 0)
    882				rule->filter_id = EFX_ARFS_FILTER_ID_ERROR;
    883			else
    884				rule->filter_id = rc;
    885			arfs_id = rule->arfs_id;
    886		}
    887		spin_unlock_bh(&efx->rps_hash_lock);
    888	}
    889	if (rc >= 0) {
    890		/* Remember this so we can check whether to expire the filter
    891		 * later.
    892		 */
    893		mutex_lock(&efx->rps_mutex);
    894		if (channel->rps_flow_id[rc] == RPS_FLOW_ID_INVALID)
    895			channel->rfs_filter_count++;
    896		channel->rps_flow_id[rc] = req->flow_id;
    897		mutex_unlock(&efx->rps_mutex);
    898
    899		if (req->spec.ether_type == htons(ETH_P_IP))
    900			netif_info(efx, rx_status, efx->net_dev,
    901				   "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n",
    902				   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
    903				   req->spec.rem_host, ntohs(req->spec.rem_port),
    904				   req->spec.loc_host, ntohs(req->spec.loc_port),
    905				   req->rxq_index, req->flow_id, rc, arfs_id);
    906		else
    907			netif_info(efx, rx_status, efx->net_dev,
    908				   "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n",
    909				   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
    910				   req->spec.rem_host, ntohs(req->spec.rem_port),
    911				   req->spec.loc_host, ntohs(req->spec.loc_port),
    912				   req->rxq_index, req->flow_id, rc, arfs_id);
    913		channel->n_rfs_succeeded++;
    914	} else {
    915		if (req->spec.ether_type == htons(ETH_P_IP))
    916			netif_dbg(efx, rx_status, efx->net_dev,
    917				  "failed to steer %s %pI4:%u:%pI4:%u to queue %u [flow %u rc %d id %u]\n",
    918				  (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
    919				  req->spec.rem_host, ntohs(req->spec.rem_port),
    920				  req->spec.loc_host, ntohs(req->spec.loc_port),
    921				  req->rxq_index, req->flow_id, rc, arfs_id);
    922		else
    923			netif_dbg(efx, rx_status, efx->net_dev,
    924				  "failed to steer %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u rc %d id %u]\n",
    925				  (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
    926				  req->spec.rem_host, ntohs(req->spec.rem_port),
    927				  req->spec.loc_host, ntohs(req->spec.loc_port),
    928				  req->rxq_index, req->flow_id, rc, arfs_id);
    929		channel->n_rfs_failed++;
    930		/* We're overloading the NIC's filter tables, so let's do a
    931		 * chunk of extra expiry work.
    932		 */
    933		__efx_filter_rfs_expire(channel, min(channel->rfs_filter_count,
    934						     100u));
    935	}
    936
    937	/* Release references */
    938	clear_bit(slot_idx, &efx->rps_slot_map);
    939	dev_put(req->net_dev);
    940}
    941
    942int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
    943		   u16 rxq_index, u32 flow_id)
    944{
    945	struct efx_nic *efx = netdev_priv(net_dev);
    946	struct efx_async_filter_insertion *req;
    947	struct efx_arfs_rule *rule;
    948	struct flow_keys fk;
    949	int slot_idx;
    950	bool new;
    951	int rc;
    952
    953	/* find a free slot */
    954	for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++)
    955		if (!test_and_set_bit(slot_idx, &efx->rps_slot_map))
    956			break;
    957	if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT)
    958		return -EBUSY;
    959
    960	if (flow_id == RPS_FLOW_ID_INVALID) {
    961		rc = -EINVAL;
    962		goto out_clear;
    963	}
    964
    965	if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) {
    966		rc = -EPROTONOSUPPORT;
    967		goto out_clear;
    968	}
    969
    970	if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) {
    971		rc = -EPROTONOSUPPORT;
    972		goto out_clear;
    973	}
    974	if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) {
    975		rc = -EPROTONOSUPPORT;
    976		goto out_clear;
    977	}
    978
    979	req = efx->rps_slot + slot_idx;
    980	efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
    981			   efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
    982			   rxq_index);
    983	req->spec.match_flags =
    984		EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
    985		EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
    986		EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT;
    987	req->spec.ether_type = fk.basic.n_proto;
    988	req->spec.ip_proto = fk.basic.ip_proto;
    989
    990	if (fk.basic.n_proto == htons(ETH_P_IP)) {
    991		req->spec.rem_host[0] = fk.addrs.v4addrs.src;
    992		req->spec.loc_host[0] = fk.addrs.v4addrs.dst;
    993	} else {
    994		memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src,
    995		       sizeof(struct in6_addr));
    996		memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst,
    997		       sizeof(struct in6_addr));
    998	}
    999
   1000	req->spec.rem_port = fk.ports.src;
   1001	req->spec.loc_port = fk.ports.dst;
   1002
   1003	if (efx->rps_hash_table) {
   1004		/* Add it to ARFS hash table */
   1005		spin_lock(&efx->rps_hash_lock);
   1006		rule = efx_rps_hash_add(efx, &req->spec, &new);
   1007		if (!rule) {
   1008			rc = -ENOMEM;
   1009			goto out_unlock;
   1010		}
   1011		if (new)
   1012			rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER;
   1013		rc = rule->arfs_id;
   1014		/* Skip if existing or pending filter already does the right thing */
   1015		if (!new && rule->rxq_index == rxq_index &&
   1016		    rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING)
   1017			goto out_unlock;
   1018		rule->rxq_index = rxq_index;
   1019		rule->filter_id = EFX_ARFS_FILTER_ID_PENDING;
   1020		spin_unlock(&efx->rps_hash_lock);
   1021	} else {
   1022		/* Without an ARFS hash table, we just use arfs_id 0 for all
   1023		 * filters.  This means if multiple flows hash to the same
   1024		 * flow_id, all but the most recently touched will be eligible
   1025		 * for expiry.
   1026		 */
   1027		rc = 0;
   1028	}
   1029
   1030	/* Queue the request */
   1031	dev_hold(req->net_dev = net_dev);
   1032	INIT_WORK(&req->work, efx_filter_rfs_work);
   1033	req->rxq_index = rxq_index;
   1034	req->flow_id = flow_id;
   1035	schedule_work(&req->work);
   1036	return rc;
   1037out_unlock:
   1038	spin_unlock(&efx->rps_hash_lock);
   1039out_clear:
   1040	clear_bit(slot_idx, &efx->rps_slot_map);
   1041	return rc;
   1042}
   1043
   1044bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota)
   1045{
   1046	bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index);
   1047	struct efx_nic *efx = channel->efx;
   1048	unsigned int index, size, start;
   1049	u32 flow_id;
   1050
   1051	if (!mutex_trylock(&efx->rps_mutex))
   1052		return false;
   1053	expire_one = efx->type->filter_rfs_expire_one;
   1054	index = channel->rfs_expire_index;
   1055	start = index;
   1056	size = efx->type->max_rx_ip_filters;
   1057	while (quota) {
   1058		flow_id = channel->rps_flow_id[index];
   1059
   1060		if (flow_id != RPS_FLOW_ID_INVALID) {
   1061			quota--;
   1062			if (expire_one(efx, flow_id, index)) {
   1063				netif_info(efx, rx_status, efx->net_dev,
   1064					   "expired filter %d [channel %u flow %u]\n",
   1065					   index, channel->channel, flow_id);
   1066				channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID;
   1067				channel->rfs_filter_count--;
   1068			}
   1069		}
   1070		if (++index == size)
   1071			index = 0;
   1072		/* If we were called with a quota that exceeds the total number
   1073		 * of filters in the table (which shouldn't happen, but could
   1074		 * if two callers race), ensure that we don't loop forever -
   1075		 * stop when we've examined every row of the table.
   1076		 */
   1077		if (index == start)
   1078			break;
   1079	}
   1080
   1081	channel->rfs_expire_index = index;
   1082	mutex_unlock(&efx->rps_mutex);
   1083	return true;
   1084}
   1085
   1086#endif /* CONFIG_RFS_ACCEL */