cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

igc_main.c (177712B)


      1// SPDX-License-Identifier: GPL-2.0
      2/* Copyright (c)  2018 Intel Corporation */
      3
      4#include <linux/module.h>
      5#include <linux/types.h>
      6#include <linux/if_vlan.h>
      7#include <linux/aer.h>
      8#include <linux/tcp.h>
      9#include <linux/udp.h>
     10#include <linux/ip.h>
     11#include <linux/pm_runtime.h>
     12#include <net/pkt_sched.h>
     13#include <linux/bpf_trace.h>
     14#include <net/xdp_sock_drv.h>
     15#include <linux/pci.h>
     16
     17#include <net/ipv6.h>
     18
     19#include "igc.h"
     20#include "igc_hw.h"
     21#include "igc_tsn.h"
     22#include "igc_xdp.h"
     23
     24#define DRV_SUMMARY	"Intel(R) 2.5G Ethernet Linux Driver"
     25
     26#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
     27
     28#define IGC_XDP_PASS		0
     29#define IGC_XDP_CONSUMED	BIT(0)
     30#define IGC_XDP_TX		BIT(1)
     31#define IGC_XDP_REDIRECT	BIT(2)
     32
     33static int debug = -1;
     34
     35MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
     36MODULE_DESCRIPTION(DRV_SUMMARY);
     37MODULE_LICENSE("GPL v2");
     38module_param(debug, int, 0);
     39MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
     40
     41char igc_driver_name[] = "igc";
     42static const char igc_driver_string[] = DRV_SUMMARY;
     43static const char igc_copyright[] =
     44	"Copyright(c) 2018 Intel Corporation.";
     45
     46static const struct igc_info *igc_info_tbl[] = {
     47	[board_base] = &igc_base_info,
     48};
     49
     50static const struct pci_device_id igc_pci_tbl[] = {
     51	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base },
     52	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base },
     53	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base },
     54	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base },
     55	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base },
     56	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base },
     57	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base },
     58	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base },
     59	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LMVP), board_base },
     60	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base },
     61	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base },
     62	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base },
     63	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base },
     64	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base },
     65	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base },
     66	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base },
     67	/* required last entry */
     68	{0, }
     69};
     70
     71MODULE_DEVICE_TABLE(pci, igc_pci_tbl);
     72
     73enum latency_range {
     74	lowest_latency = 0,
     75	low_latency = 1,
     76	bulk_latency = 2,
     77	latency_invalid = 255
     78};
     79
     80void igc_reset(struct igc_adapter *adapter)
     81{
     82	struct net_device *dev = adapter->netdev;
     83	struct igc_hw *hw = &adapter->hw;
     84	struct igc_fc_info *fc = &hw->fc;
     85	u32 pba, hwm;
     86
     87	/* Repartition PBA for greater than 9k MTU if required */
     88	pba = IGC_PBA_34K;
     89
     90	/* flow control settings
     91	 * The high water mark must be low enough to fit one full frame
     92	 * after transmitting the pause frame.  As such we must have enough
     93	 * space to allow for us to complete our current transmit and then
     94	 * receive the frame that is in progress from the link partner.
     95	 * Set it to:
     96	 * - the full Rx FIFO size minus one full Tx plus one full Rx frame
     97	 */
     98	hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE);
     99
    100	fc->high_water = hwm & 0xFFFFFFF0;	/* 16-byte granularity */
    101	fc->low_water = fc->high_water - 16;
    102	fc->pause_time = 0xFFFF;
    103	fc->send_xon = 1;
    104	fc->current_mode = fc->requested_mode;
    105
    106	hw->mac.ops.reset_hw(hw);
    107
    108	if (hw->mac.ops.init_hw(hw))
    109		netdev_err(dev, "Error on hardware initialization\n");
    110
    111	/* Re-establish EEE setting */
    112	igc_set_eee_i225(hw, true, true, true);
    113
    114	if (!netif_running(adapter->netdev))
    115		igc_power_down_phy_copper_base(&adapter->hw);
    116
    117	/* Enable HW to recognize an 802.1Q VLAN Ethernet packet */
    118	wr32(IGC_VET, ETH_P_8021Q);
    119
    120	/* Re-enable PTP, where applicable. */
    121	igc_ptp_reset(adapter);
    122
    123	/* Re-enable TSN offloading, where applicable. */
    124	igc_tsn_reset(adapter);
    125
    126	igc_get_phy_info(hw);
    127}
    128
    129/**
    130 * igc_power_up_link - Power up the phy link
    131 * @adapter: address of board private structure
    132 */
    133static void igc_power_up_link(struct igc_adapter *adapter)
    134{
    135	igc_reset_phy(&adapter->hw);
    136
    137	igc_power_up_phy_copper(&adapter->hw);
    138
    139	igc_setup_link(&adapter->hw);
    140}
    141
    142/**
    143 * igc_release_hw_control - release control of the h/w to f/w
    144 * @adapter: address of board private structure
    145 *
    146 * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
    147 * For ASF and Pass Through versions of f/w this means that the
    148 * driver is no longer loaded.
    149 */
    150static void igc_release_hw_control(struct igc_adapter *adapter)
    151{
    152	struct igc_hw *hw = &adapter->hw;
    153	u32 ctrl_ext;
    154
    155	if (!pci_device_is_present(adapter->pdev))
    156		return;
    157
    158	/* Let firmware take over control of h/w */
    159	ctrl_ext = rd32(IGC_CTRL_EXT);
    160	wr32(IGC_CTRL_EXT,
    161	     ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
    162}
    163
    164/**
    165 * igc_get_hw_control - get control of the h/w from f/w
    166 * @adapter: address of board private structure
    167 *
    168 * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
    169 * For ASF and Pass Through versions of f/w this means that
    170 * the driver is loaded.
    171 */
    172static void igc_get_hw_control(struct igc_adapter *adapter)
    173{
    174	struct igc_hw *hw = &adapter->hw;
    175	u32 ctrl_ext;
    176
    177	/* Let firmware know the driver has taken over */
    178	ctrl_ext = rd32(IGC_CTRL_EXT);
    179	wr32(IGC_CTRL_EXT,
    180	     ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
    181}
    182
    183static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf)
    184{
    185	dma_unmap_single(dev, dma_unmap_addr(buf, dma),
    186			 dma_unmap_len(buf, len), DMA_TO_DEVICE);
    187
    188	dma_unmap_len_set(buf, len, 0);
    189}
    190
    191/**
    192 * igc_clean_tx_ring - Free Tx Buffers
    193 * @tx_ring: ring to be cleaned
    194 */
    195static void igc_clean_tx_ring(struct igc_ring *tx_ring)
    196{
    197	u16 i = tx_ring->next_to_clean;
    198	struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
    199	u32 xsk_frames = 0;
    200
    201	while (i != tx_ring->next_to_use) {
    202		union igc_adv_tx_desc *eop_desc, *tx_desc;
    203
    204		switch (tx_buffer->type) {
    205		case IGC_TX_BUFFER_TYPE_XSK:
    206			xsk_frames++;
    207			break;
    208		case IGC_TX_BUFFER_TYPE_XDP:
    209			xdp_return_frame(tx_buffer->xdpf);
    210			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
    211			break;
    212		case IGC_TX_BUFFER_TYPE_SKB:
    213			dev_kfree_skb_any(tx_buffer->skb);
    214			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
    215			break;
    216		default:
    217			netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
    218			break;
    219		}
    220
    221		/* check for eop_desc to determine the end of the packet */
    222		eop_desc = tx_buffer->next_to_watch;
    223		tx_desc = IGC_TX_DESC(tx_ring, i);
    224
    225		/* unmap remaining buffers */
    226		while (tx_desc != eop_desc) {
    227			tx_buffer++;
    228			tx_desc++;
    229			i++;
    230			if (unlikely(i == tx_ring->count)) {
    231				i = 0;
    232				tx_buffer = tx_ring->tx_buffer_info;
    233				tx_desc = IGC_TX_DESC(tx_ring, 0);
    234			}
    235
    236			/* unmap any remaining paged data */
    237			if (dma_unmap_len(tx_buffer, len))
    238				igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
    239		}
    240
    241		tx_buffer->next_to_watch = NULL;
    242
    243		/* move us one more past the eop_desc for start of next pkt */
    244		tx_buffer++;
    245		i++;
    246		if (unlikely(i == tx_ring->count)) {
    247			i = 0;
    248			tx_buffer = tx_ring->tx_buffer_info;
    249		}
    250	}
    251
    252	if (tx_ring->xsk_pool && xsk_frames)
    253		xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
    254
    255	/* reset BQL for queue */
    256	netdev_tx_reset_queue(txring_txq(tx_ring));
    257
    258	/* reset next_to_use and next_to_clean */
    259	tx_ring->next_to_use = 0;
    260	tx_ring->next_to_clean = 0;
    261}
    262
    263/**
    264 * igc_free_tx_resources - Free Tx Resources per Queue
    265 * @tx_ring: Tx descriptor ring for a specific queue
    266 *
    267 * Free all transmit software resources
    268 */
    269void igc_free_tx_resources(struct igc_ring *tx_ring)
    270{
    271	igc_clean_tx_ring(tx_ring);
    272
    273	vfree(tx_ring->tx_buffer_info);
    274	tx_ring->tx_buffer_info = NULL;
    275
    276	/* if not set, then don't free */
    277	if (!tx_ring->desc)
    278		return;
    279
    280	dma_free_coherent(tx_ring->dev, tx_ring->size,
    281			  tx_ring->desc, tx_ring->dma);
    282
    283	tx_ring->desc = NULL;
    284}
    285
    286/**
    287 * igc_free_all_tx_resources - Free Tx Resources for All Queues
    288 * @adapter: board private structure
    289 *
    290 * Free all transmit software resources
    291 */
    292static void igc_free_all_tx_resources(struct igc_adapter *adapter)
    293{
    294	int i;
    295
    296	for (i = 0; i < adapter->num_tx_queues; i++)
    297		igc_free_tx_resources(adapter->tx_ring[i]);
    298}
    299
    300/**
    301 * igc_clean_all_tx_rings - Free Tx Buffers for all queues
    302 * @adapter: board private structure
    303 */
    304static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
    305{
    306	int i;
    307
    308	for (i = 0; i < adapter->num_tx_queues; i++)
    309		if (adapter->tx_ring[i])
    310			igc_clean_tx_ring(adapter->tx_ring[i]);
    311}
    312
    313/**
    314 * igc_setup_tx_resources - allocate Tx resources (Descriptors)
    315 * @tx_ring: tx descriptor ring (for a specific queue) to setup
    316 *
    317 * Return 0 on success, negative on failure
    318 */
    319int igc_setup_tx_resources(struct igc_ring *tx_ring)
    320{
    321	struct net_device *ndev = tx_ring->netdev;
    322	struct device *dev = tx_ring->dev;
    323	int size = 0;
    324
    325	size = sizeof(struct igc_tx_buffer) * tx_ring->count;
    326	tx_ring->tx_buffer_info = vzalloc(size);
    327	if (!tx_ring->tx_buffer_info)
    328		goto err;
    329
    330	/* round up to nearest 4K */
    331	tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc);
    332	tx_ring->size = ALIGN(tx_ring->size, 4096);
    333
    334	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
    335					   &tx_ring->dma, GFP_KERNEL);
    336
    337	if (!tx_ring->desc)
    338		goto err;
    339
    340	tx_ring->next_to_use = 0;
    341	tx_ring->next_to_clean = 0;
    342
    343	return 0;
    344
    345err:
    346	vfree(tx_ring->tx_buffer_info);
    347	netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n");
    348	return -ENOMEM;
    349}
    350
    351/**
    352 * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues
    353 * @adapter: board private structure
    354 *
    355 * Return 0 on success, negative on failure
    356 */
    357static int igc_setup_all_tx_resources(struct igc_adapter *adapter)
    358{
    359	struct net_device *dev = adapter->netdev;
    360	int i, err = 0;
    361
    362	for (i = 0; i < adapter->num_tx_queues; i++) {
    363		err = igc_setup_tx_resources(adapter->tx_ring[i]);
    364		if (err) {
    365			netdev_err(dev, "Error on Tx queue %u setup\n", i);
    366			for (i--; i >= 0; i--)
    367				igc_free_tx_resources(adapter->tx_ring[i]);
    368			break;
    369		}
    370	}
    371
    372	return err;
    373}
    374
    375static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring)
    376{
    377	u16 i = rx_ring->next_to_clean;
    378
    379	dev_kfree_skb(rx_ring->skb);
    380	rx_ring->skb = NULL;
    381
    382	/* Free all the Rx ring sk_buffs */
    383	while (i != rx_ring->next_to_alloc) {
    384		struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
    385
    386		/* Invalidate cache lines that may have been written to by
    387		 * device so that we avoid corrupting memory.
    388		 */
    389		dma_sync_single_range_for_cpu(rx_ring->dev,
    390					      buffer_info->dma,
    391					      buffer_info->page_offset,
    392					      igc_rx_bufsz(rx_ring),
    393					      DMA_FROM_DEVICE);
    394
    395		/* free resources associated with mapping */
    396		dma_unmap_page_attrs(rx_ring->dev,
    397				     buffer_info->dma,
    398				     igc_rx_pg_size(rx_ring),
    399				     DMA_FROM_DEVICE,
    400				     IGC_RX_DMA_ATTR);
    401		__page_frag_cache_drain(buffer_info->page,
    402					buffer_info->pagecnt_bias);
    403
    404		i++;
    405		if (i == rx_ring->count)
    406			i = 0;
    407	}
    408}
    409
    410static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring)
    411{
    412	struct igc_rx_buffer *bi;
    413	u16 i;
    414
    415	for (i = 0; i < ring->count; i++) {
    416		bi = &ring->rx_buffer_info[i];
    417		if (!bi->xdp)
    418			continue;
    419
    420		xsk_buff_free(bi->xdp);
    421		bi->xdp = NULL;
    422	}
    423}
    424
    425/**
    426 * igc_clean_rx_ring - Free Rx Buffers per Queue
    427 * @ring: ring to free buffers from
    428 */
    429static void igc_clean_rx_ring(struct igc_ring *ring)
    430{
    431	if (ring->xsk_pool)
    432		igc_clean_rx_ring_xsk_pool(ring);
    433	else
    434		igc_clean_rx_ring_page_shared(ring);
    435
    436	clear_ring_uses_large_buffer(ring);
    437
    438	ring->next_to_alloc = 0;
    439	ring->next_to_clean = 0;
    440	ring->next_to_use = 0;
    441}
    442
    443/**
    444 * igc_clean_all_rx_rings - Free Rx Buffers for all queues
    445 * @adapter: board private structure
    446 */
    447static void igc_clean_all_rx_rings(struct igc_adapter *adapter)
    448{
    449	int i;
    450
    451	for (i = 0; i < adapter->num_rx_queues; i++)
    452		if (adapter->rx_ring[i])
    453			igc_clean_rx_ring(adapter->rx_ring[i]);
    454}
    455
    456/**
    457 * igc_free_rx_resources - Free Rx Resources
    458 * @rx_ring: ring to clean the resources from
    459 *
    460 * Free all receive software resources
    461 */
    462void igc_free_rx_resources(struct igc_ring *rx_ring)
    463{
    464	igc_clean_rx_ring(rx_ring);
    465
    466	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
    467
    468	vfree(rx_ring->rx_buffer_info);
    469	rx_ring->rx_buffer_info = NULL;
    470
    471	/* if not set, then don't free */
    472	if (!rx_ring->desc)
    473		return;
    474
    475	dma_free_coherent(rx_ring->dev, rx_ring->size,
    476			  rx_ring->desc, rx_ring->dma);
    477
    478	rx_ring->desc = NULL;
    479}
    480
    481/**
    482 * igc_free_all_rx_resources - Free Rx Resources for All Queues
    483 * @adapter: board private structure
    484 *
    485 * Free all receive software resources
    486 */
    487static void igc_free_all_rx_resources(struct igc_adapter *adapter)
    488{
    489	int i;
    490
    491	for (i = 0; i < adapter->num_rx_queues; i++)
    492		igc_free_rx_resources(adapter->rx_ring[i]);
    493}
    494
    495/**
    496 * igc_setup_rx_resources - allocate Rx resources (Descriptors)
    497 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
    498 *
    499 * Returns 0 on success, negative on failure
    500 */
    501int igc_setup_rx_resources(struct igc_ring *rx_ring)
    502{
    503	struct net_device *ndev = rx_ring->netdev;
    504	struct device *dev = rx_ring->dev;
    505	u8 index = rx_ring->queue_index;
    506	int size, desc_len, res;
    507
    508	/* XDP RX-queue info */
    509	if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
    510		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
    511	res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index,
    512			       rx_ring->q_vector->napi.napi_id);
    513	if (res < 0) {
    514		netdev_err(ndev, "Failed to register xdp_rxq index %u\n",
    515			   index);
    516		return res;
    517	}
    518
    519	size = sizeof(struct igc_rx_buffer) * rx_ring->count;
    520	rx_ring->rx_buffer_info = vzalloc(size);
    521	if (!rx_ring->rx_buffer_info)
    522		goto err;
    523
    524	desc_len = sizeof(union igc_adv_rx_desc);
    525
    526	/* Round up to nearest 4K */
    527	rx_ring->size = rx_ring->count * desc_len;
    528	rx_ring->size = ALIGN(rx_ring->size, 4096);
    529
    530	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
    531					   &rx_ring->dma, GFP_KERNEL);
    532
    533	if (!rx_ring->desc)
    534		goto err;
    535
    536	rx_ring->next_to_alloc = 0;
    537	rx_ring->next_to_clean = 0;
    538	rx_ring->next_to_use = 0;
    539
    540	return 0;
    541
    542err:
    543	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
    544	vfree(rx_ring->rx_buffer_info);
    545	rx_ring->rx_buffer_info = NULL;
    546	netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n");
    547	return -ENOMEM;
    548}
    549
    550/**
    551 * igc_setup_all_rx_resources - wrapper to allocate Rx resources
    552 *                                (Descriptors) for all queues
    553 * @adapter: board private structure
    554 *
    555 * Return 0 on success, negative on failure
    556 */
    557static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
    558{
    559	struct net_device *dev = adapter->netdev;
    560	int i, err = 0;
    561
    562	for (i = 0; i < adapter->num_rx_queues; i++) {
    563		err = igc_setup_rx_resources(adapter->rx_ring[i]);
    564		if (err) {
    565			netdev_err(dev, "Error on Rx queue %u setup\n", i);
    566			for (i--; i >= 0; i--)
    567				igc_free_rx_resources(adapter->rx_ring[i]);
    568			break;
    569		}
    570	}
    571
    572	return err;
    573}
    574
    575static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter,
    576					      struct igc_ring *ring)
    577{
    578	if (!igc_xdp_is_enabled(adapter) ||
    579	    !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags))
    580		return NULL;
    581
    582	return xsk_get_pool_from_qid(ring->netdev, ring->queue_index);
    583}
    584
    585/**
    586 * igc_configure_rx_ring - Configure a receive ring after Reset
    587 * @adapter: board private structure
    588 * @ring: receive ring to be configured
    589 *
    590 * Configure the Rx unit of the MAC after a reset.
    591 */
    592static void igc_configure_rx_ring(struct igc_adapter *adapter,
    593				  struct igc_ring *ring)
    594{
    595	struct igc_hw *hw = &adapter->hw;
    596	union igc_adv_rx_desc *rx_desc;
    597	int reg_idx = ring->reg_idx;
    598	u32 srrctl = 0, rxdctl = 0;
    599	u64 rdba = ring->dma;
    600	u32 buf_size;
    601
    602	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
    603	ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
    604	if (ring->xsk_pool) {
    605		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
    606						   MEM_TYPE_XSK_BUFF_POOL,
    607						   NULL));
    608		xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
    609	} else {
    610		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
    611						   MEM_TYPE_PAGE_SHARED,
    612						   NULL));
    613	}
    614
    615	if (igc_xdp_is_enabled(adapter))
    616		set_ring_uses_large_buffer(ring);
    617
    618	/* disable the queue */
    619	wr32(IGC_RXDCTL(reg_idx), 0);
    620
    621	/* Set DMA base address registers */
    622	wr32(IGC_RDBAL(reg_idx),
    623	     rdba & 0x00000000ffffffffULL);
    624	wr32(IGC_RDBAH(reg_idx), rdba >> 32);
    625	wr32(IGC_RDLEN(reg_idx),
    626	     ring->count * sizeof(union igc_adv_rx_desc));
    627
    628	/* initialize head and tail */
    629	ring->tail = adapter->io_addr + IGC_RDT(reg_idx);
    630	wr32(IGC_RDH(reg_idx), 0);
    631	writel(0, ring->tail);
    632
    633	/* reset next-to- use/clean to place SW in sync with hardware */
    634	ring->next_to_clean = 0;
    635	ring->next_to_use = 0;
    636
    637	if (ring->xsk_pool)
    638		buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool);
    639	else if (ring_uses_large_buffer(ring))
    640		buf_size = IGC_RXBUFFER_3072;
    641	else
    642		buf_size = IGC_RXBUFFER_2048;
    643
    644	srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
    645	srrctl |= buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT;
    646	srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
    647
    648	wr32(IGC_SRRCTL(reg_idx), srrctl);
    649
    650	rxdctl |= IGC_RX_PTHRESH;
    651	rxdctl |= IGC_RX_HTHRESH << 8;
    652	rxdctl |= IGC_RX_WTHRESH << 16;
    653
    654	/* initialize rx_buffer_info */
    655	memset(ring->rx_buffer_info, 0,
    656	       sizeof(struct igc_rx_buffer) * ring->count);
    657
    658	/* initialize Rx descriptor 0 */
    659	rx_desc = IGC_RX_DESC(ring, 0);
    660	rx_desc->wb.upper.length = 0;
    661
    662	/* enable receive descriptor fetching */
    663	rxdctl |= IGC_RXDCTL_QUEUE_ENABLE;
    664
    665	wr32(IGC_RXDCTL(reg_idx), rxdctl);
    666}
    667
    668/**
    669 * igc_configure_rx - Configure receive Unit after Reset
    670 * @adapter: board private structure
    671 *
    672 * Configure the Rx unit of the MAC after a reset.
    673 */
    674static void igc_configure_rx(struct igc_adapter *adapter)
    675{
    676	int i;
    677
    678	/* Setup the HW Rx Head and Tail Descriptor Pointers and
    679	 * the Base and Length of the Rx Descriptor Ring
    680	 */
    681	for (i = 0; i < adapter->num_rx_queues; i++)
    682		igc_configure_rx_ring(adapter, adapter->rx_ring[i]);
    683}
    684
    685/**
    686 * igc_configure_tx_ring - Configure transmit ring after Reset
    687 * @adapter: board private structure
    688 * @ring: tx ring to configure
    689 *
    690 * Configure a transmit ring after a reset.
    691 */
    692static void igc_configure_tx_ring(struct igc_adapter *adapter,
    693				  struct igc_ring *ring)
    694{
    695	struct igc_hw *hw = &adapter->hw;
    696	int reg_idx = ring->reg_idx;
    697	u64 tdba = ring->dma;
    698	u32 txdctl = 0;
    699
    700	ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
    701
    702	/* disable the queue */
    703	wr32(IGC_TXDCTL(reg_idx), 0);
    704	wrfl();
    705	mdelay(10);
    706
    707	wr32(IGC_TDLEN(reg_idx),
    708	     ring->count * sizeof(union igc_adv_tx_desc));
    709	wr32(IGC_TDBAL(reg_idx),
    710	     tdba & 0x00000000ffffffffULL);
    711	wr32(IGC_TDBAH(reg_idx), tdba >> 32);
    712
    713	ring->tail = adapter->io_addr + IGC_TDT(reg_idx);
    714	wr32(IGC_TDH(reg_idx), 0);
    715	writel(0, ring->tail);
    716
    717	txdctl |= IGC_TX_PTHRESH;
    718	txdctl |= IGC_TX_HTHRESH << 8;
    719	txdctl |= IGC_TX_WTHRESH << 16;
    720
    721	txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
    722	wr32(IGC_TXDCTL(reg_idx), txdctl);
    723}
    724
    725/**
    726 * igc_configure_tx - Configure transmit Unit after Reset
    727 * @adapter: board private structure
    728 *
    729 * Configure the Tx unit of the MAC after a reset.
    730 */
    731static void igc_configure_tx(struct igc_adapter *adapter)
    732{
    733	int i;
    734
    735	for (i = 0; i < adapter->num_tx_queues; i++)
    736		igc_configure_tx_ring(adapter, adapter->tx_ring[i]);
    737}
    738
    739/**
    740 * igc_setup_mrqc - configure the multiple receive queue control registers
    741 * @adapter: Board private structure
    742 */
    743static void igc_setup_mrqc(struct igc_adapter *adapter)
    744{
    745	struct igc_hw *hw = &adapter->hw;
    746	u32 j, num_rx_queues;
    747	u32 mrqc, rxcsum;
    748	u32 rss_key[10];
    749
    750	netdev_rss_key_fill(rss_key, sizeof(rss_key));
    751	for (j = 0; j < 10; j++)
    752		wr32(IGC_RSSRK(j), rss_key[j]);
    753
    754	num_rx_queues = adapter->rss_queues;
    755
    756	if (adapter->rss_indir_tbl_init != num_rx_queues) {
    757		for (j = 0; j < IGC_RETA_SIZE; j++)
    758			adapter->rss_indir_tbl[j] =
    759			(j * num_rx_queues) / IGC_RETA_SIZE;
    760		adapter->rss_indir_tbl_init = num_rx_queues;
    761	}
    762	igc_write_rss_indir_tbl(adapter);
    763
    764	/* Disable raw packet checksumming so that RSS hash is placed in
    765	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
    766	 * offloads as they are enabled by default
    767	 */
    768	rxcsum = rd32(IGC_RXCSUM);
    769	rxcsum |= IGC_RXCSUM_PCSD;
    770
    771	/* Enable Receive Checksum Offload for SCTP */
    772	rxcsum |= IGC_RXCSUM_CRCOFL;
    773
    774	/* Don't need to set TUOFL or IPOFL, they default to 1 */
    775	wr32(IGC_RXCSUM, rxcsum);
    776
    777	/* Generate RSS hash based on packet types, TCP/UDP
    778	 * port numbers and/or IPv4/v6 src and dst addresses
    779	 */
    780	mrqc = IGC_MRQC_RSS_FIELD_IPV4 |
    781	       IGC_MRQC_RSS_FIELD_IPV4_TCP |
    782	       IGC_MRQC_RSS_FIELD_IPV6 |
    783	       IGC_MRQC_RSS_FIELD_IPV6_TCP |
    784	       IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
    785
    786	if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP)
    787		mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
    788	if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP)
    789		mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
    790
    791	mrqc |= IGC_MRQC_ENABLE_RSS_MQ;
    792
    793	wr32(IGC_MRQC, mrqc);
    794}
    795
    796/**
    797 * igc_setup_rctl - configure the receive control registers
    798 * @adapter: Board private structure
    799 */
    800static void igc_setup_rctl(struct igc_adapter *adapter)
    801{
    802	struct igc_hw *hw = &adapter->hw;
    803	u32 rctl;
    804
    805	rctl = rd32(IGC_RCTL);
    806
    807	rctl &= ~(3 << IGC_RCTL_MO_SHIFT);
    808	rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC);
    809
    810	rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF |
    811		(hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
    812
    813	/* enable stripping of CRC. Newer features require
    814	 * that the HW strips the CRC.
    815	 */
    816	rctl |= IGC_RCTL_SECRC;
    817
    818	/* disable store bad packets and clear size bits. */
    819	rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256);
    820
    821	/* enable LPE to allow for reception of jumbo frames */
    822	rctl |= IGC_RCTL_LPE;
    823
    824	/* disable queue 0 to prevent tail write w/o re-config */
    825	wr32(IGC_RXDCTL(0), 0);
    826
    827	/* This is useful for sniffing bad packets. */
    828	if (adapter->netdev->features & NETIF_F_RXALL) {
    829		/* UPE and MPE will be handled by normal PROMISC logic
    830		 * in set_rx_mode
    831		 */
    832		rctl |= (IGC_RCTL_SBP | /* Receive bad packets */
    833			 IGC_RCTL_BAM | /* RX All Bcast Pkts */
    834			 IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
    835
    836		rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */
    837			  IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */
    838	}
    839
    840	wr32(IGC_RCTL, rctl);
    841}
    842
    843/**
    844 * igc_setup_tctl - configure the transmit control registers
    845 * @adapter: Board private structure
    846 */
    847static void igc_setup_tctl(struct igc_adapter *adapter)
    848{
    849	struct igc_hw *hw = &adapter->hw;
    850	u32 tctl;
    851
    852	/* disable queue 0 which icould be enabled by default */
    853	wr32(IGC_TXDCTL(0), 0);
    854
    855	/* Program the Transmit Control Register */
    856	tctl = rd32(IGC_TCTL);
    857	tctl &= ~IGC_TCTL_CT;
    858	tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC |
    859		(IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT);
    860
    861	/* Enable transmits */
    862	tctl |= IGC_TCTL_EN;
    863
    864	wr32(IGC_TCTL, tctl);
    865}
    866
    867/**
    868 * igc_set_mac_filter_hw() - Set MAC address filter in hardware
    869 * @adapter: Pointer to adapter where the filter should be set
    870 * @index: Filter index
    871 * @type: MAC address filter type (source or destination)
    872 * @addr: MAC address
    873 * @queue: If non-negative, queue assignment feature is enabled and frames
    874 *         matching the filter are enqueued onto 'queue'. Otherwise, queue
    875 *         assignment is disabled.
    876 */
    877static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index,
    878				  enum igc_mac_filter_type type,
    879				  const u8 *addr, int queue)
    880{
    881	struct net_device *dev = adapter->netdev;
    882	struct igc_hw *hw = &adapter->hw;
    883	u32 ral, rah;
    884
    885	if (WARN_ON(index >= hw->mac.rar_entry_count))
    886		return;
    887
    888	ral = le32_to_cpup((__le32 *)(addr));
    889	rah = le16_to_cpup((__le16 *)(addr + 4));
    890
    891	if (type == IGC_MAC_FILTER_TYPE_SRC) {
    892		rah &= ~IGC_RAH_ASEL_MASK;
    893		rah |= IGC_RAH_ASEL_SRC_ADDR;
    894	}
    895
    896	if (queue >= 0) {
    897		rah &= ~IGC_RAH_QSEL_MASK;
    898		rah |= (queue << IGC_RAH_QSEL_SHIFT);
    899		rah |= IGC_RAH_QSEL_ENABLE;
    900	}
    901
    902	rah |= IGC_RAH_AV;
    903
    904	wr32(IGC_RAL(index), ral);
    905	wr32(IGC_RAH(index), rah);
    906
    907	netdev_dbg(dev, "MAC address filter set in HW: index %d", index);
    908}
    909
    910/**
    911 * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware
    912 * @adapter: Pointer to adapter where the filter should be cleared
    913 * @index: Filter index
    914 */
    915static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index)
    916{
    917	struct net_device *dev = adapter->netdev;
    918	struct igc_hw *hw = &adapter->hw;
    919
    920	if (WARN_ON(index >= hw->mac.rar_entry_count))
    921		return;
    922
    923	wr32(IGC_RAL(index), 0);
    924	wr32(IGC_RAH(index), 0);
    925
    926	netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index);
    927}
    928
    929/* Set default MAC address for the PF in the first RAR entry */
    930static void igc_set_default_mac_filter(struct igc_adapter *adapter)
    931{
    932	struct net_device *dev = adapter->netdev;
    933	u8 *addr = adapter->hw.mac.addr;
    934
    935	netdev_dbg(dev, "Set default MAC address filter: address %pM", addr);
    936
    937	igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1);
    938}
    939
    940/**
    941 * igc_set_mac - Change the Ethernet Address of the NIC
    942 * @netdev: network interface device structure
    943 * @p: pointer to an address structure
    944 *
    945 * Returns 0 on success, negative on failure
    946 */
    947static int igc_set_mac(struct net_device *netdev, void *p)
    948{
    949	struct igc_adapter *adapter = netdev_priv(netdev);
    950	struct igc_hw *hw = &adapter->hw;
    951	struct sockaddr *addr = p;
    952
    953	if (!is_valid_ether_addr(addr->sa_data))
    954		return -EADDRNOTAVAIL;
    955
    956	eth_hw_addr_set(netdev, addr->sa_data);
    957	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
    958
    959	/* set the correct pool for the new PF MAC address in entry 0 */
    960	igc_set_default_mac_filter(adapter);
    961
    962	return 0;
    963}
    964
    965/**
    966 *  igc_write_mc_addr_list - write multicast addresses to MTA
    967 *  @netdev: network interface device structure
    968 *
    969 *  Writes multicast address list to the MTA hash table.
    970 *  Returns: -ENOMEM on failure
    971 *           0 on no addresses written
    972 *           X on writing X addresses to MTA
    973 **/
    974static int igc_write_mc_addr_list(struct net_device *netdev)
    975{
    976	struct igc_adapter *adapter = netdev_priv(netdev);
    977	struct igc_hw *hw = &adapter->hw;
    978	struct netdev_hw_addr *ha;
    979	u8  *mta_list;
    980	int i;
    981
    982	if (netdev_mc_empty(netdev)) {
    983		/* nothing to program, so clear mc list */
    984		igc_update_mc_addr_list(hw, NULL, 0);
    985		return 0;
    986	}
    987
    988	mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC);
    989	if (!mta_list)
    990		return -ENOMEM;
    991
    992	/* The shared function expects a packed array of only addresses. */
    993	i = 0;
    994	netdev_for_each_mc_addr(ha, netdev)
    995		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
    996
    997	igc_update_mc_addr_list(hw, mta_list, i);
    998	kfree(mta_list);
    999
   1000	return netdev_mc_count(netdev);
   1001}
   1002
   1003static __le32 igc_tx_launchtime(struct igc_adapter *adapter, ktime_t txtime)
   1004{
   1005	ktime_t cycle_time = adapter->cycle_time;
   1006	ktime_t base_time = adapter->base_time;
   1007	u32 launchtime;
   1008
   1009	/* FIXME: when using ETF together with taprio, we may have a
   1010	 * case where 'delta' is larger than the cycle_time, this may
   1011	 * cause problems if we don't read the current value of
   1012	 * IGC_BASET, as the value writen into the launchtime
   1013	 * descriptor field may be misinterpreted.
   1014	 */
   1015	div_s64_rem(ktime_sub_ns(txtime, base_time), cycle_time, &launchtime);
   1016
   1017	return cpu_to_le32(launchtime);
   1018}
   1019
   1020static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
   1021			    struct igc_tx_buffer *first,
   1022			    u32 vlan_macip_lens, u32 type_tucmd,
   1023			    u32 mss_l4len_idx)
   1024{
   1025	struct igc_adv_tx_context_desc *context_desc;
   1026	u16 i = tx_ring->next_to_use;
   1027
   1028	context_desc = IGC_TX_CTXTDESC(tx_ring, i);
   1029
   1030	i++;
   1031	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
   1032
   1033	/* set bits to identify this as an advanced context descriptor */
   1034	type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
   1035
   1036	/* For i225, context index must be unique per ring. */
   1037	if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
   1038		mss_l4len_idx |= tx_ring->reg_idx << 4;
   1039
   1040	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
   1041	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
   1042	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
   1043
   1044	/* We assume there is always a valid Tx time available. Invalid times
   1045	 * should have been handled by the upper layers.
   1046	 */
   1047	if (tx_ring->launchtime_enable) {
   1048		struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
   1049		ktime_t txtime = first->skb->tstamp;
   1050
   1051		skb_txtime_consumed(first->skb);
   1052		context_desc->launch_time = igc_tx_launchtime(adapter,
   1053							      txtime);
   1054	} else {
   1055		context_desc->launch_time = 0;
   1056	}
   1057}
   1058
   1059static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first)
   1060{
   1061	struct sk_buff *skb = first->skb;
   1062	u32 vlan_macip_lens = 0;
   1063	u32 type_tucmd = 0;
   1064
   1065	if (skb->ip_summed != CHECKSUM_PARTIAL) {
   1066csum_failed:
   1067		if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) &&
   1068		    !tx_ring->launchtime_enable)
   1069			return;
   1070		goto no_csum;
   1071	}
   1072
   1073	switch (skb->csum_offset) {
   1074	case offsetof(struct tcphdr, check):
   1075		type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
   1076		fallthrough;
   1077	case offsetof(struct udphdr, check):
   1078		break;
   1079	case offsetof(struct sctphdr, checksum):
   1080		/* validate that this is actually an SCTP request */
   1081		if (skb_csum_is_sctp(skb)) {
   1082			type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP;
   1083			break;
   1084		}
   1085		fallthrough;
   1086	default:
   1087		skb_checksum_help(skb);
   1088		goto csum_failed;
   1089	}
   1090
   1091	/* update TX checksum flag */
   1092	first->tx_flags |= IGC_TX_FLAGS_CSUM;
   1093	vlan_macip_lens = skb_checksum_start_offset(skb) -
   1094			  skb_network_offset(skb);
   1095no_csum:
   1096	vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT;
   1097	vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
   1098
   1099	igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0);
   1100}
   1101
   1102static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
   1103{
   1104	struct net_device *netdev = tx_ring->netdev;
   1105
   1106	netif_stop_subqueue(netdev, tx_ring->queue_index);
   1107
   1108	/* memory barriier comment */
   1109	smp_mb();
   1110
   1111	/* We need to check again in a case another CPU has just
   1112	 * made room available.
   1113	 */
   1114	if (igc_desc_unused(tx_ring) < size)
   1115		return -EBUSY;
   1116
   1117	/* A reprieve! */
   1118	netif_wake_subqueue(netdev, tx_ring->queue_index);
   1119
   1120	u64_stats_update_begin(&tx_ring->tx_syncp2);
   1121	tx_ring->tx_stats.restart_queue2++;
   1122	u64_stats_update_end(&tx_ring->tx_syncp2);
   1123
   1124	return 0;
   1125}
   1126
   1127static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
   1128{
   1129	if (igc_desc_unused(tx_ring) >= size)
   1130		return 0;
   1131	return __igc_maybe_stop_tx(tx_ring, size);
   1132}
   1133
   1134#define IGC_SET_FLAG(_input, _flag, _result) \
   1135	(((_flag) <= (_result)) ?				\
   1136	 ((u32)((_input) & (_flag)) * ((_result) / (_flag))) :	\
   1137	 ((u32)((_input) & (_flag)) / ((_flag) / (_result))))
   1138
   1139static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
   1140{
   1141	/* set type for advanced descriptor with frame checksum insertion */
   1142	u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
   1143		       IGC_ADVTXD_DCMD_DEXT |
   1144		       IGC_ADVTXD_DCMD_IFCS;
   1145
   1146	/* set HW vlan bit if vlan is present */
   1147	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN,
   1148				 IGC_ADVTXD_DCMD_VLE);
   1149
   1150	/* set segmentation bits for TSO */
   1151	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO,
   1152				 (IGC_ADVTXD_DCMD_TSE));
   1153
   1154	/* set timestamp bit if present */
   1155	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP,
   1156				 (IGC_ADVTXD_MAC_TSTAMP));
   1157
   1158	/* insert frame checksum */
   1159	cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS);
   1160
   1161	return cmd_type;
   1162}
   1163
   1164static void igc_tx_olinfo_status(struct igc_ring *tx_ring,
   1165				 union igc_adv_tx_desc *tx_desc,
   1166				 u32 tx_flags, unsigned int paylen)
   1167{
   1168	u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT;
   1169
   1170	/* insert L4 checksum */
   1171	olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) *
   1172			  ((IGC_TXD_POPTS_TXSM << 8) /
   1173			  IGC_TX_FLAGS_CSUM);
   1174
   1175	/* insert IPv4 checksum */
   1176	olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) *
   1177			  (((IGC_TXD_POPTS_IXSM << 8)) /
   1178			  IGC_TX_FLAGS_IPV4);
   1179
   1180	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
   1181}
   1182
   1183static int igc_tx_map(struct igc_ring *tx_ring,
   1184		      struct igc_tx_buffer *first,
   1185		      const u8 hdr_len)
   1186{
   1187	struct sk_buff *skb = first->skb;
   1188	struct igc_tx_buffer *tx_buffer;
   1189	union igc_adv_tx_desc *tx_desc;
   1190	u32 tx_flags = first->tx_flags;
   1191	skb_frag_t *frag;
   1192	u16 i = tx_ring->next_to_use;
   1193	unsigned int data_len, size;
   1194	dma_addr_t dma;
   1195	u32 cmd_type;
   1196
   1197	cmd_type = igc_tx_cmd_type(skb, tx_flags);
   1198	tx_desc = IGC_TX_DESC(tx_ring, i);
   1199
   1200	igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
   1201
   1202	size = skb_headlen(skb);
   1203	data_len = skb->data_len;
   1204
   1205	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
   1206
   1207	tx_buffer = first;
   1208
   1209	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
   1210		if (dma_mapping_error(tx_ring->dev, dma))
   1211			goto dma_error;
   1212
   1213		/* record length, and DMA address */
   1214		dma_unmap_len_set(tx_buffer, len, size);
   1215		dma_unmap_addr_set(tx_buffer, dma, dma);
   1216
   1217		tx_desc->read.buffer_addr = cpu_to_le64(dma);
   1218
   1219		while (unlikely(size > IGC_MAX_DATA_PER_TXD)) {
   1220			tx_desc->read.cmd_type_len =
   1221				cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD);
   1222
   1223			i++;
   1224			tx_desc++;
   1225			if (i == tx_ring->count) {
   1226				tx_desc = IGC_TX_DESC(tx_ring, 0);
   1227				i = 0;
   1228			}
   1229			tx_desc->read.olinfo_status = 0;
   1230
   1231			dma += IGC_MAX_DATA_PER_TXD;
   1232			size -= IGC_MAX_DATA_PER_TXD;
   1233
   1234			tx_desc->read.buffer_addr = cpu_to_le64(dma);
   1235		}
   1236
   1237		if (likely(!data_len))
   1238			break;
   1239
   1240		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
   1241
   1242		i++;
   1243		tx_desc++;
   1244		if (i == tx_ring->count) {
   1245			tx_desc = IGC_TX_DESC(tx_ring, 0);
   1246			i = 0;
   1247		}
   1248		tx_desc->read.olinfo_status = 0;
   1249
   1250		size = skb_frag_size(frag);
   1251		data_len -= size;
   1252
   1253		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
   1254				       size, DMA_TO_DEVICE);
   1255
   1256		tx_buffer = &tx_ring->tx_buffer_info[i];
   1257	}
   1258
   1259	/* write last descriptor with RS and EOP bits */
   1260	cmd_type |= size | IGC_TXD_DCMD;
   1261	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
   1262
   1263	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
   1264
   1265	/* set the timestamp */
   1266	first->time_stamp = jiffies;
   1267
   1268	skb_tx_timestamp(skb);
   1269
   1270	/* Force memory writes to complete before letting h/w know there
   1271	 * are new descriptors to fetch.  (Only applicable for weak-ordered
   1272	 * memory model archs, such as IA-64).
   1273	 *
   1274	 * We also need this memory barrier to make certain all of the
   1275	 * status bits have been updated before next_to_watch is written.
   1276	 */
   1277	wmb();
   1278
   1279	/* set next_to_watch value indicating a packet is present */
   1280	first->next_to_watch = tx_desc;
   1281
   1282	i++;
   1283	if (i == tx_ring->count)
   1284		i = 0;
   1285
   1286	tx_ring->next_to_use = i;
   1287
   1288	/* Make sure there is space in the ring for the next send. */
   1289	igc_maybe_stop_tx(tx_ring, DESC_NEEDED);
   1290
   1291	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
   1292		writel(i, tx_ring->tail);
   1293	}
   1294
   1295	return 0;
   1296dma_error:
   1297	netdev_err(tx_ring->netdev, "TX DMA map failed\n");
   1298	tx_buffer = &tx_ring->tx_buffer_info[i];
   1299
   1300	/* clear dma mappings for failed tx_buffer_info map */
   1301	while (tx_buffer != first) {
   1302		if (dma_unmap_len(tx_buffer, len))
   1303			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
   1304
   1305		if (i-- == 0)
   1306			i += tx_ring->count;
   1307		tx_buffer = &tx_ring->tx_buffer_info[i];
   1308	}
   1309
   1310	if (dma_unmap_len(tx_buffer, len))
   1311		igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
   1312
   1313	dev_kfree_skb_any(tx_buffer->skb);
   1314	tx_buffer->skb = NULL;
   1315
   1316	tx_ring->next_to_use = i;
   1317
   1318	return -1;
   1319}
   1320
   1321static int igc_tso(struct igc_ring *tx_ring,
   1322		   struct igc_tx_buffer *first,
   1323		   u8 *hdr_len)
   1324{
   1325	u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
   1326	struct sk_buff *skb = first->skb;
   1327	union {
   1328		struct iphdr *v4;
   1329		struct ipv6hdr *v6;
   1330		unsigned char *hdr;
   1331	} ip;
   1332	union {
   1333		struct tcphdr *tcp;
   1334		struct udphdr *udp;
   1335		unsigned char *hdr;
   1336	} l4;
   1337	u32 paylen, l4_offset;
   1338	int err;
   1339
   1340	if (skb->ip_summed != CHECKSUM_PARTIAL)
   1341		return 0;
   1342
   1343	if (!skb_is_gso(skb))
   1344		return 0;
   1345
   1346	err = skb_cow_head(skb, 0);
   1347	if (err < 0)
   1348		return err;
   1349
   1350	ip.hdr = skb_network_header(skb);
   1351	l4.hdr = skb_checksum_start(skb);
   1352
   1353	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
   1354	type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
   1355
   1356	/* initialize outer IP header fields */
   1357	if (ip.v4->version == 4) {
   1358		unsigned char *csum_start = skb_checksum_start(skb);
   1359		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
   1360
   1361		/* IP header will have to cancel out any data that
   1362		 * is not a part of the outer IP header
   1363		 */
   1364		ip.v4->check = csum_fold(csum_partial(trans_start,
   1365						      csum_start - trans_start,
   1366						      0));
   1367		type_tucmd |= IGC_ADVTXD_TUCMD_IPV4;
   1368
   1369		ip.v4->tot_len = 0;
   1370		first->tx_flags |= IGC_TX_FLAGS_TSO |
   1371				   IGC_TX_FLAGS_CSUM |
   1372				   IGC_TX_FLAGS_IPV4;
   1373	} else {
   1374		ip.v6->payload_len = 0;
   1375		first->tx_flags |= IGC_TX_FLAGS_TSO |
   1376				   IGC_TX_FLAGS_CSUM;
   1377	}
   1378
   1379	/* determine offset of inner transport header */
   1380	l4_offset = l4.hdr - skb->data;
   1381
   1382	/* remove payload length from inner checksum */
   1383	paylen = skb->len - l4_offset;
   1384	if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) {
   1385		/* compute length of segmentation header */
   1386		*hdr_len = (l4.tcp->doff * 4) + l4_offset;
   1387		csum_replace_by_diff(&l4.tcp->check,
   1388				     (__force __wsum)htonl(paylen));
   1389	} else {
   1390		/* compute length of segmentation header */
   1391		*hdr_len = sizeof(*l4.udp) + l4_offset;
   1392		csum_replace_by_diff(&l4.udp->check,
   1393				     (__force __wsum)htonl(paylen));
   1394	}
   1395
   1396	/* update gso size and bytecount with header size */
   1397	first->gso_segs = skb_shinfo(skb)->gso_segs;
   1398	first->bytecount += (first->gso_segs - 1) * *hdr_len;
   1399
   1400	/* MSS L4LEN IDX */
   1401	mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT;
   1402	mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT;
   1403
   1404	/* VLAN MACLEN IPLEN */
   1405	vlan_macip_lens = l4.hdr - ip.hdr;
   1406	vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT;
   1407	vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
   1408
   1409	igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens,
   1410			type_tucmd, mss_l4len_idx);
   1411
   1412	return 1;
   1413}
   1414
   1415static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
   1416				       struct igc_ring *tx_ring)
   1417{
   1418	u16 count = TXD_USE_COUNT(skb_headlen(skb));
   1419	__be16 protocol = vlan_get_protocol(skb);
   1420	struct igc_tx_buffer *first;
   1421	u32 tx_flags = 0;
   1422	unsigned short f;
   1423	u8 hdr_len = 0;
   1424	int tso = 0;
   1425
   1426	/* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD,
   1427	 *	+ 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD,
   1428	 *	+ 2 desc gap to keep tail from touching head,
   1429	 *	+ 1 desc for context descriptor,
   1430	 * otherwise try next time
   1431	 */
   1432	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
   1433		count += TXD_USE_COUNT(skb_frag_size(
   1434						&skb_shinfo(skb)->frags[f]));
   1435
   1436	if (igc_maybe_stop_tx(tx_ring, count + 3)) {
   1437		/* this is a hard error */
   1438		return NETDEV_TX_BUSY;
   1439	}
   1440
   1441	/* record the location of the first descriptor for this packet */
   1442	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
   1443	first->type = IGC_TX_BUFFER_TYPE_SKB;
   1444	first->skb = skb;
   1445	first->bytecount = skb->len;
   1446	first->gso_segs = 1;
   1447
   1448	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
   1449		struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
   1450
   1451		/* FIXME: add support for retrieving timestamps from
   1452		 * the other timer registers before skipping the
   1453		 * timestamping request.
   1454		 */
   1455		if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
   1456		    !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS,
   1457					   &adapter->state)) {
   1458			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
   1459			tx_flags |= IGC_TX_FLAGS_TSTAMP;
   1460
   1461			adapter->ptp_tx_skb = skb_get(skb);
   1462			adapter->ptp_tx_start = jiffies;
   1463		} else {
   1464			adapter->tx_hwtstamp_skipped++;
   1465		}
   1466	}
   1467
   1468	if (skb_vlan_tag_present(skb)) {
   1469		tx_flags |= IGC_TX_FLAGS_VLAN;
   1470		tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT);
   1471	}
   1472
   1473	/* record initial flags and protocol */
   1474	first->tx_flags = tx_flags;
   1475	first->protocol = protocol;
   1476
   1477	tso = igc_tso(tx_ring, first, &hdr_len);
   1478	if (tso < 0)
   1479		goto out_drop;
   1480	else if (!tso)
   1481		igc_tx_csum(tx_ring, first);
   1482
   1483	igc_tx_map(tx_ring, first, hdr_len);
   1484
   1485	return NETDEV_TX_OK;
   1486
   1487out_drop:
   1488	dev_kfree_skb_any(first->skb);
   1489	first->skb = NULL;
   1490
   1491	return NETDEV_TX_OK;
   1492}
   1493
   1494static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter,
   1495						    struct sk_buff *skb)
   1496{
   1497	unsigned int r_idx = skb->queue_mapping;
   1498
   1499	if (r_idx >= adapter->num_tx_queues)
   1500		r_idx = r_idx % adapter->num_tx_queues;
   1501
   1502	return adapter->tx_ring[r_idx];
   1503}
   1504
   1505static netdev_tx_t igc_xmit_frame(struct sk_buff *skb,
   1506				  struct net_device *netdev)
   1507{
   1508	struct igc_adapter *adapter = netdev_priv(netdev);
   1509
   1510	/* The minimum packet size with TCTL.PSP set is 17 so pad the skb
   1511	 * in order to meet this minimum size requirement.
   1512	 */
   1513	if (skb->len < 17) {
   1514		if (skb_padto(skb, 17))
   1515			return NETDEV_TX_OK;
   1516		skb->len = 17;
   1517	}
   1518
   1519	return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb));
   1520}
   1521
   1522static void igc_rx_checksum(struct igc_ring *ring,
   1523			    union igc_adv_rx_desc *rx_desc,
   1524			    struct sk_buff *skb)
   1525{
   1526	skb_checksum_none_assert(skb);
   1527
   1528	/* Ignore Checksum bit is set */
   1529	if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM))
   1530		return;
   1531
   1532	/* Rx checksum disabled via ethtool */
   1533	if (!(ring->netdev->features & NETIF_F_RXCSUM))
   1534		return;
   1535
   1536	/* TCP/UDP checksum error bit is set */
   1537	if (igc_test_staterr(rx_desc,
   1538			     IGC_RXDEXT_STATERR_L4E |
   1539			     IGC_RXDEXT_STATERR_IPE)) {
   1540		/* work around errata with sctp packets where the TCPE aka
   1541		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
   1542		 * packets (aka let the stack check the crc32c)
   1543		 */
   1544		if (!(skb->len == 60 &&
   1545		      test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
   1546			u64_stats_update_begin(&ring->rx_syncp);
   1547			ring->rx_stats.csum_err++;
   1548			u64_stats_update_end(&ring->rx_syncp);
   1549		}
   1550		/* let the stack verify checksum errors */
   1551		return;
   1552	}
   1553	/* It must be a TCP or UDP packet with a valid checksum */
   1554	if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS |
   1555				      IGC_RXD_STAT_UDPCS))
   1556		skb->ip_summed = CHECKSUM_UNNECESSARY;
   1557
   1558	netdev_dbg(ring->netdev, "cksum success: bits %08X\n",
   1559		   le32_to_cpu(rx_desc->wb.upper.status_error));
   1560}
   1561
   1562static inline void igc_rx_hash(struct igc_ring *ring,
   1563			       union igc_adv_rx_desc *rx_desc,
   1564			       struct sk_buff *skb)
   1565{
   1566	if (ring->netdev->features & NETIF_F_RXHASH)
   1567		skb_set_hash(skb,
   1568			     le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
   1569			     PKT_HASH_TYPE_L3);
   1570}
   1571
   1572static void igc_rx_vlan(struct igc_ring *rx_ring,
   1573			union igc_adv_rx_desc *rx_desc,
   1574			struct sk_buff *skb)
   1575{
   1576	struct net_device *dev = rx_ring->netdev;
   1577	u16 vid;
   1578
   1579	if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
   1580	    igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) {
   1581		if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) &&
   1582		    test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
   1583			vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan);
   1584		else
   1585			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
   1586
   1587		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
   1588	}
   1589}
   1590
   1591/**
   1592 * igc_process_skb_fields - Populate skb header fields from Rx descriptor
   1593 * @rx_ring: rx descriptor ring packet is being transacted on
   1594 * @rx_desc: pointer to the EOP Rx descriptor
   1595 * @skb: pointer to current skb being populated
   1596 *
   1597 * This function checks the ring, descriptor, and packet information in order
   1598 * to populate the hash, checksum, VLAN, protocol, and other fields within the
   1599 * skb.
   1600 */
   1601static void igc_process_skb_fields(struct igc_ring *rx_ring,
   1602				   union igc_adv_rx_desc *rx_desc,
   1603				   struct sk_buff *skb)
   1604{
   1605	igc_rx_hash(rx_ring, rx_desc, skb);
   1606
   1607	igc_rx_checksum(rx_ring, rx_desc, skb);
   1608
   1609	igc_rx_vlan(rx_ring, rx_desc, skb);
   1610
   1611	skb_record_rx_queue(skb, rx_ring->queue_index);
   1612
   1613	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
   1614}
   1615
   1616static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features)
   1617{
   1618	bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
   1619	struct igc_adapter *adapter = netdev_priv(netdev);
   1620	struct igc_hw *hw = &adapter->hw;
   1621	u32 ctrl;
   1622
   1623	ctrl = rd32(IGC_CTRL);
   1624
   1625	if (enable) {
   1626		/* enable VLAN tag insert/strip */
   1627		ctrl |= IGC_CTRL_VME;
   1628	} else {
   1629		/* disable VLAN tag insert/strip */
   1630		ctrl &= ~IGC_CTRL_VME;
   1631	}
   1632	wr32(IGC_CTRL, ctrl);
   1633}
   1634
   1635static void igc_restore_vlan(struct igc_adapter *adapter)
   1636{
   1637	igc_vlan_mode(adapter->netdev, adapter->netdev->features);
   1638}
   1639
   1640static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
   1641					       const unsigned int size,
   1642					       int *rx_buffer_pgcnt)
   1643{
   1644	struct igc_rx_buffer *rx_buffer;
   1645
   1646	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
   1647	*rx_buffer_pgcnt =
   1648#if (PAGE_SIZE < 8192)
   1649		page_count(rx_buffer->page);
   1650#else
   1651		0;
   1652#endif
   1653	prefetchw(rx_buffer->page);
   1654
   1655	/* we are reusing so sync this buffer for CPU use */
   1656	dma_sync_single_range_for_cpu(rx_ring->dev,
   1657				      rx_buffer->dma,
   1658				      rx_buffer->page_offset,
   1659				      size,
   1660				      DMA_FROM_DEVICE);
   1661
   1662	rx_buffer->pagecnt_bias--;
   1663
   1664	return rx_buffer;
   1665}
   1666
   1667static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer,
   1668			       unsigned int truesize)
   1669{
   1670#if (PAGE_SIZE < 8192)
   1671	buffer->page_offset ^= truesize;
   1672#else
   1673	buffer->page_offset += truesize;
   1674#endif
   1675}
   1676
   1677static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring,
   1678					      unsigned int size)
   1679{
   1680	unsigned int truesize;
   1681
   1682#if (PAGE_SIZE < 8192)
   1683	truesize = igc_rx_pg_size(ring) / 2;
   1684#else
   1685	truesize = ring_uses_build_skb(ring) ?
   1686		   SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
   1687		   SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
   1688		   SKB_DATA_ALIGN(size);
   1689#endif
   1690	return truesize;
   1691}
   1692
   1693/**
   1694 * igc_add_rx_frag - Add contents of Rx buffer to sk_buff
   1695 * @rx_ring: rx descriptor ring to transact packets on
   1696 * @rx_buffer: buffer containing page to add
   1697 * @skb: sk_buff to place the data into
   1698 * @size: size of buffer to be added
   1699 *
   1700 * This function will add the data contained in rx_buffer->page to the skb.
   1701 */
   1702static void igc_add_rx_frag(struct igc_ring *rx_ring,
   1703			    struct igc_rx_buffer *rx_buffer,
   1704			    struct sk_buff *skb,
   1705			    unsigned int size)
   1706{
   1707	unsigned int truesize;
   1708
   1709#if (PAGE_SIZE < 8192)
   1710	truesize = igc_rx_pg_size(rx_ring) / 2;
   1711#else
   1712	truesize = ring_uses_build_skb(rx_ring) ?
   1713		   SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
   1714		   SKB_DATA_ALIGN(size);
   1715#endif
   1716	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
   1717			rx_buffer->page_offset, size, truesize);
   1718
   1719	igc_rx_buffer_flip(rx_buffer, truesize);
   1720}
   1721
   1722static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
   1723				     struct igc_rx_buffer *rx_buffer,
   1724				     struct xdp_buff *xdp)
   1725{
   1726	unsigned int size = xdp->data_end - xdp->data;
   1727	unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
   1728	unsigned int metasize = xdp->data - xdp->data_meta;
   1729	struct sk_buff *skb;
   1730
   1731	/* prefetch first cache line of first page */
   1732	net_prefetch(xdp->data_meta);
   1733
   1734	/* build an skb around the page buffer */
   1735	skb = napi_build_skb(xdp->data_hard_start, truesize);
   1736	if (unlikely(!skb))
   1737		return NULL;
   1738
   1739	/* update pointers within the skb to store the data */
   1740	skb_reserve(skb, xdp->data - xdp->data_hard_start);
   1741	__skb_put(skb, size);
   1742	if (metasize)
   1743		skb_metadata_set(skb, metasize);
   1744
   1745	igc_rx_buffer_flip(rx_buffer, truesize);
   1746	return skb;
   1747}
   1748
   1749static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
   1750					 struct igc_rx_buffer *rx_buffer,
   1751					 struct xdp_buff *xdp,
   1752					 ktime_t timestamp)
   1753{
   1754	unsigned int metasize = xdp->data - xdp->data_meta;
   1755	unsigned int size = xdp->data_end - xdp->data;
   1756	unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
   1757	void *va = xdp->data;
   1758	unsigned int headlen;
   1759	struct sk_buff *skb;
   1760
   1761	/* prefetch first cache line of first page */
   1762	net_prefetch(xdp->data_meta);
   1763
   1764	/* allocate a skb to store the frags */
   1765	skb = napi_alloc_skb(&rx_ring->q_vector->napi,
   1766			     IGC_RX_HDR_LEN + metasize);
   1767	if (unlikely(!skb))
   1768		return NULL;
   1769
   1770	if (timestamp)
   1771		skb_hwtstamps(skb)->hwtstamp = timestamp;
   1772
   1773	/* Determine available headroom for copy */
   1774	headlen = size;
   1775	if (headlen > IGC_RX_HDR_LEN)
   1776		headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN);
   1777
   1778	/* align pull length to size of long to optimize memcpy performance */
   1779	memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta,
   1780	       ALIGN(headlen + metasize, sizeof(long)));
   1781
   1782	if (metasize) {
   1783		skb_metadata_set(skb, metasize);
   1784		__skb_pull(skb, metasize);
   1785	}
   1786
   1787	/* update all of the pointers */
   1788	size -= headlen;
   1789	if (size) {
   1790		skb_add_rx_frag(skb, 0, rx_buffer->page,
   1791				(va + headlen) - page_address(rx_buffer->page),
   1792				size, truesize);
   1793		igc_rx_buffer_flip(rx_buffer, truesize);
   1794	} else {
   1795		rx_buffer->pagecnt_bias++;
   1796	}
   1797
   1798	return skb;
   1799}
   1800
   1801/**
   1802 * igc_reuse_rx_page - page flip buffer and store it back on the ring
   1803 * @rx_ring: rx descriptor ring to store buffers on
   1804 * @old_buff: donor buffer to have page reused
   1805 *
   1806 * Synchronizes page for reuse by the adapter
   1807 */
   1808static void igc_reuse_rx_page(struct igc_ring *rx_ring,
   1809			      struct igc_rx_buffer *old_buff)
   1810{
   1811	u16 nta = rx_ring->next_to_alloc;
   1812	struct igc_rx_buffer *new_buff;
   1813
   1814	new_buff = &rx_ring->rx_buffer_info[nta];
   1815
   1816	/* update, and store next to alloc */
   1817	nta++;
   1818	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
   1819
   1820	/* Transfer page from old buffer to new buffer.
   1821	 * Move each member individually to avoid possible store
   1822	 * forwarding stalls.
   1823	 */
   1824	new_buff->dma		= old_buff->dma;
   1825	new_buff->page		= old_buff->page;
   1826	new_buff->page_offset	= old_buff->page_offset;
   1827	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
   1828}
   1829
   1830static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer,
   1831				  int rx_buffer_pgcnt)
   1832{
   1833	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
   1834	struct page *page = rx_buffer->page;
   1835
   1836	/* avoid re-using remote and pfmemalloc pages */
   1837	if (!dev_page_is_reusable(page))
   1838		return false;
   1839
   1840#if (PAGE_SIZE < 8192)
   1841	/* if we are only owner of page we can reuse it */
   1842	if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
   1843		return false;
   1844#else
   1845#define IGC_LAST_OFFSET \
   1846	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048)
   1847
   1848	if (rx_buffer->page_offset > IGC_LAST_OFFSET)
   1849		return false;
   1850#endif
   1851
   1852	/* If we have drained the page fragment pool we need to update
   1853	 * the pagecnt_bias and page count so that we fully restock the
   1854	 * number of references the driver holds.
   1855	 */
   1856	if (unlikely(pagecnt_bias == 1)) {
   1857		page_ref_add(page, USHRT_MAX - 1);
   1858		rx_buffer->pagecnt_bias = USHRT_MAX;
   1859	}
   1860
   1861	return true;
   1862}
   1863
   1864/**
   1865 * igc_is_non_eop - process handling of non-EOP buffers
   1866 * @rx_ring: Rx ring being processed
   1867 * @rx_desc: Rx descriptor for current buffer
   1868 *
   1869 * This function updates next to clean.  If the buffer is an EOP buffer
   1870 * this function exits returning false, otherwise it will place the
   1871 * sk_buff in the next buffer to be chained and return true indicating
   1872 * that this is in fact a non-EOP buffer.
   1873 */
   1874static bool igc_is_non_eop(struct igc_ring *rx_ring,
   1875			   union igc_adv_rx_desc *rx_desc)
   1876{
   1877	u32 ntc = rx_ring->next_to_clean + 1;
   1878
   1879	/* fetch, update, and store next to clean */
   1880	ntc = (ntc < rx_ring->count) ? ntc : 0;
   1881	rx_ring->next_to_clean = ntc;
   1882
   1883	prefetch(IGC_RX_DESC(rx_ring, ntc));
   1884
   1885	if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP)))
   1886		return false;
   1887
   1888	return true;
   1889}
   1890
   1891/**
   1892 * igc_cleanup_headers - Correct corrupted or empty headers
   1893 * @rx_ring: rx descriptor ring packet is being transacted on
   1894 * @rx_desc: pointer to the EOP Rx descriptor
   1895 * @skb: pointer to current skb being fixed
   1896 *
   1897 * Address the case where we are pulling data in on pages only
   1898 * and as such no data is present in the skb header.
   1899 *
   1900 * In addition if skb is not at least 60 bytes we need to pad it so that
   1901 * it is large enough to qualify as a valid Ethernet frame.
   1902 *
   1903 * Returns true if an error was encountered and skb was freed.
   1904 */
   1905static bool igc_cleanup_headers(struct igc_ring *rx_ring,
   1906				union igc_adv_rx_desc *rx_desc,
   1907				struct sk_buff *skb)
   1908{
   1909	/* XDP packets use error pointer so abort at this point */
   1910	if (IS_ERR(skb))
   1911		return true;
   1912
   1913	if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) {
   1914		struct net_device *netdev = rx_ring->netdev;
   1915
   1916		if (!(netdev->features & NETIF_F_RXALL)) {
   1917			dev_kfree_skb_any(skb);
   1918			return true;
   1919		}
   1920	}
   1921
   1922	/* if eth_skb_pad returns an error the skb was freed */
   1923	if (eth_skb_pad(skb))
   1924		return true;
   1925
   1926	return false;
   1927}
   1928
   1929static void igc_put_rx_buffer(struct igc_ring *rx_ring,
   1930			      struct igc_rx_buffer *rx_buffer,
   1931			      int rx_buffer_pgcnt)
   1932{
   1933	if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
   1934		/* hand second half of page back to the ring */
   1935		igc_reuse_rx_page(rx_ring, rx_buffer);
   1936	} else {
   1937		/* We are not reusing the buffer so unmap it and free
   1938		 * any references we are holding to it
   1939		 */
   1940		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
   1941				     igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
   1942				     IGC_RX_DMA_ATTR);
   1943		__page_frag_cache_drain(rx_buffer->page,
   1944					rx_buffer->pagecnt_bias);
   1945	}
   1946
   1947	/* clear contents of rx_buffer */
   1948	rx_buffer->page = NULL;
   1949}
   1950
   1951static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
   1952{
   1953	struct igc_adapter *adapter = rx_ring->q_vector->adapter;
   1954
   1955	if (ring_uses_build_skb(rx_ring))
   1956		return IGC_SKB_PAD;
   1957	if (igc_xdp_is_enabled(adapter))
   1958		return XDP_PACKET_HEADROOM;
   1959
   1960	return 0;
   1961}
   1962
   1963static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
   1964				  struct igc_rx_buffer *bi)
   1965{
   1966	struct page *page = bi->page;
   1967	dma_addr_t dma;
   1968
   1969	/* since we are recycling buffers we should seldom need to alloc */
   1970	if (likely(page))
   1971		return true;
   1972
   1973	/* alloc new page for storage */
   1974	page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
   1975	if (unlikely(!page)) {
   1976		rx_ring->rx_stats.alloc_failed++;
   1977		return false;
   1978	}
   1979
   1980	/* map page for use */
   1981	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
   1982				 igc_rx_pg_size(rx_ring),
   1983				 DMA_FROM_DEVICE,
   1984				 IGC_RX_DMA_ATTR);
   1985
   1986	/* if mapping failed free memory back to system since
   1987	 * there isn't much point in holding memory we can't use
   1988	 */
   1989	if (dma_mapping_error(rx_ring->dev, dma)) {
   1990		__free_page(page);
   1991
   1992		rx_ring->rx_stats.alloc_failed++;
   1993		return false;
   1994	}
   1995
   1996	bi->dma = dma;
   1997	bi->page = page;
   1998	bi->page_offset = igc_rx_offset(rx_ring);
   1999	page_ref_add(page, USHRT_MAX - 1);
   2000	bi->pagecnt_bias = USHRT_MAX;
   2001
   2002	return true;
   2003}
   2004
   2005/**
   2006 * igc_alloc_rx_buffers - Replace used receive buffers; packet split
   2007 * @rx_ring: rx descriptor ring
   2008 * @cleaned_count: number of buffers to clean
   2009 */
   2010static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
   2011{
   2012	union igc_adv_rx_desc *rx_desc;
   2013	u16 i = rx_ring->next_to_use;
   2014	struct igc_rx_buffer *bi;
   2015	u16 bufsz;
   2016
   2017	/* nothing to do */
   2018	if (!cleaned_count)
   2019		return;
   2020
   2021	rx_desc = IGC_RX_DESC(rx_ring, i);
   2022	bi = &rx_ring->rx_buffer_info[i];
   2023	i -= rx_ring->count;
   2024
   2025	bufsz = igc_rx_bufsz(rx_ring);
   2026
   2027	do {
   2028		if (!igc_alloc_mapped_page(rx_ring, bi))
   2029			break;
   2030
   2031		/* sync the buffer for use by the device */
   2032		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
   2033						 bi->page_offset, bufsz,
   2034						 DMA_FROM_DEVICE);
   2035
   2036		/* Refresh the desc even if buffer_addrs didn't change
   2037		 * because each write-back erases this info.
   2038		 */
   2039		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
   2040
   2041		rx_desc++;
   2042		bi++;
   2043		i++;
   2044		if (unlikely(!i)) {
   2045			rx_desc = IGC_RX_DESC(rx_ring, 0);
   2046			bi = rx_ring->rx_buffer_info;
   2047			i -= rx_ring->count;
   2048		}
   2049
   2050		/* clear the length for the next_to_use descriptor */
   2051		rx_desc->wb.upper.length = 0;
   2052
   2053		cleaned_count--;
   2054	} while (cleaned_count);
   2055
   2056	i += rx_ring->count;
   2057
   2058	if (rx_ring->next_to_use != i) {
   2059		/* record the next descriptor to use */
   2060		rx_ring->next_to_use = i;
   2061
   2062		/* update next to alloc since we have filled the ring */
   2063		rx_ring->next_to_alloc = i;
   2064
   2065		/* Force memory writes to complete before letting h/w
   2066		 * know there are new descriptors to fetch.  (Only
   2067		 * applicable for weak-ordered memory model archs,
   2068		 * such as IA-64).
   2069		 */
   2070		wmb();
   2071		writel(i, rx_ring->tail);
   2072	}
   2073}
   2074
   2075static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count)
   2076{
   2077	union igc_adv_rx_desc *desc;
   2078	u16 i = ring->next_to_use;
   2079	struct igc_rx_buffer *bi;
   2080	dma_addr_t dma;
   2081	bool ok = true;
   2082
   2083	if (!count)
   2084		return ok;
   2085
   2086	desc = IGC_RX_DESC(ring, i);
   2087	bi = &ring->rx_buffer_info[i];
   2088	i -= ring->count;
   2089
   2090	do {
   2091		bi->xdp = xsk_buff_alloc(ring->xsk_pool);
   2092		if (!bi->xdp) {
   2093			ok = false;
   2094			break;
   2095		}
   2096
   2097		dma = xsk_buff_xdp_get_dma(bi->xdp);
   2098		desc->read.pkt_addr = cpu_to_le64(dma);
   2099
   2100		desc++;
   2101		bi++;
   2102		i++;
   2103		if (unlikely(!i)) {
   2104			desc = IGC_RX_DESC(ring, 0);
   2105			bi = ring->rx_buffer_info;
   2106			i -= ring->count;
   2107		}
   2108
   2109		/* Clear the length for the next_to_use descriptor. */
   2110		desc->wb.upper.length = 0;
   2111
   2112		count--;
   2113	} while (count);
   2114
   2115	i += ring->count;
   2116
   2117	if (ring->next_to_use != i) {
   2118		ring->next_to_use = i;
   2119
   2120		/* Force memory writes to complete before letting h/w
   2121		 * know there are new descriptors to fetch.  (Only
   2122		 * applicable for weak-ordered memory model archs,
   2123		 * such as IA-64).
   2124		 */
   2125		wmb();
   2126		writel(i, ring->tail);
   2127	}
   2128
   2129	return ok;
   2130}
   2131
   2132static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer,
   2133				  struct xdp_frame *xdpf,
   2134				  struct igc_ring *ring)
   2135{
   2136	dma_addr_t dma;
   2137
   2138	dma = dma_map_single(ring->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE);
   2139	if (dma_mapping_error(ring->dev, dma)) {
   2140		netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
   2141		return -ENOMEM;
   2142	}
   2143
   2144	buffer->type = IGC_TX_BUFFER_TYPE_XDP;
   2145	buffer->xdpf = xdpf;
   2146	buffer->protocol = 0;
   2147	buffer->bytecount = xdpf->len;
   2148	buffer->gso_segs = 1;
   2149	buffer->time_stamp = jiffies;
   2150	dma_unmap_len_set(buffer, len, xdpf->len);
   2151	dma_unmap_addr_set(buffer, dma, dma);
   2152	return 0;
   2153}
   2154
   2155/* This function requires __netif_tx_lock is held by the caller. */
   2156static int igc_xdp_init_tx_descriptor(struct igc_ring *ring,
   2157				      struct xdp_frame *xdpf)
   2158{
   2159	struct igc_tx_buffer *buffer;
   2160	union igc_adv_tx_desc *desc;
   2161	u32 cmd_type, olinfo_status;
   2162	int err;
   2163
   2164	if (!igc_desc_unused(ring))
   2165		return -EBUSY;
   2166
   2167	buffer = &ring->tx_buffer_info[ring->next_to_use];
   2168	err = igc_xdp_init_tx_buffer(buffer, xdpf, ring);
   2169	if (err)
   2170		return err;
   2171
   2172	cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
   2173		   IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
   2174		   buffer->bytecount;
   2175	olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
   2176
   2177	desc = IGC_TX_DESC(ring, ring->next_to_use);
   2178	desc->read.cmd_type_len = cpu_to_le32(cmd_type);
   2179	desc->read.olinfo_status = cpu_to_le32(olinfo_status);
   2180	desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma));
   2181
   2182	netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount);
   2183
   2184	buffer->next_to_watch = desc;
   2185
   2186	ring->next_to_use++;
   2187	if (ring->next_to_use == ring->count)
   2188		ring->next_to_use = 0;
   2189
   2190	return 0;
   2191}
   2192
   2193static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter,
   2194					    int cpu)
   2195{
   2196	int index = cpu;
   2197
   2198	if (unlikely(index < 0))
   2199		index = 0;
   2200
   2201	while (index >= adapter->num_tx_queues)
   2202		index -= adapter->num_tx_queues;
   2203
   2204	return adapter->tx_ring[index];
   2205}
   2206
   2207static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
   2208{
   2209	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
   2210	int cpu = smp_processor_id();
   2211	struct netdev_queue *nq;
   2212	struct igc_ring *ring;
   2213	int res;
   2214
   2215	if (unlikely(!xdpf))
   2216		return -EFAULT;
   2217
   2218	ring = igc_xdp_get_tx_ring(adapter, cpu);
   2219	nq = txring_txq(ring);
   2220
   2221	__netif_tx_lock(nq, cpu);
   2222	res = igc_xdp_init_tx_descriptor(ring, xdpf);
   2223	__netif_tx_unlock(nq);
   2224	return res;
   2225}
   2226
   2227/* This function assumes rcu_read_lock() is held by the caller. */
   2228static int __igc_xdp_run_prog(struct igc_adapter *adapter,
   2229			      struct bpf_prog *prog,
   2230			      struct xdp_buff *xdp)
   2231{
   2232	u32 act = bpf_prog_run_xdp(prog, xdp);
   2233
   2234	switch (act) {
   2235	case XDP_PASS:
   2236		return IGC_XDP_PASS;
   2237	case XDP_TX:
   2238		if (igc_xdp_xmit_back(adapter, xdp) < 0)
   2239			goto out_failure;
   2240		return IGC_XDP_TX;
   2241	case XDP_REDIRECT:
   2242		if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0)
   2243			goto out_failure;
   2244		return IGC_XDP_REDIRECT;
   2245		break;
   2246	default:
   2247		bpf_warn_invalid_xdp_action(adapter->netdev, prog, act);
   2248		fallthrough;
   2249	case XDP_ABORTED:
   2250out_failure:
   2251		trace_xdp_exception(adapter->netdev, prog, act);
   2252		fallthrough;
   2253	case XDP_DROP:
   2254		return IGC_XDP_CONSUMED;
   2255	}
   2256}
   2257
   2258static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
   2259					struct xdp_buff *xdp)
   2260{
   2261	struct bpf_prog *prog;
   2262	int res;
   2263
   2264	prog = READ_ONCE(adapter->xdp_prog);
   2265	if (!prog) {
   2266		res = IGC_XDP_PASS;
   2267		goto out;
   2268	}
   2269
   2270	res = __igc_xdp_run_prog(adapter, prog, xdp);
   2271
   2272out:
   2273	return ERR_PTR(-res);
   2274}
   2275
   2276/* This function assumes __netif_tx_lock is held by the caller. */
   2277static void igc_flush_tx_descriptors(struct igc_ring *ring)
   2278{
   2279	/* Once tail pointer is updated, hardware can fetch the descriptors
   2280	 * any time so we issue a write membar here to ensure all memory
   2281	 * writes are complete before the tail pointer is updated.
   2282	 */
   2283	wmb();
   2284	writel(ring->next_to_use, ring->tail);
   2285}
   2286
   2287static void igc_finalize_xdp(struct igc_adapter *adapter, int status)
   2288{
   2289	int cpu = smp_processor_id();
   2290	struct netdev_queue *nq;
   2291	struct igc_ring *ring;
   2292
   2293	if (status & IGC_XDP_TX) {
   2294		ring = igc_xdp_get_tx_ring(adapter, cpu);
   2295		nq = txring_txq(ring);
   2296
   2297		__netif_tx_lock(nq, cpu);
   2298		igc_flush_tx_descriptors(ring);
   2299		__netif_tx_unlock(nq);
   2300	}
   2301
   2302	if (status & IGC_XDP_REDIRECT)
   2303		xdp_do_flush();
   2304}
   2305
   2306static void igc_update_rx_stats(struct igc_q_vector *q_vector,
   2307				unsigned int packets, unsigned int bytes)
   2308{
   2309	struct igc_ring *ring = q_vector->rx.ring;
   2310
   2311	u64_stats_update_begin(&ring->rx_syncp);
   2312	ring->rx_stats.packets += packets;
   2313	ring->rx_stats.bytes += bytes;
   2314	u64_stats_update_end(&ring->rx_syncp);
   2315
   2316	q_vector->rx.total_packets += packets;
   2317	q_vector->rx.total_bytes += bytes;
   2318}
   2319
   2320static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
   2321{
   2322	unsigned int total_bytes = 0, total_packets = 0;
   2323	struct igc_adapter *adapter = q_vector->adapter;
   2324	struct igc_ring *rx_ring = q_vector->rx.ring;
   2325	struct sk_buff *skb = rx_ring->skb;
   2326	u16 cleaned_count = igc_desc_unused(rx_ring);
   2327	int xdp_status = 0, rx_buffer_pgcnt;
   2328
   2329	while (likely(total_packets < budget)) {
   2330		union igc_adv_rx_desc *rx_desc;
   2331		struct igc_rx_buffer *rx_buffer;
   2332		unsigned int size, truesize;
   2333		ktime_t timestamp = 0;
   2334		struct xdp_buff xdp;
   2335		int pkt_offset = 0;
   2336		void *pktbuf;
   2337
   2338		/* return some buffers to hardware, one at a time is too slow */
   2339		if (cleaned_count >= IGC_RX_BUFFER_WRITE) {
   2340			igc_alloc_rx_buffers(rx_ring, cleaned_count);
   2341			cleaned_count = 0;
   2342		}
   2343
   2344		rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean);
   2345		size = le16_to_cpu(rx_desc->wb.upper.length);
   2346		if (!size)
   2347			break;
   2348
   2349		/* This memory barrier is needed to keep us from reading
   2350		 * any other fields out of the rx_desc until we know the
   2351		 * descriptor has been written back
   2352		 */
   2353		dma_rmb();
   2354
   2355		rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
   2356		truesize = igc_get_rx_frame_truesize(rx_ring, size);
   2357
   2358		pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
   2359
   2360		if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
   2361			timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
   2362							pktbuf);
   2363			pkt_offset = IGC_TS_HDR_LEN;
   2364			size -= IGC_TS_HDR_LEN;
   2365		}
   2366
   2367		if (!skb) {
   2368			xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq);
   2369			xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring),
   2370					 igc_rx_offset(rx_ring) + pkt_offset,
   2371					 size, true);
   2372
   2373			skb = igc_xdp_run_prog(adapter, &xdp);
   2374		}
   2375
   2376		if (IS_ERR(skb)) {
   2377			unsigned int xdp_res = -PTR_ERR(skb);
   2378
   2379			switch (xdp_res) {
   2380			case IGC_XDP_CONSUMED:
   2381				rx_buffer->pagecnt_bias++;
   2382				break;
   2383			case IGC_XDP_TX:
   2384			case IGC_XDP_REDIRECT:
   2385				igc_rx_buffer_flip(rx_buffer, truesize);
   2386				xdp_status |= xdp_res;
   2387				break;
   2388			}
   2389
   2390			total_packets++;
   2391			total_bytes += size;
   2392		} else if (skb)
   2393			igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
   2394		else if (ring_uses_build_skb(rx_ring))
   2395			skb = igc_build_skb(rx_ring, rx_buffer, &xdp);
   2396		else
   2397			skb = igc_construct_skb(rx_ring, rx_buffer, &xdp,
   2398						timestamp);
   2399
   2400		/* exit if we failed to retrieve a buffer */
   2401		if (!skb) {
   2402			rx_ring->rx_stats.alloc_failed++;
   2403			rx_buffer->pagecnt_bias++;
   2404			break;
   2405		}
   2406
   2407		igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
   2408		cleaned_count++;
   2409
   2410		/* fetch next buffer in frame if non-eop */
   2411		if (igc_is_non_eop(rx_ring, rx_desc))
   2412			continue;
   2413
   2414		/* verify the packet layout is correct */
   2415		if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
   2416			skb = NULL;
   2417			continue;
   2418		}
   2419
   2420		/* probably a little skewed due to removing CRC */
   2421		total_bytes += skb->len;
   2422
   2423		/* populate checksum, VLAN, and protocol */
   2424		igc_process_skb_fields(rx_ring, rx_desc, skb);
   2425
   2426		napi_gro_receive(&q_vector->napi, skb);
   2427
   2428		/* reset skb pointer */
   2429		skb = NULL;
   2430
   2431		/* update budget accounting */
   2432		total_packets++;
   2433	}
   2434
   2435	if (xdp_status)
   2436		igc_finalize_xdp(adapter, xdp_status);
   2437
   2438	/* place incomplete frames back on ring for completion */
   2439	rx_ring->skb = skb;
   2440
   2441	igc_update_rx_stats(q_vector, total_packets, total_bytes);
   2442
   2443	if (cleaned_count)
   2444		igc_alloc_rx_buffers(rx_ring, cleaned_count);
   2445
   2446	return total_packets;
   2447}
   2448
   2449static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
   2450					    struct xdp_buff *xdp)
   2451{
   2452	unsigned int totalsize = xdp->data_end - xdp->data_meta;
   2453	unsigned int metasize = xdp->data - xdp->data_meta;
   2454	struct sk_buff *skb;
   2455
   2456	net_prefetch(xdp->data_meta);
   2457
   2458	skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize,
   2459			       GFP_ATOMIC | __GFP_NOWARN);
   2460	if (unlikely(!skb))
   2461		return NULL;
   2462
   2463	memcpy(__skb_put(skb, totalsize), xdp->data_meta,
   2464	       ALIGN(totalsize, sizeof(long)));
   2465
   2466	if (metasize) {
   2467		skb_metadata_set(skb, metasize);
   2468		__skb_pull(skb, metasize);
   2469	}
   2470
   2471	return skb;
   2472}
   2473
   2474static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
   2475				union igc_adv_rx_desc *desc,
   2476				struct xdp_buff *xdp,
   2477				ktime_t timestamp)
   2478{
   2479	struct igc_ring *ring = q_vector->rx.ring;
   2480	struct sk_buff *skb;
   2481
   2482	skb = igc_construct_skb_zc(ring, xdp);
   2483	if (!skb) {
   2484		ring->rx_stats.alloc_failed++;
   2485		return;
   2486	}
   2487
   2488	if (timestamp)
   2489		skb_hwtstamps(skb)->hwtstamp = timestamp;
   2490
   2491	if (igc_cleanup_headers(ring, desc, skb))
   2492		return;
   2493
   2494	igc_process_skb_fields(ring, desc, skb);
   2495	napi_gro_receive(&q_vector->napi, skb);
   2496}
   2497
   2498static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
   2499{
   2500	struct igc_adapter *adapter = q_vector->adapter;
   2501	struct igc_ring *ring = q_vector->rx.ring;
   2502	u16 cleaned_count = igc_desc_unused(ring);
   2503	int total_bytes = 0, total_packets = 0;
   2504	u16 ntc = ring->next_to_clean;
   2505	struct bpf_prog *prog;
   2506	bool failure = false;
   2507	int xdp_status = 0;
   2508
   2509	rcu_read_lock();
   2510
   2511	prog = READ_ONCE(adapter->xdp_prog);
   2512
   2513	while (likely(total_packets < budget)) {
   2514		union igc_adv_rx_desc *desc;
   2515		struct igc_rx_buffer *bi;
   2516		ktime_t timestamp = 0;
   2517		unsigned int size;
   2518		int res;
   2519
   2520		desc = IGC_RX_DESC(ring, ntc);
   2521		size = le16_to_cpu(desc->wb.upper.length);
   2522		if (!size)
   2523			break;
   2524
   2525		/* This memory barrier is needed to keep us from reading
   2526		 * any other fields out of the rx_desc until we know the
   2527		 * descriptor has been written back
   2528		 */
   2529		dma_rmb();
   2530
   2531		bi = &ring->rx_buffer_info[ntc];
   2532
   2533		if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
   2534			timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
   2535							bi->xdp->data);
   2536
   2537			bi->xdp->data += IGC_TS_HDR_LEN;
   2538
   2539			/* HW timestamp has been copied into local variable. Metadata
   2540			 * length when XDP program is called should be 0.
   2541			 */
   2542			bi->xdp->data_meta += IGC_TS_HDR_LEN;
   2543			size -= IGC_TS_HDR_LEN;
   2544		}
   2545
   2546		bi->xdp->data_end = bi->xdp->data + size;
   2547		xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool);
   2548
   2549		res = __igc_xdp_run_prog(adapter, prog, bi->xdp);
   2550		switch (res) {
   2551		case IGC_XDP_PASS:
   2552			igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp);
   2553			fallthrough;
   2554		case IGC_XDP_CONSUMED:
   2555			xsk_buff_free(bi->xdp);
   2556			break;
   2557		case IGC_XDP_TX:
   2558		case IGC_XDP_REDIRECT:
   2559			xdp_status |= res;
   2560			break;
   2561		}
   2562
   2563		bi->xdp = NULL;
   2564		total_bytes += size;
   2565		total_packets++;
   2566		cleaned_count++;
   2567		ntc++;
   2568		if (ntc == ring->count)
   2569			ntc = 0;
   2570	}
   2571
   2572	ring->next_to_clean = ntc;
   2573	rcu_read_unlock();
   2574
   2575	if (cleaned_count >= IGC_RX_BUFFER_WRITE)
   2576		failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count);
   2577
   2578	if (xdp_status)
   2579		igc_finalize_xdp(adapter, xdp_status);
   2580
   2581	igc_update_rx_stats(q_vector, total_packets, total_bytes);
   2582
   2583	if (xsk_uses_need_wakeup(ring->xsk_pool)) {
   2584		if (failure || ring->next_to_clean == ring->next_to_use)
   2585			xsk_set_rx_need_wakeup(ring->xsk_pool);
   2586		else
   2587			xsk_clear_rx_need_wakeup(ring->xsk_pool);
   2588		return total_packets;
   2589	}
   2590
   2591	return failure ? budget : total_packets;
   2592}
   2593
   2594static void igc_update_tx_stats(struct igc_q_vector *q_vector,
   2595				unsigned int packets, unsigned int bytes)
   2596{
   2597	struct igc_ring *ring = q_vector->tx.ring;
   2598
   2599	u64_stats_update_begin(&ring->tx_syncp);
   2600	ring->tx_stats.bytes += bytes;
   2601	ring->tx_stats.packets += packets;
   2602	u64_stats_update_end(&ring->tx_syncp);
   2603
   2604	q_vector->tx.total_bytes += bytes;
   2605	q_vector->tx.total_packets += packets;
   2606}
   2607
   2608static void igc_xdp_xmit_zc(struct igc_ring *ring)
   2609{
   2610	struct xsk_buff_pool *pool = ring->xsk_pool;
   2611	struct netdev_queue *nq = txring_txq(ring);
   2612	union igc_adv_tx_desc *tx_desc = NULL;
   2613	int cpu = smp_processor_id();
   2614	u16 ntu = ring->next_to_use;
   2615	struct xdp_desc xdp_desc;
   2616	u16 budget;
   2617
   2618	if (!netif_carrier_ok(ring->netdev))
   2619		return;
   2620
   2621	__netif_tx_lock(nq, cpu);
   2622
   2623	budget = igc_desc_unused(ring);
   2624
   2625	while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
   2626		u32 cmd_type, olinfo_status;
   2627		struct igc_tx_buffer *bi;
   2628		dma_addr_t dma;
   2629
   2630		cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
   2631			   IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
   2632			   xdp_desc.len;
   2633		olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT;
   2634
   2635		dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr);
   2636		xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len);
   2637
   2638		tx_desc = IGC_TX_DESC(ring, ntu);
   2639		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
   2640		tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
   2641		tx_desc->read.buffer_addr = cpu_to_le64(dma);
   2642
   2643		bi = &ring->tx_buffer_info[ntu];
   2644		bi->type = IGC_TX_BUFFER_TYPE_XSK;
   2645		bi->protocol = 0;
   2646		bi->bytecount = xdp_desc.len;
   2647		bi->gso_segs = 1;
   2648		bi->time_stamp = jiffies;
   2649		bi->next_to_watch = tx_desc;
   2650
   2651		netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len);
   2652
   2653		ntu++;
   2654		if (ntu == ring->count)
   2655			ntu = 0;
   2656	}
   2657
   2658	ring->next_to_use = ntu;
   2659	if (tx_desc) {
   2660		igc_flush_tx_descriptors(ring);
   2661		xsk_tx_release(pool);
   2662	}
   2663
   2664	__netif_tx_unlock(nq);
   2665}
   2666
   2667/**
   2668 * igc_clean_tx_irq - Reclaim resources after transmit completes
   2669 * @q_vector: pointer to q_vector containing needed info
   2670 * @napi_budget: Used to determine if we are in netpoll
   2671 *
   2672 * returns true if ring is completely cleaned
   2673 */
   2674static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
   2675{
   2676	struct igc_adapter *adapter = q_vector->adapter;
   2677	unsigned int total_bytes = 0, total_packets = 0;
   2678	unsigned int budget = q_vector->tx.work_limit;
   2679	struct igc_ring *tx_ring = q_vector->tx.ring;
   2680	unsigned int i = tx_ring->next_to_clean;
   2681	struct igc_tx_buffer *tx_buffer;
   2682	union igc_adv_tx_desc *tx_desc;
   2683	u32 xsk_frames = 0;
   2684
   2685	if (test_bit(__IGC_DOWN, &adapter->state))
   2686		return true;
   2687
   2688	tx_buffer = &tx_ring->tx_buffer_info[i];
   2689	tx_desc = IGC_TX_DESC(tx_ring, i);
   2690	i -= tx_ring->count;
   2691
   2692	do {
   2693		union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
   2694
   2695		/* if next_to_watch is not set then there is no work pending */
   2696		if (!eop_desc)
   2697			break;
   2698
   2699		/* prevent any other reads prior to eop_desc */
   2700		smp_rmb();
   2701
   2702		/* if DD is not set pending work has not been completed */
   2703		if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD)))
   2704			break;
   2705
   2706		/* clear next_to_watch to prevent false hangs */
   2707		tx_buffer->next_to_watch = NULL;
   2708
   2709		/* update the statistics for this packet */
   2710		total_bytes += tx_buffer->bytecount;
   2711		total_packets += tx_buffer->gso_segs;
   2712
   2713		switch (tx_buffer->type) {
   2714		case IGC_TX_BUFFER_TYPE_XSK:
   2715			xsk_frames++;
   2716			break;
   2717		case IGC_TX_BUFFER_TYPE_XDP:
   2718			xdp_return_frame(tx_buffer->xdpf);
   2719			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
   2720			break;
   2721		case IGC_TX_BUFFER_TYPE_SKB:
   2722			napi_consume_skb(tx_buffer->skb, napi_budget);
   2723			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
   2724			break;
   2725		default:
   2726			netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
   2727			break;
   2728		}
   2729
   2730		/* clear last DMA location and unmap remaining buffers */
   2731		while (tx_desc != eop_desc) {
   2732			tx_buffer++;
   2733			tx_desc++;
   2734			i++;
   2735			if (unlikely(!i)) {
   2736				i -= tx_ring->count;
   2737				tx_buffer = tx_ring->tx_buffer_info;
   2738				tx_desc = IGC_TX_DESC(tx_ring, 0);
   2739			}
   2740
   2741			/* unmap any remaining paged data */
   2742			if (dma_unmap_len(tx_buffer, len))
   2743				igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
   2744		}
   2745
   2746		/* move us one more past the eop_desc for start of next pkt */
   2747		tx_buffer++;
   2748		tx_desc++;
   2749		i++;
   2750		if (unlikely(!i)) {
   2751			i -= tx_ring->count;
   2752			tx_buffer = tx_ring->tx_buffer_info;
   2753			tx_desc = IGC_TX_DESC(tx_ring, 0);
   2754		}
   2755
   2756		/* issue prefetch for next Tx descriptor */
   2757		prefetch(tx_desc);
   2758
   2759		/* update budget accounting */
   2760		budget--;
   2761	} while (likely(budget));
   2762
   2763	netdev_tx_completed_queue(txring_txq(tx_ring),
   2764				  total_packets, total_bytes);
   2765
   2766	i += tx_ring->count;
   2767	tx_ring->next_to_clean = i;
   2768
   2769	igc_update_tx_stats(q_vector, total_packets, total_bytes);
   2770
   2771	if (tx_ring->xsk_pool) {
   2772		if (xsk_frames)
   2773			xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
   2774		if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
   2775			xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
   2776		igc_xdp_xmit_zc(tx_ring);
   2777	}
   2778
   2779	if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
   2780		struct igc_hw *hw = &adapter->hw;
   2781
   2782		/* Detect a transmit hang in hardware, this serializes the
   2783		 * check with the clearing of time_stamp and movement of i
   2784		 */
   2785		clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
   2786		if (tx_buffer->next_to_watch &&
   2787		    time_after(jiffies, tx_buffer->time_stamp +
   2788		    (adapter->tx_timeout_factor * HZ)) &&
   2789		    !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) {
   2790			/* detected Tx unit hang */
   2791			netdev_err(tx_ring->netdev,
   2792				   "Detected Tx Unit Hang\n"
   2793				   "  Tx Queue             <%d>\n"
   2794				   "  TDH                  <%x>\n"
   2795				   "  TDT                  <%x>\n"
   2796				   "  next_to_use          <%x>\n"
   2797				   "  next_to_clean        <%x>\n"
   2798				   "buffer_info[next_to_clean]\n"
   2799				   "  time_stamp           <%lx>\n"
   2800				   "  next_to_watch        <%p>\n"
   2801				   "  jiffies              <%lx>\n"
   2802				   "  desc.status          <%x>\n",
   2803				   tx_ring->queue_index,
   2804				   rd32(IGC_TDH(tx_ring->reg_idx)),
   2805				   readl(tx_ring->tail),
   2806				   tx_ring->next_to_use,
   2807				   tx_ring->next_to_clean,
   2808				   tx_buffer->time_stamp,
   2809				   tx_buffer->next_to_watch,
   2810				   jiffies,
   2811				   tx_buffer->next_to_watch->wb.status);
   2812			netif_stop_subqueue(tx_ring->netdev,
   2813					    tx_ring->queue_index);
   2814
   2815			/* we are about to reset, no point in enabling stuff */
   2816			return true;
   2817		}
   2818	}
   2819
   2820#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
   2821	if (unlikely(total_packets &&
   2822		     netif_carrier_ok(tx_ring->netdev) &&
   2823		     igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
   2824		/* Make sure that anybody stopping the queue after this
   2825		 * sees the new next_to_clean.
   2826		 */
   2827		smp_mb();
   2828		if (__netif_subqueue_stopped(tx_ring->netdev,
   2829					     tx_ring->queue_index) &&
   2830		    !(test_bit(__IGC_DOWN, &adapter->state))) {
   2831			netif_wake_subqueue(tx_ring->netdev,
   2832					    tx_ring->queue_index);
   2833
   2834			u64_stats_update_begin(&tx_ring->tx_syncp);
   2835			tx_ring->tx_stats.restart_queue++;
   2836			u64_stats_update_end(&tx_ring->tx_syncp);
   2837		}
   2838	}
   2839
   2840	return !!budget;
   2841}
   2842
   2843static int igc_find_mac_filter(struct igc_adapter *adapter,
   2844			       enum igc_mac_filter_type type, const u8 *addr)
   2845{
   2846	struct igc_hw *hw = &adapter->hw;
   2847	int max_entries = hw->mac.rar_entry_count;
   2848	u32 ral, rah;
   2849	int i;
   2850
   2851	for (i = 0; i < max_entries; i++) {
   2852		ral = rd32(IGC_RAL(i));
   2853		rah = rd32(IGC_RAH(i));
   2854
   2855		if (!(rah & IGC_RAH_AV))
   2856			continue;
   2857		if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type)
   2858			continue;
   2859		if ((rah & IGC_RAH_RAH_MASK) !=
   2860		    le16_to_cpup((__le16 *)(addr + 4)))
   2861			continue;
   2862		if (ral != le32_to_cpup((__le32 *)(addr)))
   2863			continue;
   2864
   2865		return i;
   2866	}
   2867
   2868	return -1;
   2869}
   2870
   2871static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter)
   2872{
   2873	struct igc_hw *hw = &adapter->hw;
   2874	int max_entries = hw->mac.rar_entry_count;
   2875	u32 rah;
   2876	int i;
   2877
   2878	for (i = 0; i < max_entries; i++) {
   2879		rah = rd32(IGC_RAH(i));
   2880
   2881		if (!(rah & IGC_RAH_AV))
   2882			return i;
   2883	}
   2884
   2885	return -1;
   2886}
   2887
   2888/**
   2889 * igc_add_mac_filter() - Add MAC address filter
   2890 * @adapter: Pointer to adapter where the filter should be added
   2891 * @type: MAC address filter type (source or destination)
   2892 * @addr: MAC address
   2893 * @queue: If non-negative, queue assignment feature is enabled and frames
   2894 *         matching the filter are enqueued onto 'queue'. Otherwise, queue
   2895 *         assignment is disabled.
   2896 *
   2897 * Return: 0 in case of success, negative errno code otherwise.
   2898 */
   2899static int igc_add_mac_filter(struct igc_adapter *adapter,
   2900			      enum igc_mac_filter_type type, const u8 *addr,
   2901			      int queue)
   2902{
   2903	struct net_device *dev = adapter->netdev;
   2904	int index;
   2905
   2906	index = igc_find_mac_filter(adapter, type, addr);
   2907	if (index >= 0)
   2908		goto update_filter;
   2909
   2910	index = igc_get_avail_mac_filter_slot(adapter);
   2911	if (index < 0)
   2912		return -ENOSPC;
   2913
   2914	netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n",
   2915		   index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
   2916		   addr, queue);
   2917
   2918update_filter:
   2919	igc_set_mac_filter_hw(adapter, index, type, addr, queue);
   2920	return 0;
   2921}
   2922
   2923/**
   2924 * igc_del_mac_filter() - Delete MAC address filter
   2925 * @adapter: Pointer to adapter where the filter should be deleted from
   2926 * @type: MAC address filter type (source or destination)
   2927 * @addr: MAC address
   2928 */
   2929static void igc_del_mac_filter(struct igc_adapter *adapter,
   2930			       enum igc_mac_filter_type type, const u8 *addr)
   2931{
   2932	struct net_device *dev = adapter->netdev;
   2933	int index;
   2934
   2935	index = igc_find_mac_filter(adapter, type, addr);
   2936	if (index < 0)
   2937		return;
   2938
   2939	if (index == 0) {
   2940		/* If this is the default filter, we don't actually delete it.
   2941		 * We just reset to its default value i.e. disable queue
   2942		 * assignment.
   2943		 */
   2944		netdev_dbg(dev, "Disable default MAC filter queue assignment");
   2945
   2946		igc_set_mac_filter_hw(adapter, 0, type, addr, -1);
   2947	} else {
   2948		netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n",
   2949			   index,
   2950			   type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
   2951			   addr);
   2952
   2953		igc_clear_mac_filter_hw(adapter, index);
   2954	}
   2955}
   2956
   2957/**
   2958 * igc_add_vlan_prio_filter() - Add VLAN priority filter
   2959 * @adapter: Pointer to adapter where the filter should be added
   2960 * @prio: VLAN priority value
   2961 * @queue: Queue number which matching frames are assigned to
   2962 *
   2963 * Return: 0 in case of success, negative errno code otherwise.
   2964 */
   2965static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio,
   2966				    int queue)
   2967{
   2968	struct net_device *dev = adapter->netdev;
   2969	struct igc_hw *hw = &adapter->hw;
   2970	u32 vlanpqf;
   2971
   2972	vlanpqf = rd32(IGC_VLANPQF);
   2973
   2974	if (vlanpqf & IGC_VLANPQF_VALID(prio)) {
   2975		netdev_dbg(dev, "VLAN priority filter already in use\n");
   2976		return -EEXIST;
   2977	}
   2978
   2979	vlanpqf |= IGC_VLANPQF_QSEL(prio, queue);
   2980	vlanpqf |= IGC_VLANPQF_VALID(prio);
   2981
   2982	wr32(IGC_VLANPQF, vlanpqf);
   2983
   2984	netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n",
   2985		   prio, queue);
   2986	return 0;
   2987}
   2988
   2989/**
   2990 * igc_del_vlan_prio_filter() - Delete VLAN priority filter
   2991 * @adapter: Pointer to adapter where the filter should be deleted from
   2992 * @prio: VLAN priority value
   2993 */
   2994static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio)
   2995{
   2996	struct igc_hw *hw = &adapter->hw;
   2997	u32 vlanpqf;
   2998
   2999	vlanpqf = rd32(IGC_VLANPQF);
   3000
   3001	vlanpqf &= ~IGC_VLANPQF_VALID(prio);
   3002	vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK);
   3003
   3004	wr32(IGC_VLANPQF, vlanpqf);
   3005
   3006	netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n",
   3007		   prio);
   3008}
   3009
   3010static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter)
   3011{
   3012	struct igc_hw *hw = &adapter->hw;
   3013	int i;
   3014
   3015	for (i = 0; i < MAX_ETYPE_FILTER; i++) {
   3016		u32 etqf = rd32(IGC_ETQF(i));
   3017
   3018		if (!(etqf & IGC_ETQF_FILTER_ENABLE))
   3019			return i;
   3020	}
   3021
   3022	return -1;
   3023}
   3024
   3025/**
   3026 * igc_add_etype_filter() - Add ethertype filter
   3027 * @adapter: Pointer to adapter where the filter should be added
   3028 * @etype: Ethertype value
   3029 * @queue: If non-negative, queue assignment feature is enabled and frames
   3030 *         matching the filter are enqueued onto 'queue'. Otherwise, queue
   3031 *         assignment is disabled.
   3032 *
   3033 * Return: 0 in case of success, negative errno code otherwise.
   3034 */
   3035static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype,
   3036				int queue)
   3037{
   3038	struct igc_hw *hw = &adapter->hw;
   3039	int index;
   3040	u32 etqf;
   3041
   3042	index = igc_get_avail_etype_filter_slot(adapter);
   3043	if (index < 0)
   3044		return -ENOSPC;
   3045
   3046	etqf = rd32(IGC_ETQF(index));
   3047
   3048	etqf &= ~IGC_ETQF_ETYPE_MASK;
   3049	etqf |= etype;
   3050
   3051	if (queue >= 0) {
   3052		etqf &= ~IGC_ETQF_QUEUE_MASK;
   3053		etqf |= (queue << IGC_ETQF_QUEUE_SHIFT);
   3054		etqf |= IGC_ETQF_QUEUE_ENABLE;
   3055	}
   3056
   3057	etqf |= IGC_ETQF_FILTER_ENABLE;
   3058
   3059	wr32(IGC_ETQF(index), etqf);
   3060
   3061	netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n",
   3062		   etype, queue);
   3063	return 0;
   3064}
   3065
   3066static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype)
   3067{
   3068	struct igc_hw *hw = &adapter->hw;
   3069	int i;
   3070
   3071	for (i = 0; i < MAX_ETYPE_FILTER; i++) {
   3072		u32 etqf = rd32(IGC_ETQF(i));
   3073
   3074		if ((etqf & IGC_ETQF_ETYPE_MASK) == etype)
   3075			return i;
   3076	}
   3077
   3078	return -1;
   3079}
   3080
   3081/**
   3082 * igc_del_etype_filter() - Delete ethertype filter
   3083 * @adapter: Pointer to adapter where the filter should be deleted from
   3084 * @etype: Ethertype value
   3085 */
   3086static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
   3087{
   3088	struct igc_hw *hw = &adapter->hw;
   3089	int index;
   3090
   3091	index = igc_find_etype_filter(adapter, etype);
   3092	if (index < 0)
   3093		return;
   3094
   3095	wr32(IGC_ETQF(index), 0);
   3096
   3097	netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n",
   3098		   etype);
   3099}
   3100
   3101static int igc_flex_filter_select(struct igc_adapter *adapter,
   3102				  struct igc_flex_filter *input,
   3103				  u32 *fhft)
   3104{
   3105	struct igc_hw *hw = &adapter->hw;
   3106	u8 fhft_index;
   3107	u32 fhftsl;
   3108
   3109	if (input->index >= MAX_FLEX_FILTER) {
   3110		dev_err(&adapter->pdev->dev, "Wrong Flex Filter index selected!\n");
   3111		return -EINVAL;
   3112	}
   3113
   3114	/* Indirect table select register */
   3115	fhftsl = rd32(IGC_FHFTSL);
   3116	fhftsl &= ~IGC_FHFTSL_FTSL_MASK;
   3117	switch (input->index) {
   3118	case 0 ... 7:
   3119		fhftsl |= 0x00;
   3120		break;
   3121	case 8 ... 15:
   3122		fhftsl |= 0x01;
   3123		break;
   3124	case 16 ... 23:
   3125		fhftsl |= 0x02;
   3126		break;
   3127	case 24 ... 31:
   3128		fhftsl |= 0x03;
   3129		break;
   3130	}
   3131	wr32(IGC_FHFTSL, fhftsl);
   3132
   3133	/* Normalize index down to host table register */
   3134	fhft_index = input->index % 8;
   3135
   3136	*fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) :
   3137		IGC_FHFT_EXT(fhft_index - 4);
   3138
   3139	return 0;
   3140}
   3141
   3142static int igc_write_flex_filter_ll(struct igc_adapter *adapter,
   3143				    struct igc_flex_filter *input)
   3144{
   3145	struct device *dev = &adapter->pdev->dev;
   3146	struct igc_hw *hw = &adapter->hw;
   3147	u8 *data = input->data;
   3148	u8 *mask = input->mask;
   3149	u32 queuing;
   3150	u32 fhft;
   3151	u32 wufc;
   3152	int ret;
   3153	int i;
   3154
   3155	/* Length has to be aligned to 8. Otherwise the filter will fail. Bail
   3156	 * out early to avoid surprises later.
   3157	 */
   3158	if (input->length % 8 != 0) {
   3159		dev_err(dev, "The length of a flex filter has to be 8 byte aligned!\n");
   3160		return -EINVAL;
   3161	}
   3162
   3163	/* Select corresponding flex filter register and get base for host table. */
   3164	ret = igc_flex_filter_select(adapter, input, &fhft);
   3165	if (ret)
   3166		return ret;
   3167
   3168	/* When adding a filter globally disable flex filter feature. That is
   3169	 * recommended within the datasheet.
   3170	 */
   3171	wufc = rd32(IGC_WUFC);
   3172	wufc &= ~IGC_WUFC_FLEX_HQ;
   3173	wr32(IGC_WUFC, wufc);
   3174
   3175	/* Configure filter */
   3176	queuing = input->length & IGC_FHFT_LENGTH_MASK;
   3177	queuing |= (input->rx_queue << IGC_FHFT_QUEUE_SHIFT) & IGC_FHFT_QUEUE_MASK;
   3178	queuing |= (input->prio << IGC_FHFT_PRIO_SHIFT) & IGC_FHFT_PRIO_MASK;
   3179
   3180	if (input->immediate_irq)
   3181		queuing |= IGC_FHFT_IMM_INT;
   3182
   3183	if (input->drop)
   3184		queuing |= IGC_FHFT_DROP;
   3185
   3186	wr32(fhft + 0xFC, queuing);
   3187
   3188	/* Write data (128 byte) and mask (128 bit) */
   3189	for (i = 0; i < 16; ++i) {
   3190		const size_t data_idx = i * 8;
   3191		const size_t row_idx = i * 16;
   3192		u32 dw0 =
   3193			(data[data_idx + 0] << 0) |
   3194			(data[data_idx + 1] << 8) |
   3195			(data[data_idx + 2] << 16) |
   3196			(data[data_idx + 3] << 24);
   3197		u32 dw1 =
   3198			(data[data_idx + 4] << 0) |
   3199			(data[data_idx + 5] << 8) |
   3200			(data[data_idx + 6] << 16) |
   3201			(data[data_idx + 7] << 24);
   3202		u32 tmp;
   3203
   3204		/* Write row: dw0, dw1 and mask */
   3205		wr32(fhft + row_idx, dw0);
   3206		wr32(fhft + row_idx + 4, dw1);
   3207
   3208		/* mask is only valid for MASK(7, 0) */
   3209		tmp = rd32(fhft + row_idx + 8);
   3210		tmp &= ~GENMASK(7, 0);
   3211		tmp |= mask[i];
   3212		wr32(fhft + row_idx + 8, tmp);
   3213	}
   3214
   3215	/* Enable filter. */
   3216	wufc |= IGC_WUFC_FLEX_HQ;
   3217	if (input->index > 8) {
   3218		/* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */
   3219		u32 wufc_ext = rd32(IGC_WUFC_EXT);
   3220
   3221		wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8));
   3222
   3223		wr32(IGC_WUFC_EXT, wufc_ext);
   3224	} else {
   3225		wufc |= (IGC_WUFC_FLX0 << input->index);
   3226	}
   3227	wr32(IGC_WUFC, wufc);
   3228
   3229	dev_dbg(&adapter->pdev->dev, "Added flex filter %u to HW.\n",
   3230		input->index);
   3231
   3232	return 0;
   3233}
   3234
   3235static void igc_flex_filter_add_field(struct igc_flex_filter *flex,
   3236				      const void *src, unsigned int offset,
   3237				      size_t len, const void *mask)
   3238{
   3239	int i;
   3240
   3241	/* data */
   3242	memcpy(&flex->data[offset], src, len);
   3243
   3244	/* mask */
   3245	for (i = 0; i < len; ++i) {
   3246		const unsigned int idx = i + offset;
   3247		const u8 *ptr = mask;
   3248
   3249		if (mask) {
   3250			if (ptr[i] & 0xff)
   3251				flex->mask[idx / 8] |= BIT(idx % 8);
   3252
   3253			continue;
   3254		}
   3255
   3256		flex->mask[idx / 8] |= BIT(idx % 8);
   3257	}
   3258}
   3259
   3260static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter)
   3261{
   3262	struct igc_hw *hw = &adapter->hw;
   3263	u32 wufc, wufc_ext;
   3264	int i;
   3265
   3266	wufc = rd32(IGC_WUFC);
   3267	wufc_ext = rd32(IGC_WUFC_EXT);
   3268
   3269	for (i = 0; i < MAX_FLEX_FILTER; i++) {
   3270		if (i < 8) {
   3271			if (!(wufc & (IGC_WUFC_FLX0 << i)))
   3272				return i;
   3273		} else {
   3274			if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8))))
   3275				return i;
   3276		}
   3277	}
   3278
   3279	return -ENOSPC;
   3280}
   3281
   3282static bool igc_flex_filter_in_use(struct igc_adapter *adapter)
   3283{
   3284	struct igc_hw *hw = &adapter->hw;
   3285	u32 wufc, wufc_ext;
   3286
   3287	wufc = rd32(IGC_WUFC);
   3288	wufc_ext = rd32(IGC_WUFC_EXT);
   3289
   3290	if (wufc & IGC_WUFC_FILTER_MASK)
   3291		return true;
   3292
   3293	if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK)
   3294		return true;
   3295
   3296	return false;
   3297}
   3298
   3299static int igc_add_flex_filter(struct igc_adapter *adapter,
   3300			       struct igc_nfc_rule *rule)
   3301{
   3302	struct igc_flex_filter flex = { };
   3303	struct igc_nfc_filter *filter = &rule->filter;
   3304	unsigned int eth_offset, user_offset;
   3305	int ret, index;
   3306	bool vlan;
   3307
   3308	index = igc_find_avail_flex_filter_slot(adapter);
   3309	if (index < 0)
   3310		return -ENOSPC;
   3311
   3312	/* Construct the flex filter:
   3313	 *  -> dest_mac [6]
   3314	 *  -> src_mac [6]
   3315	 *  -> tpid [2]
   3316	 *  -> vlan tci [2]
   3317	 *  -> ether type [2]
   3318	 *  -> user data [8]
   3319	 *  -> = 26 bytes => 32 length
   3320	 */
   3321	flex.index    = index;
   3322	flex.length   = 32;
   3323	flex.rx_queue = rule->action;
   3324
   3325	vlan = rule->filter.vlan_tci || rule->filter.vlan_etype;
   3326	eth_offset = vlan ? 16 : 12;
   3327	user_offset = vlan ? 18 : 14;
   3328
   3329	/* Add destination MAC  */
   3330	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
   3331		igc_flex_filter_add_field(&flex, &filter->dst_addr, 0,
   3332					  ETH_ALEN, NULL);
   3333
   3334	/* Add source MAC */
   3335	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
   3336		igc_flex_filter_add_field(&flex, &filter->src_addr, 6,
   3337					  ETH_ALEN, NULL);
   3338
   3339	/* Add VLAN etype */
   3340	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE)
   3341		igc_flex_filter_add_field(&flex, &filter->vlan_etype, 12,
   3342					  sizeof(filter->vlan_etype),
   3343					  NULL);
   3344
   3345	/* Add VLAN TCI */
   3346	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI)
   3347		igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14,
   3348					  sizeof(filter->vlan_tci), NULL);
   3349
   3350	/* Add Ether type */
   3351	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
   3352		__be16 etype = cpu_to_be16(filter->etype);
   3353
   3354		igc_flex_filter_add_field(&flex, &etype, eth_offset,
   3355					  sizeof(etype), NULL);
   3356	}
   3357
   3358	/* Add user data */
   3359	if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA)
   3360		igc_flex_filter_add_field(&flex, &filter->user_data,
   3361					  user_offset,
   3362					  sizeof(filter->user_data),
   3363					  filter->user_mask);
   3364
   3365	/* Add it down to the hardware and enable it. */
   3366	ret = igc_write_flex_filter_ll(adapter, &flex);
   3367	if (ret)
   3368		return ret;
   3369
   3370	filter->flex_index = index;
   3371
   3372	return 0;
   3373}
   3374
   3375static void igc_del_flex_filter(struct igc_adapter *adapter,
   3376				u16 reg_index)
   3377{
   3378	struct igc_hw *hw = &adapter->hw;
   3379	u32 wufc;
   3380
   3381	/* Just disable the filter. The filter table itself is kept
   3382	 * intact. Another flex_filter_add() should override the "old" data
   3383	 * then.
   3384	 */
   3385	if (reg_index > 8) {
   3386		u32 wufc_ext = rd32(IGC_WUFC_EXT);
   3387
   3388		wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8));
   3389		wr32(IGC_WUFC_EXT, wufc_ext);
   3390	} else {
   3391		wufc = rd32(IGC_WUFC);
   3392
   3393		wufc &= ~(IGC_WUFC_FLX0 << reg_index);
   3394		wr32(IGC_WUFC, wufc);
   3395	}
   3396
   3397	if (igc_flex_filter_in_use(adapter))
   3398		return;
   3399
   3400	/* No filters are in use, we may disable flex filters */
   3401	wufc = rd32(IGC_WUFC);
   3402	wufc &= ~IGC_WUFC_FLEX_HQ;
   3403	wr32(IGC_WUFC, wufc);
   3404}
   3405
   3406static int igc_enable_nfc_rule(struct igc_adapter *adapter,
   3407			       struct igc_nfc_rule *rule)
   3408{
   3409	int err;
   3410
   3411	if (rule->flex) {
   3412		return igc_add_flex_filter(adapter, rule);
   3413	}
   3414
   3415	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
   3416		err = igc_add_etype_filter(adapter, rule->filter.etype,
   3417					   rule->action);
   3418		if (err)
   3419			return err;
   3420	}
   3421
   3422	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
   3423		err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
   3424					 rule->filter.src_addr, rule->action);
   3425		if (err)
   3426			return err;
   3427	}
   3428
   3429	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
   3430		err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
   3431					 rule->filter.dst_addr, rule->action);
   3432		if (err)
   3433			return err;
   3434	}
   3435
   3436	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
   3437		int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
   3438			   VLAN_PRIO_SHIFT;
   3439
   3440		err = igc_add_vlan_prio_filter(adapter, prio, rule->action);
   3441		if (err)
   3442			return err;
   3443	}
   3444
   3445	return 0;
   3446}
   3447
   3448static void igc_disable_nfc_rule(struct igc_adapter *adapter,
   3449				 const struct igc_nfc_rule *rule)
   3450{
   3451	if (rule->flex) {
   3452		igc_del_flex_filter(adapter, rule->filter.flex_index);
   3453		return;
   3454	}
   3455
   3456	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
   3457		igc_del_etype_filter(adapter, rule->filter.etype);
   3458
   3459	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
   3460		int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
   3461			   VLAN_PRIO_SHIFT;
   3462
   3463		igc_del_vlan_prio_filter(adapter, prio);
   3464	}
   3465
   3466	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
   3467		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
   3468				   rule->filter.src_addr);
   3469
   3470	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
   3471		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
   3472				   rule->filter.dst_addr);
   3473}
   3474
   3475/**
   3476 * igc_get_nfc_rule() - Get NFC rule
   3477 * @adapter: Pointer to adapter
   3478 * @location: Rule location
   3479 *
   3480 * Context: Expects adapter->nfc_rule_lock to be held by caller.
   3481 *
   3482 * Return: Pointer to NFC rule at @location. If not found, NULL.
   3483 */
   3484struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter,
   3485				      u32 location)
   3486{
   3487	struct igc_nfc_rule *rule;
   3488
   3489	list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
   3490		if (rule->location == location)
   3491			return rule;
   3492		if (rule->location > location)
   3493			break;
   3494	}
   3495
   3496	return NULL;
   3497}
   3498
   3499/**
   3500 * igc_del_nfc_rule() - Delete NFC rule
   3501 * @adapter: Pointer to adapter
   3502 * @rule: Pointer to rule to be deleted
   3503 *
   3504 * Disable NFC rule in hardware and delete it from adapter.
   3505 *
   3506 * Context: Expects adapter->nfc_rule_lock to be held by caller.
   3507 */
   3508void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
   3509{
   3510	igc_disable_nfc_rule(adapter, rule);
   3511
   3512	list_del(&rule->list);
   3513	adapter->nfc_rule_count--;
   3514
   3515	kfree(rule);
   3516}
   3517
   3518static void igc_flush_nfc_rules(struct igc_adapter *adapter)
   3519{
   3520	struct igc_nfc_rule *rule, *tmp;
   3521
   3522	mutex_lock(&adapter->nfc_rule_lock);
   3523
   3524	list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list)
   3525		igc_del_nfc_rule(adapter, rule);
   3526
   3527	mutex_unlock(&adapter->nfc_rule_lock);
   3528}
   3529
   3530/**
   3531 * igc_add_nfc_rule() - Add NFC rule
   3532 * @adapter: Pointer to adapter
   3533 * @rule: Pointer to rule to be added
   3534 *
   3535 * Enable NFC rule in hardware and add it to adapter.
   3536 *
   3537 * Context: Expects adapter->nfc_rule_lock to be held by caller.
   3538 *
   3539 * Return: 0 on success, negative errno on failure.
   3540 */
   3541int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
   3542{
   3543	struct igc_nfc_rule *pred, *cur;
   3544	int err;
   3545
   3546	err = igc_enable_nfc_rule(adapter, rule);
   3547	if (err)
   3548		return err;
   3549
   3550	pred = NULL;
   3551	list_for_each_entry(cur, &adapter->nfc_rule_list, list) {
   3552		if (cur->location >= rule->location)
   3553			break;
   3554		pred = cur;
   3555	}
   3556
   3557	list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list);
   3558	adapter->nfc_rule_count++;
   3559	return 0;
   3560}
   3561
   3562static void igc_restore_nfc_rules(struct igc_adapter *adapter)
   3563{
   3564	struct igc_nfc_rule *rule;
   3565
   3566	mutex_lock(&adapter->nfc_rule_lock);
   3567
   3568	list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list)
   3569		igc_enable_nfc_rule(adapter, rule);
   3570
   3571	mutex_unlock(&adapter->nfc_rule_lock);
   3572}
   3573
   3574static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
   3575{
   3576	struct igc_adapter *adapter = netdev_priv(netdev);
   3577
   3578	return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1);
   3579}
   3580
   3581static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
   3582{
   3583	struct igc_adapter *adapter = netdev_priv(netdev);
   3584
   3585	igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr);
   3586	return 0;
   3587}
   3588
   3589/**
   3590 * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
   3591 * @netdev: network interface device structure
   3592 *
   3593 * The set_rx_mode entry point is called whenever the unicast or multicast
   3594 * address lists or the network interface flags are updated.  This routine is
   3595 * responsible for configuring the hardware for proper unicast, multicast,
   3596 * promiscuous mode, and all-multi behavior.
   3597 */
   3598static void igc_set_rx_mode(struct net_device *netdev)
   3599{
   3600	struct igc_adapter *adapter = netdev_priv(netdev);
   3601	struct igc_hw *hw = &adapter->hw;
   3602	u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
   3603	int count;
   3604
   3605	/* Check for Promiscuous and All Multicast modes */
   3606	if (netdev->flags & IFF_PROMISC) {
   3607		rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE;
   3608	} else {
   3609		if (netdev->flags & IFF_ALLMULTI) {
   3610			rctl |= IGC_RCTL_MPE;
   3611		} else {
   3612			/* Write addresses to the MTA, if the attempt fails
   3613			 * then we should just turn on promiscuous mode so
   3614			 * that we can at least receive multicast traffic
   3615			 */
   3616			count = igc_write_mc_addr_list(netdev);
   3617			if (count < 0)
   3618				rctl |= IGC_RCTL_MPE;
   3619		}
   3620	}
   3621
   3622	/* Write addresses to available RAR registers, if there is not
   3623	 * sufficient space to store all the addresses then enable
   3624	 * unicast promiscuous mode
   3625	 */
   3626	if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync))
   3627		rctl |= IGC_RCTL_UPE;
   3628
   3629	/* update state of unicast and multicast */
   3630	rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE);
   3631	wr32(IGC_RCTL, rctl);
   3632
   3633#if (PAGE_SIZE < 8192)
   3634	if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB)
   3635		rlpml = IGC_MAX_FRAME_BUILD_SKB;
   3636#endif
   3637	wr32(IGC_RLPML, rlpml);
   3638}
   3639
   3640/**
   3641 * igc_configure - configure the hardware for RX and TX
   3642 * @adapter: private board structure
   3643 */
   3644static void igc_configure(struct igc_adapter *adapter)
   3645{
   3646	struct net_device *netdev = adapter->netdev;
   3647	int i = 0;
   3648
   3649	igc_get_hw_control(adapter);
   3650	igc_set_rx_mode(netdev);
   3651
   3652	igc_restore_vlan(adapter);
   3653
   3654	igc_setup_tctl(adapter);
   3655	igc_setup_mrqc(adapter);
   3656	igc_setup_rctl(adapter);
   3657
   3658	igc_set_default_mac_filter(adapter);
   3659	igc_restore_nfc_rules(adapter);
   3660
   3661	igc_configure_tx(adapter);
   3662	igc_configure_rx(adapter);
   3663
   3664	igc_rx_fifo_flush_base(&adapter->hw);
   3665
   3666	/* call igc_desc_unused which always leaves
   3667	 * at least 1 descriptor unused to make sure
   3668	 * next_to_use != next_to_clean
   3669	 */
   3670	for (i = 0; i < adapter->num_rx_queues; i++) {
   3671		struct igc_ring *ring = adapter->rx_ring[i];
   3672
   3673		if (ring->xsk_pool)
   3674			igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
   3675		else
   3676			igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
   3677	}
   3678}
   3679
   3680/**
   3681 * igc_write_ivar - configure ivar for given MSI-X vector
   3682 * @hw: pointer to the HW structure
   3683 * @msix_vector: vector number we are allocating to a given ring
   3684 * @index: row index of IVAR register to write within IVAR table
   3685 * @offset: column offset of in IVAR, should be multiple of 8
   3686 *
   3687 * The IVAR table consists of 2 columns,
   3688 * each containing an cause allocation for an Rx and Tx ring, and a
   3689 * variable number of rows depending on the number of queues supported.
   3690 */
   3691static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
   3692			   int index, int offset)
   3693{
   3694	u32 ivar = array_rd32(IGC_IVAR0, index);
   3695
   3696	/* clear any bits that are currently set */
   3697	ivar &= ~((u32)0xFF << offset);
   3698
   3699	/* write vector and valid bit */
   3700	ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
   3701
   3702	array_wr32(IGC_IVAR0, index, ivar);
   3703}
   3704
   3705static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
   3706{
   3707	struct igc_adapter *adapter = q_vector->adapter;
   3708	struct igc_hw *hw = &adapter->hw;
   3709	int rx_queue = IGC_N0_QUEUE;
   3710	int tx_queue = IGC_N0_QUEUE;
   3711
   3712	if (q_vector->rx.ring)
   3713		rx_queue = q_vector->rx.ring->reg_idx;
   3714	if (q_vector->tx.ring)
   3715		tx_queue = q_vector->tx.ring->reg_idx;
   3716
   3717	switch (hw->mac.type) {
   3718	case igc_i225:
   3719		if (rx_queue > IGC_N0_QUEUE)
   3720			igc_write_ivar(hw, msix_vector,
   3721				       rx_queue >> 1,
   3722				       (rx_queue & 0x1) << 4);
   3723		if (tx_queue > IGC_N0_QUEUE)
   3724			igc_write_ivar(hw, msix_vector,
   3725				       tx_queue >> 1,
   3726				       ((tx_queue & 0x1) << 4) + 8);
   3727		q_vector->eims_value = BIT(msix_vector);
   3728		break;
   3729	default:
   3730		WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
   3731		break;
   3732	}
   3733
   3734	/* add q_vector eims value to global eims_enable_mask */
   3735	adapter->eims_enable_mask |= q_vector->eims_value;
   3736
   3737	/* configure q_vector to set itr on first interrupt */
   3738	q_vector->set_itr = 1;
   3739}
   3740
   3741/**
   3742 * igc_configure_msix - Configure MSI-X hardware
   3743 * @adapter: Pointer to adapter structure
   3744 *
   3745 * igc_configure_msix sets up the hardware to properly
   3746 * generate MSI-X interrupts.
   3747 */
   3748static void igc_configure_msix(struct igc_adapter *adapter)
   3749{
   3750	struct igc_hw *hw = &adapter->hw;
   3751	int i, vector = 0;
   3752	u32 tmp;
   3753
   3754	adapter->eims_enable_mask = 0;
   3755
   3756	/* set vector for other causes, i.e. link changes */
   3757	switch (hw->mac.type) {
   3758	case igc_i225:
   3759		/* Turn on MSI-X capability first, or our settings
   3760		 * won't stick.  And it will take days to debug.
   3761		 */
   3762		wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
   3763		     IGC_GPIE_PBA | IGC_GPIE_EIAME |
   3764		     IGC_GPIE_NSICR);
   3765
   3766		/* enable msix_other interrupt */
   3767		adapter->eims_other = BIT(vector);
   3768		tmp = (vector++ | IGC_IVAR_VALID) << 8;
   3769
   3770		wr32(IGC_IVAR_MISC, tmp);
   3771		break;
   3772	default:
   3773		/* do nothing, since nothing else supports MSI-X */
   3774		break;
   3775	} /* switch (hw->mac.type) */
   3776
   3777	adapter->eims_enable_mask |= adapter->eims_other;
   3778
   3779	for (i = 0; i < adapter->num_q_vectors; i++)
   3780		igc_assign_vector(adapter->q_vector[i], vector++);
   3781
   3782	wrfl();
   3783}
   3784
   3785/**
   3786 * igc_irq_enable - Enable default interrupt generation settings
   3787 * @adapter: board private structure
   3788 */
   3789static void igc_irq_enable(struct igc_adapter *adapter)
   3790{
   3791	struct igc_hw *hw = &adapter->hw;
   3792
   3793	if (adapter->msix_entries) {
   3794		u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
   3795		u32 regval = rd32(IGC_EIAC);
   3796
   3797		wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
   3798		regval = rd32(IGC_EIAM);
   3799		wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
   3800		wr32(IGC_EIMS, adapter->eims_enable_mask);
   3801		wr32(IGC_IMS, ims);
   3802	} else {
   3803		wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
   3804		wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
   3805	}
   3806}
   3807
   3808/**
   3809 * igc_irq_disable - Mask off interrupt generation on the NIC
   3810 * @adapter: board private structure
   3811 */
   3812static void igc_irq_disable(struct igc_adapter *adapter)
   3813{
   3814	struct igc_hw *hw = &adapter->hw;
   3815
   3816	if (adapter->msix_entries) {
   3817		u32 regval = rd32(IGC_EIAM);
   3818
   3819		wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
   3820		wr32(IGC_EIMC, adapter->eims_enable_mask);
   3821		regval = rd32(IGC_EIAC);
   3822		wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
   3823	}
   3824
   3825	wr32(IGC_IAM, 0);
   3826	wr32(IGC_IMC, ~0);
   3827	wrfl();
   3828
   3829	if (adapter->msix_entries) {
   3830		int vector = 0, i;
   3831
   3832		synchronize_irq(adapter->msix_entries[vector++].vector);
   3833
   3834		for (i = 0; i < adapter->num_q_vectors; i++)
   3835			synchronize_irq(adapter->msix_entries[vector++].vector);
   3836	} else {
   3837		synchronize_irq(adapter->pdev->irq);
   3838	}
   3839}
   3840
   3841void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
   3842			      const u32 max_rss_queues)
   3843{
   3844	/* Determine if we need to pair queues. */
   3845	/* If rss_queues > half of max_rss_queues, pair the queues in
   3846	 * order to conserve interrupts due to limited supply.
   3847	 */
   3848	if (adapter->rss_queues > (max_rss_queues / 2))
   3849		adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
   3850	else
   3851		adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
   3852}
   3853
   3854unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
   3855{
   3856	return IGC_MAX_RX_QUEUES;
   3857}
   3858
   3859static void igc_init_queue_configuration(struct igc_adapter *adapter)
   3860{
   3861	u32 max_rss_queues;
   3862
   3863	max_rss_queues = igc_get_max_rss_queues(adapter);
   3864	adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
   3865
   3866	igc_set_flag_queue_pairs(adapter, max_rss_queues);
   3867}
   3868
   3869/**
   3870 * igc_reset_q_vector - Reset config for interrupt vector
   3871 * @adapter: board private structure to initialize
   3872 * @v_idx: Index of vector to be reset
   3873 *
   3874 * If NAPI is enabled it will delete any references to the
   3875 * NAPI struct. This is preparation for igc_free_q_vector.
   3876 */
   3877static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
   3878{
   3879	struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
   3880
   3881	/* if we're coming from igc_set_interrupt_capability, the vectors are
   3882	 * not yet allocated
   3883	 */
   3884	if (!q_vector)
   3885		return;
   3886
   3887	if (q_vector->tx.ring)
   3888		adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
   3889
   3890	if (q_vector->rx.ring)
   3891		adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
   3892
   3893	netif_napi_del(&q_vector->napi);
   3894}
   3895
   3896/**
   3897 * igc_free_q_vector - Free memory allocated for specific interrupt vector
   3898 * @adapter: board private structure to initialize
   3899 * @v_idx: Index of vector to be freed
   3900 *
   3901 * This function frees the memory allocated to the q_vector.
   3902 */
   3903static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
   3904{
   3905	struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
   3906
   3907	adapter->q_vector[v_idx] = NULL;
   3908
   3909	/* igc_get_stats64() might access the rings on this vector,
   3910	 * we must wait a grace period before freeing it.
   3911	 */
   3912	if (q_vector)
   3913		kfree_rcu(q_vector, rcu);
   3914}
   3915
   3916/**
   3917 * igc_free_q_vectors - Free memory allocated for interrupt vectors
   3918 * @adapter: board private structure to initialize
   3919 *
   3920 * This function frees the memory allocated to the q_vectors.  In addition if
   3921 * NAPI is enabled it will delete any references to the NAPI struct prior
   3922 * to freeing the q_vector.
   3923 */
   3924static void igc_free_q_vectors(struct igc_adapter *adapter)
   3925{
   3926	int v_idx = adapter->num_q_vectors;
   3927
   3928	adapter->num_tx_queues = 0;
   3929	adapter->num_rx_queues = 0;
   3930	adapter->num_q_vectors = 0;
   3931
   3932	while (v_idx--) {
   3933		igc_reset_q_vector(adapter, v_idx);
   3934		igc_free_q_vector(adapter, v_idx);
   3935	}
   3936}
   3937
   3938/**
   3939 * igc_update_itr - update the dynamic ITR value based on statistics
   3940 * @q_vector: pointer to q_vector
   3941 * @ring_container: ring info to update the itr for
   3942 *
   3943 * Stores a new ITR value based on packets and byte
   3944 * counts during the last interrupt.  The advantage of per interrupt
   3945 * computation is faster updates and more accurate ITR for the current
   3946 * traffic pattern.  Constants in this function were computed
   3947 * based on theoretical maximum wire speed and thresholds were set based
   3948 * on testing data as well as attempting to minimize response time
   3949 * while increasing bulk throughput.
   3950 * NOTE: These calculations are only valid when operating in a single-
   3951 * queue environment.
   3952 */
   3953static void igc_update_itr(struct igc_q_vector *q_vector,
   3954			   struct igc_ring_container *ring_container)
   3955{
   3956	unsigned int packets = ring_container->total_packets;
   3957	unsigned int bytes = ring_container->total_bytes;
   3958	u8 itrval = ring_container->itr;
   3959
   3960	/* no packets, exit with status unchanged */
   3961	if (packets == 0)
   3962		return;
   3963
   3964	switch (itrval) {
   3965	case lowest_latency:
   3966		/* handle TSO and jumbo frames */
   3967		if (bytes / packets > 8000)
   3968			itrval = bulk_latency;
   3969		else if ((packets < 5) && (bytes > 512))
   3970			itrval = low_latency;
   3971		break;
   3972	case low_latency:  /* 50 usec aka 20000 ints/s */
   3973		if (bytes > 10000) {
   3974			/* this if handles the TSO accounting */
   3975			if (bytes / packets > 8000)
   3976				itrval = bulk_latency;
   3977			else if ((packets < 10) || ((bytes / packets) > 1200))
   3978				itrval = bulk_latency;
   3979			else if ((packets > 35))
   3980				itrval = lowest_latency;
   3981		} else if (bytes / packets > 2000) {
   3982			itrval = bulk_latency;
   3983		} else if (packets <= 2 && bytes < 512) {
   3984			itrval = lowest_latency;
   3985		}
   3986		break;
   3987	case bulk_latency: /* 250 usec aka 4000 ints/s */
   3988		if (bytes > 25000) {
   3989			if (packets > 35)
   3990				itrval = low_latency;
   3991		} else if (bytes < 1500) {
   3992			itrval = low_latency;
   3993		}
   3994		break;
   3995	}
   3996
   3997	/* clear work counters since we have the values we need */
   3998	ring_container->total_bytes = 0;
   3999	ring_container->total_packets = 0;
   4000
   4001	/* write updated itr to ring container */
   4002	ring_container->itr = itrval;
   4003}
   4004
   4005static void igc_set_itr(struct igc_q_vector *q_vector)
   4006{
   4007	struct igc_adapter *adapter = q_vector->adapter;
   4008	u32 new_itr = q_vector->itr_val;
   4009	u8 current_itr = 0;
   4010
   4011	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
   4012	switch (adapter->link_speed) {
   4013	case SPEED_10:
   4014	case SPEED_100:
   4015		current_itr = 0;
   4016		new_itr = IGC_4K_ITR;
   4017		goto set_itr_now;
   4018	default:
   4019		break;
   4020	}
   4021
   4022	igc_update_itr(q_vector, &q_vector->tx);
   4023	igc_update_itr(q_vector, &q_vector->rx);
   4024
   4025	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
   4026
   4027	/* conservative mode (itr 3) eliminates the lowest_latency setting */
   4028	if (current_itr == lowest_latency &&
   4029	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
   4030	    (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
   4031		current_itr = low_latency;
   4032
   4033	switch (current_itr) {
   4034	/* counts and packets in update_itr are dependent on these numbers */
   4035	case lowest_latency:
   4036		new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
   4037		break;
   4038	case low_latency:
   4039		new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
   4040		break;
   4041	case bulk_latency:
   4042		new_itr = IGC_4K_ITR;  /* 4,000 ints/sec */
   4043		break;
   4044	default:
   4045		break;
   4046	}
   4047
   4048set_itr_now:
   4049	if (new_itr != q_vector->itr_val) {
   4050		/* this attempts to bias the interrupt rate towards Bulk
   4051		 * by adding intermediate steps when interrupt rate is
   4052		 * increasing
   4053		 */
   4054		new_itr = new_itr > q_vector->itr_val ?
   4055			  max((new_itr * q_vector->itr_val) /
   4056			  (new_itr + (q_vector->itr_val >> 2)),
   4057			  new_itr) : new_itr;
   4058		/* Don't write the value here; it resets the adapter's
   4059		 * internal timer, and causes us to delay far longer than
   4060		 * we should between interrupts.  Instead, we write the ITR
   4061		 * value at the beginning of the next interrupt so the timing
   4062		 * ends up being correct.
   4063		 */
   4064		q_vector->itr_val = new_itr;
   4065		q_vector->set_itr = 1;
   4066	}
   4067}
   4068
   4069static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
   4070{
   4071	int v_idx = adapter->num_q_vectors;
   4072
   4073	if (adapter->msix_entries) {
   4074		pci_disable_msix(adapter->pdev);
   4075		kfree(adapter->msix_entries);
   4076		adapter->msix_entries = NULL;
   4077	} else if (adapter->flags & IGC_FLAG_HAS_MSI) {
   4078		pci_disable_msi(adapter->pdev);
   4079	}
   4080
   4081	while (v_idx--)
   4082		igc_reset_q_vector(adapter, v_idx);
   4083}
   4084
   4085/**
   4086 * igc_set_interrupt_capability - set MSI or MSI-X if supported
   4087 * @adapter: Pointer to adapter structure
   4088 * @msix: boolean value for MSI-X capability
   4089 *
   4090 * Attempt to configure interrupts using the best available
   4091 * capabilities of the hardware and kernel.
   4092 */
   4093static void igc_set_interrupt_capability(struct igc_adapter *adapter,
   4094					 bool msix)
   4095{
   4096	int numvecs, i;
   4097	int err;
   4098
   4099	if (!msix)
   4100		goto msi_only;
   4101	adapter->flags |= IGC_FLAG_HAS_MSIX;
   4102
   4103	/* Number of supported queues. */
   4104	adapter->num_rx_queues = adapter->rss_queues;
   4105
   4106	adapter->num_tx_queues = adapter->rss_queues;
   4107
   4108	/* start with one vector for every Rx queue */
   4109	numvecs = adapter->num_rx_queues;
   4110
   4111	/* if Tx handler is separate add 1 for every Tx queue */
   4112	if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
   4113		numvecs += adapter->num_tx_queues;
   4114
   4115	/* store the number of vectors reserved for queues */
   4116	adapter->num_q_vectors = numvecs;
   4117
   4118	/* add 1 vector for link status interrupts */
   4119	numvecs++;
   4120
   4121	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
   4122					GFP_KERNEL);
   4123
   4124	if (!adapter->msix_entries)
   4125		return;
   4126
   4127	/* populate entry values */
   4128	for (i = 0; i < numvecs; i++)
   4129		adapter->msix_entries[i].entry = i;
   4130
   4131	err = pci_enable_msix_range(adapter->pdev,
   4132				    adapter->msix_entries,
   4133				    numvecs,
   4134				    numvecs);
   4135	if (err > 0)
   4136		return;
   4137
   4138	kfree(adapter->msix_entries);
   4139	adapter->msix_entries = NULL;
   4140
   4141	igc_reset_interrupt_capability(adapter);
   4142
   4143msi_only:
   4144	adapter->flags &= ~IGC_FLAG_HAS_MSIX;
   4145
   4146	adapter->rss_queues = 1;
   4147	adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
   4148	adapter->num_rx_queues = 1;
   4149	adapter->num_tx_queues = 1;
   4150	adapter->num_q_vectors = 1;
   4151	if (!pci_enable_msi(adapter->pdev))
   4152		adapter->flags |= IGC_FLAG_HAS_MSI;
   4153}
   4154
   4155/**
   4156 * igc_update_ring_itr - update the dynamic ITR value based on packet size
   4157 * @q_vector: pointer to q_vector
   4158 *
   4159 * Stores a new ITR value based on strictly on packet size.  This
   4160 * algorithm is less sophisticated than that used in igc_update_itr,
   4161 * due to the difficulty of synchronizing statistics across multiple
   4162 * receive rings.  The divisors and thresholds used by this function
   4163 * were determined based on theoretical maximum wire speed and testing
   4164 * data, in order to minimize response time while increasing bulk
   4165 * throughput.
   4166 * NOTE: This function is called only when operating in a multiqueue
   4167 * receive environment.
   4168 */
   4169static void igc_update_ring_itr(struct igc_q_vector *q_vector)
   4170{
   4171	struct igc_adapter *adapter = q_vector->adapter;
   4172	int new_val = q_vector->itr_val;
   4173	int avg_wire_size = 0;
   4174	unsigned int packets;
   4175
   4176	/* For non-gigabit speeds, just fix the interrupt rate at 4000
   4177	 * ints/sec - ITR timer value of 120 ticks.
   4178	 */
   4179	switch (adapter->link_speed) {
   4180	case SPEED_10:
   4181	case SPEED_100:
   4182		new_val = IGC_4K_ITR;
   4183		goto set_itr_val;
   4184	default:
   4185		break;
   4186	}
   4187
   4188	packets = q_vector->rx.total_packets;
   4189	if (packets)
   4190		avg_wire_size = q_vector->rx.total_bytes / packets;
   4191
   4192	packets = q_vector->tx.total_packets;
   4193	if (packets)
   4194		avg_wire_size = max_t(u32, avg_wire_size,
   4195				      q_vector->tx.total_bytes / packets);
   4196
   4197	/* if avg_wire_size isn't set no work was done */
   4198	if (!avg_wire_size)
   4199		goto clear_counts;
   4200
   4201	/* Add 24 bytes to size to account for CRC, preamble, and gap */
   4202	avg_wire_size += 24;
   4203
   4204	/* Don't starve jumbo frames */
   4205	avg_wire_size = min(avg_wire_size, 3000);
   4206
   4207	/* Give a little boost to mid-size frames */
   4208	if (avg_wire_size > 300 && avg_wire_size < 1200)
   4209		new_val = avg_wire_size / 3;
   4210	else
   4211		new_val = avg_wire_size / 2;
   4212
   4213	/* conservative mode (itr 3) eliminates the lowest_latency setting */
   4214	if (new_val < IGC_20K_ITR &&
   4215	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
   4216	    (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
   4217		new_val = IGC_20K_ITR;
   4218
   4219set_itr_val:
   4220	if (new_val != q_vector->itr_val) {
   4221		q_vector->itr_val = new_val;
   4222		q_vector->set_itr = 1;
   4223	}
   4224clear_counts:
   4225	q_vector->rx.total_bytes = 0;
   4226	q_vector->rx.total_packets = 0;
   4227	q_vector->tx.total_bytes = 0;
   4228	q_vector->tx.total_packets = 0;
   4229}
   4230
   4231static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
   4232{
   4233	struct igc_adapter *adapter = q_vector->adapter;
   4234	struct igc_hw *hw = &adapter->hw;
   4235
   4236	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
   4237	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
   4238		if (adapter->num_q_vectors == 1)
   4239			igc_set_itr(q_vector);
   4240		else
   4241			igc_update_ring_itr(q_vector);
   4242	}
   4243
   4244	if (!test_bit(__IGC_DOWN, &adapter->state)) {
   4245		if (adapter->msix_entries)
   4246			wr32(IGC_EIMS, q_vector->eims_value);
   4247		else
   4248			igc_irq_enable(adapter);
   4249	}
   4250}
   4251
   4252static void igc_add_ring(struct igc_ring *ring,
   4253			 struct igc_ring_container *head)
   4254{
   4255	head->ring = ring;
   4256	head->count++;
   4257}
   4258
   4259/**
   4260 * igc_cache_ring_register - Descriptor ring to register mapping
   4261 * @adapter: board private structure to initialize
   4262 *
   4263 * Once we know the feature-set enabled for the device, we'll cache
   4264 * the register offset the descriptor ring is assigned to.
   4265 */
   4266static void igc_cache_ring_register(struct igc_adapter *adapter)
   4267{
   4268	int i = 0, j = 0;
   4269
   4270	switch (adapter->hw.mac.type) {
   4271	case igc_i225:
   4272	default:
   4273		for (; i < adapter->num_rx_queues; i++)
   4274			adapter->rx_ring[i]->reg_idx = i;
   4275		for (; j < adapter->num_tx_queues; j++)
   4276			adapter->tx_ring[j]->reg_idx = j;
   4277		break;
   4278	}
   4279}
   4280
   4281/**
   4282 * igc_poll - NAPI Rx polling callback
   4283 * @napi: napi polling structure
   4284 * @budget: count of how many packets we should handle
   4285 */
   4286static int igc_poll(struct napi_struct *napi, int budget)
   4287{
   4288	struct igc_q_vector *q_vector = container_of(napi,
   4289						     struct igc_q_vector,
   4290						     napi);
   4291	struct igc_ring *rx_ring = q_vector->rx.ring;
   4292	bool clean_complete = true;
   4293	int work_done = 0;
   4294
   4295	if (q_vector->tx.ring)
   4296		clean_complete = igc_clean_tx_irq(q_vector, budget);
   4297
   4298	if (rx_ring) {
   4299		int cleaned = rx_ring->xsk_pool ?
   4300			      igc_clean_rx_irq_zc(q_vector, budget) :
   4301			      igc_clean_rx_irq(q_vector, budget);
   4302
   4303		work_done += cleaned;
   4304		if (cleaned >= budget)
   4305			clean_complete = false;
   4306	}
   4307
   4308	/* If all work not completed, return budget and keep polling */
   4309	if (!clean_complete)
   4310		return budget;
   4311
   4312	/* Exit the polling mode, but don't re-enable interrupts if stack might
   4313	 * poll us due to busy-polling
   4314	 */
   4315	if (likely(napi_complete_done(napi, work_done)))
   4316		igc_ring_irq_enable(q_vector);
   4317
   4318	return min(work_done, budget - 1);
   4319}
   4320
   4321/**
   4322 * igc_alloc_q_vector - Allocate memory for a single interrupt vector
   4323 * @adapter: board private structure to initialize
   4324 * @v_count: q_vectors allocated on adapter, used for ring interleaving
   4325 * @v_idx: index of vector in adapter struct
   4326 * @txr_count: total number of Tx rings to allocate
   4327 * @txr_idx: index of first Tx ring to allocate
   4328 * @rxr_count: total number of Rx rings to allocate
   4329 * @rxr_idx: index of first Rx ring to allocate
   4330 *
   4331 * We allocate one q_vector.  If allocation fails we return -ENOMEM.
   4332 */
   4333static int igc_alloc_q_vector(struct igc_adapter *adapter,
   4334			      unsigned int v_count, unsigned int v_idx,
   4335			      unsigned int txr_count, unsigned int txr_idx,
   4336			      unsigned int rxr_count, unsigned int rxr_idx)
   4337{
   4338	struct igc_q_vector *q_vector;
   4339	struct igc_ring *ring;
   4340	int ring_count;
   4341
   4342	/* igc only supports 1 Tx and/or 1 Rx queue per vector */
   4343	if (txr_count > 1 || rxr_count > 1)
   4344		return -ENOMEM;
   4345
   4346	ring_count = txr_count + rxr_count;
   4347
   4348	/* allocate q_vector and rings */
   4349	q_vector = adapter->q_vector[v_idx];
   4350	if (!q_vector)
   4351		q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
   4352				   GFP_KERNEL);
   4353	else
   4354		memset(q_vector, 0, struct_size(q_vector, ring, ring_count));
   4355	if (!q_vector)
   4356		return -ENOMEM;
   4357
   4358	/* initialize NAPI */
   4359	netif_napi_add(adapter->netdev, &q_vector->napi,
   4360		       igc_poll, 64);
   4361
   4362	/* tie q_vector and adapter together */
   4363	adapter->q_vector[v_idx] = q_vector;
   4364	q_vector->adapter = adapter;
   4365
   4366	/* initialize work limits */
   4367	q_vector->tx.work_limit = adapter->tx_work_limit;
   4368
   4369	/* initialize ITR configuration */
   4370	q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
   4371	q_vector->itr_val = IGC_START_ITR;
   4372
   4373	/* initialize pointer to rings */
   4374	ring = q_vector->ring;
   4375
   4376	/* initialize ITR */
   4377	if (rxr_count) {
   4378		/* rx or rx/tx vector */
   4379		if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
   4380			q_vector->itr_val = adapter->rx_itr_setting;
   4381	} else {
   4382		/* tx only vector */
   4383		if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
   4384			q_vector->itr_val = adapter->tx_itr_setting;
   4385	}
   4386
   4387	if (txr_count) {
   4388		/* assign generic ring traits */
   4389		ring->dev = &adapter->pdev->dev;
   4390		ring->netdev = adapter->netdev;
   4391
   4392		/* configure backlink on ring */
   4393		ring->q_vector = q_vector;
   4394
   4395		/* update q_vector Tx values */
   4396		igc_add_ring(ring, &q_vector->tx);
   4397
   4398		/* apply Tx specific ring traits */
   4399		ring->count = adapter->tx_ring_count;
   4400		ring->queue_index = txr_idx;
   4401
   4402		/* assign ring to adapter */
   4403		adapter->tx_ring[txr_idx] = ring;
   4404
   4405		/* push pointer to next ring */
   4406		ring++;
   4407	}
   4408
   4409	if (rxr_count) {
   4410		/* assign generic ring traits */
   4411		ring->dev = &adapter->pdev->dev;
   4412		ring->netdev = adapter->netdev;
   4413
   4414		/* configure backlink on ring */
   4415		ring->q_vector = q_vector;
   4416
   4417		/* update q_vector Rx values */
   4418		igc_add_ring(ring, &q_vector->rx);
   4419
   4420		/* apply Rx specific ring traits */
   4421		ring->count = adapter->rx_ring_count;
   4422		ring->queue_index = rxr_idx;
   4423
   4424		/* assign ring to adapter */
   4425		adapter->rx_ring[rxr_idx] = ring;
   4426	}
   4427
   4428	return 0;
   4429}
   4430
   4431/**
   4432 * igc_alloc_q_vectors - Allocate memory for interrupt vectors
   4433 * @adapter: board private structure to initialize
   4434 *
   4435 * We allocate one q_vector per queue interrupt.  If allocation fails we
   4436 * return -ENOMEM.
   4437 */
   4438static int igc_alloc_q_vectors(struct igc_adapter *adapter)
   4439{
   4440	int rxr_remaining = adapter->num_rx_queues;
   4441	int txr_remaining = adapter->num_tx_queues;
   4442	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
   4443	int q_vectors = adapter->num_q_vectors;
   4444	int err;
   4445
   4446	if (q_vectors >= (rxr_remaining + txr_remaining)) {
   4447		for (; rxr_remaining; v_idx++) {
   4448			err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
   4449						 0, 0, 1, rxr_idx);
   4450
   4451			if (err)
   4452				goto err_out;
   4453
   4454			/* update counts and index */
   4455			rxr_remaining--;
   4456			rxr_idx++;
   4457		}
   4458	}
   4459
   4460	for (; v_idx < q_vectors; v_idx++) {
   4461		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
   4462		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
   4463
   4464		err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
   4465					 tqpv, txr_idx, rqpv, rxr_idx);
   4466
   4467		if (err)
   4468			goto err_out;
   4469
   4470		/* update counts and index */
   4471		rxr_remaining -= rqpv;
   4472		txr_remaining -= tqpv;
   4473		rxr_idx++;
   4474		txr_idx++;
   4475	}
   4476
   4477	return 0;
   4478
   4479err_out:
   4480	adapter->num_tx_queues = 0;
   4481	adapter->num_rx_queues = 0;
   4482	adapter->num_q_vectors = 0;
   4483
   4484	while (v_idx--)
   4485		igc_free_q_vector(adapter, v_idx);
   4486
   4487	return -ENOMEM;
   4488}
   4489
   4490/**
   4491 * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
   4492 * @adapter: Pointer to adapter structure
   4493 * @msix: boolean for MSI-X capability
   4494 *
   4495 * This function initializes the interrupts and allocates all of the queues.
   4496 */
   4497static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
   4498{
   4499	struct net_device *dev = adapter->netdev;
   4500	int err = 0;
   4501
   4502	igc_set_interrupt_capability(adapter, msix);
   4503
   4504	err = igc_alloc_q_vectors(adapter);
   4505	if (err) {
   4506		netdev_err(dev, "Unable to allocate memory for vectors\n");
   4507		goto err_alloc_q_vectors;
   4508	}
   4509
   4510	igc_cache_ring_register(adapter);
   4511
   4512	return 0;
   4513
   4514err_alloc_q_vectors:
   4515	igc_reset_interrupt_capability(adapter);
   4516	return err;
   4517}
   4518
   4519/**
   4520 * igc_sw_init - Initialize general software structures (struct igc_adapter)
   4521 * @adapter: board private structure to initialize
   4522 *
   4523 * igc_sw_init initializes the Adapter private data structure.
   4524 * Fields are initialized based on PCI device information and
   4525 * OS network device settings (MTU size).
   4526 */
   4527static int igc_sw_init(struct igc_adapter *adapter)
   4528{
   4529	struct net_device *netdev = adapter->netdev;
   4530	struct pci_dev *pdev = adapter->pdev;
   4531	struct igc_hw *hw = &adapter->hw;
   4532
   4533	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
   4534
   4535	/* set default ring sizes */
   4536	adapter->tx_ring_count = IGC_DEFAULT_TXD;
   4537	adapter->rx_ring_count = IGC_DEFAULT_RXD;
   4538
   4539	/* set default ITR values */
   4540	adapter->rx_itr_setting = IGC_DEFAULT_ITR;
   4541	adapter->tx_itr_setting = IGC_DEFAULT_ITR;
   4542
   4543	/* set default work limits */
   4544	adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
   4545
   4546	/* adjust max frame to be at least the size of a standard frame */
   4547	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
   4548				VLAN_HLEN;
   4549	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
   4550
   4551	mutex_init(&adapter->nfc_rule_lock);
   4552	INIT_LIST_HEAD(&adapter->nfc_rule_list);
   4553	adapter->nfc_rule_count = 0;
   4554
   4555	spin_lock_init(&adapter->stats64_lock);
   4556	/* Assume MSI-X interrupts, will be checked during IRQ allocation */
   4557	adapter->flags |= IGC_FLAG_HAS_MSIX;
   4558
   4559	igc_init_queue_configuration(adapter);
   4560
   4561	/* This call may decrease the number of queues */
   4562	if (igc_init_interrupt_scheme(adapter, true)) {
   4563		netdev_err(netdev, "Unable to allocate memory for queues\n");
   4564		return -ENOMEM;
   4565	}
   4566
   4567	/* Explicitly disable IRQ since the NIC can be in any state. */
   4568	igc_irq_disable(adapter);
   4569
   4570	set_bit(__IGC_DOWN, &adapter->state);
   4571
   4572	return 0;
   4573}
   4574
   4575/**
   4576 * igc_up - Open the interface and prepare it to handle traffic
   4577 * @adapter: board private structure
   4578 */
   4579void igc_up(struct igc_adapter *adapter)
   4580{
   4581	struct igc_hw *hw = &adapter->hw;
   4582	int i = 0;
   4583
   4584	/* hardware has been reset, we need to reload some things */
   4585	igc_configure(adapter);
   4586
   4587	clear_bit(__IGC_DOWN, &adapter->state);
   4588
   4589	for (i = 0; i < adapter->num_q_vectors; i++)
   4590		napi_enable(&adapter->q_vector[i]->napi);
   4591
   4592	if (adapter->msix_entries)
   4593		igc_configure_msix(adapter);
   4594	else
   4595		igc_assign_vector(adapter->q_vector[0], 0);
   4596
   4597	/* Clear any pending interrupts. */
   4598	rd32(IGC_ICR);
   4599	igc_irq_enable(adapter);
   4600
   4601	netif_tx_start_all_queues(adapter->netdev);
   4602
   4603	/* start the watchdog. */
   4604	hw->mac.get_link_status = true;
   4605	schedule_work(&adapter->watchdog_task);
   4606}
   4607
   4608/**
   4609 * igc_update_stats - Update the board statistics counters
   4610 * @adapter: board private structure
   4611 */
   4612void igc_update_stats(struct igc_adapter *adapter)
   4613{
   4614	struct rtnl_link_stats64 *net_stats = &adapter->stats64;
   4615	struct pci_dev *pdev = adapter->pdev;
   4616	struct igc_hw *hw = &adapter->hw;
   4617	u64 _bytes, _packets;
   4618	u64 bytes, packets;
   4619	unsigned int start;
   4620	u32 mpc;
   4621	int i;
   4622
   4623	/* Prevent stats update while adapter is being reset, or if the pci
   4624	 * connection is down.
   4625	 */
   4626	if (adapter->link_speed == 0)
   4627		return;
   4628	if (pci_channel_offline(pdev))
   4629		return;
   4630
   4631	packets = 0;
   4632	bytes = 0;
   4633
   4634	rcu_read_lock();
   4635	for (i = 0; i < adapter->num_rx_queues; i++) {
   4636		struct igc_ring *ring = adapter->rx_ring[i];
   4637		u32 rqdpc = rd32(IGC_RQDPC(i));
   4638
   4639		if (hw->mac.type >= igc_i225)
   4640			wr32(IGC_RQDPC(i), 0);
   4641
   4642		if (rqdpc) {
   4643			ring->rx_stats.drops += rqdpc;
   4644			net_stats->rx_fifo_errors += rqdpc;
   4645		}
   4646
   4647		do {
   4648			start = u64_stats_fetch_begin_irq(&ring->rx_syncp);
   4649			_bytes = ring->rx_stats.bytes;
   4650			_packets = ring->rx_stats.packets;
   4651		} while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start));
   4652		bytes += _bytes;
   4653		packets += _packets;
   4654	}
   4655
   4656	net_stats->rx_bytes = bytes;
   4657	net_stats->rx_packets = packets;
   4658
   4659	packets = 0;
   4660	bytes = 0;
   4661	for (i = 0; i < adapter->num_tx_queues; i++) {
   4662		struct igc_ring *ring = adapter->tx_ring[i];
   4663
   4664		do {
   4665			start = u64_stats_fetch_begin_irq(&ring->tx_syncp);
   4666			_bytes = ring->tx_stats.bytes;
   4667			_packets = ring->tx_stats.packets;
   4668		} while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start));
   4669		bytes += _bytes;
   4670		packets += _packets;
   4671	}
   4672	net_stats->tx_bytes = bytes;
   4673	net_stats->tx_packets = packets;
   4674	rcu_read_unlock();
   4675
   4676	/* read stats registers */
   4677	adapter->stats.crcerrs += rd32(IGC_CRCERRS);
   4678	adapter->stats.gprc += rd32(IGC_GPRC);
   4679	adapter->stats.gorc += rd32(IGC_GORCL);
   4680	rd32(IGC_GORCH); /* clear GORCL */
   4681	adapter->stats.bprc += rd32(IGC_BPRC);
   4682	adapter->stats.mprc += rd32(IGC_MPRC);
   4683	adapter->stats.roc += rd32(IGC_ROC);
   4684
   4685	adapter->stats.prc64 += rd32(IGC_PRC64);
   4686	adapter->stats.prc127 += rd32(IGC_PRC127);
   4687	adapter->stats.prc255 += rd32(IGC_PRC255);
   4688	adapter->stats.prc511 += rd32(IGC_PRC511);
   4689	adapter->stats.prc1023 += rd32(IGC_PRC1023);
   4690	adapter->stats.prc1522 += rd32(IGC_PRC1522);
   4691	adapter->stats.tlpic += rd32(IGC_TLPIC);
   4692	adapter->stats.rlpic += rd32(IGC_RLPIC);
   4693	adapter->stats.hgptc += rd32(IGC_HGPTC);
   4694
   4695	mpc = rd32(IGC_MPC);
   4696	adapter->stats.mpc += mpc;
   4697	net_stats->rx_fifo_errors += mpc;
   4698	adapter->stats.scc += rd32(IGC_SCC);
   4699	adapter->stats.ecol += rd32(IGC_ECOL);
   4700	adapter->stats.mcc += rd32(IGC_MCC);
   4701	adapter->stats.latecol += rd32(IGC_LATECOL);
   4702	adapter->stats.dc += rd32(IGC_DC);
   4703	adapter->stats.rlec += rd32(IGC_RLEC);
   4704	adapter->stats.xonrxc += rd32(IGC_XONRXC);
   4705	adapter->stats.xontxc += rd32(IGC_XONTXC);
   4706	adapter->stats.xoffrxc += rd32(IGC_XOFFRXC);
   4707	adapter->stats.xofftxc += rd32(IGC_XOFFTXC);
   4708	adapter->stats.fcruc += rd32(IGC_FCRUC);
   4709	adapter->stats.gptc += rd32(IGC_GPTC);
   4710	adapter->stats.gotc += rd32(IGC_GOTCL);
   4711	rd32(IGC_GOTCH); /* clear GOTCL */
   4712	adapter->stats.rnbc += rd32(IGC_RNBC);
   4713	adapter->stats.ruc += rd32(IGC_RUC);
   4714	adapter->stats.rfc += rd32(IGC_RFC);
   4715	adapter->stats.rjc += rd32(IGC_RJC);
   4716	adapter->stats.tor += rd32(IGC_TORH);
   4717	adapter->stats.tot += rd32(IGC_TOTH);
   4718	adapter->stats.tpr += rd32(IGC_TPR);
   4719
   4720	adapter->stats.ptc64 += rd32(IGC_PTC64);
   4721	adapter->stats.ptc127 += rd32(IGC_PTC127);
   4722	adapter->stats.ptc255 += rd32(IGC_PTC255);
   4723	adapter->stats.ptc511 += rd32(IGC_PTC511);
   4724	adapter->stats.ptc1023 += rd32(IGC_PTC1023);
   4725	adapter->stats.ptc1522 += rd32(IGC_PTC1522);
   4726
   4727	adapter->stats.mptc += rd32(IGC_MPTC);
   4728	adapter->stats.bptc += rd32(IGC_BPTC);
   4729
   4730	adapter->stats.tpt += rd32(IGC_TPT);
   4731	adapter->stats.colc += rd32(IGC_COLC);
   4732	adapter->stats.colc += rd32(IGC_RERC);
   4733
   4734	adapter->stats.algnerrc += rd32(IGC_ALGNERRC);
   4735
   4736	adapter->stats.tsctc += rd32(IGC_TSCTC);
   4737
   4738	adapter->stats.iac += rd32(IGC_IAC);
   4739
   4740	/* Fill out the OS statistics structure */
   4741	net_stats->multicast = adapter->stats.mprc;
   4742	net_stats->collisions = adapter->stats.colc;
   4743
   4744	/* Rx Errors */
   4745
   4746	/* RLEC on some newer hardware can be incorrect so build
   4747	 * our own version based on RUC and ROC
   4748	 */
   4749	net_stats->rx_errors = adapter->stats.rxerrc +
   4750		adapter->stats.crcerrs + adapter->stats.algnerrc +
   4751		adapter->stats.ruc + adapter->stats.roc +
   4752		adapter->stats.cexterr;
   4753	net_stats->rx_length_errors = adapter->stats.ruc +
   4754				      adapter->stats.roc;
   4755	net_stats->rx_crc_errors = adapter->stats.crcerrs;
   4756	net_stats->rx_frame_errors = adapter->stats.algnerrc;
   4757	net_stats->rx_missed_errors = adapter->stats.mpc;
   4758
   4759	/* Tx Errors */
   4760	net_stats->tx_errors = adapter->stats.ecol +
   4761			       adapter->stats.latecol;
   4762	net_stats->tx_aborted_errors = adapter->stats.ecol;
   4763	net_stats->tx_window_errors = adapter->stats.latecol;
   4764	net_stats->tx_carrier_errors = adapter->stats.tncrs;
   4765
   4766	/* Tx Dropped needs to be maintained elsewhere */
   4767
   4768	/* Management Stats */
   4769	adapter->stats.mgptc += rd32(IGC_MGTPTC);
   4770	adapter->stats.mgprc += rd32(IGC_MGTPRC);
   4771	adapter->stats.mgpdc += rd32(IGC_MGTPDC);
   4772}
   4773
   4774/**
   4775 * igc_down - Close the interface
   4776 * @adapter: board private structure
   4777 */
   4778void igc_down(struct igc_adapter *adapter)
   4779{
   4780	struct net_device *netdev = adapter->netdev;
   4781	struct igc_hw *hw = &adapter->hw;
   4782	u32 tctl, rctl;
   4783	int i = 0;
   4784
   4785	set_bit(__IGC_DOWN, &adapter->state);
   4786
   4787	igc_ptp_suspend(adapter);
   4788
   4789	if (pci_device_is_present(adapter->pdev)) {
   4790		/* disable receives in the hardware */
   4791		rctl = rd32(IGC_RCTL);
   4792		wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
   4793		/* flush and sleep below */
   4794	}
   4795	/* set trans_start so we don't get spurious watchdogs during reset */
   4796	netif_trans_update(netdev);
   4797
   4798	netif_carrier_off(netdev);
   4799	netif_tx_stop_all_queues(netdev);
   4800
   4801	if (pci_device_is_present(adapter->pdev)) {
   4802		/* disable transmits in the hardware */
   4803		tctl = rd32(IGC_TCTL);
   4804		tctl &= ~IGC_TCTL_EN;
   4805		wr32(IGC_TCTL, tctl);
   4806		/* flush both disables and wait for them to finish */
   4807		wrfl();
   4808		usleep_range(10000, 20000);
   4809
   4810		igc_irq_disable(adapter);
   4811	}
   4812
   4813	adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
   4814
   4815	for (i = 0; i < adapter->num_q_vectors; i++) {
   4816		if (adapter->q_vector[i]) {
   4817			napi_synchronize(&adapter->q_vector[i]->napi);
   4818			napi_disable(&adapter->q_vector[i]->napi);
   4819		}
   4820	}
   4821
   4822	del_timer_sync(&adapter->watchdog_timer);
   4823	del_timer_sync(&adapter->phy_info_timer);
   4824
   4825	/* record the stats before reset*/
   4826	spin_lock(&adapter->stats64_lock);
   4827	igc_update_stats(adapter);
   4828	spin_unlock(&adapter->stats64_lock);
   4829
   4830	adapter->link_speed = 0;
   4831	adapter->link_duplex = 0;
   4832
   4833	if (!pci_channel_offline(adapter->pdev))
   4834		igc_reset(adapter);
   4835
   4836	/* clear VLAN promisc flag so VFTA will be updated if necessary */
   4837	adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
   4838
   4839	igc_clean_all_tx_rings(adapter);
   4840	igc_clean_all_rx_rings(adapter);
   4841}
   4842
   4843void igc_reinit_locked(struct igc_adapter *adapter)
   4844{
   4845	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
   4846		usleep_range(1000, 2000);
   4847	igc_down(adapter);
   4848	igc_up(adapter);
   4849	clear_bit(__IGC_RESETTING, &adapter->state);
   4850}
   4851
   4852static void igc_reset_task(struct work_struct *work)
   4853{
   4854	struct igc_adapter *adapter;
   4855
   4856	adapter = container_of(work, struct igc_adapter, reset_task);
   4857
   4858	rtnl_lock();
   4859	/* If we're already down or resetting, just bail */
   4860	if (test_bit(__IGC_DOWN, &adapter->state) ||
   4861	    test_bit(__IGC_RESETTING, &adapter->state)) {
   4862		rtnl_unlock();
   4863		return;
   4864	}
   4865
   4866	igc_rings_dump(adapter);
   4867	igc_regs_dump(adapter);
   4868	netdev_err(adapter->netdev, "Reset adapter\n");
   4869	igc_reinit_locked(adapter);
   4870	rtnl_unlock();
   4871}
   4872
   4873/**
   4874 * igc_change_mtu - Change the Maximum Transfer Unit
   4875 * @netdev: network interface device structure
   4876 * @new_mtu: new value for maximum frame size
   4877 *
   4878 * Returns 0 on success, negative on failure
   4879 */
   4880static int igc_change_mtu(struct net_device *netdev, int new_mtu)
   4881{
   4882	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
   4883	struct igc_adapter *adapter = netdev_priv(netdev);
   4884
   4885	if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) {
   4886		netdev_dbg(netdev, "Jumbo frames not supported with XDP");
   4887		return -EINVAL;
   4888	}
   4889
   4890	/* adjust max frame to be at least the size of a standard frame */
   4891	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
   4892		max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
   4893
   4894	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
   4895		usleep_range(1000, 2000);
   4896
   4897	/* igc_down has a dependency on max_frame_size */
   4898	adapter->max_frame_size = max_frame;
   4899
   4900	if (netif_running(netdev))
   4901		igc_down(adapter);
   4902
   4903	netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu);
   4904	netdev->mtu = new_mtu;
   4905
   4906	if (netif_running(netdev))
   4907		igc_up(adapter);
   4908	else
   4909		igc_reset(adapter);
   4910
   4911	clear_bit(__IGC_RESETTING, &adapter->state);
   4912
   4913	return 0;
   4914}
   4915
   4916/**
   4917 * igc_get_stats64 - Get System Network Statistics
   4918 * @netdev: network interface device structure
   4919 * @stats: rtnl_link_stats64 pointer
   4920 *
   4921 * Returns the address of the device statistics structure.
   4922 * The statistics are updated here and also from the timer callback.
   4923 */
   4924static void igc_get_stats64(struct net_device *netdev,
   4925			    struct rtnl_link_stats64 *stats)
   4926{
   4927	struct igc_adapter *adapter = netdev_priv(netdev);
   4928
   4929	spin_lock(&adapter->stats64_lock);
   4930	if (!test_bit(__IGC_RESETTING, &adapter->state))
   4931		igc_update_stats(adapter);
   4932	memcpy(stats, &adapter->stats64, sizeof(*stats));
   4933	spin_unlock(&adapter->stats64_lock);
   4934}
   4935
   4936static netdev_features_t igc_fix_features(struct net_device *netdev,
   4937					  netdev_features_t features)
   4938{
   4939	/* Since there is no support for separate Rx/Tx vlan accel
   4940	 * enable/disable make sure Tx flag is always in same state as Rx.
   4941	 */
   4942	if (features & NETIF_F_HW_VLAN_CTAG_RX)
   4943		features |= NETIF_F_HW_VLAN_CTAG_TX;
   4944	else
   4945		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
   4946
   4947	return features;
   4948}
   4949
   4950static int igc_set_features(struct net_device *netdev,
   4951			    netdev_features_t features)
   4952{
   4953	netdev_features_t changed = netdev->features ^ features;
   4954	struct igc_adapter *adapter = netdev_priv(netdev);
   4955
   4956	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
   4957		igc_vlan_mode(netdev, features);
   4958
   4959	/* Add VLAN support */
   4960	if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
   4961		return 0;
   4962
   4963	if (!(features & NETIF_F_NTUPLE))
   4964		igc_flush_nfc_rules(adapter);
   4965
   4966	netdev->features = features;
   4967
   4968	if (netif_running(netdev))
   4969		igc_reinit_locked(adapter);
   4970	else
   4971		igc_reset(adapter);
   4972
   4973	return 1;
   4974}
   4975
   4976static netdev_features_t
   4977igc_features_check(struct sk_buff *skb, struct net_device *dev,
   4978		   netdev_features_t features)
   4979{
   4980	unsigned int network_hdr_len, mac_hdr_len;
   4981
   4982	/* Make certain the headers can be described by a context descriptor */
   4983	mac_hdr_len = skb_network_header(skb) - skb->data;
   4984	if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN))
   4985		return features & ~(NETIF_F_HW_CSUM |
   4986				    NETIF_F_SCTP_CRC |
   4987				    NETIF_F_HW_VLAN_CTAG_TX |
   4988				    NETIF_F_TSO |
   4989				    NETIF_F_TSO6);
   4990
   4991	network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
   4992	if (unlikely(network_hdr_len >  IGC_MAX_NETWORK_HDR_LEN))
   4993		return features & ~(NETIF_F_HW_CSUM |
   4994				    NETIF_F_SCTP_CRC |
   4995				    NETIF_F_TSO |
   4996				    NETIF_F_TSO6);
   4997
   4998	/* We can only support IPv4 TSO in tunnels if we can mangle the
   4999	 * inner IP ID field, so strip TSO if MANGLEID is not supported.
   5000	 */
   5001	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
   5002		features &= ~NETIF_F_TSO;
   5003
   5004	return features;
   5005}
   5006
   5007static void igc_tsync_interrupt(struct igc_adapter *adapter)
   5008{
   5009	u32 ack, tsauxc, sec, nsec, tsicr;
   5010	struct igc_hw *hw = &adapter->hw;
   5011	struct ptp_clock_event event;
   5012	struct timespec64 ts;
   5013
   5014	tsicr = rd32(IGC_TSICR);
   5015	ack = 0;
   5016
   5017	if (tsicr & IGC_TSICR_SYS_WRAP) {
   5018		event.type = PTP_CLOCK_PPS;
   5019		if (adapter->ptp_caps.pps)
   5020			ptp_clock_event(adapter->ptp_clock, &event);
   5021		ack |= IGC_TSICR_SYS_WRAP;
   5022	}
   5023
   5024	if (tsicr & IGC_TSICR_TXTS) {
   5025		/* retrieve hardware timestamp */
   5026		schedule_work(&adapter->ptp_tx_work);
   5027		ack |= IGC_TSICR_TXTS;
   5028	}
   5029
   5030	if (tsicr & IGC_TSICR_TT0) {
   5031		spin_lock(&adapter->tmreg_lock);
   5032		ts = timespec64_add(adapter->perout[0].start,
   5033				    adapter->perout[0].period);
   5034		wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
   5035		wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec);
   5036		tsauxc = rd32(IGC_TSAUXC);
   5037		tsauxc |= IGC_TSAUXC_EN_TT0;
   5038		wr32(IGC_TSAUXC, tsauxc);
   5039		adapter->perout[0].start = ts;
   5040		spin_unlock(&adapter->tmreg_lock);
   5041		ack |= IGC_TSICR_TT0;
   5042	}
   5043
   5044	if (tsicr & IGC_TSICR_TT1) {
   5045		spin_lock(&adapter->tmreg_lock);
   5046		ts = timespec64_add(adapter->perout[1].start,
   5047				    adapter->perout[1].period);
   5048		wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
   5049		wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec);
   5050		tsauxc = rd32(IGC_TSAUXC);
   5051		tsauxc |= IGC_TSAUXC_EN_TT1;
   5052		wr32(IGC_TSAUXC, tsauxc);
   5053		adapter->perout[1].start = ts;
   5054		spin_unlock(&adapter->tmreg_lock);
   5055		ack |= IGC_TSICR_TT1;
   5056	}
   5057
   5058	if (tsicr & IGC_TSICR_AUTT0) {
   5059		nsec = rd32(IGC_AUXSTMPL0);
   5060		sec  = rd32(IGC_AUXSTMPH0);
   5061		event.type = PTP_CLOCK_EXTTS;
   5062		event.index = 0;
   5063		event.timestamp = sec * NSEC_PER_SEC + nsec;
   5064		ptp_clock_event(adapter->ptp_clock, &event);
   5065		ack |= IGC_TSICR_AUTT0;
   5066	}
   5067
   5068	if (tsicr & IGC_TSICR_AUTT1) {
   5069		nsec = rd32(IGC_AUXSTMPL1);
   5070		sec  = rd32(IGC_AUXSTMPH1);
   5071		event.type = PTP_CLOCK_EXTTS;
   5072		event.index = 1;
   5073		event.timestamp = sec * NSEC_PER_SEC + nsec;
   5074		ptp_clock_event(adapter->ptp_clock, &event);
   5075		ack |= IGC_TSICR_AUTT1;
   5076	}
   5077
   5078	/* acknowledge the interrupts */
   5079	wr32(IGC_TSICR, ack);
   5080}
   5081
   5082/**
   5083 * igc_msix_other - msix other interrupt handler
   5084 * @irq: interrupt number
   5085 * @data: pointer to a q_vector
   5086 */
   5087static irqreturn_t igc_msix_other(int irq, void *data)
   5088{
   5089	struct igc_adapter *adapter = data;
   5090	struct igc_hw *hw = &adapter->hw;
   5091	u32 icr = rd32(IGC_ICR);
   5092
   5093	/* reading ICR causes bit 31 of EICR to be cleared */
   5094	if (icr & IGC_ICR_DRSTA)
   5095		schedule_work(&adapter->reset_task);
   5096
   5097	if (icr & IGC_ICR_DOUTSYNC) {
   5098		/* HW is reporting DMA is out of sync */
   5099		adapter->stats.doosync++;
   5100	}
   5101
   5102	if (icr & IGC_ICR_LSC) {
   5103		hw->mac.get_link_status = true;
   5104		/* guard against interrupt when we're going down */
   5105		if (!test_bit(__IGC_DOWN, &adapter->state))
   5106			mod_timer(&adapter->watchdog_timer, jiffies + 1);
   5107	}
   5108
   5109	if (icr & IGC_ICR_TS)
   5110		igc_tsync_interrupt(adapter);
   5111
   5112	wr32(IGC_EIMS, adapter->eims_other);
   5113
   5114	return IRQ_HANDLED;
   5115}
   5116
   5117static void igc_write_itr(struct igc_q_vector *q_vector)
   5118{
   5119	u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
   5120
   5121	if (!q_vector->set_itr)
   5122		return;
   5123
   5124	if (!itr_val)
   5125		itr_val = IGC_ITR_VAL_MASK;
   5126
   5127	itr_val |= IGC_EITR_CNT_IGNR;
   5128
   5129	writel(itr_val, q_vector->itr_register);
   5130	q_vector->set_itr = 0;
   5131}
   5132
   5133static irqreturn_t igc_msix_ring(int irq, void *data)
   5134{
   5135	struct igc_q_vector *q_vector = data;
   5136
   5137	/* Write the ITR value calculated from the previous interrupt. */
   5138	igc_write_itr(q_vector);
   5139
   5140	napi_schedule(&q_vector->napi);
   5141
   5142	return IRQ_HANDLED;
   5143}
   5144
   5145/**
   5146 * igc_request_msix - Initialize MSI-X interrupts
   5147 * @adapter: Pointer to adapter structure
   5148 *
   5149 * igc_request_msix allocates MSI-X vectors and requests interrupts from the
   5150 * kernel.
   5151 */
   5152static int igc_request_msix(struct igc_adapter *adapter)
   5153{
   5154	unsigned int num_q_vectors = adapter->num_q_vectors;
   5155	int i = 0, err = 0, vector = 0, free_vector = 0;
   5156	struct net_device *netdev = adapter->netdev;
   5157
   5158	err = request_irq(adapter->msix_entries[vector].vector,
   5159			  &igc_msix_other, 0, netdev->name, adapter);
   5160	if (err)
   5161		goto err_out;
   5162
   5163	if (num_q_vectors > MAX_Q_VECTORS) {
   5164		num_q_vectors = MAX_Q_VECTORS;
   5165		dev_warn(&adapter->pdev->dev,
   5166			 "The number of queue vectors (%d) is higher than max allowed (%d)\n",
   5167			 adapter->num_q_vectors, MAX_Q_VECTORS);
   5168	}
   5169	for (i = 0; i < num_q_vectors; i++) {
   5170		struct igc_q_vector *q_vector = adapter->q_vector[i];
   5171
   5172		vector++;
   5173
   5174		q_vector->itr_register = adapter->io_addr + IGC_EITR(vector);
   5175
   5176		if (q_vector->rx.ring && q_vector->tx.ring)
   5177			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
   5178				q_vector->rx.ring->queue_index);
   5179		else if (q_vector->tx.ring)
   5180			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
   5181				q_vector->tx.ring->queue_index);
   5182		else if (q_vector->rx.ring)
   5183			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
   5184				q_vector->rx.ring->queue_index);
   5185		else
   5186			sprintf(q_vector->name, "%s-unused", netdev->name);
   5187
   5188		err = request_irq(adapter->msix_entries[vector].vector,
   5189				  igc_msix_ring, 0, q_vector->name,
   5190				  q_vector);
   5191		if (err)
   5192			goto err_free;
   5193	}
   5194
   5195	igc_configure_msix(adapter);
   5196	return 0;
   5197
   5198err_free:
   5199	/* free already assigned IRQs */
   5200	free_irq(adapter->msix_entries[free_vector++].vector, adapter);
   5201
   5202	vector--;
   5203	for (i = 0; i < vector; i++) {
   5204		free_irq(adapter->msix_entries[free_vector++].vector,
   5205			 adapter->q_vector[i]);
   5206	}
   5207err_out:
   5208	return err;
   5209}
   5210
   5211/**
   5212 * igc_clear_interrupt_scheme - reset the device to a state of no interrupts
   5213 * @adapter: Pointer to adapter structure
   5214 *
   5215 * This function resets the device so that it has 0 rx queues, tx queues, and
   5216 * MSI-X interrupts allocated.
   5217 */
   5218static void igc_clear_interrupt_scheme(struct igc_adapter *adapter)
   5219{
   5220	igc_free_q_vectors(adapter);
   5221	igc_reset_interrupt_capability(adapter);
   5222}
   5223
   5224/* Need to wait a few seconds after link up to get diagnostic information from
   5225 * the phy
   5226 */
   5227static void igc_update_phy_info(struct timer_list *t)
   5228{
   5229	struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer);
   5230
   5231	igc_get_phy_info(&adapter->hw);
   5232}
   5233
   5234/**
   5235 * igc_has_link - check shared code for link and determine up/down
   5236 * @adapter: pointer to driver private info
   5237 */
   5238bool igc_has_link(struct igc_adapter *adapter)
   5239{
   5240	struct igc_hw *hw = &adapter->hw;
   5241	bool link_active = false;
   5242
   5243	/* get_link_status is set on LSC (link status) interrupt or
   5244	 * rx sequence error interrupt.  get_link_status will stay
   5245	 * false until the igc_check_for_link establishes link
   5246	 * for copper adapters ONLY
   5247	 */
   5248	if (!hw->mac.get_link_status)
   5249		return true;
   5250	hw->mac.ops.check_for_link(hw);
   5251	link_active = !hw->mac.get_link_status;
   5252
   5253	if (hw->mac.type == igc_i225) {
   5254		if (!netif_carrier_ok(adapter->netdev)) {
   5255			adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
   5256		} else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) {
   5257			adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE;
   5258			adapter->link_check_timeout = jiffies;
   5259		}
   5260	}
   5261
   5262	return link_active;
   5263}
   5264
   5265/**
   5266 * igc_watchdog - Timer Call-back
   5267 * @t: timer for the watchdog
   5268 */
   5269static void igc_watchdog(struct timer_list *t)
   5270{
   5271	struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer);
   5272	/* Do the rest outside of interrupt context */
   5273	schedule_work(&adapter->watchdog_task);
   5274}
   5275
   5276static void igc_watchdog_task(struct work_struct *work)
   5277{
   5278	struct igc_adapter *adapter = container_of(work,
   5279						   struct igc_adapter,
   5280						   watchdog_task);
   5281	struct net_device *netdev = adapter->netdev;
   5282	struct igc_hw *hw = &adapter->hw;
   5283	struct igc_phy_info *phy = &hw->phy;
   5284	u16 phy_data, retry_count = 20;
   5285	u32 link;
   5286	int i;
   5287
   5288	link = igc_has_link(adapter);
   5289
   5290	if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) {
   5291		if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
   5292			adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
   5293		else
   5294			link = false;
   5295	}
   5296
   5297	if (link) {
   5298		/* Cancel scheduled suspend requests. */
   5299		pm_runtime_resume(netdev->dev.parent);
   5300
   5301		if (!netif_carrier_ok(netdev)) {
   5302			u32 ctrl;
   5303
   5304			hw->mac.ops.get_speed_and_duplex(hw,
   5305							 &adapter->link_speed,
   5306							 &adapter->link_duplex);
   5307
   5308			ctrl = rd32(IGC_CTRL);
   5309			/* Link status message must follow this format */
   5310			netdev_info(netdev,
   5311				    "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
   5312				    adapter->link_speed,
   5313				    adapter->link_duplex == FULL_DUPLEX ?
   5314				    "Full" : "Half",
   5315				    (ctrl & IGC_CTRL_TFCE) &&
   5316				    (ctrl & IGC_CTRL_RFCE) ? "RX/TX" :
   5317				    (ctrl & IGC_CTRL_RFCE) ?  "RX" :
   5318				    (ctrl & IGC_CTRL_TFCE) ?  "TX" : "None");
   5319
   5320			/* disable EEE if enabled */
   5321			if ((adapter->flags & IGC_FLAG_EEE) &&
   5322			    adapter->link_duplex == HALF_DUPLEX) {
   5323				netdev_info(netdev,
   5324					    "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex\n");
   5325				adapter->hw.dev_spec._base.eee_enable = false;
   5326				adapter->flags &= ~IGC_FLAG_EEE;
   5327			}
   5328
   5329			/* check if SmartSpeed worked */
   5330			igc_check_downshift(hw);
   5331			if (phy->speed_downgraded)
   5332				netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
   5333
   5334			/* adjust timeout factor according to speed/duplex */
   5335			adapter->tx_timeout_factor = 1;
   5336			switch (adapter->link_speed) {
   5337			case SPEED_10:
   5338				adapter->tx_timeout_factor = 14;
   5339				break;
   5340			case SPEED_100:
   5341			case SPEED_1000:
   5342			case SPEED_2500:
   5343				adapter->tx_timeout_factor = 7;
   5344				break;
   5345			}
   5346
   5347			if (adapter->link_speed != SPEED_1000)
   5348				goto no_wait;
   5349
   5350			/* wait for Remote receiver status OK */
   5351retry_read_status:
   5352			if (!igc_read_phy_reg(hw, PHY_1000T_STATUS,
   5353					      &phy_data)) {
   5354				if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) &&
   5355				    retry_count) {
   5356					msleep(100);
   5357					retry_count--;
   5358					goto retry_read_status;
   5359				} else if (!retry_count) {
   5360					netdev_err(netdev, "exceed max 2 second\n");
   5361				}
   5362			} else {
   5363				netdev_err(netdev, "read 1000Base-T Status Reg\n");
   5364			}
   5365no_wait:
   5366			netif_carrier_on(netdev);
   5367
   5368			/* link state has changed, schedule phy info update */
   5369			if (!test_bit(__IGC_DOWN, &adapter->state))
   5370				mod_timer(&adapter->phy_info_timer,
   5371					  round_jiffies(jiffies + 2 * HZ));
   5372		}
   5373	} else {
   5374		if (netif_carrier_ok(netdev)) {
   5375			adapter->link_speed = 0;
   5376			adapter->link_duplex = 0;
   5377
   5378			/* Links status message must follow this format */
   5379			netdev_info(netdev, "NIC Link is Down\n");
   5380			netif_carrier_off(netdev);
   5381
   5382			/* link state has changed, schedule phy info update */
   5383			if (!test_bit(__IGC_DOWN, &adapter->state))
   5384				mod_timer(&adapter->phy_info_timer,
   5385					  round_jiffies(jiffies + 2 * HZ));
   5386
   5387			/* link is down, time to check for alternate media */
   5388			if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
   5389				if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
   5390					schedule_work(&adapter->reset_task);
   5391					/* return immediately */
   5392					return;
   5393				}
   5394			}
   5395			pm_schedule_suspend(netdev->dev.parent,
   5396					    MSEC_PER_SEC * 5);
   5397
   5398		/* also check for alternate media here */
   5399		} else if (!netif_carrier_ok(netdev) &&
   5400			   (adapter->flags & IGC_FLAG_MAS_ENABLE)) {
   5401			if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
   5402				schedule_work(&adapter->reset_task);
   5403				/* return immediately */
   5404				return;
   5405			}
   5406		}
   5407	}
   5408
   5409	spin_lock(&adapter->stats64_lock);
   5410	igc_update_stats(adapter);
   5411	spin_unlock(&adapter->stats64_lock);
   5412
   5413	for (i = 0; i < adapter->num_tx_queues; i++) {
   5414		struct igc_ring *tx_ring = adapter->tx_ring[i];
   5415
   5416		if (!netif_carrier_ok(netdev)) {
   5417			/* We've lost link, so the controller stops DMA,
   5418			 * but we've got queued Tx work that's never going
   5419			 * to get done, so reset controller to flush Tx.
   5420			 * (Do the reset outside of interrupt context).
   5421			 */
   5422			if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) {
   5423				adapter->tx_timeout_count++;
   5424				schedule_work(&adapter->reset_task);
   5425				/* return immediately since reset is imminent */
   5426				return;
   5427			}
   5428		}
   5429
   5430		/* Force detection of hung controller every watchdog period */
   5431		set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
   5432	}
   5433
   5434	/* Cause software interrupt to ensure Rx ring is cleaned */
   5435	if (adapter->flags & IGC_FLAG_HAS_MSIX) {
   5436		u32 eics = 0;
   5437
   5438		for (i = 0; i < adapter->num_q_vectors; i++)
   5439			eics |= adapter->q_vector[i]->eims_value;
   5440		wr32(IGC_EICS, eics);
   5441	} else {
   5442		wr32(IGC_ICS, IGC_ICS_RXDMT0);
   5443	}
   5444
   5445	igc_ptp_tx_hang(adapter);
   5446
   5447	/* Reset the timer */
   5448	if (!test_bit(__IGC_DOWN, &adapter->state)) {
   5449		if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)
   5450			mod_timer(&adapter->watchdog_timer,
   5451				  round_jiffies(jiffies +  HZ));
   5452		else
   5453			mod_timer(&adapter->watchdog_timer,
   5454				  round_jiffies(jiffies + 2 * HZ));
   5455	}
   5456}
   5457
   5458/**
   5459 * igc_intr_msi - Interrupt Handler
   5460 * @irq: interrupt number
   5461 * @data: pointer to a network interface device structure
   5462 */
   5463static irqreturn_t igc_intr_msi(int irq, void *data)
   5464{
   5465	struct igc_adapter *adapter = data;
   5466	struct igc_q_vector *q_vector = adapter->q_vector[0];
   5467	struct igc_hw *hw = &adapter->hw;
   5468	/* read ICR disables interrupts using IAM */
   5469	u32 icr = rd32(IGC_ICR);
   5470
   5471	igc_write_itr(q_vector);
   5472
   5473	if (icr & IGC_ICR_DRSTA)
   5474		schedule_work(&adapter->reset_task);
   5475
   5476	if (icr & IGC_ICR_DOUTSYNC) {
   5477		/* HW is reporting DMA is out of sync */
   5478		adapter->stats.doosync++;
   5479	}
   5480
   5481	if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
   5482		hw->mac.get_link_status = true;
   5483		if (!test_bit(__IGC_DOWN, &adapter->state))
   5484			mod_timer(&adapter->watchdog_timer, jiffies + 1);
   5485	}
   5486
   5487	if (icr & IGC_ICR_TS)
   5488		igc_tsync_interrupt(adapter);
   5489
   5490	napi_schedule(&q_vector->napi);
   5491
   5492	return IRQ_HANDLED;
   5493}
   5494
   5495/**
   5496 * igc_intr - Legacy Interrupt Handler
   5497 * @irq: interrupt number
   5498 * @data: pointer to a network interface device structure
   5499 */
   5500static irqreturn_t igc_intr(int irq, void *data)
   5501{
   5502	struct igc_adapter *adapter = data;
   5503	struct igc_q_vector *q_vector = adapter->q_vector[0];
   5504	struct igc_hw *hw = &adapter->hw;
   5505	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
   5506	 * need for the IMC write
   5507	 */
   5508	u32 icr = rd32(IGC_ICR);
   5509
   5510	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
   5511	 * not set, then the adapter didn't send an interrupt
   5512	 */
   5513	if (!(icr & IGC_ICR_INT_ASSERTED))
   5514		return IRQ_NONE;
   5515
   5516	igc_write_itr(q_vector);
   5517
   5518	if (icr & IGC_ICR_DRSTA)
   5519		schedule_work(&adapter->reset_task);
   5520
   5521	if (icr & IGC_ICR_DOUTSYNC) {
   5522		/* HW is reporting DMA is out of sync */
   5523		adapter->stats.doosync++;
   5524	}
   5525
   5526	if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
   5527		hw->mac.get_link_status = true;
   5528		/* guard against interrupt when we're going down */
   5529		if (!test_bit(__IGC_DOWN, &adapter->state))
   5530			mod_timer(&adapter->watchdog_timer, jiffies + 1);
   5531	}
   5532
   5533	if (icr & IGC_ICR_TS)
   5534		igc_tsync_interrupt(adapter);
   5535
   5536	napi_schedule(&q_vector->napi);
   5537
   5538	return IRQ_HANDLED;
   5539}
   5540
   5541static void igc_free_irq(struct igc_adapter *adapter)
   5542{
   5543	if (adapter->msix_entries) {
   5544		int vector = 0, i;
   5545
   5546		free_irq(adapter->msix_entries[vector++].vector, adapter);
   5547
   5548		for (i = 0; i < adapter->num_q_vectors; i++)
   5549			free_irq(adapter->msix_entries[vector++].vector,
   5550				 adapter->q_vector[i]);
   5551	} else {
   5552		free_irq(adapter->pdev->irq, adapter);
   5553	}
   5554}
   5555
   5556/**
   5557 * igc_request_irq - initialize interrupts
   5558 * @adapter: Pointer to adapter structure
   5559 *
   5560 * Attempts to configure interrupts using the best available
   5561 * capabilities of the hardware and kernel.
   5562 */
   5563static int igc_request_irq(struct igc_adapter *adapter)
   5564{
   5565	struct net_device *netdev = adapter->netdev;
   5566	struct pci_dev *pdev = adapter->pdev;
   5567	int err = 0;
   5568
   5569	if (adapter->flags & IGC_FLAG_HAS_MSIX) {
   5570		err = igc_request_msix(adapter);
   5571		if (!err)
   5572			goto request_done;
   5573		/* fall back to MSI */
   5574		igc_free_all_tx_resources(adapter);
   5575		igc_free_all_rx_resources(adapter);
   5576
   5577		igc_clear_interrupt_scheme(adapter);
   5578		err = igc_init_interrupt_scheme(adapter, false);
   5579		if (err)
   5580			goto request_done;
   5581		igc_setup_all_tx_resources(adapter);
   5582		igc_setup_all_rx_resources(adapter);
   5583		igc_configure(adapter);
   5584	}
   5585
   5586	igc_assign_vector(adapter->q_vector[0], 0);
   5587
   5588	if (adapter->flags & IGC_FLAG_HAS_MSI) {
   5589		err = request_irq(pdev->irq, &igc_intr_msi, 0,
   5590				  netdev->name, adapter);
   5591		if (!err)
   5592			goto request_done;
   5593
   5594		/* fall back to legacy interrupts */
   5595		igc_reset_interrupt_capability(adapter);
   5596		adapter->flags &= ~IGC_FLAG_HAS_MSI;
   5597	}
   5598
   5599	err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED,
   5600			  netdev->name, adapter);
   5601
   5602	if (err)
   5603		netdev_err(netdev, "Error %d getting interrupt\n", err);
   5604
   5605request_done:
   5606	return err;
   5607}
   5608
   5609/**
   5610 * __igc_open - Called when a network interface is made active
   5611 * @netdev: network interface device structure
   5612 * @resuming: boolean indicating if the device is resuming
   5613 *
   5614 * Returns 0 on success, negative value on failure
   5615 *
   5616 * The open entry point is called when a network interface is made
   5617 * active by the system (IFF_UP).  At this point all resources needed
   5618 * for transmit and receive operations are allocated, the interrupt
   5619 * handler is registered with the OS, the watchdog timer is started,
   5620 * and the stack is notified that the interface is ready.
   5621 */
   5622static int __igc_open(struct net_device *netdev, bool resuming)
   5623{
   5624	struct igc_adapter *adapter = netdev_priv(netdev);
   5625	struct pci_dev *pdev = adapter->pdev;
   5626	struct igc_hw *hw = &adapter->hw;
   5627	int err = 0;
   5628	int i = 0;
   5629
   5630	/* disallow open during test */
   5631
   5632	if (test_bit(__IGC_TESTING, &adapter->state)) {
   5633		WARN_ON(resuming);
   5634		return -EBUSY;
   5635	}
   5636
   5637	if (!resuming)
   5638		pm_runtime_get_sync(&pdev->dev);
   5639
   5640	netif_carrier_off(netdev);
   5641
   5642	/* allocate transmit descriptors */
   5643	err = igc_setup_all_tx_resources(adapter);
   5644	if (err)
   5645		goto err_setup_tx;
   5646
   5647	/* allocate receive descriptors */
   5648	err = igc_setup_all_rx_resources(adapter);
   5649	if (err)
   5650		goto err_setup_rx;
   5651
   5652	igc_power_up_link(adapter);
   5653
   5654	igc_configure(adapter);
   5655
   5656	err = igc_request_irq(adapter);
   5657	if (err)
   5658		goto err_req_irq;
   5659
   5660	/* Notify the stack of the actual queue counts. */
   5661	err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
   5662	if (err)
   5663		goto err_set_queues;
   5664
   5665	err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
   5666	if (err)
   5667		goto err_set_queues;
   5668
   5669	clear_bit(__IGC_DOWN, &adapter->state);
   5670
   5671	for (i = 0; i < adapter->num_q_vectors; i++)
   5672		napi_enable(&adapter->q_vector[i]->napi);
   5673
   5674	/* Clear any pending interrupts. */
   5675	rd32(IGC_ICR);
   5676	igc_irq_enable(adapter);
   5677
   5678	if (!resuming)
   5679		pm_runtime_put(&pdev->dev);
   5680
   5681	netif_tx_start_all_queues(netdev);
   5682
   5683	/* start the watchdog. */
   5684	hw->mac.get_link_status = true;
   5685	schedule_work(&adapter->watchdog_task);
   5686
   5687	return IGC_SUCCESS;
   5688
   5689err_set_queues:
   5690	igc_free_irq(adapter);
   5691err_req_irq:
   5692	igc_release_hw_control(adapter);
   5693	igc_power_down_phy_copper_base(&adapter->hw);
   5694	igc_free_all_rx_resources(adapter);
   5695err_setup_rx:
   5696	igc_free_all_tx_resources(adapter);
   5697err_setup_tx:
   5698	igc_reset(adapter);
   5699	if (!resuming)
   5700		pm_runtime_put(&pdev->dev);
   5701
   5702	return err;
   5703}
   5704
   5705int igc_open(struct net_device *netdev)
   5706{
   5707	return __igc_open(netdev, false);
   5708}
   5709
   5710/**
   5711 * __igc_close - Disables a network interface
   5712 * @netdev: network interface device structure
   5713 * @suspending: boolean indicating the device is suspending
   5714 *
   5715 * Returns 0, this is not allowed to fail
   5716 *
   5717 * The close entry point is called when an interface is de-activated
   5718 * by the OS.  The hardware is still under the driver's control, but
   5719 * needs to be disabled.  A global MAC reset is issued to stop the
   5720 * hardware, and all transmit and receive resources are freed.
   5721 */
   5722static int __igc_close(struct net_device *netdev, bool suspending)
   5723{
   5724	struct igc_adapter *adapter = netdev_priv(netdev);
   5725	struct pci_dev *pdev = adapter->pdev;
   5726
   5727	WARN_ON(test_bit(__IGC_RESETTING, &adapter->state));
   5728
   5729	if (!suspending)
   5730		pm_runtime_get_sync(&pdev->dev);
   5731
   5732	igc_down(adapter);
   5733
   5734	igc_release_hw_control(adapter);
   5735
   5736	igc_free_irq(adapter);
   5737
   5738	igc_free_all_tx_resources(adapter);
   5739	igc_free_all_rx_resources(adapter);
   5740
   5741	if (!suspending)
   5742		pm_runtime_put_sync(&pdev->dev);
   5743
   5744	return 0;
   5745}
   5746
   5747int igc_close(struct net_device *netdev)
   5748{
   5749	if (netif_device_present(netdev) || netdev->dismantle)
   5750		return __igc_close(netdev, false);
   5751	return 0;
   5752}
   5753
   5754/**
   5755 * igc_ioctl - Access the hwtstamp interface
   5756 * @netdev: network interface device structure
   5757 * @ifr: interface request data
   5758 * @cmd: ioctl command
   5759 **/
   5760static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
   5761{
   5762	switch (cmd) {
   5763	case SIOCGHWTSTAMP:
   5764		return igc_ptp_get_ts_config(netdev, ifr);
   5765	case SIOCSHWTSTAMP:
   5766		return igc_ptp_set_ts_config(netdev, ifr);
   5767	default:
   5768		return -EOPNOTSUPP;
   5769	}
   5770}
   5771
   5772static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue,
   5773				      bool enable)
   5774{
   5775	struct igc_ring *ring;
   5776
   5777	if (queue < 0 || queue >= adapter->num_tx_queues)
   5778		return -EINVAL;
   5779
   5780	ring = adapter->tx_ring[queue];
   5781	ring->launchtime_enable = enable;
   5782
   5783	return 0;
   5784}
   5785
   5786static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now)
   5787{
   5788	struct timespec64 b;
   5789
   5790	b = ktime_to_timespec64(base_time);
   5791
   5792	return timespec64_compare(now, &b) > 0;
   5793}
   5794
   5795static bool validate_schedule(struct igc_adapter *adapter,
   5796			      const struct tc_taprio_qopt_offload *qopt)
   5797{
   5798	int queue_uses[IGC_MAX_TX_QUEUES] = { };
   5799	struct timespec64 now;
   5800	size_t n;
   5801
   5802	if (qopt->cycle_time_extension)
   5803		return false;
   5804
   5805	igc_ptp_read(adapter, &now);
   5806
   5807	/* If we program the controller's BASET registers with a time
   5808	 * in the future, it will hold all the packets until that
   5809	 * time, causing a lot of TX Hangs, so to avoid that, we
   5810	 * reject schedules that would start in the future.
   5811	 */
   5812	if (!is_base_time_past(qopt->base_time, &now))
   5813		return false;
   5814
   5815	for (n = 0; n < qopt->num_entries; n++) {
   5816		const struct tc_taprio_sched_entry *e;
   5817		int i;
   5818
   5819		e = &qopt->entries[n];
   5820
   5821		/* i225 only supports "global" frame preemption
   5822		 * settings.
   5823		 */
   5824		if (e->command != TC_TAPRIO_CMD_SET_GATES)
   5825			return false;
   5826
   5827		for (i = 0; i < adapter->num_tx_queues; i++) {
   5828			if (e->gate_mask & BIT(i))
   5829				queue_uses[i]++;
   5830
   5831			if (queue_uses[i] > 1)
   5832				return false;
   5833		}
   5834	}
   5835
   5836	return true;
   5837}
   5838
   5839static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
   5840				     struct tc_etf_qopt_offload *qopt)
   5841{
   5842	struct igc_hw *hw = &adapter->hw;
   5843	int err;
   5844
   5845	if (hw->mac.type != igc_i225)
   5846		return -EOPNOTSUPP;
   5847
   5848	err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable);
   5849	if (err)
   5850		return err;
   5851
   5852	return igc_tsn_offload_apply(adapter);
   5853}
   5854
   5855static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
   5856{
   5857	int i;
   5858
   5859	adapter->base_time = 0;
   5860	adapter->cycle_time = NSEC_PER_SEC;
   5861
   5862	for (i = 0; i < adapter->num_tx_queues; i++) {
   5863		struct igc_ring *ring = adapter->tx_ring[i];
   5864
   5865		ring->start_time = 0;
   5866		ring->end_time = NSEC_PER_SEC;
   5867	}
   5868
   5869	return 0;
   5870}
   5871
   5872static int igc_save_qbv_schedule(struct igc_adapter *adapter,
   5873				 struct tc_taprio_qopt_offload *qopt)
   5874{
   5875	u32 start_time = 0, end_time = 0;
   5876	size_t n;
   5877
   5878	if (!qopt->enable)
   5879		return igc_tsn_clear_schedule(adapter);
   5880
   5881	if (adapter->base_time)
   5882		return -EALREADY;
   5883
   5884	if (!validate_schedule(adapter, qopt))
   5885		return -EINVAL;
   5886
   5887	adapter->cycle_time = qopt->cycle_time;
   5888	adapter->base_time = qopt->base_time;
   5889
   5890	/* FIXME: be a little smarter about cases when the gate for a
   5891	 * queue stays open for more than one entry.
   5892	 */
   5893	for (n = 0; n < qopt->num_entries; n++) {
   5894		struct tc_taprio_sched_entry *e = &qopt->entries[n];
   5895		int i;
   5896
   5897		end_time += e->interval;
   5898
   5899		for (i = 0; i < adapter->num_tx_queues; i++) {
   5900			struct igc_ring *ring = adapter->tx_ring[i];
   5901
   5902			if (!(e->gate_mask & BIT(i)))
   5903				continue;
   5904
   5905			ring->start_time = start_time;
   5906			ring->end_time = end_time;
   5907		}
   5908
   5909		start_time += e->interval;
   5910	}
   5911
   5912	return 0;
   5913}
   5914
   5915static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter,
   5916					 struct tc_taprio_qopt_offload *qopt)
   5917{
   5918	struct igc_hw *hw = &adapter->hw;
   5919	int err;
   5920
   5921	if (hw->mac.type != igc_i225)
   5922		return -EOPNOTSUPP;
   5923
   5924	err = igc_save_qbv_schedule(adapter, qopt);
   5925	if (err)
   5926		return err;
   5927
   5928	return igc_tsn_offload_apply(adapter);
   5929}
   5930
   5931static int igc_save_cbs_params(struct igc_adapter *adapter, int queue,
   5932			       bool enable, int idleslope, int sendslope,
   5933			       int hicredit, int locredit)
   5934{
   5935	bool cbs_status[IGC_MAX_SR_QUEUES] = { false };
   5936	struct net_device *netdev = adapter->netdev;
   5937	struct igc_ring *ring;
   5938	int i;
   5939
   5940	/* i225 has two sets of credit-based shaper logic.
   5941	 * Supporting it only on the top two priority queues
   5942	 */
   5943	if (queue < 0 || queue > 1)
   5944		return -EINVAL;
   5945
   5946	ring = adapter->tx_ring[queue];
   5947
   5948	for (i = 0; i < IGC_MAX_SR_QUEUES; i++)
   5949		if (adapter->tx_ring[i])
   5950			cbs_status[i] = adapter->tx_ring[i]->cbs_enable;
   5951
   5952	/* CBS should be enabled on the highest priority queue first in order
   5953	 * for the CBS algorithm to operate as intended.
   5954	 */
   5955	if (enable) {
   5956		if (queue == 1 && !cbs_status[0]) {
   5957			netdev_err(netdev,
   5958				   "Enabling CBS on queue1 before queue0\n");
   5959			return -EINVAL;
   5960		}
   5961	} else {
   5962		if (queue == 0 && cbs_status[1]) {
   5963			netdev_err(netdev,
   5964				   "Disabling CBS on queue0 before queue1\n");
   5965			return -EINVAL;
   5966		}
   5967	}
   5968
   5969	ring->cbs_enable = enable;
   5970	ring->idleslope = idleslope;
   5971	ring->sendslope = sendslope;
   5972	ring->hicredit = hicredit;
   5973	ring->locredit = locredit;
   5974
   5975	return 0;
   5976}
   5977
   5978static int igc_tsn_enable_cbs(struct igc_adapter *adapter,
   5979			      struct tc_cbs_qopt_offload *qopt)
   5980{
   5981	struct igc_hw *hw = &adapter->hw;
   5982	int err;
   5983
   5984	if (hw->mac.type != igc_i225)
   5985		return -EOPNOTSUPP;
   5986
   5987	if (qopt->queue < 0 || qopt->queue > 1)
   5988		return -EINVAL;
   5989
   5990	err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable,
   5991				  qopt->idleslope, qopt->sendslope,
   5992				  qopt->hicredit, qopt->locredit);
   5993	if (err)
   5994		return err;
   5995
   5996	return igc_tsn_offload_apply(adapter);
   5997}
   5998
   5999static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
   6000			void *type_data)
   6001{
   6002	struct igc_adapter *adapter = netdev_priv(dev);
   6003
   6004	switch (type) {
   6005	case TC_SETUP_QDISC_TAPRIO:
   6006		return igc_tsn_enable_qbv_scheduling(adapter, type_data);
   6007
   6008	case TC_SETUP_QDISC_ETF:
   6009		return igc_tsn_enable_launchtime(adapter, type_data);
   6010
   6011	case TC_SETUP_QDISC_CBS:
   6012		return igc_tsn_enable_cbs(adapter, type_data);
   6013
   6014	default:
   6015		return -EOPNOTSUPP;
   6016	}
   6017}
   6018
   6019static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
   6020{
   6021	struct igc_adapter *adapter = netdev_priv(dev);
   6022
   6023	switch (bpf->command) {
   6024	case XDP_SETUP_PROG:
   6025		return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack);
   6026	case XDP_SETUP_XSK_POOL:
   6027		return igc_xdp_setup_pool(adapter, bpf->xsk.pool,
   6028					  bpf->xsk.queue_id);
   6029	default:
   6030		return -EOPNOTSUPP;
   6031	}
   6032}
   6033
   6034static int igc_xdp_xmit(struct net_device *dev, int num_frames,
   6035			struct xdp_frame **frames, u32 flags)
   6036{
   6037	struct igc_adapter *adapter = netdev_priv(dev);
   6038	int cpu = smp_processor_id();
   6039	struct netdev_queue *nq;
   6040	struct igc_ring *ring;
   6041	int i, drops;
   6042
   6043	if (unlikely(test_bit(__IGC_DOWN, &adapter->state)))
   6044		return -ENETDOWN;
   6045
   6046	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
   6047		return -EINVAL;
   6048
   6049	ring = igc_xdp_get_tx_ring(adapter, cpu);
   6050	nq = txring_txq(ring);
   6051
   6052	__netif_tx_lock(nq, cpu);
   6053
   6054	drops = 0;
   6055	for (i = 0; i < num_frames; i++) {
   6056		int err;
   6057		struct xdp_frame *xdpf = frames[i];
   6058
   6059		err = igc_xdp_init_tx_descriptor(ring, xdpf);
   6060		if (err) {
   6061			xdp_return_frame_rx_napi(xdpf);
   6062			drops++;
   6063		}
   6064	}
   6065
   6066	if (flags & XDP_XMIT_FLUSH)
   6067		igc_flush_tx_descriptors(ring);
   6068
   6069	__netif_tx_unlock(nq);
   6070
   6071	return num_frames - drops;
   6072}
   6073
   6074static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter,
   6075					struct igc_q_vector *q_vector)
   6076{
   6077	struct igc_hw *hw = &adapter->hw;
   6078	u32 eics = 0;
   6079
   6080	eics |= q_vector->eims_value;
   6081	wr32(IGC_EICS, eics);
   6082}
   6083
   6084int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
   6085{
   6086	struct igc_adapter *adapter = netdev_priv(dev);
   6087	struct igc_q_vector *q_vector;
   6088	struct igc_ring *ring;
   6089
   6090	if (test_bit(__IGC_DOWN, &adapter->state))
   6091		return -ENETDOWN;
   6092
   6093	if (!igc_xdp_is_enabled(adapter))
   6094		return -ENXIO;
   6095
   6096	if (queue_id >= adapter->num_rx_queues)
   6097		return -EINVAL;
   6098
   6099	ring = adapter->rx_ring[queue_id];
   6100
   6101	if (!ring->xsk_pool)
   6102		return -ENXIO;
   6103
   6104	q_vector = adapter->q_vector[queue_id];
   6105	if (!napi_if_scheduled_mark_missed(&q_vector->napi))
   6106		igc_trigger_rxtxq_interrupt(adapter, q_vector);
   6107
   6108	return 0;
   6109}
   6110
   6111static const struct net_device_ops igc_netdev_ops = {
   6112	.ndo_open		= igc_open,
   6113	.ndo_stop		= igc_close,
   6114	.ndo_start_xmit		= igc_xmit_frame,
   6115	.ndo_set_rx_mode	= igc_set_rx_mode,
   6116	.ndo_set_mac_address	= igc_set_mac,
   6117	.ndo_change_mtu		= igc_change_mtu,
   6118	.ndo_get_stats64	= igc_get_stats64,
   6119	.ndo_fix_features	= igc_fix_features,
   6120	.ndo_set_features	= igc_set_features,
   6121	.ndo_features_check	= igc_features_check,
   6122	.ndo_eth_ioctl		= igc_ioctl,
   6123	.ndo_setup_tc		= igc_setup_tc,
   6124	.ndo_bpf		= igc_bpf,
   6125	.ndo_xdp_xmit		= igc_xdp_xmit,
   6126	.ndo_xsk_wakeup		= igc_xsk_wakeup,
   6127};
   6128
   6129/* PCIe configuration access */
   6130void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
   6131{
   6132	struct igc_adapter *adapter = hw->back;
   6133
   6134	pci_read_config_word(adapter->pdev, reg, value);
   6135}
   6136
   6137void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
   6138{
   6139	struct igc_adapter *adapter = hw->back;
   6140
   6141	pci_write_config_word(adapter->pdev, reg, *value);
   6142}
   6143
   6144s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
   6145{
   6146	struct igc_adapter *adapter = hw->back;
   6147
   6148	if (!pci_is_pcie(adapter->pdev))
   6149		return -IGC_ERR_CONFIG;
   6150
   6151	pcie_capability_read_word(adapter->pdev, reg, value);
   6152
   6153	return IGC_SUCCESS;
   6154}
   6155
   6156s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
   6157{
   6158	struct igc_adapter *adapter = hw->back;
   6159
   6160	if (!pci_is_pcie(adapter->pdev))
   6161		return -IGC_ERR_CONFIG;
   6162
   6163	pcie_capability_write_word(adapter->pdev, reg, *value);
   6164
   6165	return IGC_SUCCESS;
   6166}
   6167
   6168u32 igc_rd32(struct igc_hw *hw, u32 reg)
   6169{
   6170	struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw);
   6171	u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
   6172	u32 value = 0;
   6173
   6174	value = readl(&hw_addr[reg]);
   6175
   6176	/* reads should not return all F's */
   6177	if (!(~value) && (!reg || !(~readl(hw_addr)))) {
   6178		struct net_device *netdev = igc->netdev;
   6179
   6180		hw->hw_addr = NULL;
   6181		netif_device_detach(netdev);
   6182		netdev_err(netdev, "PCIe link lost, device now detached\n");
   6183		WARN(pci_device_is_present(igc->pdev),
   6184		     "igc: Failed to read reg 0x%x!\n", reg);
   6185	}
   6186
   6187	return value;
   6188}
   6189
   6190/**
   6191 * igc_probe - Device Initialization Routine
   6192 * @pdev: PCI device information struct
   6193 * @ent: entry in igc_pci_tbl
   6194 *
   6195 * Returns 0 on success, negative on failure
   6196 *
   6197 * igc_probe initializes an adapter identified by a pci_dev structure.
   6198 * The OS initialization, configuring the adapter private structure,
   6199 * and a hardware reset occur.
   6200 */
   6201static int igc_probe(struct pci_dev *pdev,
   6202		     const struct pci_device_id *ent)
   6203{
   6204	struct igc_adapter *adapter;
   6205	struct net_device *netdev;
   6206	struct igc_hw *hw;
   6207	const struct igc_info *ei = igc_info_tbl[ent->driver_data];
   6208	int err;
   6209
   6210	err = pci_enable_device_mem(pdev);
   6211	if (err)
   6212		return err;
   6213
   6214	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
   6215	if (err) {
   6216		dev_err(&pdev->dev,
   6217			"No usable DMA configuration, aborting\n");
   6218		goto err_dma;
   6219	}
   6220
   6221	err = pci_request_mem_regions(pdev, igc_driver_name);
   6222	if (err)
   6223		goto err_pci_reg;
   6224
   6225	pci_enable_pcie_error_reporting(pdev);
   6226
   6227	err = pci_enable_ptm(pdev, NULL);
   6228	if (err < 0)
   6229		dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n");
   6230
   6231	pci_set_master(pdev);
   6232
   6233	err = -ENOMEM;
   6234	netdev = alloc_etherdev_mq(sizeof(struct igc_adapter),
   6235				   IGC_MAX_TX_QUEUES);
   6236
   6237	if (!netdev)
   6238		goto err_alloc_etherdev;
   6239
   6240	SET_NETDEV_DEV(netdev, &pdev->dev);
   6241
   6242	pci_set_drvdata(pdev, netdev);
   6243	adapter = netdev_priv(netdev);
   6244	adapter->netdev = netdev;
   6245	adapter->pdev = pdev;
   6246	hw = &adapter->hw;
   6247	hw->back = adapter;
   6248	adapter->port_num = hw->bus.func;
   6249	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
   6250
   6251	err = pci_save_state(pdev);
   6252	if (err)
   6253		goto err_ioremap;
   6254
   6255	err = -EIO;
   6256	adapter->io_addr = ioremap(pci_resource_start(pdev, 0),
   6257				   pci_resource_len(pdev, 0));
   6258	if (!adapter->io_addr)
   6259		goto err_ioremap;
   6260
   6261	/* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */
   6262	hw->hw_addr = adapter->io_addr;
   6263
   6264	netdev->netdev_ops = &igc_netdev_ops;
   6265	igc_ethtool_set_ops(netdev);
   6266	netdev->watchdog_timeo = 5 * HZ;
   6267
   6268	netdev->mem_start = pci_resource_start(pdev, 0);
   6269	netdev->mem_end = pci_resource_end(pdev, 0);
   6270
   6271	/* PCI config space info */
   6272	hw->vendor_id = pdev->vendor;
   6273	hw->device_id = pdev->device;
   6274	hw->revision_id = pdev->revision;
   6275	hw->subsystem_vendor_id = pdev->subsystem_vendor;
   6276	hw->subsystem_device_id = pdev->subsystem_device;
   6277
   6278	/* Copy the default MAC and PHY function pointers */
   6279	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
   6280	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
   6281
   6282	/* Initialize skew-specific constants */
   6283	err = ei->get_invariants(hw);
   6284	if (err)
   6285		goto err_sw_init;
   6286
   6287	/* Add supported features to the features list*/
   6288	netdev->features |= NETIF_F_SG;
   6289	netdev->features |= NETIF_F_TSO;
   6290	netdev->features |= NETIF_F_TSO6;
   6291	netdev->features |= NETIF_F_TSO_ECN;
   6292	netdev->features |= NETIF_F_RXCSUM;
   6293	netdev->features |= NETIF_F_HW_CSUM;
   6294	netdev->features |= NETIF_F_SCTP_CRC;
   6295	netdev->features |= NETIF_F_HW_TC;
   6296
   6297#define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
   6298				  NETIF_F_GSO_GRE_CSUM | \
   6299				  NETIF_F_GSO_IPXIP4 | \
   6300				  NETIF_F_GSO_IPXIP6 | \
   6301				  NETIF_F_GSO_UDP_TUNNEL | \
   6302				  NETIF_F_GSO_UDP_TUNNEL_CSUM)
   6303
   6304	netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES;
   6305	netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES;
   6306
   6307	/* setup the private structure */
   6308	err = igc_sw_init(adapter);
   6309	if (err)
   6310		goto err_sw_init;
   6311
   6312	/* copy netdev features into list of user selectable features */
   6313	netdev->hw_features |= NETIF_F_NTUPLE;
   6314	netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
   6315	netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
   6316	netdev->hw_features |= netdev->features;
   6317
   6318	netdev->features |= NETIF_F_HIGHDMA;
   6319
   6320	netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
   6321	netdev->mpls_features |= NETIF_F_HW_CSUM;
   6322	netdev->hw_enc_features |= netdev->vlan_features;
   6323
   6324	/* MTU range: 68 - 9216 */
   6325	netdev->min_mtu = ETH_MIN_MTU;
   6326	netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
   6327
   6328	/* before reading the NVM, reset the controller to put the device in a
   6329	 * known good starting state
   6330	 */
   6331	hw->mac.ops.reset_hw(hw);
   6332
   6333	if (igc_get_flash_presence_i225(hw)) {
   6334		if (hw->nvm.ops.validate(hw) < 0) {
   6335			dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
   6336			err = -EIO;
   6337			goto err_eeprom;
   6338		}
   6339	}
   6340
   6341	if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) {
   6342		/* copy the MAC address out of the NVM */
   6343		if (hw->mac.ops.read_mac_addr(hw))
   6344			dev_err(&pdev->dev, "NVM Read Error\n");
   6345	}
   6346
   6347	eth_hw_addr_set(netdev, hw->mac.addr);
   6348
   6349	if (!is_valid_ether_addr(netdev->dev_addr)) {
   6350		dev_err(&pdev->dev, "Invalid MAC Address\n");
   6351		err = -EIO;
   6352		goto err_eeprom;
   6353	}
   6354
   6355	/* configure RXPBSIZE and TXPBSIZE */
   6356	wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT);
   6357	wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
   6358
   6359	timer_setup(&adapter->watchdog_timer, igc_watchdog, 0);
   6360	timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0);
   6361
   6362	INIT_WORK(&adapter->reset_task, igc_reset_task);
   6363	INIT_WORK(&adapter->watchdog_task, igc_watchdog_task);
   6364
   6365	/* Initialize link properties that are user-changeable */
   6366	adapter->fc_autoneg = true;
   6367	hw->mac.autoneg = true;
   6368	hw->phy.autoneg_advertised = 0xaf;
   6369
   6370	hw->fc.requested_mode = igc_fc_default;
   6371	hw->fc.current_mode = igc_fc_default;
   6372
   6373	/* By default, support wake on port A */
   6374	adapter->flags |= IGC_FLAG_WOL_SUPPORTED;
   6375
   6376	/* initialize the wol settings based on the eeprom settings */
   6377	if (adapter->flags & IGC_FLAG_WOL_SUPPORTED)
   6378		adapter->wol |= IGC_WUFC_MAG;
   6379
   6380	device_set_wakeup_enable(&adapter->pdev->dev,
   6381				 adapter->flags & IGC_FLAG_WOL_SUPPORTED);
   6382
   6383	igc_ptp_init(adapter);
   6384
   6385	igc_tsn_clear_schedule(adapter);
   6386
   6387	/* reset the hardware with the new settings */
   6388	igc_reset(adapter);
   6389
   6390	/* let the f/w know that the h/w is now under the control of the
   6391	 * driver.
   6392	 */
   6393	igc_get_hw_control(adapter);
   6394
   6395	strncpy(netdev->name, "eth%d", IFNAMSIZ);
   6396	err = register_netdev(netdev);
   6397	if (err)
   6398		goto err_register;
   6399
   6400	 /* carrier off reporting is important to ethtool even BEFORE open */
   6401	netif_carrier_off(netdev);
   6402
   6403	/* Check if Media Autosense is enabled */
   6404	adapter->ei = *ei;
   6405
   6406	/* print pcie link status and MAC address */
   6407	pcie_print_link_status(pdev);
   6408	netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr);
   6409
   6410	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
   6411	/* Disable EEE for internal PHY devices */
   6412	hw->dev_spec._base.eee_enable = false;
   6413	adapter->flags &= ~IGC_FLAG_EEE;
   6414	igc_set_eee_i225(hw, false, false, false);
   6415
   6416	pm_runtime_put_noidle(&pdev->dev);
   6417
   6418	return 0;
   6419
   6420err_register:
   6421	igc_release_hw_control(adapter);
   6422err_eeprom:
   6423	if (!igc_check_reset_block(hw))
   6424		igc_reset_phy(hw);
   6425err_sw_init:
   6426	igc_clear_interrupt_scheme(adapter);
   6427	iounmap(adapter->io_addr);
   6428err_ioremap:
   6429	free_netdev(netdev);
   6430err_alloc_etherdev:
   6431	pci_disable_pcie_error_reporting(pdev);
   6432	pci_release_mem_regions(pdev);
   6433err_pci_reg:
   6434err_dma:
   6435	pci_disable_device(pdev);
   6436	return err;
   6437}
   6438
   6439/**
   6440 * igc_remove - Device Removal Routine
   6441 * @pdev: PCI device information struct
   6442 *
   6443 * igc_remove is called by the PCI subsystem to alert the driver
   6444 * that it should release a PCI device.  This could be caused by a
   6445 * Hot-Plug event, or because the driver is going to be removed from
   6446 * memory.
   6447 */
   6448static void igc_remove(struct pci_dev *pdev)
   6449{
   6450	struct net_device *netdev = pci_get_drvdata(pdev);
   6451	struct igc_adapter *adapter = netdev_priv(netdev);
   6452
   6453	pm_runtime_get_noresume(&pdev->dev);
   6454
   6455	igc_flush_nfc_rules(adapter);
   6456
   6457	igc_ptp_stop(adapter);
   6458
   6459	set_bit(__IGC_DOWN, &adapter->state);
   6460
   6461	del_timer_sync(&adapter->watchdog_timer);
   6462	del_timer_sync(&adapter->phy_info_timer);
   6463
   6464	cancel_work_sync(&adapter->reset_task);
   6465	cancel_work_sync(&adapter->watchdog_task);
   6466
   6467	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
   6468	 * would have already happened in close and is redundant.
   6469	 */
   6470	igc_release_hw_control(adapter);
   6471	unregister_netdev(netdev);
   6472
   6473	igc_clear_interrupt_scheme(adapter);
   6474	pci_iounmap(pdev, adapter->io_addr);
   6475	pci_release_mem_regions(pdev);
   6476
   6477	free_netdev(netdev);
   6478
   6479	pci_disable_pcie_error_reporting(pdev);
   6480
   6481	pci_disable_device(pdev);
   6482}
   6483
   6484static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake,
   6485			  bool runtime)
   6486{
   6487	struct net_device *netdev = pci_get_drvdata(pdev);
   6488	struct igc_adapter *adapter = netdev_priv(netdev);
   6489	u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol;
   6490	struct igc_hw *hw = &adapter->hw;
   6491	u32 ctrl, rctl, status;
   6492	bool wake;
   6493
   6494	rtnl_lock();
   6495	netif_device_detach(netdev);
   6496
   6497	if (netif_running(netdev))
   6498		__igc_close(netdev, true);
   6499
   6500	igc_ptp_suspend(adapter);
   6501
   6502	igc_clear_interrupt_scheme(adapter);
   6503	rtnl_unlock();
   6504
   6505	status = rd32(IGC_STATUS);
   6506	if (status & IGC_STATUS_LU)
   6507		wufc &= ~IGC_WUFC_LNKC;
   6508
   6509	if (wufc) {
   6510		igc_setup_rctl(adapter);
   6511		igc_set_rx_mode(netdev);
   6512
   6513		/* turn on all-multi mode if wake on multicast is enabled */
   6514		if (wufc & IGC_WUFC_MC) {
   6515			rctl = rd32(IGC_RCTL);
   6516			rctl |= IGC_RCTL_MPE;
   6517			wr32(IGC_RCTL, rctl);
   6518		}
   6519
   6520		ctrl = rd32(IGC_CTRL);
   6521		ctrl |= IGC_CTRL_ADVD3WUC;
   6522		wr32(IGC_CTRL, ctrl);
   6523
   6524		/* Allow time for pending master requests to run */
   6525		igc_disable_pcie_master(hw);
   6526
   6527		wr32(IGC_WUC, IGC_WUC_PME_EN);
   6528		wr32(IGC_WUFC, wufc);
   6529	} else {
   6530		wr32(IGC_WUC, 0);
   6531		wr32(IGC_WUFC, 0);
   6532	}
   6533
   6534	wake = wufc || adapter->en_mng_pt;
   6535	if (!wake)
   6536		igc_power_down_phy_copper_base(&adapter->hw);
   6537	else
   6538		igc_power_up_link(adapter);
   6539
   6540	if (enable_wake)
   6541		*enable_wake = wake;
   6542
   6543	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
   6544	 * would have already happened in close and is redundant.
   6545	 */
   6546	igc_release_hw_control(adapter);
   6547
   6548	pci_disable_device(pdev);
   6549
   6550	return 0;
   6551}
   6552
   6553#ifdef CONFIG_PM
   6554static int __maybe_unused igc_runtime_suspend(struct device *dev)
   6555{
   6556	return __igc_shutdown(to_pci_dev(dev), NULL, 1);
   6557}
   6558
   6559static void igc_deliver_wake_packet(struct net_device *netdev)
   6560{
   6561	struct igc_adapter *adapter = netdev_priv(netdev);
   6562	struct igc_hw *hw = &adapter->hw;
   6563	struct sk_buff *skb;
   6564	u32 wupl;
   6565
   6566	wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK;
   6567
   6568	/* WUPM stores only the first 128 bytes of the wake packet.
   6569	 * Read the packet only if we have the whole thing.
   6570	 */
   6571	if (wupl == 0 || wupl > IGC_WUPM_BYTES)
   6572		return;
   6573
   6574	skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES);
   6575	if (!skb)
   6576		return;
   6577
   6578	skb_put(skb, wupl);
   6579
   6580	/* Ensure reads are 32-bit aligned */
   6581	wupl = roundup(wupl, 4);
   6582
   6583	memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl);
   6584
   6585	skb->protocol = eth_type_trans(skb, netdev);
   6586	netif_rx(skb);
   6587}
   6588
   6589static int __maybe_unused igc_resume(struct device *dev)
   6590{
   6591	struct pci_dev *pdev = to_pci_dev(dev);
   6592	struct net_device *netdev = pci_get_drvdata(pdev);
   6593	struct igc_adapter *adapter = netdev_priv(netdev);
   6594	struct igc_hw *hw = &adapter->hw;
   6595	u32 err, val;
   6596
   6597	pci_set_power_state(pdev, PCI_D0);
   6598	pci_restore_state(pdev);
   6599	pci_save_state(pdev);
   6600
   6601	if (!pci_device_is_present(pdev))
   6602		return -ENODEV;
   6603	err = pci_enable_device_mem(pdev);
   6604	if (err) {
   6605		netdev_err(netdev, "Cannot enable PCI device from suspend\n");
   6606		return err;
   6607	}
   6608	pci_set_master(pdev);
   6609
   6610	pci_enable_wake(pdev, PCI_D3hot, 0);
   6611	pci_enable_wake(pdev, PCI_D3cold, 0);
   6612
   6613	if (igc_init_interrupt_scheme(adapter, true)) {
   6614		netdev_err(netdev, "Unable to allocate memory for queues\n");
   6615		return -ENOMEM;
   6616	}
   6617
   6618	igc_reset(adapter);
   6619
   6620	/* let the f/w know that the h/w is now under the control of the
   6621	 * driver.
   6622	 */
   6623	igc_get_hw_control(adapter);
   6624
   6625	val = rd32(IGC_WUS);
   6626	if (val & WAKE_PKT_WUS)
   6627		igc_deliver_wake_packet(netdev);
   6628
   6629	wr32(IGC_WUS, ~0);
   6630
   6631	rtnl_lock();
   6632	if (!err && netif_running(netdev))
   6633		err = __igc_open(netdev, true);
   6634
   6635	if (!err)
   6636		netif_device_attach(netdev);
   6637	rtnl_unlock();
   6638
   6639	return err;
   6640}
   6641
   6642static int __maybe_unused igc_runtime_resume(struct device *dev)
   6643{
   6644	return igc_resume(dev);
   6645}
   6646
   6647static int __maybe_unused igc_suspend(struct device *dev)
   6648{
   6649	return __igc_shutdown(to_pci_dev(dev), NULL, 0);
   6650}
   6651
   6652static int __maybe_unused igc_runtime_idle(struct device *dev)
   6653{
   6654	struct net_device *netdev = dev_get_drvdata(dev);
   6655	struct igc_adapter *adapter = netdev_priv(netdev);
   6656
   6657	if (!igc_has_link(adapter))
   6658		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
   6659
   6660	return -EBUSY;
   6661}
   6662#endif /* CONFIG_PM */
   6663
   6664static void igc_shutdown(struct pci_dev *pdev)
   6665{
   6666	bool wake;
   6667
   6668	__igc_shutdown(pdev, &wake, 0);
   6669
   6670	if (system_state == SYSTEM_POWER_OFF) {
   6671		pci_wake_from_d3(pdev, wake);
   6672		pci_set_power_state(pdev, PCI_D3hot);
   6673	}
   6674}
   6675
   6676/**
   6677 *  igc_io_error_detected - called when PCI error is detected
   6678 *  @pdev: Pointer to PCI device
   6679 *  @state: The current PCI connection state
   6680 *
   6681 *  This function is called after a PCI bus error affecting
   6682 *  this device has been detected.
   6683 **/
   6684static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
   6685					      pci_channel_state_t state)
   6686{
   6687	struct net_device *netdev = pci_get_drvdata(pdev);
   6688	struct igc_adapter *adapter = netdev_priv(netdev);
   6689
   6690	netif_device_detach(netdev);
   6691
   6692	if (state == pci_channel_io_perm_failure)
   6693		return PCI_ERS_RESULT_DISCONNECT;
   6694
   6695	if (netif_running(netdev))
   6696		igc_down(adapter);
   6697	pci_disable_device(pdev);
   6698
   6699	/* Request a slot reset. */
   6700	return PCI_ERS_RESULT_NEED_RESET;
   6701}
   6702
   6703/**
   6704 *  igc_io_slot_reset - called after the PCI bus has been reset.
   6705 *  @pdev: Pointer to PCI device
   6706 *
   6707 *  Restart the card from scratch, as if from a cold-boot. Implementation
   6708 *  resembles the first-half of the igc_resume routine.
   6709 **/
   6710static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
   6711{
   6712	struct net_device *netdev = pci_get_drvdata(pdev);
   6713	struct igc_adapter *adapter = netdev_priv(netdev);
   6714	struct igc_hw *hw = &adapter->hw;
   6715	pci_ers_result_t result;
   6716
   6717	if (pci_enable_device_mem(pdev)) {
   6718		netdev_err(netdev, "Could not re-enable PCI device after reset\n");
   6719		result = PCI_ERS_RESULT_DISCONNECT;
   6720	} else {
   6721		pci_set_master(pdev);
   6722		pci_restore_state(pdev);
   6723		pci_save_state(pdev);
   6724
   6725		pci_enable_wake(pdev, PCI_D3hot, 0);
   6726		pci_enable_wake(pdev, PCI_D3cold, 0);
   6727
   6728		/* In case of PCI error, adapter loses its HW address
   6729		 * so we should re-assign it here.
   6730		 */
   6731		hw->hw_addr = adapter->io_addr;
   6732
   6733		igc_reset(adapter);
   6734		wr32(IGC_WUS, ~0);
   6735		result = PCI_ERS_RESULT_RECOVERED;
   6736	}
   6737
   6738	return result;
   6739}
   6740
   6741/**
   6742 *  igc_io_resume - called when traffic can start to flow again.
   6743 *  @pdev: Pointer to PCI device
   6744 *
   6745 *  This callback is called when the error recovery driver tells us that
   6746 *  its OK to resume normal operation. Implementation resembles the
   6747 *  second-half of the igc_resume routine.
   6748 */
   6749static void igc_io_resume(struct pci_dev *pdev)
   6750{
   6751	struct net_device *netdev = pci_get_drvdata(pdev);
   6752	struct igc_adapter *adapter = netdev_priv(netdev);
   6753
   6754	rtnl_lock();
   6755	if (netif_running(netdev)) {
   6756		if (igc_open(netdev)) {
   6757			netdev_err(netdev, "igc_open failed after reset\n");
   6758			return;
   6759		}
   6760	}
   6761
   6762	netif_device_attach(netdev);
   6763
   6764	/* let the f/w know that the h/w is now under the control of the
   6765	 * driver.
   6766	 */
   6767	igc_get_hw_control(adapter);
   6768	rtnl_unlock();
   6769}
   6770
   6771static const struct pci_error_handlers igc_err_handler = {
   6772	.error_detected = igc_io_error_detected,
   6773	.slot_reset = igc_io_slot_reset,
   6774	.resume = igc_io_resume,
   6775};
   6776
   6777#ifdef CONFIG_PM
   6778static const struct dev_pm_ops igc_pm_ops = {
   6779	SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
   6780	SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume,
   6781			   igc_runtime_idle)
   6782};
   6783#endif
   6784
   6785static struct pci_driver igc_driver = {
   6786	.name     = igc_driver_name,
   6787	.id_table = igc_pci_tbl,
   6788	.probe    = igc_probe,
   6789	.remove   = igc_remove,
   6790#ifdef CONFIG_PM
   6791	.driver.pm = &igc_pm_ops,
   6792#endif
   6793	.shutdown = igc_shutdown,
   6794	.err_handler = &igc_err_handler,
   6795};
   6796
   6797/**
   6798 * igc_reinit_queues - return error
   6799 * @adapter: pointer to adapter structure
   6800 */
   6801int igc_reinit_queues(struct igc_adapter *adapter)
   6802{
   6803	struct net_device *netdev = adapter->netdev;
   6804	int err = 0;
   6805
   6806	if (netif_running(netdev))
   6807		igc_close(netdev);
   6808
   6809	igc_reset_interrupt_capability(adapter);
   6810
   6811	if (igc_init_interrupt_scheme(adapter, true)) {
   6812		netdev_err(netdev, "Unable to allocate memory for queues\n");
   6813		return -ENOMEM;
   6814	}
   6815
   6816	if (netif_running(netdev))
   6817		err = igc_open(netdev);
   6818
   6819	return err;
   6820}
   6821
   6822/**
   6823 * igc_get_hw_dev - return device
   6824 * @hw: pointer to hardware structure
   6825 *
   6826 * used by hardware layer to print debugging information
   6827 */
   6828struct net_device *igc_get_hw_dev(struct igc_hw *hw)
   6829{
   6830	struct igc_adapter *adapter = hw->back;
   6831
   6832	return adapter->netdev;
   6833}
   6834
   6835static void igc_disable_rx_ring_hw(struct igc_ring *ring)
   6836{
   6837	struct igc_hw *hw = &ring->q_vector->adapter->hw;
   6838	u8 idx = ring->reg_idx;
   6839	u32 rxdctl;
   6840
   6841	rxdctl = rd32(IGC_RXDCTL(idx));
   6842	rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE;
   6843	rxdctl |= IGC_RXDCTL_SWFLUSH;
   6844	wr32(IGC_RXDCTL(idx), rxdctl);
   6845}
   6846
   6847void igc_disable_rx_ring(struct igc_ring *ring)
   6848{
   6849	igc_disable_rx_ring_hw(ring);
   6850	igc_clean_rx_ring(ring);
   6851}
   6852
   6853void igc_enable_rx_ring(struct igc_ring *ring)
   6854{
   6855	struct igc_adapter *adapter = ring->q_vector->adapter;
   6856
   6857	igc_configure_rx_ring(adapter, ring);
   6858
   6859	if (ring->xsk_pool)
   6860		igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
   6861	else
   6862		igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
   6863}
   6864
   6865static void igc_disable_tx_ring_hw(struct igc_ring *ring)
   6866{
   6867	struct igc_hw *hw = &ring->q_vector->adapter->hw;
   6868	u8 idx = ring->reg_idx;
   6869	u32 txdctl;
   6870
   6871	txdctl = rd32(IGC_TXDCTL(idx));
   6872	txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
   6873	txdctl |= IGC_TXDCTL_SWFLUSH;
   6874	wr32(IGC_TXDCTL(idx), txdctl);
   6875}
   6876
   6877void igc_disable_tx_ring(struct igc_ring *ring)
   6878{
   6879	igc_disable_tx_ring_hw(ring);
   6880	igc_clean_tx_ring(ring);
   6881}
   6882
   6883void igc_enable_tx_ring(struct igc_ring *ring)
   6884{
   6885	struct igc_adapter *adapter = ring->q_vector->adapter;
   6886
   6887	igc_configure_tx_ring(adapter, ring);
   6888}
   6889
   6890/**
   6891 * igc_init_module - Driver Registration Routine
   6892 *
   6893 * igc_init_module is the first routine called when the driver is
   6894 * loaded. All it does is register with the PCI subsystem.
   6895 */
   6896static int __init igc_init_module(void)
   6897{
   6898	int ret;
   6899
   6900	pr_info("%s\n", igc_driver_string);
   6901	pr_info("%s\n", igc_copyright);
   6902
   6903	ret = pci_register_driver(&igc_driver);
   6904	return ret;
   6905}
   6906
   6907module_init(igc_init_module);
   6908
   6909/**
   6910 * igc_exit_module - Driver Exit Cleanup Routine
   6911 *
   6912 * igc_exit_module is called just before the driver is removed
   6913 * from memory.
   6914 */
   6915static void __exit igc_exit_module(void)
   6916{
   6917	pci_unregister_driver(&igc_driver);
   6918}
   6919
   6920module_exit(igc_exit_module);
   6921/* igc_main.c */