cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ixgbe_main.c (321397B)


      1// SPDX-License-Identifier: GPL-2.0
      2/* Copyright(c) 1999 - 2018 Intel Corporation. */
      3
      4#include <linux/types.h>
      5#include <linux/module.h>
      6#include <linux/pci.h>
      7#include <linux/netdevice.h>
      8#include <linux/vmalloc.h>
      9#include <linux/string.h>
     10#include <linux/in.h>
     11#include <linux/interrupt.h>
     12#include <linux/ip.h>
     13#include <linux/tcp.h>
     14#include <linux/sctp.h>
     15#include <linux/pkt_sched.h>
     16#include <linux/ipv6.h>
     17#include <linux/slab.h>
     18#include <net/checksum.h>
     19#include <net/ip6_checksum.h>
     20#include <linux/etherdevice.h>
     21#include <linux/ethtool.h>
     22#include <linux/if.h>
     23#include <linux/if_vlan.h>
     24#include <linux/if_macvlan.h>
     25#include <linux/if_bridge.h>
     26#include <linux/prefetch.h>
     27#include <linux/bpf.h>
     28#include <linux/bpf_trace.h>
     29#include <linux/atomic.h>
     30#include <linux/numa.h>
     31#include <generated/utsrelease.h>
     32#include <scsi/fc/fc_fcoe.h>
     33#include <net/udp_tunnel.h>
     34#include <net/pkt_cls.h>
     35#include <net/tc_act/tc_gact.h>
     36#include <net/tc_act/tc_mirred.h>
     37#include <net/vxlan.h>
     38#include <net/mpls.h>
     39#include <net/xdp_sock_drv.h>
     40#include <net/xfrm.h>
     41
     42#include "ixgbe.h"
     43#include "ixgbe_common.h"
     44#include "ixgbe_dcb_82599.h"
     45#include "ixgbe_phy.h"
     46#include "ixgbe_sriov.h"
     47#include "ixgbe_model.h"
     48#include "ixgbe_txrx_common.h"
     49
     50char ixgbe_driver_name[] = "ixgbe";
     51static const char ixgbe_driver_string[] =
     52			      "Intel(R) 10 Gigabit PCI Express Network Driver";
     53#ifdef IXGBE_FCOE
     54char ixgbe_default_device_descr[] =
     55			      "Intel(R) 10 Gigabit Network Connection";
     56#else
     57static char ixgbe_default_device_descr[] =
     58			      "Intel(R) 10 Gigabit Network Connection";
     59#endif
     60static const char ixgbe_copyright[] =
     61				"Copyright (c) 1999-2016 Intel Corporation.";
     62
     63static const char ixgbe_overheat_msg[] = "Network adapter has been stopped because it has over heated. Restart the computer. If the problem persists, power off the system and replace the adapter";
     64
     65static const struct ixgbe_info *ixgbe_info_tbl[] = {
     66	[board_82598]		= &ixgbe_82598_info,
     67	[board_82599]		= &ixgbe_82599_info,
     68	[board_X540]		= &ixgbe_X540_info,
     69	[board_X550]		= &ixgbe_X550_info,
     70	[board_X550EM_x]	= &ixgbe_X550EM_x_info,
     71	[board_x550em_x_fw]	= &ixgbe_x550em_x_fw_info,
     72	[board_x550em_a]	= &ixgbe_x550em_a_info,
     73	[board_x550em_a_fw]	= &ixgbe_x550em_a_fw_info,
     74};
     75
     76/* ixgbe_pci_tbl - PCI Device ID Table
     77 *
     78 * Wildcard entries (PCI_ANY_ID) should come last
     79 * Last entry must be all 0s
     80 *
     81 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
     82 *   Class, Class Mask, private data (not used) }
     83 */
     84static const struct pci_device_id ixgbe_pci_tbl[] = {
     85	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598), board_82598 },
     86	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT), board_82598 },
     87	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_SINGLE_PORT), board_82598 },
     88	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT), board_82598 },
     89	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT2), board_82598 },
     90	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_CX4), board_82598 },
     91	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT), board_82598 },
     92	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT), board_82598 },
     93	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM), board_82598 },
     94	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_XF_LR), board_82598 },
     95	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM), board_82598 },
     96	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_BX), board_82598 },
     97	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4), board_82599 },
     98	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_XAUI_LOM), board_82599 },
     99	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KR), board_82599 },
    100	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP), board_82599 },
    101	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_EM), board_82599 },
    102	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ), board_82599 },
    103	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_CX4), board_82599 },
    104	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE), board_82599 },
    105	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_FCOE), board_82599 },
    106	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_T3_LOM), board_82599 },
    107	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_COMBO_BACKPLANE), board_82599 },
    108	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T), board_X540 },
    109	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_SF2), board_82599 },
    110	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_LS), board_82599 },
    111	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP), board_82599 },
    112	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599EN_SFP), board_82599 },
    113	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP), board_82599 },
    114	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T1), board_X540 },
    115	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550T), board_X550},
    116	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550T1), board_X550},
    117	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_KX4), board_X550EM_x},
    118	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_XFI), board_X550EM_x},
    119	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_KR), board_X550EM_x},
    120	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_10G_T), board_X550EM_x},
    121	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_SFP), board_X550EM_x},
    122	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_1G_T), board_x550em_x_fw},
    123	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_KR), board_x550em_a },
    124	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_KR_L), board_x550em_a },
    125	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N), board_x550em_a },
    126	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SGMII), board_x550em_a },
    127	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SGMII_L), board_x550em_a },
    128	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_10G_T), board_x550em_a},
    129	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP), board_x550em_a },
    130	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_1G_T), board_x550em_a_fw },
    131	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L), board_x550em_a_fw },
    132	/* required last entry */
    133	{0, }
    134};
    135MODULE_DEVICE_TABLE(pci, ixgbe_pci_tbl);
    136
    137#ifdef CONFIG_IXGBE_DCA
    138static int ixgbe_notify_dca(struct notifier_block *, unsigned long event,
    139			    void *p);
    140static struct notifier_block dca_notifier = {
    141	.notifier_call = ixgbe_notify_dca,
    142	.next          = NULL,
    143	.priority      = 0
    144};
    145#endif
    146
    147#ifdef CONFIG_PCI_IOV
    148static unsigned int max_vfs;
    149module_param(max_vfs, uint, 0);
    150MODULE_PARM_DESC(max_vfs,
    151		 "Maximum number of virtual functions to allocate per physical function - default is zero and maximum value is 63. (Deprecated)");
    152#endif /* CONFIG_PCI_IOV */
    153
    154static bool allow_unsupported_sfp;
    155module_param(allow_unsupported_sfp, bool, 0);
    156MODULE_PARM_DESC(allow_unsupported_sfp,
    157		 "Allow unsupported and untested SFP+ modules on 82599-based adapters");
    158
    159#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
    160static int debug = -1;
    161module_param(debug, int, 0);
    162MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
    163
    164MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
    165MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver");
    166MODULE_LICENSE("GPL v2");
    167
    168DEFINE_STATIC_KEY_FALSE(ixgbe_xdp_locking_key);
    169EXPORT_SYMBOL(ixgbe_xdp_locking_key);
    170
    171static struct workqueue_struct *ixgbe_wq;
    172
    173static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev);
    174static void ixgbe_watchdog_link_is_down(struct ixgbe_adapter *);
    175
    176static const struct net_device_ops ixgbe_netdev_ops;
    177
    178static bool netif_is_ixgbe(struct net_device *dev)
    179{
    180	return dev && (dev->netdev_ops == &ixgbe_netdev_ops);
    181}
    182
    183static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter,
    184					  u32 reg, u16 *value)
    185{
    186	struct pci_dev *parent_dev;
    187	struct pci_bus *parent_bus;
    188
    189	parent_bus = adapter->pdev->bus->parent;
    190	if (!parent_bus)
    191		return -1;
    192
    193	parent_dev = parent_bus->self;
    194	if (!parent_dev)
    195		return -1;
    196
    197	if (!pci_is_pcie(parent_dev))
    198		return -1;
    199
    200	pcie_capability_read_word(parent_dev, reg, value);
    201	if (*value == IXGBE_FAILED_READ_CFG_WORD &&
    202	    ixgbe_check_cfg_remove(&adapter->hw, parent_dev))
    203		return -1;
    204	return 0;
    205}
    206
    207static s32 ixgbe_get_parent_bus_info(struct ixgbe_adapter *adapter)
    208{
    209	struct ixgbe_hw *hw = &adapter->hw;
    210	u16 link_status = 0;
    211	int err;
    212
    213	hw->bus.type = ixgbe_bus_type_pci_express;
    214
    215	/* Get the negotiated link width and speed from PCI config space of the
    216	 * parent, as this device is behind a switch
    217	 */
    218	err = ixgbe_read_pci_cfg_word_parent(adapter, 18, &link_status);
    219
    220	/* assume caller will handle error case */
    221	if (err)
    222		return err;
    223
    224	hw->bus.width = ixgbe_convert_bus_width(link_status);
    225	hw->bus.speed = ixgbe_convert_bus_speed(link_status);
    226
    227	return 0;
    228}
    229
    230/**
    231 * ixgbe_pcie_from_parent - Determine whether PCIe info should come from parent
    232 * @hw: hw specific details
    233 *
    234 * This function is used by probe to determine whether a device's PCI-Express
    235 * bandwidth details should be gathered from the parent bus instead of from the
    236 * device. Used to ensure that various locations all have the correct device ID
    237 * checks.
    238 */
    239static inline bool ixgbe_pcie_from_parent(struct ixgbe_hw *hw)
    240{
    241	switch (hw->device_id) {
    242	case IXGBE_DEV_ID_82599_SFP_SF_QP:
    243	case IXGBE_DEV_ID_82599_QSFP_SF_QP:
    244		return true;
    245	default:
    246		return false;
    247	}
    248}
    249
    250static void ixgbe_check_minimum_link(struct ixgbe_adapter *adapter,
    251				     int expected_gts)
    252{
    253	struct ixgbe_hw *hw = &adapter->hw;
    254	struct pci_dev *pdev;
    255
    256	/* Some devices are not connected over PCIe and thus do not negotiate
    257	 * speed. These devices do not have valid bus info, and thus any report
    258	 * we generate may not be correct.
    259	 */
    260	if (hw->bus.type == ixgbe_bus_type_internal)
    261		return;
    262
    263	/* determine whether to use the parent device */
    264	if (ixgbe_pcie_from_parent(&adapter->hw))
    265		pdev = adapter->pdev->bus->parent->self;
    266	else
    267		pdev = adapter->pdev;
    268
    269	pcie_print_link_status(pdev);
    270}
    271
    272static void ixgbe_service_event_schedule(struct ixgbe_adapter *adapter)
    273{
    274	if (!test_bit(__IXGBE_DOWN, &adapter->state) &&
    275	    !test_bit(__IXGBE_REMOVING, &adapter->state) &&
    276	    !test_and_set_bit(__IXGBE_SERVICE_SCHED, &adapter->state))
    277		queue_work(ixgbe_wq, &adapter->service_task);
    278}
    279
    280static void ixgbe_remove_adapter(struct ixgbe_hw *hw)
    281{
    282	struct ixgbe_adapter *adapter = hw->back;
    283
    284	if (!hw->hw_addr)
    285		return;
    286	hw->hw_addr = NULL;
    287	e_dev_err("Adapter removed\n");
    288	if (test_bit(__IXGBE_SERVICE_INITED, &adapter->state))
    289		ixgbe_service_event_schedule(adapter);
    290}
    291
    292static u32 ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg)
    293{
    294	u8 __iomem *reg_addr;
    295	u32 value;
    296	int i;
    297
    298	reg_addr = READ_ONCE(hw->hw_addr);
    299	if (ixgbe_removed(reg_addr))
    300		return IXGBE_FAILED_READ_REG;
    301
    302	/* Register read of 0xFFFFFFF can indicate the adapter has been removed,
    303	 * so perform several status register reads to determine if the adapter
    304	 * has been removed.
    305	 */
    306	for (i = 0; i < IXGBE_FAILED_READ_RETRIES; i++) {
    307		value = readl(reg_addr + IXGBE_STATUS);
    308		if (value != IXGBE_FAILED_READ_REG)
    309			break;
    310		mdelay(3);
    311	}
    312
    313	if (value == IXGBE_FAILED_READ_REG)
    314		ixgbe_remove_adapter(hw);
    315	else
    316		value = readl(reg_addr + reg);
    317	return value;
    318}
    319
    320/**
    321 * ixgbe_read_reg - Read from device register
    322 * @hw: hw specific details
    323 * @reg: offset of register to read
    324 *
    325 * Returns : value read or IXGBE_FAILED_READ_REG if removed
    326 *
    327 * This function is used to read device registers. It checks for device
    328 * removal by confirming any read that returns all ones by checking the
    329 * status register value for all ones. This function avoids reading from
    330 * the hardware if a removal was previously detected in which case it
    331 * returns IXGBE_FAILED_READ_REG (all ones).
    332 */
    333u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
    334{
    335	u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr);
    336	u32 value;
    337
    338	if (ixgbe_removed(reg_addr))
    339		return IXGBE_FAILED_READ_REG;
    340	if (unlikely(hw->phy.nw_mng_if_sel &
    341		     IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE)) {
    342		struct ixgbe_adapter *adapter;
    343		int i;
    344
    345		for (i = 0; i < 200; ++i) {
    346			value = readl(reg_addr + IXGBE_MAC_SGMII_BUSY);
    347			if (likely(!value))
    348				goto writes_completed;
    349			if (value == IXGBE_FAILED_READ_REG) {
    350				ixgbe_remove_adapter(hw);
    351				return IXGBE_FAILED_READ_REG;
    352			}
    353			udelay(5);
    354		}
    355
    356		adapter = hw->back;
    357		e_warn(hw, "register writes incomplete %08x\n", value);
    358	}
    359
    360writes_completed:
    361	value = readl(reg_addr + reg);
    362	if (unlikely(value == IXGBE_FAILED_READ_REG))
    363		value = ixgbe_check_remove(hw, reg);
    364	return value;
    365}
    366
    367static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev)
    368{
    369	u16 value;
    370
    371	pci_read_config_word(pdev, PCI_VENDOR_ID, &value);
    372	if (value == IXGBE_FAILED_READ_CFG_WORD) {
    373		ixgbe_remove_adapter(hw);
    374		return true;
    375	}
    376	return false;
    377}
    378
    379u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg)
    380{
    381	struct ixgbe_adapter *adapter = hw->back;
    382	u16 value;
    383
    384	if (ixgbe_removed(hw->hw_addr))
    385		return IXGBE_FAILED_READ_CFG_WORD;
    386	pci_read_config_word(adapter->pdev, reg, &value);
    387	if (value == IXGBE_FAILED_READ_CFG_WORD &&
    388	    ixgbe_check_cfg_remove(hw, adapter->pdev))
    389		return IXGBE_FAILED_READ_CFG_WORD;
    390	return value;
    391}
    392
    393#ifdef CONFIG_PCI_IOV
    394static u32 ixgbe_read_pci_cfg_dword(struct ixgbe_hw *hw, u32 reg)
    395{
    396	struct ixgbe_adapter *adapter = hw->back;
    397	u32 value;
    398
    399	if (ixgbe_removed(hw->hw_addr))
    400		return IXGBE_FAILED_READ_CFG_DWORD;
    401	pci_read_config_dword(adapter->pdev, reg, &value);
    402	if (value == IXGBE_FAILED_READ_CFG_DWORD &&
    403	    ixgbe_check_cfg_remove(hw, adapter->pdev))
    404		return IXGBE_FAILED_READ_CFG_DWORD;
    405	return value;
    406}
    407#endif /* CONFIG_PCI_IOV */
    408
    409void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value)
    410{
    411	struct ixgbe_adapter *adapter = hw->back;
    412
    413	if (ixgbe_removed(hw->hw_addr))
    414		return;
    415	pci_write_config_word(adapter->pdev, reg, value);
    416}
    417
    418static void ixgbe_service_event_complete(struct ixgbe_adapter *adapter)
    419{
    420	BUG_ON(!test_bit(__IXGBE_SERVICE_SCHED, &adapter->state));
    421
    422	/* flush memory to make sure state is correct before next watchdog */
    423	smp_mb__before_atomic();
    424	clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state);
    425}
    426
    427struct ixgbe_reg_info {
    428	u32 ofs;
    429	char *name;
    430};
    431
    432static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = {
    433
    434	/* General Registers */
    435	{IXGBE_CTRL, "CTRL"},
    436	{IXGBE_STATUS, "STATUS"},
    437	{IXGBE_CTRL_EXT, "CTRL_EXT"},
    438
    439	/* Interrupt Registers */
    440	{IXGBE_EICR, "EICR"},
    441
    442	/* RX Registers */
    443	{IXGBE_SRRCTL(0), "SRRCTL"},
    444	{IXGBE_DCA_RXCTRL(0), "DRXCTL"},
    445	{IXGBE_RDLEN(0), "RDLEN"},
    446	{IXGBE_RDH(0), "RDH"},
    447	{IXGBE_RDT(0), "RDT"},
    448	{IXGBE_RXDCTL(0), "RXDCTL"},
    449	{IXGBE_RDBAL(0), "RDBAL"},
    450	{IXGBE_RDBAH(0), "RDBAH"},
    451
    452	/* TX Registers */
    453	{IXGBE_TDBAL(0), "TDBAL"},
    454	{IXGBE_TDBAH(0), "TDBAH"},
    455	{IXGBE_TDLEN(0), "TDLEN"},
    456	{IXGBE_TDH(0), "TDH"},
    457	{IXGBE_TDT(0), "TDT"},
    458	{IXGBE_TXDCTL(0), "TXDCTL"},
    459
    460	/* List Terminator */
    461	{ .name = NULL }
    462};
    463
    464
    465/*
    466 * ixgbe_regdump - register printout routine
    467 */
    468static void ixgbe_regdump(struct ixgbe_hw *hw, struct ixgbe_reg_info *reginfo)
    469{
    470	int i;
    471	char rname[16];
    472	u32 regs[64];
    473
    474	switch (reginfo->ofs) {
    475	case IXGBE_SRRCTL(0):
    476		for (i = 0; i < 64; i++)
    477			regs[i] = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
    478		break;
    479	case IXGBE_DCA_RXCTRL(0):
    480		for (i = 0; i < 64; i++)
    481			regs[i] = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
    482		break;
    483	case IXGBE_RDLEN(0):
    484		for (i = 0; i < 64; i++)
    485			regs[i] = IXGBE_READ_REG(hw, IXGBE_RDLEN(i));
    486		break;
    487	case IXGBE_RDH(0):
    488		for (i = 0; i < 64; i++)
    489			regs[i] = IXGBE_READ_REG(hw, IXGBE_RDH(i));
    490		break;
    491	case IXGBE_RDT(0):
    492		for (i = 0; i < 64; i++)
    493			regs[i] = IXGBE_READ_REG(hw, IXGBE_RDT(i));
    494		break;
    495	case IXGBE_RXDCTL(0):
    496		for (i = 0; i < 64; i++)
    497			regs[i] = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
    498		break;
    499	case IXGBE_RDBAL(0):
    500		for (i = 0; i < 64; i++)
    501			regs[i] = IXGBE_READ_REG(hw, IXGBE_RDBAL(i));
    502		break;
    503	case IXGBE_RDBAH(0):
    504		for (i = 0; i < 64; i++)
    505			regs[i] = IXGBE_READ_REG(hw, IXGBE_RDBAH(i));
    506		break;
    507	case IXGBE_TDBAL(0):
    508		for (i = 0; i < 64; i++)
    509			regs[i] = IXGBE_READ_REG(hw, IXGBE_TDBAL(i));
    510		break;
    511	case IXGBE_TDBAH(0):
    512		for (i = 0; i < 64; i++)
    513			regs[i] = IXGBE_READ_REG(hw, IXGBE_TDBAH(i));
    514		break;
    515	case IXGBE_TDLEN(0):
    516		for (i = 0; i < 64; i++)
    517			regs[i] = IXGBE_READ_REG(hw, IXGBE_TDLEN(i));
    518		break;
    519	case IXGBE_TDH(0):
    520		for (i = 0; i < 64; i++)
    521			regs[i] = IXGBE_READ_REG(hw, IXGBE_TDH(i));
    522		break;
    523	case IXGBE_TDT(0):
    524		for (i = 0; i < 64; i++)
    525			regs[i] = IXGBE_READ_REG(hw, IXGBE_TDT(i));
    526		break;
    527	case IXGBE_TXDCTL(0):
    528		for (i = 0; i < 64; i++)
    529			regs[i] = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
    530		break;
    531	default:
    532		pr_info("%-15s %08x\n",
    533			reginfo->name, IXGBE_READ_REG(hw, reginfo->ofs));
    534		return;
    535	}
    536
    537	i = 0;
    538	while (i < 64) {
    539		int j;
    540		char buf[9 * 8 + 1];
    541		char *p = buf;
    542
    543		snprintf(rname, 16, "%s[%d-%d]", reginfo->name, i, i + 7);
    544		for (j = 0; j < 8; j++)
    545			p += sprintf(p, " %08x", regs[i++]);
    546		pr_err("%-15s%s\n", rname, buf);
    547	}
    548
    549}
    550
    551static void ixgbe_print_buffer(struct ixgbe_ring *ring, int n)
    552{
    553	struct ixgbe_tx_buffer *tx_buffer;
    554
    555	tx_buffer = &ring->tx_buffer_info[ring->next_to_clean];
    556	pr_info(" %5d %5X %5X %016llX %08X %p %016llX\n",
    557		n, ring->next_to_use, ring->next_to_clean,
    558		(u64)dma_unmap_addr(tx_buffer, dma),
    559		dma_unmap_len(tx_buffer, len),
    560		tx_buffer->next_to_watch,
    561		(u64)tx_buffer->time_stamp);
    562}
    563
    564/*
    565 * ixgbe_dump - Print registers, tx-rings and rx-rings
    566 */
    567static void ixgbe_dump(struct ixgbe_adapter *adapter)
    568{
    569	struct net_device *netdev = adapter->netdev;
    570	struct ixgbe_hw *hw = &adapter->hw;
    571	struct ixgbe_reg_info *reginfo;
    572	int n = 0;
    573	struct ixgbe_ring *ring;
    574	struct ixgbe_tx_buffer *tx_buffer;
    575	union ixgbe_adv_tx_desc *tx_desc;
    576	struct my_u0 { u64 a; u64 b; } *u0;
    577	struct ixgbe_ring *rx_ring;
    578	union ixgbe_adv_rx_desc *rx_desc;
    579	struct ixgbe_rx_buffer *rx_buffer_info;
    580	int i = 0;
    581
    582	if (!netif_msg_hw(adapter))
    583		return;
    584
    585	/* Print netdevice Info */
    586	if (netdev) {
    587		dev_info(&adapter->pdev->dev, "Net device Info\n");
    588		pr_info("Device Name     state            "
    589			"trans_start\n");
    590		pr_info("%-15s %016lX %016lX\n",
    591			netdev->name,
    592			netdev->state,
    593			dev_trans_start(netdev));
    594	}
    595
    596	/* Print Registers */
    597	dev_info(&adapter->pdev->dev, "Register Dump\n");
    598	pr_info(" Register Name   Value\n");
    599	for (reginfo = (struct ixgbe_reg_info *)ixgbe_reg_info_tbl;
    600	     reginfo->name; reginfo++) {
    601		ixgbe_regdump(hw, reginfo);
    602	}
    603
    604	/* Print TX Ring Summary */
    605	if (!netdev || !netif_running(netdev))
    606		return;
    607
    608	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
    609	pr_info(" %s     %s              %s        %s\n",
    610		"Queue [NTU] [NTC] [bi(ntc)->dma  ]",
    611		"leng", "ntw", "timestamp");
    612	for (n = 0; n < adapter->num_tx_queues; n++) {
    613		ring = adapter->tx_ring[n];
    614		ixgbe_print_buffer(ring, n);
    615	}
    616
    617	for (n = 0; n < adapter->num_xdp_queues; n++) {
    618		ring = adapter->xdp_ring[n];
    619		ixgbe_print_buffer(ring, n);
    620	}
    621
    622	/* Print TX Rings */
    623	if (!netif_msg_tx_done(adapter))
    624		goto rx_ring_summary;
    625
    626	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
    627
    628	/* Transmit Descriptor Formats
    629	 *
    630	 * 82598 Advanced Transmit Descriptor
    631	 *   +--------------------------------------------------------------+
    632	 * 0 |         Buffer Address [63:0]                                |
    633	 *   +--------------------------------------------------------------+
    634	 * 8 |  PAYLEN  | POPTS  | IDX | STA | DCMD  |DTYP |  RSV |  DTALEN |
    635	 *   +--------------------------------------------------------------+
    636	 *   63       46 45    40 39 36 35 32 31   24 23 20 19              0
    637	 *
    638	 * 82598 Advanced Transmit Descriptor (Write-Back Format)
    639	 *   +--------------------------------------------------------------+
    640	 * 0 |                          RSV [63:0]                          |
    641	 *   +--------------------------------------------------------------+
    642	 * 8 |            RSV           |  STA  |          NXTSEQ           |
    643	 *   +--------------------------------------------------------------+
    644	 *   63                       36 35   32 31                         0
    645	 *
    646	 * 82599+ Advanced Transmit Descriptor
    647	 *   +--------------------------------------------------------------+
    648	 * 0 |         Buffer Address [63:0]                                |
    649	 *   +--------------------------------------------------------------+
    650	 * 8 |PAYLEN  |POPTS|CC|IDX  |STA  |DCMD  |DTYP |MAC  |RSV  |DTALEN |
    651	 *   +--------------------------------------------------------------+
    652	 *   63     46 45 40 39 38 36 35 32 31  24 23 20 19 18 17 16 15     0
    653	 *
    654	 * 82599+ Advanced Transmit Descriptor (Write-Back Format)
    655	 *   +--------------------------------------------------------------+
    656	 * 0 |                          RSV [63:0]                          |
    657	 *   +--------------------------------------------------------------+
    658	 * 8 |            RSV           |  STA  |           RSV             |
    659	 *   +--------------------------------------------------------------+
    660	 *   63                       36 35   32 31                         0
    661	 */
    662
    663	for (n = 0; n < adapter->num_tx_queues; n++) {
    664		ring = adapter->tx_ring[n];
    665		pr_info("------------------------------------\n");
    666		pr_info("TX QUEUE INDEX = %d\n", ring->queue_index);
    667		pr_info("------------------------------------\n");
    668		pr_info("%s%s    %s              %s        %s          %s\n",
    669			"T [desc]     [address 63:0  ] ",
    670			"[PlPOIdStDDt Ln] [bi->dma       ] ",
    671			"leng", "ntw", "timestamp", "bi->skb");
    672
    673		for (i = 0; ring->desc && (i < ring->count); i++) {
    674			tx_desc = IXGBE_TX_DESC(ring, i);
    675			tx_buffer = &ring->tx_buffer_info[i];
    676			u0 = (struct my_u0 *)tx_desc;
    677			if (dma_unmap_len(tx_buffer, len) > 0) {
    678				const char *ring_desc;
    679
    680				if (i == ring->next_to_use &&
    681				    i == ring->next_to_clean)
    682					ring_desc = " NTC/U";
    683				else if (i == ring->next_to_use)
    684					ring_desc = " NTU";
    685				else if (i == ring->next_to_clean)
    686					ring_desc = " NTC";
    687				else
    688					ring_desc = "";
    689				pr_info("T [0x%03X]    %016llX %016llX %016llX %08X %p %016llX %p%s",
    690					i,
    691					le64_to_cpu((__force __le64)u0->a),
    692					le64_to_cpu((__force __le64)u0->b),
    693					(u64)dma_unmap_addr(tx_buffer, dma),
    694					dma_unmap_len(tx_buffer, len),
    695					tx_buffer->next_to_watch,
    696					(u64)tx_buffer->time_stamp,
    697					tx_buffer->skb,
    698					ring_desc);
    699
    700				if (netif_msg_pktdata(adapter) &&
    701				    tx_buffer->skb)
    702					print_hex_dump(KERN_INFO, "",
    703						DUMP_PREFIX_ADDRESS, 16, 1,
    704						tx_buffer->skb->data,
    705						dma_unmap_len(tx_buffer, len),
    706						true);
    707			}
    708		}
    709	}
    710
    711	/* Print RX Rings Summary */
    712rx_ring_summary:
    713	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
    714	pr_info("Queue [NTU] [NTC]\n");
    715	for (n = 0; n < adapter->num_rx_queues; n++) {
    716		rx_ring = adapter->rx_ring[n];
    717		pr_info("%5d %5X %5X\n",
    718			n, rx_ring->next_to_use, rx_ring->next_to_clean);
    719	}
    720
    721	/* Print RX Rings */
    722	if (!netif_msg_rx_status(adapter))
    723		return;
    724
    725	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
    726
    727	/* Receive Descriptor Formats
    728	 *
    729	 * 82598 Advanced Receive Descriptor (Read) Format
    730	 *    63                                           1        0
    731	 *    +-----------------------------------------------------+
    732	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
    733	 *    +----------------------------------------------+------+
    734	 *  8 |       Header Buffer Address [63:1]           |  DD  |
    735	 *    +-----------------------------------------------------+
    736	 *
    737	 *
    738	 * 82598 Advanced Receive Descriptor (Write-Back) Format
    739	 *
    740	 *   63       48 47    32 31  30      21 20 16 15   4 3     0
    741	 *   +------------------------------------------------------+
    742	 * 0 |       RSS Hash /  |SPH| HDR_LEN  | RSV |Packet|  RSS |
    743	 *   | Packet   | IP     |   |          |     | Type | Type |
    744	 *   | Checksum | Ident  |   |          |     |      |      |
    745	 *   +------------------------------------------------------+
    746	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
    747	 *   +------------------------------------------------------+
    748	 *   63       48 47    32 31            20 19               0
    749	 *
    750	 * 82599+ Advanced Receive Descriptor (Read) Format
    751	 *    63                                           1        0
    752	 *    +-----------------------------------------------------+
    753	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
    754	 *    +----------------------------------------------+------+
    755	 *  8 |       Header Buffer Address [63:1]           |  DD  |
    756	 *    +-----------------------------------------------------+
    757	 *
    758	 *
    759	 * 82599+ Advanced Receive Descriptor (Write-Back) Format
    760	 *
    761	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
    762	 *   +------------------------------------------------------+
    763	 * 0 |RSS / Frag Checksum|SPH| HDR_LEN  |RSC- |Packet|  RSS |
    764	 *   |/ RTT / PCoE_PARAM |   |          | CNT | Type | Type |
    765	 *   |/ Flow Dir Flt ID  |   |          |     |      |      |
    766	 *   +------------------------------------------------------+
    767	 * 8 | VLAN Tag | Length |Extended Error| Xtnd Status/NEXTP |
    768	 *   +------------------------------------------------------+
    769	 *   63       48 47    32 31          20 19                 0
    770	 */
    771
    772	for (n = 0; n < adapter->num_rx_queues; n++) {
    773		rx_ring = adapter->rx_ring[n];
    774		pr_info("------------------------------------\n");
    775		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
    776		pr_info("------------------------------------\n");
    777		pr_info("%s%s%s\n",
    778			"R  [desc]      [ PktBuf     A0] ",
    779			"[  HeadBuf   DD] [bi->dma       ] [bi->skb       ] ",
    780			"<-- Adv Rx Read format");
    781		pr_info("%s%s%s\n",
    782			"RWB[desc]      [PcsmIpSHl PtRs] ",
    783			"[vl er S cks ln] ---------------- [bi->skb       ] ",
    784			"<-- Adv Rx Write-Back format");
    785
    786		for (i = 0; i < rx_ring->count; i++) {
    787			const char *ring_desc;
    788
    789			if (i == rx_ring->next_to_use)
    790				ring_desc = " NTU";
    791			else if (i == rx_ring->next_to_clean)
    792				ring_desc = " NTC";
    793			else
    794				ring_desc = "";
    795
    796			rx_buffer_info = &rx_ring->rx_buffer_info[i];
    797			rx_desc = IXGBE_RX_DESC(rx_ring, i);
    798			u0 = (struct my_u0 *)rx_desc;
    799			if (rx_desc->wb.upper.length) {
    800				/* Descriptor Done */
    801				pr_info("RWB[0x%03X]     %016llX %016llX ---------------- %p%s\n",
    802					i,
    803					le64_to_cpu((__force __le64)u0->a),
    804					le64_to_cpu((__force __le64)u0->b),
    805					rx_buffer_info->skb,
    806					ring_desc);
    807			} else {
    808				pr_info("R  [0x%03X]     %016llX %016llX %016llX %p%s\n",
    809					i,
    810					le64_to_cpu((__force __le64)u0->a),
    811					le64_to_cpu((__force __le64)u0->b),
    812					(u64)rx_buffer_info->dma,
    813					rx_buffer_info->skb,
    814					ring_desc);
    815
    816				if (netif_msg_pktdata(adapter) &&
    817				    rx_buffer_info->dma) {
    818					print_hex_dump(KERN_INFO, "",
    819					   DUMP_PREFIX_ADDRESS, 16, 1,
    820					   page_address(rx_buffer_info->page) +
    821						    rx_buffer_info->page_offset,
    822					   ixgbe_rx_bufsz(rx_ring), true);
    823				}
    824			}
    825		}
    826	}
    827}
    828
    829static void ixgbe_release_hw_control(struct ixgbe_adapter *adapter)
    830{
    831	u32 ctrl_ext;
    832
    833	/* Let firmware take over control of h/w */
    834	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
    835	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT,
    836			ctrl_ext & ~IXGBE_CTRL_EXT_DRV_LOAD);
    837}
    838
    839static void ixgbe_get_hw_control(struct ixgbe_adapter *adapter)
    840{
    841	u32 ctrl_ext;
    842
    843	/* Let firmware know the driver has taken over */
    844	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
    845	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT,
    846			ctrl_ext | IXGBE_CTRL_EXT_DRV_LOAD);
    847}
    848
    849/**
    850 * ixgbe_set_ivar - set the IVAR registers, mapping interrupt causes to vectors
    851 * @adapter: pointer to adapter struct
    852 * @direction: 0 for Rx, 1 for Tx, -1 for other causes
    853 * @queue: queue to map the corresponding interrupt to
    854 * @msix_vector: the vector to map to the corresponding queue
    855 *
    856 */
    857static void ixgbe_set_ivar(struct ixgbe_adapter *adapter, s8 direction,
    858			   u8 queue, u8 msix_vector)
    859{
    860	u32 ivar, index;
    861	struct ixgbe_hw *hw = &adapter->hw;
    862	switch (hw->mac.type) {
    863	case ixgbe_mac_82598EB:
    864		msix_vector |= IXGBE_IVAR_ALLOC_VAL;
    865		if (direction == -1)
    866			direction = 0;
    867		index = (((direction * 64) + queue) >> 2) & 0x1F;
    868		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
    869		ivar &= ~(0xFF << (8 * (queue & 0x3)));
    870		ivar |= (msix_vector << (8 * (queue & 0x3)));
    871		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
    872		break;
    873	case ixgbe_mac_82599EB:
    874	case ixgbe_mac_X540:
    875	case ixgbe_mac_X550:
    876	case ixgbe_mac_X550EM_x:
    877	case ixgbe_mac_x550em_a:
    878		if (direction == -1) {
    879			/* other causes */
    880			msix_vector |= IXGBE_IVAR_ALLOC_VAL;
    881			index = ((queue & 1) * 8);
    882			ivar = IXGBE_READ_REG(&adapter->hw, IXGBE_IVAR_MISC);
    883			ivar &= ~(0xFF << index);
    884			ivar |= (msix_vector << index);
    885			IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR_MISC, ivar);
    886			break;
    887		} else {
    888			/* tx or rx causes */
    889			msix_vector |= IXGBE_IVAR_ALLOC_VAL;
    890			index = ((16 * (queue & 1)) + (8 * direction));
    891			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(queue >> 1));
    892			ivar &= ~(0xFF << index);
    893			ivar |= (msix_vector << index);
    894			IXGBE_WRITE_REG(hw, IXGBE_IVAR(queue >> 1), ivar);
    895			break;
    896		}
    897	default:
    898		break;
    899	}
    900}
    901
    902void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter,
    903			    u64 qmask)
    904{
    905	u32 mask;
    906
    907	switch (adapter->hw.mac.type) {
    908	case ixgbe_mac_82598EB:
    909		mask = (IXGBE_EIMS_RTX_QUEUE & qmask);
    910		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
    911		break;
    912	case ixgbe_mac_82599EB:
    913	case ixgbe_mac_X540:
    914	case ixgbe_mac_X550:
    915	case ixgbe_mac_X550EM_x:
    916	case ixgbe_mac_x550em_a:
    917		mask = (qmask & 0xFFFFFFFF);
    918		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
    919		mask = (qmask >> 32);
    920		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
    921		break;
    922	default:
    923		break;
    924	}
    925}
    926
    927static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter)
    928{
    929	struct ixgbe_hw *hw = &adapter->hw;
    930	struct ixgbe_hw_stats *hwstats = &adapter->stats;
    931	int i;
    932	u32 data;
    933
    934	if ((hw->fc.current_mode != ixgbe_fc_full) &&
    935	    (hw->fc.current_mode != ixgbe_fc_rx_pause))
    936		return;
    937
    938	switch (hw->mac.type) {
    939	case ixgbe_mac_82598EB:
    940		data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
    941		break;
    942	default:
    943		data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
    944	}
    945	hwstats->lxoffrxc += data;
    946
    947	/* refill credits (no tx hang) if we received xoff */
    948	if (!data)
    949		return;
    950
    951	for (i = 0; i < adapter->num_tx_queues; i++)
    952		clear_bit(__IXGBE_HANG_CHECK_ARMED,
    953			  &adapter->tx_ring[i]->state);
    954
    955	for (i = 0; i < adapter->num_xdp_queues; i++)
    956		clear_bit(__IXGBE_HANG_CHECK_ARMED,
    957			  &adapter->xdp_ring[i]->state);
    958}
    959
    960static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter)
    961{
    962	struct ixgbe_hw *hw = &adapter->hw;
    963	struct ixgbe_hw_stats *hwstats = &adapter->stats;
    964	u32 xoff[8] = {0};
    965	u8 tc;
    966	int i;
    967	bool pfc_en = adapter->dcb_cfg.pfc_mode_enable;
    968
    969	if (adapter->ixgbe_ieee_pfc)
    970		pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
    971
    972	if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED) || !pfc_en) {
    973		ixgbe_update_xoff_rx_lfc(adapter);
    974		return;
    975	}
    976
    977	/* update stats for each tc, only valid with PFC enabled */
    978	for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) {
    979		u32 pxoffrxc;
    980
    981		switch (hw->mac.type) {
    982		case ixgbe_mac_82598EB:
    983			pxoffrxc = IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
    984			break;
    985		default:
    986			pxoffrxc = IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i));
    987		}
    988		hwstats->pxoffrxc[i] += pxoffrxc;
    989		/* Get the TC for given UP */
    990		tc = netdev_get_prio_tc_map(adapter->netdev, i);
    991		xoff[tc] += pxoffrxc;
    992	}
    993
    994	/* disarm tx queues that have received xoff frames */
    995	for (i = 0; i < adapter->num_tx_queues; i++) {
    996		struct ixgbe_ring *tx_ring = adapter->tx_ring[i];
    997
    998		tc = tx_ring->dcb_tc;
    999		if (xoff[tc])
   1000			clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state);
   1001	}
   1002
   1003	for (i = 0; i < adapter->num_xdp_queues; i++) {
   1004		struct ixgbe_ring *xdp_ring = adapter->xdp_ring[i];
   1005
   1006		tc = xdp_ring->dcb_tc;
   1007		if (xoff[tc])
   1008			clear_bit(__IXGBE_HANG_CHECK_ARMED, &xdp_ring->state);
   1009	}
   1010}
   1011
   1012static u64 ixgbe_get_tx_completed(struct ixgbe_ring *ring)
   1013{
   1014	return ring->stats.packets;
   1015}
   1016
   1017static u64 ixgbe_get_tx_pending(struct ixgbe_ring *ring)
   1018{
   1019	unsigned int head, tail;
   1020
   1021	head = ring->next_to_clean;
   1022	tail = ring->next_to_use;
   1023
   1024	return ((head <= tail) ? tail : tail + ring->count) - head;
   1025}
   1026
   1027static inline bool ixgbe_check_tx_hang(struct ixgbe_ring *tx_ring)
   1028{
   1029	u32 tx_done = ixgbe_get_tx_completed(tx_ring);
   1030	u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
   1031	u32 tx_pending = ixgbe_get_tx_pending(tx_ring);
   1032
   1033	clear_check_for_tx_hang(tx_ring);
   1034
   1035	/*
   1036	 * Check for a hung queue, but be thorough. This verifies
   1037	 * that a transmit has been completed since the previous
   1038	 * check AND there is at least one packet pending. The
   1039	 * ARMED bit is set to indicate a potential hang. The
   1040	 * bit is cleared if a pause frame is received to remove
   1041	 * false hang detection due to PFC or 802.3x frames. By
   1042	 * requiring this to fail twice we avoid races with
   1043	 * pfc clearing the ARMED bit and conditions where we
   1044	 * run the check_tx_hang logic with a transmit completion
   1045	 * pending but without time to complete it yet.
   1046	 */
   1047	if (tx_done_old == tx_done && tx_pending)
   1048		/* make sure it is true for two checks in a row */
   1049		return test_and_set_bit(__IXGBE_HANG_CHECK_ARMED,
   1050					&tx_ring->state);
   1051	/* update completed stats and continue */
   1052	tx_ring->tx_stats.tx_done_old = tx_done;
   1053	/* reset the countdown */
   1054	clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state);
   1055
   1056	return false;
   1057}
   1058
   1059/**
   1060 * ixgbe_tx_timeout_reset - initiate reset due to Tx timeout
   1061 * @adapter: driver private struct
   1062 **/
   1063static void ixgbe_tx_timeout_reset(struct ixgbe_adapter *adapter)
   1064{
   1065
   1066	/* Do the reset outside of interrupt context */
   1067	if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
   1068		set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
   1069		e_warn(drv, "initiating reset due to tx timeout\n");
   1070		ixgbe_service_event_schedule(adapter);
   1071	}
   1072}
   1073
   1074/**
   1075 * ixgbe_tx_maxrate - callback to set the maximum per-queue bitrate
   1076 * @netdev: network interface device structure
   1077 * @queue_index: Tx queue to set
   1078 * @maxrate: desired maximum transmit bitrate
   1079 **/
   1080static int ixgbe_tx_maxrate(struct net_device *netdev,
   1081			    int queue_index, u32 maxrate)
   1082{
   1083	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   1084	struct ixgbe_hw *hw = &adapter->hw;
   1085	u32 bcnrc_val = ixgbe_link_mbps(adapter);
   1086
   1087	if (!maxrate)
   1088		return 0;
   1089
   1090	/* Calculate the rate factor values to set */
   1091	bcnrc_val <<= IXGBE_RTTBCNRC_RF_INT_SHIFT;
   1092	bcnrc_val /= maxrate;
   1093
   1094	/* clear everything but the rate factor */
   1095	bcnrc_val &= IXGBE_RTTBCNRC_RF_INT_MASK |
   1096	IXGBE_RTTBCNRC_RF_DEC_MASK;
   1097
   1098	/* enable the rate scheduler */
   1099	bcnrc_val |= IXGBE_RTTBCNRC_RS_ENA;
   1100
   1101	IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, queue_index);
   1102	IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, bcnrc_val);
   1103
   1104	return 0;
   1105}
   1106
   1107/**
   1108 * ixgbe_clean_tx_irq - Reclaim resources after transmit completes
   1109 * @q_vector: structure containing interrupt and ring information
   1110 * @tx_ring: tx ring to clean
   1111 * @napi_budget: Used to determine if we are in netpoll
   1112 **/
   1113static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
   1114			       struct ixgbe_ring *tx_ring, int napi_budget)
   1115{
   1116	struct ixgbe_adapter *adapter = q_vector->adapter;
   1117	struct ixgbe_tx_buffer *tx_buffer;
   1118	union ixgbe_adv_tx_desc *tx_desc;
   1119	unsigned int total_bytes = 0, total_packets = 0, total_ipsec = 0;
   1120	unsigned int budget = q_vector->tx.work_limit;
   1121	unsigned int i = tx_ring->next_to_clean;
   1122
   1123	if (test_bit(__IXGBE_DOWN, &adapter->state))
   1124		return true;
   1125
   1126	tx_buffer = &tx_ring->tx_buffer_info[i];
   1127	tx_desc = IXGBE_TX_DESC(tx_ring, i);
   1128	i -= tx_ring->count;
   1129
   1130	do {
   1131		union ixgbe_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
   1132
   1133		/* if next_to_watch is not set then there is no work pending */
   1134		if (!eop_desc)
   1135			break;
   1136
   1137		/* prevent any other reads prior to eop_desc */
   1138		smp_rmb();
   1139
   1140		/* if DD is not set pending work has not been completed */
   1141		if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
   1142			break;
   1143
   1144		/* clear next_to_watch to prevent false hangs */
   1145		tx_buffer->next_to_watch = NULL;
   1146
   1147		/* update the statistics for this packet */
   1148		total_bytes += tx_buffer->bytecount;
   1149		total_packets += tx_buffer->gso_segs;
   1150		if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_IPSEC)
   1151			total_ipsec++;
   1152
   1153		/* free the skb */
   1154		if (ring_is_xdp(tx_ring))
   1155			xdp_return_frame(tx_buffer->xdpf);
   1156		else
   1157			napi_consume_skb(tx_buffer->skb, napi_budget);
   1158
   1159		/* unmap skb header data */
   1160		dma_unmap_single(tx_ring->dev,
   1161				 dma_unmap_addr(tx_buffer, dma),
   1162				 dma_unmap_len(tx_buffer, len),
   1163				 DMA_TO_DEVICE);
   1164
   1165		/* clear tx_buffer data */
   1166		dma_unmap_len_set(tx_buffer, len, 0);
   1167
   1168		/* unmap remaining buffers */
   1169		while (tx_desc != eop_desc) {
   1170			tx_buffer++;
   1171			tx_desc++;
   1172			i++;
   1173			if (unlikely(!i)) {
   1174				i -= tx_ring->count;
   1175				tx_buffer = tx_ring->tx_buffer_info;
   1176				tx_desc = IXGBE_TX_DESC(tx_ring, 0);
   1177			}
   1178
   1179			/* unmap any remaining paged data */
   1180			if (dma_unmap_len(tx_buffer, len)) {
   1181				dma_unmap_page(tx_ring->dev,
   1182					       dma_unmap_addr(tx_buffer, dma),
   1183					       dma_unmap_len(tx_buffer, len),
   1184					       DMA_TO_DEVICE);
   1185				dma_unmap_len_set(tx_buffer, len, 0);
   1186			}
   1187		}
   1188
   1189		/* move us one more past the eop_desc for start of next pkt */
   1190		tx_buffer++;
   1191		tx_desc++;
   1192		i++;
   1193		if (unlikely(!i)) {
   1194			i -= tx_ring->count;
   1195			tx_buffer = tx_ring->tx_buffer_info;
   1196			tx_desc = IXGBE_TX_DESC(tx_ring, 0);
   1197		}
   1198
   1199		/* issue prefetch for next Tx descriptor */
   1200		prefetch(tx_desc);
   1201
   1202		/* update budget accounting */
   1203		budget--;
   1204	} while (likely(budget));
   1205
   1206	i += tx_ring->count;
   1207	tx_ring->next_to_clean = i;
   1208	u64_stats_update_begin(&tx_ring->syncp);
   1209	tx_ring->stats.bytes += total_bytes;
   1210	tx_ring->stats.packets += total_packets;
   1211	u64_stats_update_end(&tx_ring->syncp);
   1212	q_vector->tx.total_bytes += total_bytes;
   1213	q_vector->tx.total_packets += total_packets;
   1214	adapter->tx_ipsec += total_ipsec;
   1215
   1216	if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) {
   1217		/* schedule immediate reset if we believe we hung */
   1218		struct ixgbe_hw *hw = &adapter->hw;
   1219		e_err(drv, "Detected Tx Unit Hang %s\n"
   1220			"  Tx Queue             <%d>\n"
   1221			"  TDH, TDT             <%x>, <%x>\n"
   1222			"  next_to_use          <%x>\n"
   1223			"  next_to_clean        <%x>\n"
   1224			"tx_buffer_info[next_to_clean]\n"
   1225			"  time_stamp           <%lx>\n"
   1226			"  jiffies              <%lx>\n",
   1227			ring_is_xdp(tx_ring) ? "(XDP)" : "",
   1228			tx_ring->queue_index,
   1229			IXGBE_READ_REG(hw, IXGBE_TDH(tx_ring->reg_idx)),
   1230			IXGBE_READ_REG(hw, IXGBE_TDT(tx_ring->reg_idx)),
   1231			tx_ring->next_to_use, i,
   1232			tx_ring->tx_buffer_info[i].time_stamp, jiffies);
   1233
   1234		if (!ring_is_xdp(tx_ring))
   1235			netif_stop_subqueue(tx_ring->netdev,
   1236					    tx_ring->queue_index);
   1237
   1238		e_info(probe,
   1239		       "tx hang %d detected on queue %d, resetting adapter\n",
   1240			adapter->tx_timeout_count + 1, tx_ring->queue_index);
   1241
   1242		/* schedule immediate reset if we believe we hung */
   1243		ixgbe_tx_timeout_reset(adapter);
   1244
   1245		/* the adapter is about to reset, no point in enabling stuff */
   1246		return true;
   1247	}
   1248
   1249	if (ring_is_xdp(tx_ring))
   1250		return !!budget;
   1251
   1252	netdev_tx_completed_queue(txring_txq(tx_ring),
   1253				  total_packets, total_bytes);
   1254
   1255#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
   1256	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
   1257		     (ixgbe_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
   1258		/* Make sure that anybody stopping the queue after this
   1259		 * sees the new next_to_clean.
   1260		 */
   1261		smp_mb();
   1262		if (__netif_subqueue_stopped(tx_ring->netdev,
   1263					     tx_ring->queue_index)
   1264		    && !test_bit(__IXGBE_DOWN, &adapter->state)) {
   1265			netif_wake_subqueue(tx_ring->netdev,
   1266					    tx_ring->queue_index);
   1267			++tx_ring->tx_stats.restart_queue;
   1268		}
   1269	}
   1270
   1271	return !!budget;
   1272}
   1273
   1274#ifdef CONFIG_IXGBE_DCA
   1275static void ixgbe_update_tx_dca(struct ixgbe_adapter *adapter,
   1276				struct ixgbe_ring *tx_ring,
   1277				int cpu)
   1278{
   1279	struct ixgbe_hw *hw = &adapter->hw;
   1280	u32 txctrl = 0;
   1281	u16 reg_offset;
   1282
   1283	if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
   1284		txctrl = dca3_get_tag(tx_ring->dev, cpu);
   1285
   1286	switch (hw->mac.type) {
   1287	case ixgbe_mac_82598EB:
   1288		reg_offset = IXGBE_DCA_TXCTRL(tx_ring->reg_idx);
   1289		break;
   1290	case ixgbe_mac_82599EB:
   1291	case ixgbe_mac_X540:
   1292		reg_offset = IXGBE_DCA_TXCTRL_82599(tx_ring->reg_idx);
   1293		txctrl <<= IXGBE_DCA_TXCTRL_CPUID_SHIFT_82599;
   1294		break;
   1295	default:
   1296		/* for unknown hardware do not write register */
   1297		return;
   1298	}
   1299
   1300	/*
   1301	 * We can enable relaxed ordering for reads, but not writes when
   1302	 * DCA is enabled.  This is due to a known issue in some chipsets
   1303	 * which will cause the DCA tag to be cleared.
   1304	 */
   1305	txctrl |= IXGBE_DCA_TXCTRL_DESC_RRO_EN |
   1306		  IXGBE_DCA_TXCTRL_DATA_RRO_EN |
   1307		  IXGBE_DCA_TXCTRL_DESC_DCA_EN;
   1308
   1309	IXGBE_WRITE_REG(hw, reg_offset, txctrl);
   1310}
   1311
   1312static void ixgbe_update_rx_dca(struct ixgbe_adapter *adapter,
   1313				struct ixgbe_ring *rx_ring,
   1314				int cpu)
   1315{
   1316	struct ixgbe_hw *hw = &adapter->hw;
   1317	u32 rxctrl = 0;
   1318	u8 reg_idx = rx_ring->reg_idx;
   1319
   1320	if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
   1321		rxctrl = dca3_get_tag(rx_ring->dev, cpu);
   1322
   1323	switch (hw->mac.type) {
   1324	case ixgbe_mac_82599EB:
   1325	case ixgbe_mac_X540:
   1326		rxctrl <<= IXGBE_DCA_RXCTRL_CPUID_SHIFT_82599;
   1327		break;
   1328	default:
   1329		break;
   1330	}
   1331
   1332	/*
   1333	 * We can enable relaxed ordering for reads, but not writes when
   1334	 * DCA is enabled.  This is due to a known issue in some chipsets
   1335	 * which will cause the DCA tag to be cleared.
   1336	 */
   1337	rxctrl |= IXGBE_DCA_RXCTRL_DESC_RRO_EN |
   1338		  IXGBE_DCA_RXCTRL_DATA_DCA_EN |
   1339		  IXGBE_DCA_RXCTRL_DESC_DCA_EN;
   1340
   1341	IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(reg_idx), rxctrl);
   1342}
   1343
   1344static void ixgbe_update_dca(struct ixgbe_q_vector *q_vector)
   1345{
   1346	struct ixgbe_adapter *adapter = q_vector->adapter;
   1347	struct ixgbe_ring *ring;
   1348	int cpu = get_cpu();
   1349
   1350	if (q_vector->cpu == cpu)
   1351		goto out_no_update;
   1352
   1353	ixgbe_for_each_ring(ring, q_vector->tx)
   1354		ixgbe_update_tx_dca(adapter, ring, cpu);
   1355
   1356	ixgbe_for_each_ring(ring, q_vector->rx)
   1357		ixgbe_update_rx_dca(adapter, ring, cpu);
   1358
   1359	q_vector->cpu = cpu;
   1360out_no_update:
   1361	put_cpu();
   1362}
   1363
   1364static void ixgbe_setup_dca(struct ixgbe_adapter *adapter)
   1365{
   1366	int i;
   1367
   1368	/* always use CB2 mode, difference is masked in the CB driver */
   1369	if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
   1370		IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
   1371				IXGBE_DCA_CTRL_DCA_MODE_CB2);
   1372	else
   1373		IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
   1374				IXGBE_DCA_CTRL_DCA_DISABLE);
   1375
   1376	for (i = 0; i < adapter->num_q_vectors; i++) {
   1377		adapter->q_vector[i]->cpu = -1;
   1378		ixgbe_update_dca(adapter->q_vector[i]);
   1379	}
   1380}
   1381
   1382static int __ixgbe_notify_dca(struct device *dev, void *data)
   1383{
   1384	struct ixgbe_adapter *adapter = dev_get_drvdata(dev);
   1385	unsigned long event = *(unsigned long *)data;
   1386
   1387	if (!(adapter->flags & IXGBE_FLAG_DCA_CAPABLE))
   1388		return 0;
   1389
   1390	switch (event) {
   1391	case DCA_PROVIDER_ADD:
   1392		/* if we're already enabled, don't do it again */
   1393		if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
   1394			break;
   1395		if (dca_add_requester(dev) == 0) {
   1396			adapter->flags |= IXGBE_FLAG_DCA_ENABLED;
   1397			IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
   1398					IXGBE_DCA_CTRL_DCA_MODE_CB2);
   1399			break;
   1400		}
   1401		fallthrough; /* DCA is disabled. */
   1402	case DCA_PROVIDER_REMOVE:
   1403		if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) {
   1404			dca_remove_requester(dev);
   1405			adapter->flags &= ~IXGBE_FLAG_DCA_ENABLED;
   1406			IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
   1407					IXGBE_DCA_CTRL_DCA_DISABLE);
   1408		}
   1409		break;
   1410	}
   1411
   1412	return 0;
   1413}
   1414
   1415#endif /* CONFIG_IXGBE_DCA */
   1416
   1417#define IXGBE_RSS_L4_TYPES_MASK \
   1418	((1ul << IXGBE_RXDADV_RSSTYPE_IPV4_TCP) | \
   1419	 (1ul << IXGBE_RXDADV_RSSTYPE_IPV4_UDP) | \
   1420	 (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_TCP) | \
   1421	 (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_UDP))
   1422
   1423static inline void ixgbe_rx_hash(struct ixgbe_ring *ring,
   1424				 union ixgbe_adv_rx_desc *rx_desc,
   1425				 struct sk_buff *skb)
   1426{
   1427	u16 rss_type;
   1428
   1429	if (!(ring->netdev->features & NETIF_F_RXHASH))
   1430		return;
   1431
   1432	rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) &
   1433		   IXGBE_RXDADV_RSSTYPE_MASK;
   1434
   1435	if (!rss_type)
   1436		return;
   1437
   1438	skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
   1439		     (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ?
   1440		     PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
   1441}
   1442
   1443#ifdef IXGBE_FCOE
   1444/**
   1445 * ixgbe_rx_is_fcoe - check the rx desc for incoming pkt type
   1446 * @ring: structure containing ring specific data
   1447 * @rx_desc: advanced rx descriptor
   1448 *
   1449 * Returns : true if it is FCoE pkt
   1450 */
   1451static inline bool ixgbe_rx_is_fcoe(struct ixgbe_ring *ring,
   1452				    union ixgbe_adv_rx_desc *rx_desc)
   1453{
   1454	__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
   1455
   1456	return test_bit(__IXGBE_RX_FCOE, &ring->state) &&
   1457	       ((pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_ETQF_MASK)) ==
   1458		(cpu_to_le16(IXGBE_ETQF_FILTER_FCOE <<
   1459			     IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT)));
   1460}
   1461
   1462#endif /* IXGBE_FCOE */
   1463/**
   1464 * ixgbe_rx_checksum - indicate in skb if hw indicated a good cksum
   1465 * @ring: structure containing ring specific data
   1466 * @rx_desc: current Rx descriptor being processed
   1467 * @skb: skb currently being received and modified
   1468 **/
   1469static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring,
   1470				     union ixgbe_adv_rx_desc *rx_desc,
   1471				     struct sk_buff *skb)
   1472{
   1473	__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
   1474	bool encap_pkt = false;
   1475
   1476	skb_checksum_none_assert(skb);
   1477
   1478	/* Rx csum disabled */
   1479	if (!(ring->netdev->features & NETIF_F_RXCSUM))
   1480		return;
   1481
   1482	/* check for VXLAN and Geneve packets */
   1483	if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_VXLAN)) {
   1484		encap_pkt = true;
   1485		skb->encapsulation = 1;
   1486	}
   1487
   1488	/* if IP and error */
   1489	if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_IPCS) &&
   1490	    ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_IPE)) {
   1491		ring->rx_stats.csum_err++;
   1492		return;
   1493	}
   1494
   1495	if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_L4CS))
   1496		return;
   1497
   1498	if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) {
   1499		/*
   1500		 * 82599 errata, UDP frames with a 0 checksum can be marked as
   1501		 * checksum errors.
   1502		 */
   1503		if ((pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_UDP)) &&
   1504		    test_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state))
   1505			return;
   1506
   1507		ring->rx_stats.csum_err++;
   1508		return;
   1509	}
   1510
   1511	/* It must be a TCP or UDP packet with a valid checksum */
   1512	skb->ip_summed = CHECKSUM_UNNECESSARY;
   1513	if (encap_pkt) {
   1514		if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_OUTERIPCS))
   1515			return;
   1516
   1517		if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_OUTERIPER)) {
   1518			skb->ip_summed = CHECKSUM_NONE;
   1519			return;
   1520		}
   1521		/* If we checked the outer header let the stack know */
   1522		skb->csum_level = 1;
   1523	}
   1524}
   1525
   1526static unsigned int ixgbe_rx_offset(struct ixgbe_ring *rx_ring)
   1527{
   1528	return ring_uses_build_skb(rx_ring) ? IXGBE_SKB_PAD : 0;
   1529}
   1530
   1531static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring,
   1532				    struct ixgbe_rx_buffer *bi)
   1533{
   1534	struct page *page = bi->page;
   1535	dma_addr_t dma;
   1536
   1537	/* since we are recycling buffers we should seldom need to alloc */
   1538	if (likely(page))
   1539		return true;
   1540
   1541	/* alloc new page for storage */
   1542	page = dev_alloc_pages(ixgbe_rx_pg_order(rx_ring));
   1543	if (unlikely(!page)) {
   1544		rx_ring->rx_stats.alloc_rx_page_failed++;
   1545		return false;
   1546	}
   1547
   1548	/* map page for use */
   1549	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
   1550				 ixgbe_rx_pg_size(rx_ring),
   1551				 DMA_FROM_DEVICE,
   1552				 IXGBE_RX_DMA_ATTR);
   1553
   1554	/*
   1555	 * if mapping failed free memory back to system since
   1556	 * there isn't much point in holding memory we can't use
   1557	 */
   1558	if (dma_mapping_error(rx_ring->dev, dma)) {
   1559		__free_pages(page, ixgbe_rx_pg_order(rx_ring));
   1560
   1561		rx_ring->rx_stats.alloc_rx_page_failed++;
   1562		return false;
   1563	}
   1564
   1565	bi->dma = dma;
   1566	bi->page = page;
   1567	bi->page_offset = rx_ring->rx_offset;
   1568	page_ref_add(page, USHRT_MAX - 1);
   1569	bi->pagecnt_bias = USHRT_MAX;
   1570	rx_ring->rx_stats.alloc_rx_page++;
   1571
   1572	return true;
   1573}
   1574
   1575/**
   1576 * ixgbe_alloc_rx_buffers - Replace used receive buffers
   1577 * @rx_ring: ring to place buffers on
   1578 * @cleaned_count: number of buffers to replace
   1579 **/
   1580void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count)
   1581{
   1582	union ixgbe_adv_rx_desc *rx_desc;
   1583	struct ixgbe_rx_buffer *bi;
   1584	u16 i = rx_ring->next_to_use;
   1585	u16 bufsz;
   1586
   1587	/* nothing to do */
   1588	if (!cleaned_count)
   1589		return;
   1590
   1591	rx_desc = IXGBE_RX_DESC(rx_ring, i);
   1592	bi = &rx_ring->rx_buffer_info[i];
   1593	i -= rx_ring->count;
   1594
   1595	bufsz = ixgbe_rx_bufsz(rx_ring);
   1596
   1597	do {
   1598		if (!ixgbe_alloc_mapped_page(rx_ring, bi))
   1599			break;
   1600
   1601		/* sync the buffer for use by the device */
   1602		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
   1603						 bi->page_offset, bufsz,
   1604						 DMA_FROM_DEVICE);
   1605
   1606		/*
   1607		 * Refresh the desc even if buffer_addrs didn't change
   1608		 * because each write-back erases this info.
   1609		 */
   1610		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
   1611
   1612		rx_desc++;
   1613		bi++;
   1614		i++;
   1615		if (unlikely(!i)) {
   1616			rx_desc = IXGBE_RX_DESC(rx_ring, 0);
   1617			bi = rx_ring->rx_buffer_info;
   1618			i -= rx_ring->count;
   1619		}
   1620
   1621		/* clear the length for the next_to_use descriptor */
   1622		rx_desc->wb.upper.length = 0;
   1623
   1624		cleaned_count--;
   1625	} while (cleaned_count);
   1626
   1627	i += rx_ring->count;
   1628
   1629	if (rx_ring->next_to_use != i) {
   1630		rx_ring->next_to_use = i;
   1631
   1632		/* update next to alloc since we have filled the ring */
   1633		rx_ring->next_to_alloc = i;
   1634
   1635		/* Force memory writes to complete before letting h/w
   1636		 * know there are new descriptors to fetch.  (Only
   1637		 * applicable for weak-ordered memory model archs,
   1638		 * such as IA-64).
   1639		 */
   1640		wmb();
   1641		writel(i, rx_ring->tail);
   1642	}
   1643}
   1644
   1645static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring,
   1646				   struct sk_buff *skb)
   1647{
   1648	u16 hdr_len = skb_headlen(skb);
   1649
   1650	/* set gso_size to avoid messing up TCP MSS */
   1651	skb_shinfo(skb)->gso_size = DIV_ROUND_UP((skb->len - hdr_len),
   1652						 IXGBE_CB(skb)->append_cnt);
   1653	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
   1654}
   1655
   1656static void ixgbe_update_rsc_stats(struct ixgbe_ring *rx_ring,
   1657				   struct sk_buff *skb)
   1658{
   1659	/* if append_cnt is 0 then frame is not RSC */
   1660	if (!IXGBE_CB(skb)->append_cnt)
   1661		return;
   1662
   1663	rx_ring->rx_stats.rsc_count += IXGBE_CB(skb)->append_cnt;
   1664	rx_ring->rx_stats.rsc_flush++;
   1665
   1666	ixgbe_set_rsc_gso_size(rx_ring, skb);
   1667
   1668	/* gso_size is computed using append_cnt so always clear it last */
   1669	IXGBE_CB(skb)->append_cnt = 0;
   1670}
   1671
   1672/**
   1673 * ixgbe_process_skb_fields - Populate skb header fields from Rx descriptor
   1674 * @rx_ring: rx descriptor ring packet is being transacted on
   1675 * @rx_desc: pointer to the EOP Rx descriptor
   1676 * @skb: pointer to current skb being populated
   1677 *
   1678 * This function checks the ring, descriptor, and packet information in
   1679 * order to populate the hash, checksum, VLAN, timestamp, protocol, and
   1680 * other fields within the skb.
   1681 **/
   1682void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
   1683			      union ixgbe_adv_rx_desc *rx_desc,
   1684			      struct sk_buff *skb)
   1685{
   1686	struct net_device *dev = rx_ring->netdev;
   1687	u32 flags = rx_ring->q_vector->adapter->flags;
   1688
   1689	ixgbe_update_rsc_stats(rx_ring, skb);
   1690
   1691	ixgbe_rx_hash(rx_ring, rx_desc, skb);
   1692
   1693	ixgbe_rx_checksum(rx_ring, rx_desc, skb);
   1694
   1695	if (unlikely(flags & IXGBE_FLAG_RX_HWTSTAMP_ENABLED))
   1696		ixgbe_ptp_rx_hwtstamp(rx_ring, rx_desc, skb);
   1697
   1698	if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
   1699	    ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) {
   1700		u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
   1701		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
   1702	}
   1703
   1704	if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP))
   1705		ixgbe_ipsec_rx(rx_ring, rx_desc, skb);
   1706
   1707	/* record Rx queue, or update MACVLAN statistics */
   1708	if (netif_is_ixgbe(dev))
   1709		skb_record_rx_queue(skb, rx_ring->queue_index);
   1710	else
   1711		macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true,
   1712				 false);
   1713
   1714	skb->protocol = eth_type_trans(skb, dev);
   1715}
   1716
   1717void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
   1718		  struct sk_buff *skb)
   1719{
   1720	napi_gro_receive(&q_vector->napi, skb);
   1721}
   1722
   1723/**
   1724 * ixgbe_is_non_eop - process handling of non-EOP buffers
   1725 * @rx_ring: Rx ring being processed
   1726 * @rx_desc: Rx descriptor for current buffer
   1727 * @skb: Current socket buffer containing buffer in progress
   1728 *
   1729 * This function updates next to clean.  If the buffer is an EOP buffer
   1730 * this function exits returning false, otherwise it will place the
   1731 * sk_buff in the next buffer to be chained and return true indicating
   1732 * that this is in fact a non-EOP buffer.
   1733 **/
   1734static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring,
   1735			     union ixgbe_adv_rx_desc *rx_desc,
   1736			     struct sk_buff *skb)
   1737{
   1738	u32 ntc = rx_ring->next_to_clean + 1;
   1739
   1740	/* fetch, update, and store next to clean */
   1741	ntc = (ntc < rx_ring->count) ? ntc : 0;
   1742	rx_ring->next_to_clean = ntc;
   1743
   1744	prefetch(IXGBE_RX_DESC(rx_ring, ntc));
   1745
   1746	/* update RSC append count if present */
   1747	if (ring_is_rsc_enabled(rx_ring)) {
   1748		__le32 rsc_enabled = rx_desc->wb.lower.lo_dword.data &
   1749				     cpu_to_le32(IXGBE_RXDADV_RSCCNT_MASK);
   1750
   1751		if (unlikely(rsc_enabled)) {
   1752			u32 rsc_cnt = le32_to_cpu(rsc_enabled);
   1753
   1754			rsc_cnt >>= IXGBE_RXDADV_RSCCNT_SHIFT;
   1755			IXGBE_CB(skb)->append_cnt += rsc_cnt - 1;
   1756
   1757			/* update ntc based on RSC value */
   1758			ntc = le32_to_cpu(rx_desc->wb.upper.status_error);
   1759			ntc &= IXGBE_RXDADV_NEXTP_MASK;
   1760			ntc >>= IXGBE_RXDADV_NEXTP_SHIFT;
   1761		}
   1762	}
   1763
   1764	/* if we are the last buffer then there is nothing else to do */
   1765	if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)))
   1766		return false;
   1767
   1768	/* place skb in next buffer to be received */
   1769	rx_ring->rx_buffer_info[ntc].skb = skb;
   1770	rx_ring->rx_stats.non_eop_descs++;
   1771
   1772	return true;
   1773}
   1774
   1775/**
   1776 * ixgbe_pull_tail - ixgbe specific version of skb_pull_tail
   1777 * @rx_ring: rx descriptor ring packet is being transacted on
   1778 * @skb: pointer to current skb being adjusted
   1779 *
   1780 * This function is an ixgbe specific version of __pskb_pull_tail.  The
   1781 * main difference between this version and the original function is that
   1782 * this function can make several assumptions about the state of things
   1783 * that allow for significant optimizations versus the standard function.
   1784 * As a result we can do things like drop a frag and maintain an accurate
   1785 * truesize for the skb.
   1786 */
   1787static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
   1788			    struct sk_buff *skb)
   1789{
   1790	skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
   1791	unsigned char *va;
   1792	unsigned int pull_len;
   1793
   1794	/*
   1795	 * it is valid to use page_address instead of kmap since we are
   1796	 * working with pages allocated out of the lomem pool per
   1797	 * alloc_page(GFP_ATOMIC)
   1798	 */
   1799	va = skb_frag_address(frag);
   1800
   1801	/*
   1802	 * we need the header to contain the greater of either ETH_HLEN or
   1803	 * 60 bytes if the skb->len is less than 60 for skb_pad.
   1804	 */
   1805	pull_len = eth_get_headlen(skb->dev, va, IXGBE_RX_HDR_SIZE);
   1806
   1807	/* align pull length to size of long to optimize memcpy performance */
   1808	skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
   1809
   1810	/* update all of the pointers */
   1811	skb_frag_size_sub(frag, pull_len);
   1812	skb_frag_off_add(frag, pull_len);
   1813	skb->data_len -= pull_len;
   1814	skb->tail += pull_len;
   1815}
   1816
   1817/**
   1818 * ixgbe_dma_sync_frag - perform DMA sync for first frag of SKB
   1819 * @rx_ring: rx descriptor ring packet is being transacted on
   1820 * @skb: pointer to current skb being updated
   1821 *
   1822 * This function provides a basic DMA sync up for the first fragment of an
   1823 * skb.  The reason for doing this is that the first fragment cannot be
   1824 * unmapped until we have reached the end of packet descriptor for a buffer
   1825 * chain.
   1826 */
   1827static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
   1828				struct sk_buff *skb)
   1829{
   1830	if (ring_uses_build_skb(rx_ring)) {
   1831		unsigned long mask = (unsigned long)ixgbe_rx_pg_size(rx_ring) - 1;
   1832		unsigned long offset = (unsigned long)(skb->data) & mask;
   1833
   1834		dma_sync_single_range_for_cpu(rx_ring->dev,
   1835					      IXGBE_CB(skb)->dma,
   1836					      offset,
   1837					      skb_headlen(skb),
   1838					      DMA_FROM_DEVICE);
   1839	} else {
   1840		skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
   1841
   1842		dma_sync_single_range_for_cpu(rx_ring->dev,
   1843					      IXGBE_CB(skb)->dma,
   1844					      skb_frag_off(frag),
   1845					      skb_frag_size(frag),
   1846					      DMA_FROM_DEVICE);
   1847	}
   1848
   1849	/* If the page was released, just unmap it. */
   1850	if (unlikely(IXGBE_CB(skb)->page_released)) {
   1851		dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma,
   1852				     ixgbe_rx_pg_size(rx_ring),
   1853				     DMA_FROM_DEVICE,
   1854				     IXGBE_RX_DMA_ATTR);
   1855	}
   1856}
   1857
   1858/**
   1859 * ixgbe_cleanup_headers - Correct corrupted or empty headers
   1860 * @rx_ring: rx descriptor ring packet is being transacted on
   1861 * @rx_desc: pointer to the EOP Rx descriptor
   1862 * @skb: pointer to current skb being fixed
   1863 *
   1864 * Check if the skb is valid in the XDP case it will be an error pointer.
   1865 * Return true in this case to abort processing and advance to next
   1866 * descriptor.
   1867 *
   1868 * Check for corrupted packet headers caused by senders on the local L2
   1869 * embedded NIC switch not setting up their Tx Descriptors right.  These
   1870 * should be very rare.
   1871 *
   1872 * Also address the case where we are pulling data in on pages only
   1873 * and as such no data is present in the skb header.
   1874 *
   1875 * In addition if skb is not at least 60 bytes we need to pad it so that
   1876 * it is large enough to qualify as a valid Ethernet frame.
   1877 *
   1878 * Returns true if an error was encountered and skb was freed.
   1879 **/
   1880bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
   1881			   union ixgbe_adv_rx_desc *rx_desc,
   1882			   struct sk_buff *skb)
   1883{
   1884	struct net_device *netdev = rx_ring->netdev;
   1885
   1886	/* XDP packets use error pointer so abort at this point */
   1887	if (IS_ERR(skb))
   1888		return true;
   1889
   1890	/* Verify netdev is present, and that packet does not have any
   1891	 * errors that would be unacceptable to the netdev.
   1892	 */
   1893	if (!netdev ||
   1894	    (unlikely(ixgbe_test_staterr(rx_desc,
   1895					 IXGBE_RXDADV_ERR_FRAME_ERR_MASK) &&
   1896	     !(netdev->features & NETIF_F_RXALL)))) {
   1897		dev_kfree_skb_any(skb);
   1898		return true;
   1899	}
   1900
   1901	/* place header in linear portion of buffer */
   1902	if (!skb_headlen(skb))
   1903		ixgbe_pull_tail(rx_ring, skb);
   1904
   1905#ifdef IXGBE_FCOE
   1906	/* do not attempt to pad FCoE Frames as this will disrupt DDP */
   1907	if (ixgbe_rx_is_fcoe(rx_ring, rx_desc))
   1908		return false;
   1909
   1910#endif
   1911	/* if eth_skb_pad returns an error the skb was freed */
   1912	if (eth_skb_pad(skb))
   1913		return true;
   1914
   1915	return false;
   1916}
   1917
   1918/**
   1919 * ixgbe_reuse_rx_page - page flip buffer and store it back on the ring
   1920 * @rx_ring: rx descriptor ring to store buffers on
   1921 * @old_buff: donor buffer to have page reused
   1922 *
   1923 * Synchronizes page for reuse by the adapter
   1924 **/
   1925static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring,
   1926				struct ixgbe_rx_buffer *old_buff)
   1927{
   1928	struct ixgbe_rx_buffer *new_buff;
   1929	u16 nta = rx_ring->next_to_alloc;
   1930
   1931	new_buff = &rx_ring->rx_buffer_info[nta];
   1932
   1933	/* update, and store next to alloc */
   1934	nta++;
   1935	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
   1936
   1937	/* Transfer page from old buffer to new buffer.
   1938	 * Move each member individually to avoid possible store
   1939	 * forwarding stalls and unnecessary copy of skb.
   1940	 */
   1941	new_buff->dma		= old_buff->dma;
   1942	new_buff->page		= old_buff->page;
   1943	new_buff->page_offset	= old_buff->page_offset;
   1944	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
   1945}
   1946
   1947static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer,
   1948				    int rx_buffer_pgcnt)
   1949{
   1950	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
   1951	struct page *page = rx_buffer->page;
   1952
   1953	/* avoid re-using remote and pfmemalloc pages */
   1954	if (!dev_page_is_reusable(page))
   1955		return false;
   1956
   1957#if (PAGE_SIZE < 8192)
   1958	/* if we are only owner of page we can reuse it */
   1959	if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
   1960		return false;
   1961#else
   1962	/* The last offset is a bit aggressive in that we assume the
   1963	 * worst case of FCoE being enabled and using a 3K buffer.
   1964	 * However this should have minimal impact as the 1K extra is
   1965	 * still less than one buffer in size.
   1966	 */
   1967#define IXGBE_LAST_OFFSET \
   1968	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBE_RXBUFFER_3K)
   1969	if (rx_buffer->page_offset > IXGBE_LAST_OFFSET)
   1970		return false;
   1971#endif
   1972
   1973	/* If we have drained the page fragment pool we need to update
   1974	 * the pagecnt_bias and page count so that we fully restock the
   1975	 * number of references the driver holds.
   1976	 */
   1977	if (unlikely(pagecnt_bias == 1)) {
   1978		page_ref_add(page, USHRT_MAX - 1);
   1979		rx_buffer->pagecnt_bias = USHRT_MAX;
   1980	}
   1981
   1982	return true;
   1983}
   1984
   1985/**
   1986 * ixgbe_add_rx_frag - Add contents of Rx buffer to sk_buff
   1987 * @rx_ring: rx descriptor ring to transact packets on
   1988 * @rx_buffer: buffer containing page to add
   1989 * @skb: sk_buff to place the data into
   1990 * @size: size of data in rx_buffer
   1991 *
   1992 * This function will add the data contained in rx_buffer->page to the skb.
   1993 * This is done either through a direct copy if the data in the buffer is
   1994 * less than the skb header size, otherwise it will just attach the page as
   1995 * a frag to the skb.
   1996 *
   1997 * The function will then update the page offset if necessary and return
   1998 * true if the buffer can be reused by the adapter.
   1999 **/
   2000static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring,
   2001			      struct ixgbe_rx_buffer *rx_buffer,
   2002			      struct sk_buff *skb,
   2003			      unsigned int size)
   2004{
   2005#if (PAGE_SIZE < 8192)
   2006	unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
   2007#else
   2008	unsigned int truesize = rx_ring->rx_offset ?
   2009				SKB_DATA_ALIGN(rx_ring->rx_offset + size) :
   2010				SKB_DATA_ALIGN(size);
   2011#endif
   2012	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
   2013			rx_buffer->page_offset, size, truesize);
   2014#if (PAGE_SIZE < 8192)
   2015	rx_buffer->page_offset ^= truesize;
   2016#else
   2017	rx_buffer->page_offset += truesize;
   2018#endif
   2019}
   2020
   2021static struct ixgbe_rx_buffer *ixgbe_get_rx_buffer(struct ixgbe_ring *rx_ring,
   2022						   union ixgbe_adv_rx_desc *rx_desc,
   2023						   struct sk_buff **skb,
   2024						   const unsigned int size,
   2025						   int *rx_buffer_pgcnt)
   2026{
   2027	struct ixgbe_rx_buffer *rx_buffer;
   2028
   2029	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
   2030	*rx_buffer_pgcnt =
   2031#if (PAGE_SIZE < 8192)
   2032		page_count(rx_buffer->page);
   2033#else
   2034		0;
   2035#endif
   2036	prefetchw(rx_buffer->page);
   2037	*skb = rx_buffer->skb;
   2038
   2039	/* Delay unmapping of the first packet. It carries the header
   2040	 * information, HW may still access the header after the writeback.
   2041	 * Only unmap it when EOP is reached
   2042	 */
   2043	if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) {
   2044		if (!*skb)
   2045			goto skip_sync;
   2046	} else {
   2047		if (*skb)
   2048			ixgbe_dma_sync_frag(rx_ring, *skb);
   2049	}
   2050
   2051	/* we are reusing so sync this buffer for CPU use */
   2052	dma_sync_single_range_for_cpu(rx_ring->dev,
   2053				      rx_buffer->dma,
   2054				      rx_buffer->page_offset,
   2055				      size,
   2056				      DMA_FROM_DEVICE);
   2057skip_sync:
   2058	rx_buffer->pagecnt_bias--;
   2059
   2060	return rx_buffer;
   2061}
   2062
   2063static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
   2064				struct ixgbe_rx_buffer *rx_buffer,
   2065				struct sk_buff *skb,
   2066				int rx_buffer_pgcnt)
   2067{
   2068	if (ixgbe_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
   2069		/* hand second half of page back to the ring */
   2070		ixgbe_reuse_rx_page(rx_ring, rx_buffer);
   2071	} else {
   2072		if (!IS_ERR(skb) && IXGBE_CB(skb)->dma == rx_buffer->dma) {
   2073			/* the page has been released from the ring */
   2074			IXGBE_CB(skb)->page_released = true;
   2075		} else {
   2076			/* we are not reusing the buffer so unmap it */
   2077			dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
   2078					     ixgbe_rx_pg_size(rx_ring),
   2079					     DMA_FROM_DEVICE,
   2080					     IXGBE_RX_DMA_ATTR);
   2081		}
   2082		__page_frag_cache_drain(rx_buffer->page,
   2083					rx_buffer->pagecnt_bias);
   2084	}
   2085
   2086	/* clear contents of rx_buffer */
   2087	rx_buffer->page = NULL;
   2088	rx_buffer->skb = NULL;
   2089}
   2090
   2091static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
   2092					   struct ixgbe_rx_buffer *rx_buffer,
   2093					   struct xdp_buff *xdp,
   2094					   union ixgbe_adv_rx_desc *rx_desc)
   2095{
   2096	unsigned int size = xdp->data_end - xdp->data;
   2097#if (PAGE_SIZE < 8192)
   2098	unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
   2099#else
   2100	unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end -
   2101					       xdp->data_hard_start);
   2102#endif
   2103	struct sk_buff *skb;
   2104
   2105	/* prefetch first cache line of first page */
   2106	net_prefetch(xdp->data);
   2107
   2108	/* Note, we get here by enabling legacy-rx via:
   2109	 *
   2110	 *    ethtool --set-priv-flags <dev> legacy-rx on
   2111	 *
   2112	 * In this mode, we currently get 0 extra XDP headroom as
   2113	 * opposed to having legacy-rx off, where we process XDP
   2114	 * packets going to stack via ixgbe_build_skb(). The latter
   2115	 * provides us currently with 192 bytes of headroom.
   2116	 *
   2117	 * For ixgbe_construct_skb() mode it means that the
   2118	 * xdp->data_meta will always point to xdp->data, since
   2119	 * the helper cannot expand the head. Should this ever
   2120	 * change in future for legacy-rx mode on, then lets also
   2121	 * add xdp->data_meta handling here.
   2122	 */
   2123
   2124	/* allocate a skb to store the frags */
   2125	skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBE_RX_HDR_SIZE);
   2126	if (unlikely(!skb))
   2127		return NULL;
   2128
   2129	if (size > IXGBE_RX_HDR_SIZE) {
   2130		if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
   2131			IXGBE_CB(skb)->dma = rx_buffer->dma;
   2132
   2133		skb_add_rx_frag(skb, 0, rx_buffer->page,
   2134				xdp->data - page_address(rx_buffer->page),
   2135				size, truesize);
   2136#if (PAGE_SIZE < 8192)
   2137		rx_buffer->page_offset ^= truesize;
   2138#else
   2139		rx_buffer->page_offset += truesize;
   2140#endif
   2141	} else {
   2142		memcpy(__skb_put(skb, size),
   2143		       xdp->data, ALIGN(size, sizeof(long)));
   2144		rx_buffer->pagecnt_bias++;
   2145	}
   2146
   2147	return skb;
   2148}
   2149
   2150static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
   2151				       struct ixgbe_rx_buffer *rx_buffer,
   2152				       struct xdp_buff *xdp,
   2153				       union ixgbe_adv_rx_desc *rx_desc)
   2154{
   2155	unsigned int metasize = xdp->data - xdp->data_meta;
   2156#if (PAGE_SIZE < 8192)
   2157	unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
   2158#else
   2159	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
   2160				SKB_DATA_ALIGN(xdp->data_end -
   2161					       xdp->data_hard_start);
   2162#endif
   2163	struct sk_buff *skb;
   2164
   2165	/* Prefetch first cache line of first page. If xdp->data_meta
   2166	 * is unused, this points extactly as xdp->data, otherwise we
   2167	 * likely have a consumer accessing first few bytes of meta
   2168	 * data, and then actual data.
   2169	 */
   2170	net_prefetch(xdp->data_meta);
   2171
   2172	/* build an skb to around the page buffer */
   2173	skb = napi_build_skb(xdp->data_hard_start, truesize);
   2174	if (unlikely(!skb))
   2175		return NULL;
   2176
   2177	/* update pointers within the skb to store the data */
   2178	skb_reserve(skb, xdp->data - xdp->data_hard_start);
   2179	__skb_put(skb, xdp->data_end - xdp->data);
   2180	if (metasize)
   2181		skb_metadata_set(skb, metasize);
   2182
   2183	/* record DMA address if this is the start of a chain of buffers */
   2184	if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
   2185		IXGBE_CB(skb)->dma = rx_buffer->dma;
   2186
   2187	/* update buffer offset */
   2188#if (PAGE_SIZE < 8192)
   2189	rx_buffer->page_offset ^= truesize;
   2190#else
   2191	rx_buffer->page_offset += truesize;
   2192#endif
   2193
   2194	return skb;
   2195}
   2196
   2197static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
   2198				     struct ixgbe_ring *rx_ring,
   2199				     struct xdp_buff *xdp)
   2200{
   2201	int err, result = IXGBE_XDP_PASS;
   2202	struct bpf_prog *xdp_prog;
   2203	struct ixgbe_ring *ring;
   2204	struct xdp_frame *xdpf;
   2205	u32 act;
   2206
   2207	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
   2208
   2209	if (!xdp_prog)
   2210		goto xdp_out;
   2211
   2212	prefetchw(xdp->data_hard_start); /* xdp_frame write */
   2213
   2214	act = bpf_prog_run_xdp(xdp_prog, xdp);
   2215	switch (act) {
   2216	case XDP_PASS:
   2217		break;
   2218	case XDP_TX:
   2219		xdpf = xdp_convert_buff_to_frame(xdp);
   2220		if (unlikely(!xdpf))
   2221			goto out_failure;
   2222		ring = ixgbe_determine_xdp_ring(adapter);
   2223		if (static_branch_unlikely(&ixgbe_xdp_locking_key))
   2224			spin_lock(&ring->tx_lock);
   2225		result = ixgbe_xmit_xdp_ring(ring, xdpf);
   2226		if (static_branch_unlikely(&ixgbe_xdp_locking_key))
   2227			spin_unlock(&ring->tx_lock);
   2228		if (result == IXGBE_XDP_CONSUMED)
   2229			goto out_failure;
   2230		break;
   2231	case XDP_REDIRECT:
   2232		err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
   2233		if (err)
   2234			goto out_failure;
   2235		result = IXGBE_XDP_REDIR;
   2236		break;
   2237	default:
   2238		bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
   2239		fallthrough;
   2240	case XDP_ABORTED:
   2241out_failure:
   2242		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
   2243		fallthrough; /* handle aborts by dropping packet */
   2244	case XDP_DROP:
   2245		result = IXGBE_XDP_CONSUMED;
   2246		break;
   2247	}
   2248xdp_out:
   2249	return ERR_PTR(-result);
   2250}
   2251
   2252static unsigned int ixgbe_rx_frame_truesize(struct ixgbe_ring *rx_ring,
   2253					    unsigned int size)
   2254{
   2255	unsigned int truesize;
   2256
   2257#if (PAGE_SIZE < 8192)
   2258	truesize = ixgbe_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
   2259#else
   2260	truesize = rx_ring->rx_offset ?
   2261		SKB_DATA_ALIGN(rx_ring->rx_offset + size) +
   2262		SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
   2263		SKB_DATA_ALIGN(size);
   2264#endif
   2265	return truesize;
   2266}
   2267
   2268static void ixgbe_rx_buffer_flip(struct ixgbe_ring *rx_ring,
   2269				 struct ixgbe_rx_buffer *rx_buffer,
   2270				 unsigned int size)
   2271{
   2272	unsigned int truesize = ixgbe_rx_frame_truesize(rx_ring, size);
   2273#if (PAGE_SIZE < 8192)
   2274	rx_buffer->page_offset ^= truesize;
   2275#else
   2276	rx_buffer->page_offset += truesize;
   2277#endif
   2278}
   2279
   2280/**
   2281 * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
   2282 * @q_vector: structure containing interrupt and ring information
   2283 * @rx_ring: rx descriptor ring to transact packets on
   2284 * @budget: Total limit on number of packets to process
   2285 *
   2286 * This function provides a "bounce buffer" approach to Rx interrupt
   2287 * processing.  The advantage to this is that on systems that have
   2288 * expensive overhead for IOMMU access this provides a means of avoiding
   2289 * it by maintaining the mapping of the page to the syste.
   2290 *
   2291 * Returns amount of work completed
   2292 **/
   2293static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
   2294			       struct ixgbe_ring *rx_ring,
   2295			       const int budget)
   2296{
   2297	unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0;
   2298	struct ixgbe_adapter *adapter = q_vector->adapter;
   2299#ifdef IXGBE_FCOE
   2300	int ddp_bytes;
   2301	unsigned int mss = 0;
   2302#endif /* IXGBE_FCOE */
   2303	u16 cleaned_count = ixgbe_desc_unused(rx_ring);
   2304	unsigned int offset = rx_ring->rx_offset;
   2305	unsigned int xdp_xmit = 0;
   2306	struct xdp_buff xdp;
   2307
   2308	/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
   2309#if (PAGE_SIZE < 8192)
   2310	frame_sz = ixgbe_rx_frame_truesize(rx_ring, 0);
   2311#endif
   2312	xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
   2313
   2314	while (likely(total_rx_packets < budget)) {
   2315		union ixgbe_adv_rx_desc *rx_desc;
   2316		struct ixgbe_rx_buffer *rx_buffer;
   2317		struct sk_buff *skb;
   2318		int rx_buffer_pgcnt;
   2319		unsigned int size;
   2320
   2321		/* return some buffers to hardware, one at a time is too slow */
   2322		if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) {
   2323			ixgbe_alloc_rx_buffers(rx_ring, cleaned_count);
   2324			cleaned_count = 0;
   2325		}
   2326
   2327		rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
   2328		size = le16_to_cpu(rx_desc->wb.upper.length);
   2329		if (!size)
   2330			break;
   2331
   2332		/* This memory barrier is needed to keep us from reading
   2333		 * any other fields out of the rx_desc until we know the
   2334		 * descriptor has been written back
   2335		 */
   2336		dma_rmb();
   2337
   2338		rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size, &rx_buffer_pgcnt);
   2339
   2340		/* retrieve a buffer from the ring */
   2341		if (!skb) {
   2342			unsigned char *hard_start;
   2343
   2344			hard_start = page_address(rx_buffer->page) +
   2345				     rx_buffer->page_offset - offset;
   2346			xdp_prepare_buff(&xdp, hard_start, offset, size, true);
   2347			xdp_buff_clear_frags_flag(&xdp);
   2348#if (PAGE_SIZE > 4096)
   2349			/* At larger PAGE_SIZE, frame_sz depend on len size */
   2350			xdp.frame_sz = ixgbe_rx_frame_truesize(rx_ring, size);
   2351#endif
   2352			skb = ixgbe_run_xdp(adapter, rx_ring, &xdp);
   2353		}
   2354
   2355		if (IS_ERR(skb)) {
   2356			unsigned int xdp_res = -PTR_ERR(skb);
   2357
   2358			if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) {
   2359				xdp_xmit |= xdp_res;
   2360				ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size);
   2361			} else {
   2362				rx_buffer->pagecnt_bias++;
   2363			}
   2364			total_rx_packets++;
   2365			total_rx_bytes += size;
   2366		} else if (skb) {
   2367			ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size);
   2368		} else if (ring_uses_build_skb(rx_ring)) {
   2369			skb = ixgbe_build_skb(rx_ring, rx_buffer,
   2370					      &xdp, rx_desc);
   2371		} else {
   2372			skb = ixgbe_construct_skb(rx_ring, rx_buffer,
   2373						  &xdp, rx_desc);
   2374		}
   2375
   2376		/* exit if we failed to retrieve a buffer */
   2377		if (!skb) {
   2378			rx_ring->rx_stats.alloc_rx_buff_failed++;
   2379			rx_buffer->pagecnt_bias++;
   2380			break;
   2381		}
   2382
   2383		ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb, rx_buffer_pgcnt);
   2384		cleaned_count++;
   2385
   2386		/* place incomplete frames back on ring for completion */
   2387		if (ixgbe_is_non_eop(rx_ring, rx_desc, skb))
   2388			continue;
   2389
   2390		/* verify the packet layout is correct */
   2391		if (ixgbe_cleanup_headers(rx_ring, rx_desc, skb))
   2392			continue;
   2393
   2394		/* probably a little skewed due to removing CRC */
   2395		total_rx_bytes += skb->len;
   2396
   2397		/* populate checksum, timestamp, VLAN, and protocol */
   2398		ixgbe_process_skb_fields(rx_ring, rx_desc, skb);
   2399
   2400#ifdef IXGBE_FCOE
   2401		/* if ddp, not passing to ULD unless for FCP_RSP or error */
   2402		if (ixgbe_rx_is_fcoe(rx_ring, rx_desc)) {
   2403			ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb);
   2404			/* include DDPed FCoE data */
   2405			if (ddp_bytes > 0) {
   2406				if (!mss) {
   2407					mss = rx_ring->netdev->mtu -
   2408						sizeof(struct fcoe_hdr) -
   2409						sizeof(struct fc_frame_header) -
   2410						sizeof(struct fcoe_crc_eof);
   2411					if (mss > 512)
   2412						mss &= ~511;
   2413				}
   2414				total_rx_bytes += ddp_bytes;
   2415				total_rx_packets += DIV_ROUND_UP(ddp_bytes,
   2416								 mss);
   2417			}
   2418			if (!ddp_bytes) {
   2419				dev_kfree_skb_any(skb);
   2420				continue;
   2421			}
   2422		}
   2423
   2424#endif /* IXGBE_FCOE */
   2425		ixgbe_rx_skb(q_vector, skb);
   2426
   2427		/* update budget accounting */
   2428		total_rx_packets++;
   2429	}
   2430
   2431	if (xdp_xmit & IXGBE_XDP_REDIR)
   2432		xdp_do_flush_map();
   2433
   2434	if (xdp_xmit & IXGBE_XDP_TX) {
   2435		struct ixgbe_ring *ring = ixgbe_determine_xdp_ring(adapter);
   2436
   2437		ixgbe_xdp_ring_update_tail_locked(ring);
   2438	}
   2439
   2440	u64_stats_update_begin(&rx_ring->syncp);
   2441	rx_ring->stats.packets += total_rx_packets;
   2442	rx_ring->stats.bytes += total_rx_bytes;
   2443	u64_stats_update_end(&rx_ring->syncp);
   2444	q_vector->rx.total_packets += total_rx_packets;
   2445	q_vector->rx.total_bytes += total_rx_bytes;
   2446
   2447	return total_rx_packets;
   2448}
   2449
   2450/**
   2451 * ixgbe_configure_msix - Configure MSI-X hardware
   2452 * @adapter: board private structure
   2453 *
   2454 * ixgbe_configure_msix sets up the hardware to properly generate MSI-X
   2455 * interrupts.
   2456 **/
   2457static void ixgbe_configure_msix(struct ixgbe_adapter *adapter)
   2458{
   2459	struct ixgbe_q_vector *q_vector;
   2460	int v_idx;
   2461	u32 mask;
   2462
   2463	/* Populate MSIX to EITR Select */
   2464	if (adapter->num_vfs > 32) {
   2465		u32 eitrsel = BIT(adapter->num_vfs - 32) - 1;
   2466		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITRSEL, eitrsel);
   2467	}
   2468
   2469	/*
   2470	 * Populate the IVAR table and set the ITR values to the
   2471	 * corresponding register.
   2472	 */
   2473	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
   2474		struct ixgbe_ring *ring;
   2475		q_vector = adapter->q_vector[v_idx];
   2476
   2477		ixgbe_for_each_ring(ring, q_vector->rx)
   2478			ixgbe_set_ivar(adapter, 0, ring->reg_idx, v_idx);
   2479
   2480		ixgbe_for_each_ring(ring, q_vector->tx)
   2481			ixgbe_set_ivar(adapter, 1, ring->reg_idx, v_idx);
   2482
   2483		ixgbe_write_eitr(q_vector);
   2484	}
   2485
   2486	switch (adapter->hw.mac.type) {
   2487	case ixgbe_mac_82598EB:
   2488		ixgbe_set_ivar(adapter, -1, IXGBE_IVAR_OTHER_CAUSES_INDEX,
   2489			       v_idx);
   2490		break;
   2491	case ixgbe_mac_82599EB:
   2492	case ixgbe_mac_X540:
   2493	case ixgbe_mac_X550:
   2494	case ixgbe_mac_X550EM_x:
   2495	case ixgbe_mac_x550em_a:
   2496		ixgbe_set_ivar(adapter, -1, 1, v_idx);
   2497		break;
   2498	default:
   2499		break;
   2500	}
   2501	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(v_idx), 1950);
   2502
   2503	/* set up to autoclear timer, and the vectors */
   2504	mask = IXGBE_EIMS_ENABLE_MASK;
   2505	mask &= ~(IXGBE_EIMS_OTHER |
   2506		  IXGBE_EIMS_MAILBOX |
   2507		  IXGBE_EIMS_LSC);
   2508
   2509	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, mask);
   2510}
   2511
   2512/**
   2513 * ixgbe_update_itr - update the dynamic ITR value based on statistics
   2514 * @q_vector: structure containing interrupt and ring information
   2515 * @ring_container: structure containing ring performance data
   2516 *
   2517 *      Stores a new ITR value based on packets and byte
   2518 *      counts during the last interrupt.  The advantage of per interrupt
   2519 *      computation is faster updates and more accurate ITR for the current
   2520 *      traffic pattern.  Constants in this function were computed
   2521 *      based on theoretical maximum wire speed and thresholds were set based
   2522 *      on testing data as well as attempting to minimize response time
   2523 *      while increasing bulk throughput.
   2524 **/
   2525static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector,
   2526			     struct ixgbe_ring_container *ring_container)
   2527{
   2528	unsigned int itr = IXGBE_ITR_ADAPTIVE_MIN_USECS |
   2529			   IXGBE_ITR_ADAPTIVE_LATENCY;
   2530	unsigned int avg_wire_size, packets, bytes;
   2531	unsigned long next_update = jiffies;
   2532
   2533	/* If we don't have any rings just leave ourselves set for maximum
   2534	 * possible latency so we take ourselves out of the equation.
   2535	 */
   2536	if (!ring_container->ring)
   2537		return;
   2538
   2539	/* If we didn't update within up to 1 - 2 jiffies we can assume
   2540	 * that either packets are coming in so slow there hasn't been
   2541	 * any work, or that there is so much work that NAPI is dealing
   2542	 * with interrupt moderation and we don't need to do anything.
   2543	 */
   2544	if (time_after(next_update, ring_container->next_update))
   2545		goto clear_counts;
   2546
   2547	packets = ring_container->total_packets;
   2548
   2549	/* We have no packets to actually measure against. This means
   2550	 * either one of the other queues on this vector is active or
   2551	 * we are a Tx queue doing TSO with too high of an interrupt rate.
   2552	 *
   2553	 * When this occurs just tick up our delay by the minimum value
   2554	 * and hope that this extra delay will prevent us from being called
   2555	 * without any work on our queue.
   2556	 */
   2557	if (!packets) {
   2558		itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
   2559		if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
   2560			itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
   2561		itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY;
   2562		goto clear_counts;
   2563	}
   2564
   2565	bytes = ring_container->total_bytes;
   2566
   2567	/* If packets are less than 4 or bytes are less than 9000 assume
   2568	 * insufficient data to use bulk rate limiting approach. We are
   2569	 * likely latency driven.
   2570	 */
   2571	if (packets < 4 && bytes < 9000) {
   2572		itr = IXGBE_ITR_ADAPTIVE_LATENCY;
   2573		goto adjust_by_size;
   2574	}
   2575
   2576	/* Between 4 and 48 we can assume that our current interrupt delay
   2577	 * is only slightly too low. As such we should increase it by a small
   2578	 * fixed amount.
   2579	 */
   2580	if (packets < 48) {
   2581		itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
   2582		if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
   2583			itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
   2584		goto clear_counts;
   2585	}
   2586
   2587	/* Between 48 and 96 is our "goldilocks" zone where we are working
   2588	 * out "just right". Just report that our current ITR is good for us.
   2589	 */
   2590	if (packets < 96) {
   2591		itr = q_vector->itr >> 2;
   2592		goto clear_counts;
   2593	}
   2594
   2595	/* If packet count is 96 or greater we are likely looking at a slight
   2596	 * overrun of the delay we want. Try halving our delay to see if that
   2597	 * will cut the number of packets in half per interrupt.
   2598	 */
   2599	if (packets < 256) {
   2600		itr = q_vector->itr >> 3;
   2601		if (itr < IXGBE_ITR_ADAPTIVE_MIN_USECS)
   2602			itr = IXGBE_ITR_ADAPTIVE_MIN_USECS;
   2603		goto clear_counts;
   2604	}
   2605
   2606	/* The paths below assume we are dealing with a bulk ITR since number
   2607	 * of packets is 256 or greater. We are just going to have to compute
   2608	 * a value and try to bring the count under control, though for smaller
   2609	 * packet sizes there isn't much we can do as NAPI polling will likely
   2610	 * be kicking in sooner rather than later.
   2611	 */
   2612	itr = IXGBE_ITR_ADAPTIVE_BULK;
   2613
   2614adjust_by_size:
   2615	/* If packet counts are 256 or greater we can assume we have a gross
   2616	 * overestimation of what the rate should be. Instead of trying to fine
   2617	 * tune it just use the formula below to try and dial in an exact value
   2618	 * give the current packet size of the frame.
   2619	 */
   2620	avg_wire_size = bytes / packets;
   2621
   2622	/* The following is a crude approximation of:
   2623	 *  wmem_default / (size + overhead) = desired_pkts_per_int
   2624	 *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
   2625	 *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
   2626	 *
   2627	 * Assuming wmem_default is 212992 and overhead is 640 bytes per
   2628	 * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
   2629	 * formula down to
   2630	 *
   2631	 *  (170 * (size + 24)) / (size + 640) = ITR
   2632	 *
   2633	 * We first do some math on the packet size and then finally bitshift
   2634	 * by 8 after rounding up. We also have to account for PCIe link speed
   2635	 * difference as ITR scales based on this.
   2636	 */
   2637	if (avg_wire_size <= 60) {
   2638		/* Start at 50k ints/sec */
   2639		avg_wire_size = 5120;
   2640	} else if (avg_wire_size <= 316) {
   2641		/* 50K ints/sec to 16K ints/sec */
   2642		avg_wire_size *= 40;
   2643		avg_wire_size += 2720;
   2644	} else if (avg_wire_size <= 1084) {
   2645		/* 16K ints/sec to 9.2K ints/sec */
   2646		avg_wire_size *= 15;
   2647		avg_wire_size += 11452;
   2648	} else if (avg_wire_size < 1968) {
   2649		/* 9.2K ints/sec to 8K ints/sec */
   2650		avg_wire_size *= 5;
   2651		avg_wire_size += 22420;
   2652	} else {
   2653		/* plateau at a limit of 8K ints/sec */
   2654		avg_wire_size = 32256;
   2655	}
   2656
   2657	/* If we are in low latency mode half our delay which doubles the rate
   2658	 * to somewhere between 100K to 16K ints/sec
   2659	 */
   2660	if (itr & IXGBE_ITR_ADAPTIVE_LATENCY)
   2661		avg_wire_size >>= 1;
   2662
   2663	/* Resultant value is 256 times larger than it needs to be. This
   2664	 * gives us room to adjust the value as needed to either increase
   2665	 * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
   2666	 *
   2667	 * Use addition as we have already recorded the new latency flag
   2668	 * for the ITR value.
   2669	 */
   2670	switch (q_vector->adapter->link_speed) {
   2671	case IXGBE_LINK_SPEED_10GB_FULL:
   2672	case IXGBE_LINK_SPEED_100_FULL:
   2673	default:
   2674		itr += DIV_ROUND_UP(avg_wire_size,
   2675				    IXGBE_ITR_ADAPTIVE_MIN_INC * 256) *
   2676		       IXGBE_ITR_ADAPTIVE_MIN_INC;
   2677		break;
   2678	case IXGBE_LINK_SPEED_2_5GB_FULL:
   2679	case IXGBE_LINK_SPEED_1GB_FULL:
   2680	case IXGBE_LINK_SPEED_10_FULL:
   2681		if (avg_wire_size > 8064)
   2682			avg_wire_size = 8064;
   2683		itr += DIV_ROUND_UP(avg_wire_size,
   2684				    IXGBE_ITR_ADAPTIVE_MIN_INC * 64) *
   2685		       IXGBE_ITR_ADAPTIVE_MIN_INC;
   2686		break;
   2687	}
   2688
   2689clear_counts:
   2690	/* write back value */
   2691	ring_container->itr = itr;
   2692
   2693	/* next update should occur within next jiffy */
   2694	ring_container->next_update = next_update + 1;
   2695
   2696	ring_container->total_bytes = 0;
   2697	ring_container->total_packets = 0;
   2698}
   2699
   2700/**
   2701 * ixgbe_write_eitr - write EITR register in hardware specific way
   2702 * @q_vector: structure containing interrupt and ring information
   2703 *
   2704 * This function is made to be called by ethtool and by the driver
   2705 * when it needs to update EITR registers at runtime.  Hardware
   2706 * specific quirks/differences are taken care of here.
   2707 */
   2708void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector)
   2709{
   2710	struct ixgbe_adapter *adapter = q_vector->adapter;
   2711	struct ixgbe_hw *hw = &adapter->hw;
   2712	int v_idx = q_vector->v_idx;
   2713	u32 itr_reg = q_vector->itr & IXGBE_MAX_EITR;
   2714
   2715	switch (adapter->hw.mac.type) {
   2716	case ixgbe_mac_82598EB:
   2717		/* must write high and low 16 bits to reset counter */
   2718		itr_reg |= (itr_reg << 16);
   2719		break;
   2720	case ixgbe_mac_82599EB:
   2721	case ixgbe_mac_X540:
   2722	case ixgbe_mac_X550:
   2723	case ixgbe_mac_X550EM_x:
   2724	case ixgbe_mac_x550em_a:
   2725		/*
   2726		 * set the WDIS bit to not clear the timer bits and cause an
   2727		 * immediate assertion of the interrupt
   2728		 */
   2729		itr_reg |= IXGBE_EITR_CNT_WDIS;
   2730		break;
   2731	default:
   2732		break;
   2733	}
   2734	IXGBE_WRITE_REG(hw, IXGBE_EITR(v_idx), itr_reg);
   2735}
   2736
   2737static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector)
   2738{
   2739	u32 new_itr;
   2740
   2741	ixgbe_update_itr(q_vector, &q_vector->tx);
   2742	ixgbe_update_itr(q_vector, &q_vector->rx);
   2743
   2744	/* use the smallest value of new ITR delay calculations */
   2745	new_itr = min(q_vector->rx.itr, q_vector->tx.itr);
   2746
   2747	/* Clear latency flag if set, shift into correct position */
   2748	new_itr &= ~IXGBE_ITR_ADAPTIVE_LATENCY;
   2749	new_itr <<= 2;
   2750
   2751	if (new_itr != q_vector->itr) {
   2752		/* save the algorithm value here */
   2753		q_vector->itr = new_itr;
   2754
   2755		ixgbe_write_eitr(q_vector);
   2756	}
   2757}
   2758
   2759/**
   2760 * ixgbe_check_overtemp_subtask - check for over temperature
   2761 * @adapter: pointer to adapter
   2762 **/
   2763static void ixgbe_check_overtemp_subtask(struct ixgbe_adapter *adapter)
   2764{
   2765	struct ixgbe_hw *hw = &adapter->hw;
   2766	u32 eicr = adapter->interrupt_event;
   2767	s32 rc;
   2768
   2769	if (test_bit(__IXGBE_DOWN, &adapter->state))
   2770		return;
   2771
   2772	if (!(adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_EVENT))
   2773		return;
   2774
   2775	adapter->flags2 &= ~IXGBE_FLAG2_TEMP_SENSOR_EVENT;
   2776
   2777	switch (hw->device_id) {
   2778	case IXGBE_DEV_ID_82599_T3_LOM:
   2779		/*
   2780		 * Since the warning interrupt is for both ports
   2781		 * we don't have to check if:
   2782		 *  - This interrupt wasn't for our port.
   2783		 *  - We may have missed the interrupt so always have to
   2784		 *    check if we  got a LSC
   2785		 */
   2786		if (!(eicr & IXGBE_EICR_GPI_SDP0_8259X) &&
   2787		    !(eicr & IXGBE_EICR_LSC))
   2788			return;
   2789
   2790		if (!(eicr & IXGBE_EICR_LSC) && hw->mac.ops.check_link) {
   2791			u32 speed;
   2792			bool link_up = false;
   2793
   2794			hw->mac.ops.check_link(hw, &speed, &link_up, false);
   2795
   2796			if (link_up)
   2797				return;
   2798		}
   2799
   2800		/* Check if this is not due to overtemp */
   2801		if (hw->phy.ops.check_overtemp(hw) != IXGBE_ERR_OVERTEMP)
   2802			return;
   2803
   2804		break;
   2805	case IXGBE_DEV_ID_X550EM_A_1G_T:
   2806	case IXGBE_DEV_ID_X550EM_A_1G_T_L:
   2807		rc = hw->phy.ops.check_overtemp(hw);
   2808		if (rc != IXGBE_ERR_OVERTEMP)
   2809			return;
   2810		break;
   2811	default:
   2812		if (adapter->hw.mac.type >= ixgbe_mac_X540)
   2813			return;
   2814		if (!(eicr & IXGBE_EICR_GPI_SDP0(hw)))
   2815			return;
   2816		break;
   2817	}
   2818	e_crit(drv, "%s\n", ixgbe_overheat_msg);
   2819
   2820	adapter->interrupt_event = 0;
   2821}
   2822
   2823static void ixgbe_check_fan_failure(struct ixgbe_adapter *adapter, u32 eicr)
   2824{
   2825	struct ixgbe_hw *hw = &adapter->hw;
   2826
   2827	if ((adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) &&
   2828	    (eicr & IXGBE_EICR_GPI_SDP1(hw))) {
   2829		e_crit(probe, "Fan has stopped, replace the adapter\n");
   2830		/* write to clear the interrupt */
   2831		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1(hw));
   2832	}
   2833}
   2834
   2835static void ixgbe_check_overtemp_event(struct ixgbe_adapter *adapter, u32 eicr)
   2836{
   2837	struct ixgbe_hw *hw = &adapter->hw;
   2838
   2839	if (!(adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE))
   2840		return;
   2841
   2842	switch (adapter->hw.mac.type) {
   2843	case ixgbe_mac_82599EB:
   2844		/*
   2845		 * Need to check link state so complete overtemp check
   2846		 * on service task
   2847		 */
   2848		if (((eicr & IXGBE_EICR_GPI_SDP0(hw)) ||
   2849		     (eicr & IXGBE_EICR_LSC)) &&
   2850		    (!test_bit(__IXGBE_DOWN, &adapter->state))) {
   2851			adapter->interrupt_event = eicr;
   2852			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_EVENT;
   2853			ixgbe_service_event_schedule(adapter);
   2854			return;
   2855		}
   2856		return;
   2857	case ixgbe_mac_x550em_a:
   2858		if (eicr & IXGBE_EICR_GPI_SDP0_X550EM_a) {
   2859			adapter->interrupt_event = eicr;
   2860			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_EVENT;
   2861			ixgbe_service_event_schedule(adapter);
   2862			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC,
   2863					IXGBE_EICR_GPI_SDP0_X550EM_a);
   2864			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICR,
   2865					IXGBE_EICR_GPI_SDP0_X550EM_a);
   2866		}
   2867		return;
   2868	case ixgbe_mac_X550:
   2869	case ixgbe_mac_X540:
   2870		if (!(eicr & IXGBE_EICR_TS))
   2871			return;
   2872		break;
   2873	default:
   2874		return;
   2875	}
   2876
   2877	e_crit(drv, "%s\n", ixgbe_overheat_msg);
   2878}
   2879
   2880static inline bool ixgbe_is_sfp(struct ixgbe_hw *hw)
   2881{
   2882	switch (hw->mac.type) {
   2883	case ixgbe_mac_82598EB:
   2884		if (hw->phy.type == ixgbe_phy_nl)
   2885			return true;
   2886		return false;
   2887	case ixgbe_mac_82599EB:
   2888	case ixgbe_mac_X550EM_x:
   2889	case ixgbe_mac_x550em_a:
   2890		switch (hw->mac.ops.get_media_type(hw)) {
   2891		case ixgbe_media_type_fiber:
   2892		case ixgbe_media_type_fiber_qsfp:
   2893			return true;
   2894		default:
   2895			return false;
   2896		}
   2897	default:
   2898		return false;
   2899	}
   2900}
   2901
   2902static void ixgbe_check_sfp_event(struct ixgbe_adapter *adapter, u32 eicr)
   2903{
   2904	struct ixgbe_hw *hw = &adapter->hw;
   2905	u32 eicr_mask = IXGBE_EICR_GPI_SDP2(hw);
   2906
   2907	if (!ixgbe_is_sfp(hw))
   2908		return;
   2909
   2910	/* Later MAC's use different SDP */
   2911	if (hw->mac.type >= ixgbe_mac_X540)
   2912		eicr_mask = IXGBE_EICR_GPI_SDP0_X540;
   2913
   2914	if (eicr & eicr_mask) {
   2915		/* Clear the interrupt */
   2916		IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr_mask);
   2917		if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
   2918			adapter->flags2 |= IXGBE_FLAG2_SFP_NEEDS_RESET;
   2919			adapter->sfp_poll_time = 0;
   2920			ixgbe_service_event_schedule(adapter);
   2921		}
   2922	}
   2923
   2924	if (adapter->hw.mac.type == ixgbe_mac_82599EB &&
   2925	    (eicr & IXGBE_EICR_GPI_SDP1(hw))) {
   2926		/* Clear the interrupt */
   2927		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1(hw));
   2928		if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
   2929			adapter->flags |= IXGBE_FLAG_NEED_LINK_CONFIG;
   2930			ixgbe_service_event_schedule(adapter);
   2931		}
   2932	}
   2933}
   2934
   2935static void ixgbe_check_lsc(struct ixgbe_adapter *adapter)
   2936{
   2937	struct ixgbe_hw *hw = &adapter->hw;
   2938
   2939	adapter->lsc_int++;
   2940	adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
   2941	adapter->link_check_timeout = jiffies;
   2942	if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
   2943		IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC);
   2944		IXGBE_WRITE_FLUSH(hw);
   2945		ixgbe_service_event_schedule(adapter);
   2946	}
   2947}
   2948
   2949static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter,
   2950					   u64 qmask)
   2951{
   2952	u32 mask;
   2953	struct ixgbe_hw *hw = &adapter->hw;
   2954
   2955	switch (hw->mac.type) {
   2956	case ixgbe_mac_82598EB:
   2957		mask = (IXGBE_EIMS_RTX_QUEUE & qmask);
   2958		IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   2959		break;
   2960	case ixgbe_mac_82599EB:
   2961	case ixgbe_mac_X540:
   2962	case ixgbe_mac_X550:
   2963	case ixgbe_mac_X550EM_x:
   2964	case ixgbe_mac_x550em_a:
   2965		mask = (qmask & 0xFFFFFFFF);
   2966		if (mask)
   2967			IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
   2968		mask = (qmask >> 32);
   2969		if (mask)
   2970			IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
   2971		break;
   2972	default:
   2973		break;
   2974	}
   2975	/* skip the flush */
   2976}
   2977
   2978/**
   2979 * ixgbe_irq_enable - Enable default interrupt generation settings
   2980 * @adapter: board private structure
   2981 * @queues: enable irqs for queues
   2982 * @flush: flush register write
   2983 **/
   2984static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter, bool queues,
   2985				    bool flush)
   2986{
   2987	struct ixgbe_hw *hw = &adapter->hw;
   2988	u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
   2989
   2990	/* don't reenable LSC while waiting for link */
   2991	if (adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE)
   2992		mask &= ~IXGBE_EIMS_LSC;
   2993
   2994	if (adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE)
   2995		switch (adapter->hw.mac.type) {
   2996		case ixgbe_mac_82599EB:
   2997			mask |= IXGBE_EIMS_GPI_SDP0(hw);
   2998			break;
   2999		case ixgbe_mac_X540:
   3000		case ixgbe_mac_X550:
   3001		case ixgbe_mac_X550EM_x:
   3002		case ixgbe_mac_x550em_a:
   3003			mask |= IXGBE_EIMS_TS;
   3004			break;
   3005		default:
   3006			break;
   3007		}
   3008	if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE)
   3009		mask |= IXGBE_EIMS_GPI_SDP1(hw);
   3010	switch (adapter->hw.mac.type) {
   3011	case ixgbe_mac_82599EB:
   3012		mask |= IXGBE_EIMS_GPI_SDP1(hw);
   3013		mask |= IXGBE_EIMS_GPI_SDP2(hw);
   3014		fallthrough;
   3015	case ixgbe_mac_X540:
   3016	case ixgbe_mac_X550:
   3017	case ixgbe_mac_X550EM_x:
   3018	case ixgbe_mac_x550em_a:
   3019		if (adapter->hw.device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
   3020		    adapter->hw.device_id == IXGBE_DEV_ID_X550EM_A_SFP ||
   3021		    adapter->hw.device_id == IXGBE_DEV_ID_X550EM_A_SFP_N)
   3022			mask |= IXGBE_EIMS_GPI_SDP0(&adapter->hw);
   3023		if (adapter->hw.phy.type == ixgbe_phy_x550em_ext_t)
   3024			mask |= IXGBE_EICR_GPI_SDP0_X540;
   3025		mask |= IXGBE_EIMS_ECC;
   3026		mask |= IXGBE_EIMS_MAILBOX;
   3027		break;
   3028	default:
   3029		break;
   3030	}
   3031
   3032	if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) &&
   3033	    !(adapter->flags2 & IXGBE_FLAG2_FDIR_REQUIRES_REINIT))
   3034		mask |= IXGBE_EIMS_FLOW_DIR;
   3035
   3036	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask);
   3037	if (queues)
   3038		ixgbe_irq_enable_queues(adapter, ~0);
   3039	if (flush)
   3040		IXGBE_WRITE_FLUSH(&adapter->hw);
   3041}
   3042
   3043static irqreturn_t ixgbe_msix_other(int irq, void *data)
   3044{
   3045	struct ixgbe_adapter *adapter = data;
   3046	struct ixgbe_hw *hw = &adapter->hw;
   3047	u32 eicr;
   3048
   3049	/*
   3050	 * Workaround for Silicon errata.  Use clear-by-write instead
   3051	 * of clear-by-read.  Reading with EICS will return the
   3052	 * interrupt causes without clearing, which later be done
   3053	 * with the write to EICR.
   3054	 */
   3055	eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
   3056
   3057	/* The lower 16bits of the EICR register are for the queue interrupts
   3058	 * which should be masked here in order to not accidentally clear them if
   3059	 * the bits are high when ixgbe_msix_other is called. There is a race
   3060	 * condition otherwise which results in possible performance loss
   3061	 * especially if the ixgbe_msix_other interrupt is triggering
   3062	 * consistently (as it would when PPS is turned on for the X540 device)
   3063	 */
   3064	eicr &= 0xFFFF0000;
   3065
   3066	IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr);
   3067
   3068	if (eicr & IXGBE_EICR_LSC)
   3069		ixgbe_check_lsc(adapter);
   3070
   3071	if (eicr & IXGBE_EICR_MAILBOX)
   3072		ixgbe_msg_task(adapter);
   3073
   3074	switch (hw->mac.type) {
   3075	case ixgbe_mac_82599EB:
   3076	case ixgbe_mac_X540:
   3077	case ixgbe_mac_X550:
   3078	case ixgbe_mac_X550EM_x:
   3079	case ixgbe_mac_x550em_a:
   3080		if (hw->phy.type == ixgbe_phy_x550em_ext_t &&
   3081		    (eicr & IXGBE_EICR_GPI_SDP0_X540)) {
   3082			adapter->flags2 |= IXGBE_FLAG2_PHY_INTERRUPT;
   3083			ixgbe_service_event_schedule(adapter);
   3084			IXGBE_WRITE_REG(hw, IXGBE_EICR,
   3085					IXGBE_EICR_GPI_SDP0_X540);
   3086		}
   3087		if (eicr & IXGBE_EICR_ECC) {
   3088			e_info(link, "Received ECC Err, initiating reset\n");
   3089			set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
   3090			ixgbe_service_event_schedule(adapter);
   3091			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
   3092		}
   3093		/* Handle Flow Director Full threshold interrupt */
   3094		if (eicr & IXGBE_EICR_FLOW_DIR) {
   3095			int reinit_count = 0;
   3096			int i;
   3097			for (i = 0; i < adapter->num_tx_queues; i++) {
   3098				struct ixgbe_ring *ring = adapter->tx_ring[i];
   3099				if (test_and_clear_bit(__IXGBE_TX_FDIR_INIT_DONE,
   3100						       &ring->state))
   3101					reinit_count++;
   3102			}
   3103			if (reinit_count) {
   3104				/* no more flow director interrupts until after init */
   3105				IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_FLOW_DIR);
   3106				adapter->flags2 |= IXGBE_FLAG2_FDIR_REQUIRES_REINIT;
   3107				ixgbe_service_event_schedule(adapter);
   3108			}
   3109		}
   3110		ixgbe_check_sfp_event(adapter, eicr);
   3111		ixgbe_check_overtemp_event(adapter, eicr);
   3112		break;
   3113	default:
   3114		break;
   3115	}
   3116
   3117	ixgbe_check_fan_failure(adapter, eicr);
   3118
   3119	if (unlikely(eicr & IXGBE_EICR_TIMESYNC))
   3120		ixgbe_ptp_check_pps_event(adapter);
   3121
   3122	/* re-enable the original interrupt state, no lsc, no queues */
   3123	if (!test_bit(__IXGBE_DOWN, &adapter->state))
   3124		ixgbe_irq_enable(adapter, false, false);
   3125
   3126	return IRQ_HANDLED;
   3127}
   3128
   3129static irqreturn_t ixgbe_msix_clean_rings(int irq, void *data)
   3130{
   3131	struct ixgbe_q_vector *q_vector = data;
   3132
   3133	/* EIAM disabled interrupts (on this vector) for us */
   3134
   3135	if (q_vector->rx.ring || q_vector->tx.ring)
   3136		napi_schedule_irqoff(&q_vector->napi);
   3137
   3138	return IRQ_HANDLED;
   3139}
   3140
   3141/**
   3142 * ixgbe_poll - NAPI Rx polling callback
   3143 * @napi: structure for representing this polling device
   3144 * @budget: how many packets driver is allowed to clean
   3145 *
   3146 * This function is used for legacy and MSI, NAPI mode
   3147 **/
   3148int ixgbe_poll(struct napi_struct *napi, int budget)
   3149{
   3150	struct ixgbe_q_vector *q_vector =
   3151				container_of(napi, struct ixgbe_q_vector, napi);
   3152	struct ixgbe_adapter *adapter = q_vector->adapter;
   3153	struct ixgbe_ring *ring;
   3154	int per_ring_budget, work_done = 0;
   3155	bool clean_complete = true;
   3156
   3157#ifdef CONFIG_IXGBE_DCA
   3158	if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
   3159		ixgbe_update_dca(q_vector);
   3160#endif
   3161
   3162	ixgbe_for_each_ring(ring, q_vector->tx) {
   3163		bool wd = ring->xsk_pool ?
   3164			  ixgbe_clean_xdp_tx_irq(q_vector, ring, budget) :
   3165			  ixgbe_clean_tx_irq(q_vector, ring, budget);
   3166
   3167		if (!wd)
   3168			clean_complete = false;
   3169	}
   3170
   3171	/* Exit if we are called by netpoll */
   3172	if (budget <= 0)
   3173		return budget;
   3174
   3175	/* attempt to distribute budget to each queue fairly, but don't allow
   3176	 * the budget to go below 1 because we'll exit polling */
   3177	if (q_vector->rx.count > 1)
   3178		per_ring_budget = max(budget/q_vector->rx.count, 1);
   3179	else
   3180		per_ring_budget = budget;
   3181
   3182	ixgbe_for_each_ring(ring, q_vector->rx) {
   3183		int cleaned = ring->xsk_pool ?
   3184			      ixgbe_clean_rx_irq_zc(q_vector, ring,
   3185						    per_ring_budget) :
   3186			      ixgbe_clean_rx_irq(q_vector, ring,
   3187						 per_ring_budget);
   3188
   3189		work_done += cleaned;
   3190		if (cleaned >= per_ring_budget)
   3191			clean_complete = false;
   3192	}
   3193
   3194	/* If all work not completed, return budget and keep polling */
   3195	if (!clean_complete)
   3196		return budget;
   3197
   3198	/* all work done, exit the polling mode */
   3199	if (likely(napi_complete_done(napi, work_done))) {
   3200		if (adapter->rx_itr_setting & 1)
   3201			ixgbe_set_itr(q_vector);
   3202		if (!test_bit(__IXGBE_DOWN, &adapter->state))
   3203			ixgbe_irq_enable_queues(adapter,
   3204						BIT_ULL(q_vector->v_idx));
   3205	}
   3206
   3207	return min(work_done, budget - 1);
   3208}
   3209
   3210/**
   3211 * ixgbe_request_msix_irqs - Initialize MSI-X interrupts
   3212 * @adapter: board private structure
   3213 *
   3214 * ixgbe_request_msix_irqs allocates MSI-X vectors and requests
   3215 * interrupts from the kernel.
   3216 **/
   3217static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter)
   3218{
   3219	struct net_device *netdev = adapter->netdev;
   3220	unsigned int ri = 0, ti = 0;
   3221	int vector, err;
   3222
   3223	for (vector = 0; vector < adapter->num_q_vectors; vector++) {
   3224		struct ixgbe_q_vector *q_vector = adapter->q_vector[vector];
   3225		struct msix_entry *entry = &adapter->msix_entries[vector];
   3226
   3227		if (q_vector->tx.ring && q_vector->rx.ring) {
   3228			snprintf(q_vector->name, sizeof(q_vector->name),
   3229				 "%s-TxRx-%u", netdev->name, ri++);
   3230			ti++;
   3231		} else if (q_vector->rx.ring) {
   3232			snprintf(q_vector->name, sizeof(q_vector->name),
   3233				 "%s-rx-%u", netdev->name, ri++);
   3234		} else if (q_vector->tx.ring) {
   3235			snprintf(q_vector->name, sizeof(q_vector->name),
   3236				 "%s-tx-%u", netdev->name, ti++);
   3237		} else {
   3238			/* skip this unused q_vector */
   3239			continue;
   3240		}
   3241		err = request_irq(entry->vector, &ixgbe_msix_clean_rings, 0,
   3242				  q_vector->name, q_vector);
   3243		if (err) {
   3244			e_err(probe, "request_irq failed for MSIX interrupt "
   3245			      "Error: %d\n", err);
   3246			goto free_queue_irqs;
   3247		}
   3248		/* If Flow Director is enabled, set interrupt affinity */
   3249		if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
   3250			/* assign the mask for this irq */
   3251			irq_update_affinity_hint(entry->vector,
   3252						 &q_vector->affinity_mask);
   3253		}
   3254	}
   3255
   3256	err = request_irq(adapter->msix_entries[vector].vector,
   3257			  ixgbe_msix_other, 0, netdev->name, adapter);
   3258	if (err) {
   3259		e_err(probe, "request_irq for msix_other failed: %d\n", err);
   3260		goto free_queue_irqs;
   3261	}
   3262
   3263	return 0;
   3264
   3265free_queue_irqs:
   3266	while (vector) {
   3267		vector--;
   3268		irq_update_affinity_hint(adapter->msix_entries[vector].vector,
   3269					 NULL);
   3270		free_irq(adapter->msix_entries[vector].vector,
   3271			 adapter->q_vector[vector]);
   3272	}
   3273	adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
   3274	pci_disable_msix(adapter->pdev);
   3275	kfree(adapter->msix_entries);
   3276	adapter->msix_entries = NULL;
   3277	return err;
   3278}
   3279
   3280/**
   3281 * ixgbe_intr - legacy mode Interrupt Handler
   3282 * @irq: interrupt number
   3283 * @data: pointer to a network interface device structure
   3284 **/
   3285static irqreturn_t ixgbe_intr(int irq, void *data)
   3286{
   3287	struct ixgbe_adapter *adapter = data;
   3288	struct ixgbe_hw *hw = &adapter->hw;
   3289	struct ixgbe_q_vector *q_vector = adapter->q_vector[0];
   3290	u32 eicr;
   3291
   3292	/*
   3293	 * Workaround for silicon errata #26 on 82598.  Mask the interrupt
   3294	 * before the read of EICR.
   3295	 */
   3296	IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_IRQ_CLEAR_MASK);
   3297
   3298	/* for NAPI, using EIAM to auto-mask tx/rx interrupt bits on read
   3299	 * therefore no explicit interrupt disable is necessary */
   3300	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
   3301	if (!eicr) {
   3302		/*
   3303		 * shared interrupt alert!
   3304		 * make sure interrupts are enabled because the read will
   3305		 * have disabled interrupts due to EIAM
   3306		 * finish the workaround of silicon errata on 82598.  Unmask
   3307		 * the interrupt that we masked before the EICR read.
   3308		 */
   3309		if (!test_bit(__IXGBE_DOWN, &adapter->state))
   3310			ixgbe_irq_enable(adapter, true, true);
   3311		return IRQ_NONE;	/* Not our interrupt */
   3312	}
   3313
   3314	if (eicr & IXGBE_EICR_LSC)
   3315		ixgbe_check_lsc(adapter);
   3316
   3317	switch (hw->mac.type) {
   3318	case ixgbe_mac_82599EB:
   3319		ixgbe_check_sfp_event(adapter, eicr);
   3320		fallthrough;
   3321	case ixgbe_mac_X540:
   3322	case ixgbe_mac_X550:
   3323	case ixgbe_mac_X550EM_x:
   3324	case ixgbe_mac_x550em_a:
   3325		if (eicr & IXGBE_EICR_ECC) {
   3326			e_info(link, "Received ECC Err, initiating reset\n");
   3327			set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
   3328			ixgbe_service_event_schedule(adapter);
   3329			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
   3330		}
   3331		ixgbe_check_overtemp_event(adapter, eicr);
   3332		break;
   3333	default:
   3334		break;
   3335	}
   3336
   3337	ixgbe_check_fan_failure(adapter, eicr);
   3338	if (unlikely(eicr & IXGBE_EICR_TIMESYNC))
   3339		ixgbe_ptp_check_pps_event(adapter);
   3340
   3341	/* would disable interrupts here but EIAM disabled it */
   3342	napi_schedule_irqoff(&q_vector->napi);
   3343
   3344	/*
   3345	 * re-enable link(maybe) and non-queue interrupts, no flush.
   3346	 * ixgbe_poll will re-enable the queue interrupts
   3347	 */
   3348	if (!test_bit(__IXGBE_DOWN, &adapter->state))
   3349		ixgbe_irq_enable(adapter, false, false);
   3350
   3351	return IRQ_HANDLED;
   3352}
   3353
   3354/**
   3355 * ixgbe_request_irq - initialize interrupts
   3356 * @adapter: board private structure
   3357 *
   3358 * Attempts to configure interrupts using the best available
   3359 * capabilities of the hardware and kernel.
   3360 **/
   3361static int ixgbe_request_irq(struct ixgbe_adapter *adapter)
   3362{
   3363	struct net_device *netdev = adapter->netdev;
   3364	int err;
   3365
   3366	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
   3367		err = ixgbe_request_msix_irqs(adapter);
   3368	else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED)
   3369		err = request_irq(adapter->pdev->irq, ixgbe_intr, 0,
   3370				  netdev->name, adapter);
   3371	else
   3372		err = request_irq(adapter->pdev->irq, ixgbe_intr, IRQF_SHARED,
   3373				  netdev->name, adapter);
   3374
   3375	if (err)
   3376		e_err(probe, "request_irq failed, Error %d\n", err);
   3377
   3378	return err;
   3379}
   3380
   3381static void ixgbe_free_irq(struct ixgbe_adapter *adapter)
   3382{
   3383	int vector;
   3384
   3385	if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) {
   3386		free_irq(adapter->pdev->irq, adapter);
   3387		return;
   3388	}
   3389
   3390	if (!adapter->msix_entries)
   3391		return;
   3392
   3393	for (vector = 0; vector < adapter->num_q_vectors; vector++) {
   3394		struct ixgbe_q_vector *q_vector = adapter->q_vector[vector];
   3395		struct msix_entry *entry = &adapter->msix_entries[vector];
   3396
   3397		/* free only the irqs that were actually requested */
   3398		if (!q_vector->rx.ring && !q_vector->tx.ring)
   3399			continue;
   3400
   3401		/* clear the affinity_mask in the IRQ descriptor */
   3402		irq_update_affinity_hint(entry->vector, NULL);
   3403
   3404		free_irq(entry->vector, q_vector);
   3405	}
   3406
   3407	free_irq(adapter->msix_entries[vector].vector, adapter);
   3408}
   3409
   3410/**
   3411 * ixgbe_irq_disable - Mask off interrupt generation on the NIC
   3412 * @adapter: board private structure
   3413 **/
   3414static inline void ixgbe_irq_disable(struct ixgbe_adapter *adapter)
   3415{
   3416	switch (adapter->hw.mac.type) {
   3417	case ixgbe_mac_82598EB:
   3418		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
   3419		break;
   3420	case ixgbe_mac_82599EB:
   3421	case ixgbe_mac_X540:
   3422	case ixgbe_mac_X550:
   3423	case ixgbe_mac_X550EM_x:
   3424	case ixgbe_mac_x550em_a:
   3425		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
   3426		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
   3427		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
   3428		break;
   3429	default:
   3430		break;
   3431	}
   3432	IXGBE_WRITE_FLUSH(&adapter->hw);
   3433	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) {
   3434		int vector;
   3435
   3436		for (vector = 0; vector < adapter->num_q_vectors; vector++)
   3437			synchronize_irq(adapter->msix_entries[vector].vector);
   3438
   3439		synchronize_irq(adapter->msix_entries[vector++].vector);
   3440	} else {
   3441		synchronize_irq(adapter->pdev->irq);
   3442	}
   3443}
   3444
   3445/**
   3446 * ixgbe_configure_msi_and_legacy - Initialize PIN (INTA...) and MSI interrupts
   3447 * @adapter: board private structure
   3448 *
   3449 **/
   3450static void ixgbe_configure_msi_and_legacy(struct ixgbe_adapter *adapter)
   3451{
   3452	struct ixgbe_q_vector *q_vector = adapter->q_vector[0];
   3453
   3454	ixgbe_write_eitr(q_vector);
   3455
   3456	ixgbe_set_ivar(adapter, 0, 0, 0);
   3457	ixgbe_set_ivar(adapter, 1, 0, 0);
   3458
   3459	e_info(hw, "Legacy interrupt IVAR setup done\n");
   3460}
   3461
   3462/**
   3463 * ixgbe_configure_tx_ring - Configure 8259x Tx ring after Reset
   3464 * @adapter: board private structure
   3465 * @ring: structure containing ring specific data
   3466 *
   3467 * Configure the Tx descriptor ring after a reset.
   3468 **/
   3469void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
   3470			     struct ixgbe_ring *ring)
   3471{
   3472	struct ixgbe_hw *hw = &adapter->hw;
   3473	u64 tdba = ring->dma;
   3474	int wait_loop = 10;
   3475	u32 txdctl = IXGBE_TXDCTL_ENABLE;
   3476	u8 reg_idx = ring->reg_idx;
   3477
   3478	ring->xsk_pool = NULL;
   3479	if (ring_is_xdp(ring))
   3480		ring->xsk_pool = ixgbe_xsk_pool(adapter, ring);
   3481
   3482	/* disable queue to avoid issues while updating state */
   3483	IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), 0);
   3484	IXGBE_WRITE_FLUSH(hw);
   3485
   3486	IXGBE_WRITE_REG(hw, IXGBE_TDBAL(reg_idx),
   3487			(tdba & DMA_BIT_MASK(32)));
   3488	IXGBE_WRITE_REG(hw, IXGBE_TDBAH(reg_idx), (tdba >> 32));
   3489	IXGBE_WRITE_REG(hw, IXGBE_TDLEN(reg_idx),
   3490			ring->count * sizeof(union ixgbe_adv_tx_desc));
   3491	IXGBE_WRITE_REG(hw, IXGBE_TDH(reg_idx), 0);
   3492	IXGBE_WRITE_REG(hw, IXGBE_TDT(reg_idx), 0);
   3493	ring->tail = adapter->io_addr + IXGBE_TDT(reg_idx);
   3494
   3495	/*
   3496	 * set WTHRESH to encourage burst writeback, it should not be set
   3497	 * higher than 1 when:
   3498	 * - ITR is 0 as it could cause false TX hangs
   3499	 * - ITR is set to > 100k int/sec and BQL is enabled
   3500	 *
   3501	 * In order to avoid issues WTHRESH + PTHRESH should always be equal
   3502	 * to or less than the number of on chip descriptors, which is
   3503	 * currently 40.
   3504	 */
   3505	if (!ring->q_vector || (ring->q_vector->itr < IXGBE_100K_ITR))
   3506		txdctl |= 1u << 16;	/* WTHRESH = 1 */
   3507	else
   3508		txdctl |= 8u << 16;	/* WTHRESH = 8 */
   3509
   3510	/*
   3511	 * Setting PTHRESH to 32 both improves performance
   3512	 * and avoids a TX hang with DFP enabled
   3513	 */
   3514	txdctl |= (1u << 8) |	/* HTHRESH = 1 */
   3515		   32;		/* PTHRESH = 32 */
   3516
   3517	/* reinitialize flowdirector state */
   3518	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
   3519		ring->atr_sample_rate = adapter->atr_sample_rate;
   3520		ring->atr_count = 0;
   3521		set_bit(__IXGBE_TX_FDIR_INIT_DONE, &ring->state);
   3522	} else {
   3523		ring->atr_sample_rate = 0;
   3524	}
   3525
   3526	/* initialize XPS */
   3527	if (!test_and_set_bit(__IXGBE_TX_XPS_INIT_DONE, &ring->state)) {
   3528		struct ixgbe_q_vector *q_vector = ring->q_vector;
   3529
   3530		if (q_vector)
   3531			netif_set_xps_queue(ring->netdev,
   3532					    &q_vector->affinity_mask,
   3533					    ring->queue_index);
   3534	}
   3535
   3536	clear_bit(__IXGBE_HANG_CHECK_ARMED, &ring->state);
   3537
   3538	/* reinitialize tx_buffer_info */
   3539	memset(ring->tx_buffer_info, 0,
   3540	       sizeof(struct ixgbe_tx_buffer) * ring->count);
   3541
   3542	/* enable queue */
   3543	IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), txdctl);
   3544
   3545	/* TXDCTL.EN will return 0 on 82598 if link is down, so skip it */
   3546	if (hw->mac.type == ixgbe_mac_82598EB &&
   3547	    !(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
   3548		return;
   3549
   3550	/* poll to verify queue is enabled */
   3551	do {
   3552		usleep_range(1000, 2000);
   3553		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx));
   3554	} while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
   3555	if (!wait_loop)
   3556		hw_dbg(hw, "Could not enable Tx Queue %d\n", reg_idx);
   3557}
   3558
   3559static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
   3560{
   3561	struct ixgbe_hw *hw = &adapter->hw;
   3562	u32 rttdcs, mtqc;
   3563	u8 tcs = adapter->hw_tcs;
   3564
   3565	if (hw->mac.type == ixgbe_mac_82598EB)
   3566		return;
   3567
   3568	/* disable the arbiter while setting MTQC */
   3569	rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
   3570	rttdcs |= IXGBE_RTTDCS_ARBDIS;
   3571	IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3572
   3573	/* set transmit pool layout */
   3574	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
   3575		mtqc = IXGBE_MTQC_VT_ENA;
   3576		if (tcs > 4)
   3577			mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
   3578		else if (tcs > 1)
   3579			mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
   3580		else if (adapter->ring_feature[RING_F_VMDQ].mask ==
   3581			 IXGBE_82599_VMDQ_4Q_MASK)
   3582			mtqc |= IXGBE_MTQC_32VF;
   3583		else
   3584			mtqc |= IXGBE_MTQC_64VF;
   3585	} else {
   3586		if (tcs > 4) {
   3587			mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
   3588		} else if (tcs > 1) {
   3589			mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
   3590		} else {
   3591			u8 max_txq = adapter->num_tx_queues +
   3592				adapter->num_xdp_queues;
   3593			if (max_txq > 63)
   3594				mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
   3595			else
   3596				mtqc = IXGBE_MTQC_64Q_1PB;
   3597		}
   3598	}
   3599
   3600	IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
   3601
   3602	/* Enable Security TX Buffer IFG for multiple pb */
   3603	if (tcs) {
   3604		u32 sectx = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
   3605		sectx |= IXGBE_SECTX_DCB;
   3606		IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, sectx);
   3607	}
   3608
   3609	/* re-enable the arbiter */
   3610	rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
   3611	IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3612}
   3613
   3614/**
   3615 * ixgbe_configure_tx - Configure 8259x Transmit Unit after Reset
   3616 * @adapter: board private structure
   3617 *
   3618 * Configure the Tx unit of the MAC after a reset.
   3619 **/
   3620static void ixgbe_configure_tx(struct ixgbe_adapter *adapter)
   3621{
   3622	struct ixgbe_hw *hw = &adapter->hw;
   3623	u32 dmatxctl;
   3624	u32 i;
   3625
   3626	ixgbe_setup_mtqc(adapter);
   3627
   3628	if (hw->mac.type != ixgbe_mac_82598EB) {
   3629		/* DMATXCTL.EN must be before Tx queues are enabled */
   3630		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
   3631		dmatxctl |= IXGBE_DMATXCTL_TE;
   3632		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
   3633	}
   3634
   3635	/* Setup the HW Tx Head and Tail descriptor pointers */
   3636	for (i = 0; i < adapter->num_tx_queues; i++)
   3637		ixgbe_configure_tx_ring(adapter, adapter->tx_ring[i]);
   3638	for (i = 0; i < adapter->num_xdp_queues; i++)
   3639		ixgbe_configure_tx_ring(adapter, adapter->xdp_ring[i]);
   3640}
   3641
   3642static void ixgbe_enable_rx_drop(struct ixgbe_adapter *adapter,
   3643				 struct ixgbe_ring *ring)
   3644{
   3645	struct ixgbe_hw *hw = &adapter->hw;
   3646	u8 reg_idx = ring->reg_idx;
   3647	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(reg_idx));
   3648
   3649	srrctl |= IXGBE_SRRCTL_DROP_EN;
   3650
   3651	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(reg_idx), srrctl);
   3652}
   3653
   3654static void ixgbe_disable_rx_drop(struct ixgbe_adapter *adapter,
   3655				  struct ixgbe_ring *ring)
   3656{
   3657	struct ixgbe_hw *hw = &adapter->hw;
   3658	u8 reg_idx = ring->reg_idx;
   3659	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(reg_idx));
   3660
   3661	srrctl &= ~IXGBE_SRRCTL_DROP_EN;
   3662
   3663	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(reg_idx), srrctl);
   3664}
   3665
   3666#ifdef CONFIG_IXGBE_DCB
   3667void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter)
   3668#else
   3669static void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter)
   3670#endif
   3671{
   3672	int i;
   3673	bool pfc_en = adapter->dcb_cfg.pfc_mode_enable;
   3674
   3675	if (adapter->ixgbe_ieee_pfc)
   3676		pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
   3677
   3678	/*
   3679	 * We should set the drop enable bit if:
   3680	 *  SR-IOV is enabled
   3681	 *   or
   3682	 *  Number of Rx queues > 1 and flow control is disabled
   3683	 *
   3684	 *  This allows us to avoid head of line blocking for security
   3685	 *  and performance reasons.
   3686	 */
   3687	if (adapter->num_vfs || (adapter->num_rx_queues > 1 &&
   3688	    !(adapter->hw.fc.current_mode & ixgbe_fc_tx_pause) && !pfc_en)) {
   3689		for (i = 0; i < adapter->num_rx_queues; i++)
   3690			ixgbe_enable_rx_drop(adapter, adapter->rx_ring[i]);
   3691	} else {
   3692		for (i = 0; i < adapter->num_rx_queues; i++)
   3693			ixgbe_disable_rx_drop(adapter, adapter->rx_ring[i]);
   3694	}
   3695}
   3696
   3697#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
   3698
   3699static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter,
   3700				   struct ixgbe_ring *rx_ring)
   3701{
   3702	struct ixgbe_hw *hw = &adapter->hw;
   3703	u32 srrctl;
   3704	u8 reg_idx = rx_ring->reg_idx;
   3705
   3706	if (hw->mac.type == ixgbe_mac_82598EB) {
   3707		u16 mask = adapter->ring_feature[RING_F_RSS].mask;
   3708
   3709		/*
   3710		 * if VMDq is not active we must program one srrctl register
   3711		 * per RSS queue since we have enabled RDRXCTL.MVMEN
   3712		 */
   3713		reg_idx &= mask;
   3714	}
   3715
   3716	/* configure header buffer length, needed for RSC */
   3717	srrctl = IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
   3718
   3719	/* configure the packet buffer length */
   3720	if (rx_ring->xsk_pool) {
   3721		u32 xsk_buf_len = xsk_pool_get_rx_frame_size(rx_ring->xsk_pool);
   3722
   3723		/* If the MAC support setting RXDCTL.RLPML, the
   3724		 * SRRCTL[n].BSIZEPKT is set to PAGE_SIZE and
   3725		 * RXDCTL.RLPML is set to the actual UMEM buffer
   3726		 * size. If not, then we are stuck with a 1k buffer
   3727		 * size resolution. In this case frames larger than
   3728		 * the UMEM buffer size viewed in a 1k resolution will
   3729		 * be dropped.
   3730		 */
   3731		if (hw->mac.type != ixgbe_mac_82599EB)
   3732			srrctl |= PAGE_SIZE >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
   3733		else
   3734			srrctl |= xsk_buf_len >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
   3735	} else if (test_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state)) {
   3736		srrctl |= IXGBE_RXBUFFER_3K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
   3737	} else {
   3738		srrctl |= IXGBE_RXBUFFER_2K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
   3739	}
   3740
   3741	/* configure descriptor type */
   3742	srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
   3743
   3744	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(reg_idx), srrctl);
   3745}
   3746
   3747/**
   3748 * ixgbe_rss_indir_tbl_entries - Return RSS indirection table entries
   3749 * @adapter: device handle
   3750 *
   3751 *  - 82598/82599/X540:     128
   3752 *  - X550(non-SRIOV mode): 512
   3753 *  - X550(SRIOV mode):     64
   3754 */
   3755u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter)
   3756{
   3757	if (adapter->hw.mac.type < ixgbe_mac_X550)
   3758		return 128;
   3759	else if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
   3760		return 64;
   3761	else
   3762		return 512;
   3763}
   3764
   3765/**
   3766 * ixgbe_store_key - Write the RSS key to HW
   3767 * @adapter: device handle
   3768 *
   3769 * Write the RSS key stored in adapter.rss_key to HW.
   3770 */
   3771void ixgbe_store_key(struct ixgbe_adapter *adapter)
   3772{
   3773	struct ixgbe_hw *hw = &adapter->hw;
   3774	int i;
   3775
   3776	for (i = 0; i < 10; i++)
   3777		IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), adapter->rss_key[i]);
   3778}
   3779
   3780/**
   3781 * ixgbe_init_rss_key - Initialize adapter RSS key
   3782 * @adapter: device handle
   3783 *
   3784 * Allocates and initializes the RSS key if it is not allocated.
   3785 **/
   3786static inline int ixgbe_init_rss_key(struct ixgbe_adapter *adapter)
   3787{
   3788	u32 *rss_key;
   3789
   3790	if (!adapter->rss_key) {
   3791		rss_key = kzalloc(IXGBE_RSS_KEY_SIZE, GFP_KERNEL);
   3792		if (unlikely(!rss_key))
   3793			return -ENOMEM;
   3794
   3795		netdev_rss_key_fill(rss_key, IXGBE_RSS_KEY_SIZE);
   3796		adapter->rss_key = rss_key;
   3797	}
   3798
   3799	return 0;
   3800}
   3801
   3802/**
   3803 * ixgbe_store_reta - Write the RETA table to HW
   3804 * @adapter: device handle
   3805 *
   3806 * Write the RSS redirection table stored in adapter.rss_indir_tbl[] to HW.
   3807 */
   3808void ixgbe_store_reta(struct ixgbe_adapter *adapter)
   3809{
   3810	u32 i, reta_entries = ixgbe_rss_indir_tbl_entries(adapter);
   3811	struct ixgbe_hw *hw = &adapter->hw;
   3812	u32 reta = 0;
   3813	u32 indices_multi;
   3814	u8 *indir_tbl = adapter->rss_indir_tbl;
   3815
   3816	/* Fill out the redirection table as follows:
   3817	 *  - 82598:      8 bit wide entries containing pair of 4 bit RSS
   3818	 *    indices.
   3819	 *  - 82599/X540: 8 bit wide entries containing 4 bit RSS index
   3820	 *  - X550:       8 bit wide entries containing 6 bit RSS index
   3821	 */
   3822	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   3823		indices_multi = 0x11;
   3824	else
   3825		indices_multi = 0x1;
   3826
   3827	/* Write redirection table to HW */
   3828	for (i = 0; i < reta_entries; i++) {
   3829		reta |= indices_multi * indir_tbl[i] << (i & 0x3) * 8;
   3830		if ((i & 3) == 3) {
   3831			if (i < 128)
   3832				IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
   3833			else
   3834				IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32),
   3835						reta);
   3836			reta = 0;
   3837		}
   3838	}
   3839}
   3840
   3841/**
   3842 * ixgbe_store_vfreta - Write the RETA table to HW (x550 devices in SRIOV mode)
   3843 * @adapter: device handle
   3844 *
   3845 * Write the RSS redirection table stored in adapter.rss_indir_tbl[] to HW.
   3846 */
   3847static void ixgbe_store_vfreta(struct ixgbe_adapter *adapter)
   3848{
   3849	u32 i, reta_entries = ixgbe_rss_indir_tbl_entries(adapter);
   3850	struct ixgbe_hw *hw = &adapter->hw;
   3851	u32 vfreta = 0;
   3852
   3853	/* Write redirection table to HW */
   3854	for (i = 0; i < reta_entries; i++) {
   3855		u16 pool = adapter->num_rx_pools;
   3856
   3857		vfreta |= (u32)adapter->rss_indir_tbl[i] << (i & 0x3) * 8;
   3858		if ((i & 3) != 3)
   3859			continue;
   3860
   3861		while (pool--)
   3862			IXGBE_WRITE_REG(hw,
   3863					IXGBE_PFVFRETA(i >> 2, VMDQ_P(pool)),
   3864					vfreta);
   3865		vfreta = 0;
   3866	}
   3867}
   3868
   3869static void ixgbe_setup_reta(struct ixgbe_adapter *adapter)
   3870{
   3871	u32 i, j;
   3872	u32 reta_entries = ixgbe_rss_indir_tbl_entries(adapter);
   3873	u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
   3874
   3875	/* Program table for at least 4 queues w/ SR-IOV so that VFs can
   3876	 * make full use of any rings they may have.  We will use the
   3877	 * PSRTYPE register to control how many rings we use within the PF.
   3878	 */
   3879	if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 4))
   3880		rss_i = 4;
   3881
   3882	/* Fill out hash function seeds */
   3883	ixgbe_store_key(adapter);
   3884
   3885	/* Fill out redirection table */
   3886	memset(adapter->rss_indir_tbl, 0, sizeof(adapter->rss_indir_tbl));
   3887
   3888	for (i = 0, j = 0; i < reta_entries; i++, j++) {
   3889		if (j == rss_i)
   3890			j = 0;
   3891
   3892		adapter->rss_indir_tbl[i] = j;
   3893	}
   3894
   3895	ixgbe_store_reta(adapter);
   3896}
   3897
   3898static void ixgbe_setup_vfreta(struct ixgbe_adapter *adapter)
   3899{
   3900	struct ixgbe_hw *hw = &adapter->hw;
   3901	u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
   3902	int i, j;
   3903
   3904	/* Fill out hash function seeds */
   3905	for (i = 0; i < 10; i++) {
   3906		u16 pool = adapter->num_rx_pools;
   3907
   3908		while (pool--)
   3909			IXGBE_WRITE_REG(hw,
   3910					IXGBE_PFVFRSSRK(i, VMDQ_P(pool)),
   3911					*(adapter->rss_key + i));
   3912	}
   3913
   3914	/* Fill out the redirection table */
   3915	for (i = 0, j = 0; i < 64; i++, j++) {
   3916		if (j == rss_i)
   3917			j = 0;
   3918
   3919		adapter->rss_indir_tbl[i] = j;
   3920	}
   3921
   3922	ixgbe_store_vfreta(adapter);
   3923}
   3924
   3925static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter)
   3926{
   3927	struct ixgbe_hw *hw = &adapter->hw;
   3928	u32 mrqc = 0, rss_field = 0, vfmrqc = 0;
   3929	u32 rxcsum;
   3930
   3931	/* Disable indicating checksum in descriptor, enables RSS hash */
   3932	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
   3933	rxcsum |= IXGBE_RXCSUM_PCSD;
   3934	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
   3935
   3936	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
   3937		if (adapter->ring_feature[RING_F_RSS].mask)
   3938			mrqc = IXGBE_MRQC_RSSEN;
   3939	} else {
   3940		u8 tcs = adapter->hw_tcs;
   3941
   3942		if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
   3943			if (tcs > 4)
   3944				mrqc = IXGBE_MRQC_VMDQRT8TCEN;	/* 8 TCs */
   3945			else if (tcs > 1)
   3946				mrqc = IXGBE_MRQC_VMDQRT4TCEN;	/* 4 TCs */
   3947			else if (adapter->ring_feature[RING_F_VMDQ].mask ==
   3948				 IXGBE_82599_VMDQ_4Q_MASK)
   3949				mrqc = IXGBE_MRQC_VMDQRSS32EN;
   3950			else
   3951				mrqc = IXGBE_MRQC_VMDQRSS64EN;
   3952
   3953			/* Enable L3/L4 for Tx Switched packets only for X550,
   3954			 * older devices do not support this feature
   3955			 */
   3956			if (hw->mac.type >= ixgbe_mac_X550)
   3957				mrqc |= IXGBE_MRQC_L3L4TXSWEN;
   3958		} else {
   3959			if (tcs > 4)
   3960				mrqc = IXGBE_MRQC_RTRSS8TCEN;
   3961			else if (tcs > 1)
   3962				mrqc = IXGBE_MRQC_RTRSS4TCEN;
   3963			else
   3964				mrqc = IXGBE_MRQC_RSSEN;
   3965		}
   3966	}
   3967
   3968	/* Perform hash on these packet types */
   3969	rss_field |= IXGBE_MRQC_RSS_FIELD_IPV4 |
   3970		     IXGBE_MRQC_RSS_FIELD_IPV4_TCP |
   3971		     IXGBE_MRQC_RSS_FIELD_IPV6 |
   3972		     IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
   3973
   3974	if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP)
   3975		rss_field |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
   3976	if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
   3977		rss_field |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
   3978
   3979	if ((hw->mac.type >= ixgbe_mac_X550) &&
   3980	    (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) {
   3981		u16 pool = adapter->num_rx_pools;
   3982
   3983		/* Enable VF RSS mode */
   3984		mrqc |= IXGBE_MRQC_MULTIPLE_RSS;
   3985		IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
   3986
   3987		/* Setup RSS through the VF registers */
   3988		ixgbe_setup_vfreta(adapter);
   3989		vfmrqc = IXGBE_MRQC_RSSEN;
   3990		vfmrqc |= rss_field;
   3991
   3992		while (pool--)
   3993			IXGBE_WRITE_REG(hw,
   3994					IXGBE_PFVFMRQC(VMDQ_P(pool)),
   3995					vfmrqc);
   3996	} else {
   3997		ixgbe_setup_reta(adapter);
   3998		mrqc |= rss_field;
   3999		IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
   4000	}
   4001}
   4002
   4003/**
   4004 * ixgbe_configure_rscctl - enable RSC for the indicated ring
   4005 * @adapter: address of board private structure
   4006 * @ring: structure containing ring specific data
   4007 **/
   4008static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter,
   4009				   struct ixgbe_ring *ring)
   4010{
   4011	struct ixgbe_hw *hw = &adapter->hw;
   4012	u32 rscctrl;
   4013	u8 reg_idx = ring->reg_idx;
   4014
   4015	if (!ring_is_rsc_enabled(ring))
   4016		return;
   4017
   4018	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(reg_idx));
   4019	rscctrl |= IXGBE_RSCCTL_RSCEN;
   4020	/*
   4021	 * we must limit the number of descriptors so that the
   4022	 * total size of max desc * buf_len is not greater
   4023	 * than 65536
   4024	 */
   4025	rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   4026	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(reg_idx), rscctrl);
   4027}
   4028
   4029#define IXGBE_MAX_RX_DESC_POLL 10
   4030static void ixgbe_rx_desc_queue_enable(struct ixgbe_adapter *adapter,
   4031				       struct ixgbe_ring *ring)
   4032{
   4033	struct ixgbe_hw *hw = &adapter->hw;
   4034	int wait_loop = IXGBE_MAX_RX_DESC_POLL;
   4035	u32 rxdctl;
   4036	u8 reg_idx = ring->reg_idx;
   4037
   4038	if (ixgbe_removed(hw->hw_addr))
   4039		return;
   4040	/* RXDCTL.EN will return 0 on 82598 if link is down, so skip it */
   4041	if (hw->mac.type == ixgbe_mac_82598EB &&
   4042	    !(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
   4043		return;
   4044
   4045	do {
   4046		usleep_range(1000, 2000);
   4047		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
   4048	} while (--wait_loop && !(rxdctl & IXGBE_RXDCTL_ENABLE));
   4049
   4050	if (!wait_loop) {
   4051		e_err(drv, "RXDCTL.ENABLE on Rx queue %d not set within "
   4052		      "the polling period\n", reg_idx);
   4053	}
   4054}
   4055
   4056void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
   4057			     struct ixgbe_ring *ring)
   4058{
   4059	struct ixgbe_hw *hw = &adapter->hw;
   4060	union ixgbe_adv_rx_desc *rx_desc;
   4061	u64 rdba = ring->dma;
   4062	u32 rxdctl;
   4063	u8 reg_idx = ring->reg_idx;
   4064
   4065	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
   4066	ring->xsk_pool = ixgbe_xsk_pool(adapter, ring);
   4067	if (ring->xsk_pool) {
   4068		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
   4069						   MEM_TYPE_XSK_BUFF_POOL,
   4070						   NULL));
   4071		xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
   4072	} else {
   4073		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
   4074						   MEM_TYPE_PAGE_SHARED, NULL));
   4075	}
   4076
   4077	/* disable queue to avoid use of these values while updating state */
   4078	rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
   4079	rxdctl &= ~IXGBE_RXDCTL_ENABLE;
   4080
   4081	/* write value back with RXDCTL.ENABLE bit cleared */
   4082	IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
   4083	IXGBE_WRITE_FLUSH(hw);
   4084
   4085	IXGBE_WRITE_REG(hw, IXGBE_RDBAL(reg_idx), (rdba & DMA_BIT_MASK(32)));
   4086	IXGBE_WRITE_REG(hw, IXGBE_RDBAH(reg_idx), (rdba >> 32));
   4087	IXGBE_WRITE_REG(hw, IXGBE_RDLEN(reg_idx),
   4088			ring->count * sizeof(union ixgbe_adv_rx_desc));
   4089	/* Force flushing of IXGBE_RDLEN to prevent MDD */
   4090	IXGBE_WRITE_FLUSH(hw);
   4091
   4092	IXGBE_WRITE_REG(hw, IXGBE_RDH(reg_idx), 0);
   4093	IXGBE_WRITE_REG(hw, IXGBE_RDT(reg_idx), 0);
   4094	ring->tail = adapter->io_addr + IXGBE_RDT(reg_idx);
   4095
   4096	ixgbe_configure_srrctl(adapter, ring);
   4097	ixgbe_configure_rscctl(adapter, ring);
   4098
   4099	if (hw->mac.type == ixgbe_mac_82598EB) {
   4100		/*
   4101		 * enable cache line friendly hardware writes:
   4102		 * PTHRESH=32 descriptors (half the internal cache),
   4103		 * this also removes ugly rx_no_buffer_count increment
   4104		 * HTHRESH=4 descriptors (to minimize latency on fetch)
   4105		 * WTHRESH=8 burst writeback up to two cache lines
   4106		 */
   4107		rxdctl &= ~0x3FFFFF;
   4108		rxdctl |=  0x080420;
   4109#if (PAGE_SIZE < 8192)
   4110	/* RXDCTL.RLPML does not work on 82599 */
   4111	} else if (hw->mac.type != ixgbe_mac_82599EB) {
   4112		rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
   4113			    IXGBE_RXDCTL_RLPML_EN);
   4114
   4115		/* Limit the maximum frame size so we don't overrun the skb.
   4116		 * This can happen in SRIOV mode when the MTU of the VF is
   4117		 * higher than the MTU of the PF.
   4118		 */
   4119		if (ring_uses_build_skb(ring) &&
   4120		    !test_bit(__IXGBE_RX_3K_BUFFER, &ring->state))
   4121			rxdctl |= IXGBE_MAX_2K_FRAME_BUILD_SKB |
   4122				  IXGBE_RXDCTL_RLPML_EN;
   4123#endif
   4124	}
   4125
   4126	ring->rx_offset = ixgbe_rx_offset(ring);
   4127
   4128	if (ring->xsk_pool && hw->mac.type != ixgbe_mac_82599EB) {
   4129		u32 xsk_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool);
   4130
   4131		rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
   4132			    IXGBE_RXDCTL_RLPML_EN);
   4133		rxdctl |= xsk_buf_len | IXGBE_RXDCTL_RLPML_EN;
   4134
   4135		ring->rx_buf_len = xsk_buf_len;
   4136	}
   4137
   4138	/* initialize rx_buffer_info */
   4139	memset(ring->rx_buffer_info, 0,
   4140	       sizeof(struct ixgbe_rx_buffer) * ring->count);
   4141
   4142	/* initialize Rx descriptor 0 */
   4143	rx_desc = IXGBE_RX_DESC(ring, 0);
   4144	rx_desc->wb.upper.length = 0;
   4145
   4146	/* enable receive descriptor ring */
   4147	rxdctl |= IXGBE_RXDCTL_ENABLE;
   4148	IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
   4149
   4150	ixgbe_rx_desc_queue_enable(adapter, ring);
   4151	if (ring->xsk_pool)
   4152		ixgbe_alloc_rx_buffers_zc(ring, ixgbe_desc_unused(ring));
   4153	else
   4154		ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring));
   4155}
   4156
   4157static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter)
   4158{
   4159	struct ixgbe_hw *hw = &adapter->hw;
   4160	int rss_i = adapter->ring_feature[RING_F_RSS].indices;
   4161	u16 pool = adapter->num_rx_pools;
   4162
   4163	/* PSRTYPE must be initialized in non 82598 adapters */
   4164	u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
   4165		      IXGBE_PSRTYPE_UDPHDR |
   4166		      IXGBE_PSRTYPE_IPV4HDR |
   4167		      IXGBE_PSRTYPE_L2HDR |
   4168		      IXGBE_PSRTYPE_IPV6HDR;
   4169
   4170	if (hw->mac.type == ixgbe_mac_82598EB)
   4171		return;
   4172
   4173	if (rss_i > 3)
   4174		psrtype |= 2u << 29;
   4175	else if (rss_i > 1)
   4176		psrtype |= 1u << 29;
   4177
   4178	while (pool--)
   4179		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(VMDQ_P(pool)), psrtype);
   4180}
   4181
   4182static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
   4183{
   4184	struct ixgbe_hw *hw = &adapter->hw;
   4185	u16 pool = adapter->num_rx_pools;
   4186	u32 reg_offset, vf_shift, vmolr;
   4187	u32 gcr_ext, vmdctl;
   4188	int i;
   4189
   4190	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
   4191		return;
   4192
   4193	vmdctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL);
   4194	vmdctl |= IXGBE_VMD_CTL_VMDQ_EN;
   4195	vmdctl &= ~IXGBE_VT_CTL_POOL_MASK;
   4196	vmdctl |= VMDQ_P(0) << IXGBE_VT_CTL_POOL_SHIFT;
   4197	vmdctl |= IXGBE_VT_CTL_REPLEN;
   4198	IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl);
   4199
   4200	/* accept untagged packets until a vlan tag is
   4201	 * specifically set for the VMDQ queue/pool
   4202	 */
   4203	vmolr = IXGBE_VMOLR_AUPE;
   4204	while (pool--)
   4205		IXGBE_WRITE_REG(hw, IXGBE_VMOLR(VMDQ_P(pool)), vmolr);
   4206
   4207	vf_shift = VMDQ_P(0) % 32;
   4208	reg_offset = (VMDQ_P(0) >= 32) ? 1 : 0;
   4209
   4210	/* Enable only the PF's pool for Tx/Rx */
   4211	IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), GENMASK(31, vf_shift));
   4212	IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset ^ 1), reg_offset - 1);
   4213	IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), GENMASK(31, vf_shift));
   4214	IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset ^ 1), reg_offset - 1);
   4215	if (adapter->bridge_mode == BRIDGE_MODE_VEB)
   4216		IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
   4217
   4218	/* Map PF MAC address in RAR Entry 0 to first pool following VFs */
   4219	hw->mac.ops.set_vmdq(hw, 0, VMDQ_P(0));
   4220
   4221	/* clear VLAN promisc flag so VFTA will be updated if necessary */
   4222	adapter->flags2 &= ~IXGBE_FLAG2_VLAN_PROMISC;
   4223
   4224	/*
   4225	 * Set up VF register offsets for selected VT Mode,
   4226	 * i.e. 32 or 64 VFs for SR-IOV
   4227	 */
   4228	switch (adapter->ring_feature[RING_F_VMDQ].mask) {
   4229	case IXGBE_82599_VMDQ_8Q_MASK:
   4230		gcr_ext = IXGBE_GCR_EXT_VT_MODE_16;
   4231		break;
   4232	case IXGBE_82599_VMDQ_4Q_MASK:
   4233		gcr_ext = IXGBE_GCR_EXT_VT_MODE_32;
   4234		break;
   4235	default:
   4236		gcr_ext = IXGBE_GCR_EXT_VT_MODE_64;
   4237		break;
   4238	}
   4239
   4240	IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext);
   4241
   4242	for (i = 0; i < adapter->num_vfs; i++) {
   4243		/* configure spoof checking */
   4244		ixgbe_ndo_set_vf_spoofchk(adapter->netdev, i,
   4245					  adapter->vfinfo[i].spoofchk_enabled);
   4246
   4247		/* Enable/Disable RSS query feature  */
   4248		ixgbe_ndo_set_vf_rss_query_en(adapter->netdev, i,
   4249					  adapter->vfinfo[i].rss_query_enabled);
   4250	}
   4251}
   4252
   4253static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter)
   4254{
   4255	struct ixgbe_hw *hw = &adapter->hw;
   4256	struct net_device *netdev = adapter->netdev;
   4257	int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
   4258	struct ixgbe_ring *rx_ring;
   4259	int i;
   4260	u32 mhadd, hlreg0;
   4261
   4262#ifdef IXGBE_FCOE
   4263	/* adjust max frame to be able to do baby jumbo for FCoE */
   4264	if ((adapter->flags & IXGBE_FLAG_FCOE_ENABLED) &&
   4265	    (max_frame < IXGBE_FCOE_JUMBO_FRAME_SIZE))
   4266		max_frame = IXGBE_FCOE_JUMBO_FRAME_SIZE;
   4267
   4268#endif /* IXGBE_FCOE */
   4269
   4270	/* adjust max frame to be at least the size of a standard frame */
   4271	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
   4272		max_frame = (ETH_FRAME_LEN + ETH_FCS_LEN);
   4273
   4274	mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
   4275	if (max_frame != (mhadd >> IXGBE_MHADD_MFS_SHIFT)) {
   4276		mhadd &= ~IXGBE_MHADD_MFS_MASK;
   4277		mhadd |= max_frame << IXGBE_MHADD_MFS_SHIFT;
   4278
   4279		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
   4280	}
   4281
   4282	hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
   4283	/* set jumbo enable since MHADD.MFS is keeping size locked at max_frame */
   4284	hlreg0 |= IXGBE_HLREG0_JUMBOEN;
   4285	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
   4286
   4287	/*
   4288	 * Setup the HW Rx Head and Tail Descriptor Pointers and
   4289	 * the Base and Length of the Rx Descriptor Ring
   4290	 */
   4291	for (i = 0; i < adapter->num_rx_queues; i++) {
   4292		rx_ring = adapter->rx_ring[i];
   4293
   4294		clear_ring_rsc_enabled(rx_ring);
   4295		clear_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state);
   4296		clear_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state);
   4297
   4298		if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
   4299			set_ring_rsc_enabled(rx_ring);
   4300
   4301		if (test_bit(__IXGBE_RX_FCOE, &rx_ring->state))
   4302			set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state);
   4303
   4304		if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY)
   4305			continue;
   4306
   4307		set_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state);
   4308
   4309#if (PAGE_SIZE < 8192)
   4310		if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
   4311			set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state);
   4312
   4313		if (IXGBE_2K_TOO_SMALL_WITH_PADDING ||
   4314		    (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN)))
   4315			set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state);
   4316#endif
   4317	}
   4318}
   4319
   4320static void ixgbe_setup_rdrxctl(struct ixgbe_adapter *adapter)
   4321{
   4322	struct ixgbe_hw *hw = &adapter->hw;
   4323	u32 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   4324
   4325	switch (hw->mac.type) {
   4326	case ixgbe_mac_82598EB:
   4327		/*
   4328		 * For VMDq support of different descriptor types or
   4329		 * buffer sizes through the use of multiple SRRCTL
   4330		 * registers, RDRXCTL.MVMEN must be set to 1
   4331		 *
   4332		 * also, the manual doesn't mention it clearly but DCA hints
   4333		 * will only use queue 0's tags unless this bit is set.  Side
   4334		 * effects of setting this bit are only that SRRCTL must be
   4335		 * fully programmed [0..15]
   4336		 */
   4337		rdrxctl |= IXGBE_RDRXCTL_MVMEN;
   4338		break;
   4339	case ixgbe_mac_X550:
   4340	case ixgbe_mac_X550EM_x:
   4341	case ixgbe_mac_x550em_a:
   4342		if (adapter->num_vfs)
   4343			rdrxctl |= IXGBE_RDRXCTL_PSP;
   4344		fallthrough;
   4345	case ixgbe_mac_82599EB:
   4346	case ixgbe_mac_X540:
   4347		/* Disable RSC for ACK packets */
   4348		IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   4349		   (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   4350		rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   4351		/* hardware requires some bits to be set by default */
   4352		rdrxctl |= (IXGBE_RDRXCTL_RSCACKC | IXGBE_RDRXCTL_FCOE_WRFIX);
   4353		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   4354		break;
   4355	default:
   4356		/* We should do nothing since we don't know this hardware */
   4357		return;
   4358	}
   4359
   4360	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   4361}
   4362
   4363/**
   4364 * ixgbe_configure_rx - Configure 8259x Receive Unit after Reset
   4365 * @adapter: board private structure
   4366 *
   4367 * Configure the Rx unit of the MAC after a reset.
   4368 **/
   4369static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
   4370{
   4371	struct ixgbe_hw *hw = &adapter->hw;
   4372	int i;
   4373	u32 rxctrl, rfctl;
   4374
   4375	/* disable receives while setting up the descriptors */
   4376	hw->mac.ops.disable_rx(hw);
   4377
   4378	ixgbe_setup_psrtype(adapter);
   4379	ixgbe_setup_rdrxctl(adapter);
   4380
   4381	/* RSC Setup */
   4382	rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
   4383	rfctl &= ~IXGBE_RFCTL_RSC_DIS;
   4384	if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED))
   4385		rfctl |= IXGBE_RFCTL_RSC_DIS;
   4386
   4387	/* disable NFS filtering */
   4388	rfctl |= (IXGBE_RFCTL_NFSW_DIS | IXGBE_RFCTL_NFSR_DIS);
   4389	IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
   4390
   4391	/* Program registers for the distribution of queues */
   4392	ixgbe_setup_mrqc(adapter);
   4393
   4394	/* set_rx_buffer_len must be called before ring initialization */
   4395	ixgbe_set_rx_buffer_len(adapter);
   4396
   4397	/*
   4398	 * Setup the HW Rx Head and Tail Descriptor Pointers and
   4399	 * the Base and Length of the Rx Descriptor Ring
   4400	 */
   4401	for (i = 0; i < adapter->num_rx_queues; i++)
   4402		ixgbe_configure_rx_ring(adapter, adapter->rx_ring[i]);
   4403
   4404	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   4405	/* disable drop enable for 82598 parts */
   4406	if (hw->mac.type == ixgbe_mac_82598EB)
   4407		rxctrl |= IXGBE_RXCTRL_DMBYPS;
   4408
   4409	/* enable all receives */
   4410	rxctrl |= IXGBE_RXCTRL_RXEN;
   4411	hw->mac.ops.enable_rx_dma(hw, rxctrl);
   4412}
   4413
   4414static int ixgbe_vlan_rx_add_vid(struct net_device *netdev,
   4415				 __be16 proto, u16 vid)
   4416{
   4417	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   4418	struct ixgbe_hw *hw = &adapter->hw;
   4419
   4420	/* add VID to filter table */
   4421	if (!vid || !(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
   4422		hw->mac.ops.set_vfta(&adapter->hw, vid, VMDQ_P(0), true, !!vid);
   4423
   4424	set_bit(vid, adapter->active_vlans);
   4425
   4426	return 0;
   4427}
   4428
   4429static int ixgbe_find_vlvf_entry(struct ixgbe_hw *hw, u32 vlan)
   4430{
   4431	u32 vlvf;
   4432	int idx;
   4433
   4434	/* short cut the special case */
   4435	if (vlan == 0)
   4436		return 0;
   4437
   4438	/* Search for the vlan id in the VLVF entries */
   4439	for (idx = IXGBE_VLVF_ENTRIES; --idx;) {
   4440		vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(idx));
   4441		if ((vlvf & VLAN_VID_MASK) == vlan)
   4442			break;
   4443	}
   4444
   4445	return idx;
   4446}
   4447
   4448void ixgbe_update_pf_promisc_vlvf(struct ixgbe_adapter *adapter, u32 vid)
   4449{
   4450	struct ixgbe_hw *hw = &adapter->hw;
   4451	u32 bits, word;
   4452	int idx;
   4453
   4454	idx = ixgbe_find_vlvf_entry(hw, vid);
   4455	if (!idx)
   4456		return;
   4457
   4458	/* See if any other pools are set for this VLAN filter
   4459	 * entry other than the PF.
   4460	 */
   4461	word = idx * 2 + (VMDQ_P(0) / 32);
   4462	bits = ~BIT(VMDQ_P(0) % 32);
   4463	bits &= IXGBE_READ_REG(hw, IXGBE_VLVFB(word));
   4464
   4465	/* Disable the filter so this falls into the default pool. */
   4466	if (!bits && !IXGBE_READ_REG(hw, IXGBE_VLVFB(word ^ 1))) {
   4467		if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
   4468			IXGBE_WRITE_REG(hw, IXGBE_VLVFB(word), 0);
   4469		IXGBE_WRITE_REG(hw, IXGBE_VLVF(idx), 0);
   4470	}
   4471}
   4472
   4473static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev,
   4474				  __be16 proto, u16 vid)
   4475{
   4476	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   4477	struct ixgbe_hw *hw = &adapter->hw;
   4478
   4479	/* remove VID from filter table */
   4480	if (vid && !(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
   4481		hw->mac.ops.set_vfta(hw, vid, VMDQ_P(0), false, true);
   4482
   4483	clear_bit(vid, adapter->active_vlans);
   4484
   4485	return 0;
   4486}
   4487
   4488/**
   4489 * ixgbe_vlan_strip_disable - helper to disable hw vlan stripping
   4490 * @adapter: driver data
   4491 */
   4492static void ixgbe_vlan_strip_disable(struct ixgbe_adapter *adapter)
   4493{
   4494	struct ixgbe_hw *hw = &adapter->hw;
   4495	u32 vlnctrl;
   4496	int i, j;
   4497
   4498	switch (hw->mac.type) {
   4499	case ixgbe_mac_82598EB:
   4500		vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
   4501		vlnctrl &= ~IXGBE_VLNCTRL_VME;
   4502		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
   4503		break;
   4504	case ixgbe_mac_82599EB:
   4505	case ixgbe_mac_X540:
   4506	case ixgbe_mac_X550:
   4507	case ixgbe_mac_X550EM_x:
   4508	case ixgbe_mac_x550em_a:
   4509		for (i = 0; i < adapter->num_rx_queues; i++) {
   4510			struct ixgbe_ring *ring = adapter->rx_ring[i];
   4511
   4512			if (!netif_is_ixgbe(ring->netdev))
   4513				continue;
   4514
   4515			j = ring->reg_idx;
   4516			vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(j));
   4517			vlnctrl &= ~IXGBE_RXDCTL_VME;
   4518			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(j), vlnctrl);
   4519		}
   4520		break;
   4521	default:
   4522		break;
   4523	}
   4524}
   4525
   4526/**
   4527 * ixgbe_vlan_strip_enable - helper to enable hw vlan stripping
   4528 * @adapter: driver data
   4529 */
   4530static void ixgbe_vlan_strip_enable(struct ixgbe_adapter *adapter)
   4531{
   4532	struct ixgbe_hw *hw = &adapter->hw;
   4533	u32 vlnctrl;
   4534	int i, j;
   4535
   4536	switch (hw->mac.type) {
   4537	case ixgbe_mac_82598EB:
   4538		vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
   4539		vlnctrl |= IXGBE_VLNCTRL_VME;
   4540		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
   4541		break;
   4542	case ixgbe_mac_82599EB:
   4543	case ixgbe_mac_X540:
   4544	case ixgbe_mac_X550:
   4545	case ixgbe_mac_X550EM_x:
   4546	case ixgbe_mac_x550em_a:
   4547		for (i = 0; i < adapter->num_rx_queues; i++) {
   4548			struct ixgbe_ring *ring = adapter->rx_ring[i];
   4549
   4550			if (!netif_is_ixgbe(ring->netdev))
   4551				continue;
   4552
   4553			j = ring->reg_idx;
   4554			vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(j));
   4555			vlnctrl |= IXGBE_RXDCTL_VME;
   4556			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(j), vlnctrl);
   4557		}
   4558		break;
   4559	default:
   4560		break;
   4561	}
   4562}
   4563
   4564static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
   4565{
   4566	struct ixgbe_hw *hw = &adapter->hw;
   4567	u32 vlnctrl, i;
   4568
   4569	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
   4570
   4571	if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) {
   4572	/* For VMDq and SR-IOV we must leave VLAN filtering enabled */
   4573		vlnctrl |= IXGBE_VLNCTRL_VFE;
   4574		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
   4575	} else {
   4576		vlnctrl &= ~IXGBE_VLNCTRL_VFE;
   4577		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
   4578		return;
   4579	}
   4580
   4581	/* Nothing to do for 82598 */
   4582	if (hw->mac.type == ixgbe_mac_82598EB)
   4583		return;
   4584
   4585	/* We are already in VLAN promisc, nothing to do */
   4586	if (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC)
   4587		return;
   4588
   4589	/* Set flag so we don't redo unnecessary work */
   4590	adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC;
   4591
   4592	/* Add PF to all active pools */
   4593	for (i = IXGBE_VLVF_ENTRIES; --i;) {
   4594		u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32);
   4595		u32 vlvfb = IXGBE_READ_REG(hw, reg_offset);
   4596
   4597		vlvfb |= BIT(VMDQ_P(0) % 32);
   4598		IXGBE_WRITE_REG(hw, reg_offset, vlvfb);
   4599	}
   4600
   4601	/* Set all bits in the VLAN filter table array */
   4602	for (i = hw->mac.vft_size; i--;)
   4603		IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), ~0U);
   4604}
   4605
   4606#define VFTA_BLOCK_SIZE 8
   4607static void ixgbe_scrub_vfta(struct ixgbe_adapter *adapter, u32 vfta_offset)
   4608{
   4609	struct ixgbe_hw *hw = &adapter->hw;
   4610	u32 vfta[VFTA_BLOCK_SIZE] = { 0 };
   4611	u32 vid_start = vfta_offset * 32;
   4612	u32 vid_end = vid_start + (VFTA_BLOCK_SIZE * 32);
   4613	u32 i, vid, word, bits;
   4614
   4615	for (i = IXGBE_VLVF_ENTRIES; --i;) {
   4616		u32 vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(i));
   4617
   4618		/* pull VLAN ID from VLVF */
   4619		vid = vlvf & VLAN_VID_MASK;
   4620
   4621		/* only concern outselves with a certain range */
   4622		if (vid < vid_start || vid >= vid_end)
   4623			continue;
   4624
   4625		if (vlvf) {
   4626			/* record VLAN ID in VFTA */
   4627			vfta[(vid - vid_start) / 32] |= BIT(vid % 32);
   4628
   4629			/* if PF is part of this then continue */
   4630			if (test_bit(vid, adapter->active_vlans))
   4631				continue;
   4632		}
   4633
   4634		/* remove PF from the pool */
   4635		word = i * 2 + VMDQ_P(0) / 32;
   4636		bits = ~BIT(VMDQ_P(0) % 32);
   4637		bits &= IXGBE_READ_REG(hw, IXGBE_VLVFB(word));
   4638		IXGBE_WRITE_REG(hw, IXGBE_VLVFB(word), bits);
   4639	}
   4640
   4641	/* extract values from active_vlans and write back to VFTA */
   4642	for (i = VFTA_BLOCK_SIZE; i--;) {
   4643		vid = (vfta_offset + i) * 32;
   4644		word = vid / BITS_PER_LONG;
   4645		bits = vid % BITS_PER_LONG;
   4646
   4647		vfta[i] |= adapter->active_vlans[word] >> bits;
   4648
   4649		IXGBE_WRITE_REG(hw, IXGBE_VFTA(vfta_offset + i), vfta[i]);
   4650	}
   4651}
   4652
   4653static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter)
   4654{
   4655	struct ixgbe_hw *hw = &adapter->hw;
   4656	u32 vlnctrl, i;
   4657
   4658	/* Set VLAN filtering to enabled */
   4659	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
   4660	vlnctrl |= IXGBE_VLNCTRL_VFE;
   4661	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
   4662
   4663	if (!(adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) ||
   4664	    hw->mac.type == ixgbe_mac_82598EB)
   4665		return;
   4666
   4667	/* We are not in VLAN promisc, nothing to do */
   4668	if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
   4669		return;
   4670
   4671	/* Set flag so we don't redo unnecessary work */
   4672	adapter->flags2 &= ~IXGBE_FLAG2_VLAN_PROMISC;
   4673
   4674	for (i = 0; i < hw->mac.vft_size; i += VFTA_BLOCK_SIZE)
   4675		ixgbe_scrub_vfta(adapter, i);
   4676}
   4677
   4678static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter)
   4679{
   4680	u16 vid = 1;
   4681
   4682	ixgbe_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0);
   4683
   4684	for_each_set_bit_from(vid, adapter->active_vlans, VLAN_N_VID)
   4685		ixgbe_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid);
   4686}
   4687
   4688/**
   4689 * ixgbe_write_mc_addr_list - write multicast addresses to MTA
   4690 * @netdev: network interface device structure
   4691 *
   4692 * Writes multicast address list to the MTA hash table.
   4693 * Returns: -ENOMEM on failure
   4694 *                0 on no addresses written
   4695 *                X on writing X addresses to MTA
   4696 **/
   4697static int ixgbe_write_mc_addr_list(struct net_device *netdev)
   4698{
   4699	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   4700	struct ixgbe_hw *hw = &adapter->hw;
   4701
   4702	if (!netif_running(netdev))
   4703		return 0;
   4704
   4705	if (hw->mac.ops.update_mc_addr_list)
   4706		hw->mac.ops.update_mc_addr_list(hw, netdev);
   4707	else
   4708		return -ENOMEM;
   4709
   4710#ifdef CONFIG_PCI_IOV
   4711	ixgbe_restore_vf_multicasts(adapter);
   4712#endif
   4713
   4714	return netdev_mc_count(netdev);
   4715}
   4716
   4717#ifdef CONFIG_PCI_IOV
   4718void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter)
   4719{
   4720	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
   4721	struct ixgbe_hw *hw = &adapter->hw;
   4722	int i;
   4723
   4724	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
   4725		mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED;
   4726
   4727		if (mac_table->state & IXGBE_MAC_STATE_IN_USE)
   4728			hw->mac.ops.set_rar(hw, i,
   4729					    mac_table->addr,
   4730					    mac_table->pool,
   4731					    IXGBE_RAH_AV);
   4732		else
   4733			hw->mac.ops.clear_rar(hw, i);
   4734	}
   4735}
   4736
   4737#endif
   4738static void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter)
   4739{
   4740	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
   4741	struct ixgbe_hw *hw = &adapter->hw;
   4742	int i;
   4743
   4744	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
   4745		if (!(mac_table->state & IXGBE_MAC_STATE_MODIFIED))
   4746			continue;
   4747
   4748		mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED;
   4749
   4750		if (mac_table->state & IXGBE_MAC_STATE_IN_USE)
   4751			hw->mac.ops.set_rar(hw, i,
   4752					    mac_table->addr,
   4753					    mac_table->pool,
   4754					    IXGBE_RAH_AV);
   4755		else
   4756			hw->mac.ops.clear_rar(hw, i);
   4757	}
   4758}
   4759
   4760static void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter)
   4761{
   4762	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
   4763	struct ixgbe_hw *hw = &adapter->hw;
   4764	int i;
   4765
   4766	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
   4767		mac_table->state |= IXGBE_MAC_STATE_MODIFIED;
   4768		mac_table->state &= ~IXGBE_MAC_STATE_IN_USE;
   4769	}
   4770
   4771	ixgbe_sync_mac_table(adapter);
   4772}
   4773
   4774static int ixgbe_available_rars(struct ixgbe_adapter *adapter, u16 pool)
   4775{
   4776	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
   4777	struct ixgbe_hw *hw = &adapter->hw;
   4778	int i, count = 0;
   4779
   4780	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
   4781		/* do not count default RAR as available */
   4782		if (mac_table->state & IXGBE_MAC_STATE_DEFAULT)
   4783			continue;
   4784
   4785		/* only count unused and addresses that belong to us */
   4786		if (mac_table->state & IXGBE_MAC_STATE_IN_USE) {
   4787			if (mac_table->pool != pool)
   4788				continue;
   4789		}
   4790
   4791		count++;
   4792	}
   4793
   4794	return count;
   4795}
   4796
   4797/* this function destroys the first RAR entry */
   4798static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter)
   4799{
   4800	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
   4801	struct ixgbe_hw *hw = &adapter->hw;
   4802
   4803	memcpy(&mac_table->addr, hw->mac.addr, ETH_ALEN);
   4804	mac_table->pool = VMDQ_P(0);
   4805
   4806	mac_table->state = IXGBE_MAC_STATE_DEFAULT | IXGBE_MAC_STATE_IN_USE;
   4807
   4808	hw->mac.ops.set_rar(hw, 0, mac_table->addr, mac_table->pool,
   4809			    IXGBE_RAH_AV);
   4810}
   4811
   4812int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter,
   4813			 const u8 *addr, u16 pool)
   4814{
   4815	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
   4816	struct ixgbe_hw *hw = &adapter->hw;
   4817	int i;
   4818
   4819	if (is_zero_ether_addr(addr))
   4820		return -EINVAL;
   4821
   4822	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
   4823		if (mac_table->state & IXGBE_MAC_STATE_IN_USE)
   4824			continue;
   4825
   4826		ether_addr_copy(mac_table->addr, addr);
   4827		mac_table->pool = pool;
   4828
   4829		mac_table->state |= IXGBE_MAC_STATE_MODIFIED |
   4830				    IXGBE_MAC_STATE_IN_USE;
   4831
   4832		ixgbe_sync_mac_table(adapter);
   4833
   4834		return i;
   4835	}
   4836
   4837	return -ENOMEM;
   4838}
   4839
   4840int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter,
   4841			 const u8 *addr, u16 pool)
   4842{
   4843	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
   4844	struct ixgbe_hw *hw = &adapter->hw;
   4845	int i;
   4846
   4847	if (is_zero_ether_addr(addr))
   4848		return -EINVAL;
   4849
   4850	/* search table for addr, if found clear IN_USE flag and sync */
   4851	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
   4852		/* we can only delete an entry if it is in use */
   4853		if (!(mac_table->state & IXGBE_MAC_STATE_IN_USE))
   4854			continue;
   4855		/* we only care about entries that belong to the given pool */
   4856		if (mac_table->pool != pool)
   4857			continue;
   4858		/* we only care about a specific MAC address */
   4859		if (!ether_addr_equal(addr, mac_table->addr))
   4860			continue;
   4861
   4862		mac_table->state |= IXGBE_MAC_STATE_MODIFIED;
   4863		mac_table->state &= ~IXGBE_MAC_STATE_IN_USE;
   4864
   4865		ixgbe_sync_mac_table(adapter);
   4866
   4867		return 0;
   4868	}
   4869
   4870	return -ENOMEM;
   4871}
   4872
   4873static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr)
   4874{
   4875	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   4876	int ret;
   4877
   4878	ret = ixgbe_add_mac_filter(adapter, addr, VMDQ_P(0));
   4879
   4880	return min_t(int, ret, 0);
   4881}
   4882
   4883static int ixgbe_uc_unsync(struct net_device *netdev, const unsigned char *addr)
   4884{
   4885	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   4886
   4887	ixgbe_del_mac_filter(adapter, addr, VMDQ_P(0));
   4888
   4889	return 0;
   4890}
   4891
   4892/**
   4893 * ixgbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set
   4894 * @netdev: network interface device structure
   4895 *
   4896 * The set_rx_method entry point is called whenever the unicast/multicast
   4897 * address list or the network interface flags are updated.  This routine is
   4898 * responsible for configuring the hardware for proper unicast, multicast and
   4899 * promiscuous mode.
   4900 **/
   4901void ixgbe_set_rx_mode(struct net_device *netdev)
   4902{
   4903	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   4904	struct ixgbe_hw *hw = &adapter->hw;
   4905	u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE;
   4906	netdev_features_t features = netdev->features;
   4907	int count;
   4908
   4909	/* Check for Promiscuous and All Multicast modes */
   4910	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
   4911
   4912	/* set all bits that we expect to always be set */
   4913	fctrl &= ~IXGBE_FCTRL_SBP; /* disable store-bad-packets */
   4914	fctrl |= IXGBE_FCTRL_BAM;
   4915	fctrl |= IXGBE_FCTRL_DPF; /* discard pause frames when FC enabled */
   4916	fctrl |= IXGBE_FCTRL_PMCF;
   4917
   4918	/* clear the bits we are changing the status of */
   4919	fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   4920	if (netdev->flags & IFF_PROMISC) {
   4921		hw->addr_ctrl.user_set_promisc = true;
   4922		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   4923		vmolr |= IXGBE_VMOLR_MPE;
   4924		features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
   4925	} else {
   4926		if (netdev->flags & IFF_ALLMULTI) {
   4927			fctrl |= IXGBE_FCTRL_MPE;
   4928			vmolr |= IXGBE_VMOLR_MPE;
   4929		}
   4930		hw->addr_ctrl.user_set_promisc = false;
   4931	}
   4932
   4933	/*
   4934	 * Write addresses to available RAR registers, if there is not
   4935	 * sufficient space to store all the addresses then enable
   4936	 * unicast promiscuous mode
   4937	 */
   4938	if (__dev_uc_sync(netdev, ixgbe_uc_sync, ixgbe_uc_unsync)) {
   4939		fctrl |= IXGBE_FCTRL_UPE;
   4940		vmolr |= IXGBE_VMOLR_ROPE;
   4941	}
   4942
   4943	/* Write addresses to the MTA, if the attempt fails
   4944	 * then we should just turn on promiscuous mode so
   4945	 * that we can at least receive multicast traffic
   4946	 */
   4947	count = ixgbe_write_mc_addr_list(netdev);
   4948	if (count < 0) {
   4949		fctrl |= IXGBE_FCTRL_MPE;
   4950		vmolr |= IXGBE_VMOLR_MPE;
   4951	} else if (count) {
   4952		vmolr |= IXGBE_VMOLR_ROMPE;
   4953	}
   4954
   4955	if (hw->mac.type != ixgbe_mac_82598EB) {
   4956		vmolr |= IXGBE_READ_REG(hw, IXGBE_VMOLR(VMDQ_P(0))) &
   4957			 ~(IXGBE_VMOLR_MPE | IXGBE_VMOLR_ROMPE |
   4958			   IXGBE_VMOLR_ROPE);
   4959		IXGBE_WRITE_REG(hw, IXGBE_VMOLR(VMDQ_P(0)), vmolr);
   4960	}
   4961
   4962	/* This is useful for sniffing bad packets. */
   4963	if (features & NETIF_F_RXALL) {
   4964		/* UPE and MPE will be handled by normal PROMISC logic
   4965		 * in e1000e_set_rx_mode */
   4966		fctrl |= (IXGBE_FCTRL_SBP | /* Receive bad packets */
   4967			  IXGBE_FCTRL_BAM | /* RX All Bcast Pkts */
   4968			  IXGBE_FCTRL_PMCF); /* RX All MAC Ctrl Pkts */
   4969
   4970		fctrl &= ~(IXGBE_FCTRL_DPF);
   4971		/* NOTE:  VLAN filtering is disabled by setting PROMISC */
   4972	}
   4973
   4974	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
   4975
   4976	if (features & NETIF_F_HW_VLAN_CTAG_RX)
   4977		ixgbe_vlan_strip_enable(adapter);
   4978	else
   4979		ixgbe_vlan_strip_disable(adapter);
   4980
   4981	if (features & NETIF_F_HW_VLAN_CTAG_FILTER)
   4982		ixgbe_vlan_promisc_disable(adapter);
   4983	else
   4984		ixgbe_vlan_promisc_enable(adapter);
   4985}
   4986
   4987static void ixgbe_napi_enable_all(struct ixgbe_adapter *adapter)
   4988{
   4989	int q_idx;
   4990
   4991	for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++)
   4992		napi_enable(&adapter->q_vector[q_idx]->napi);
   4993}
   4994
   4995static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter)
   4996{
   4997	int q_idx;
   4998
   4999	for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++)
   5000		napi_disable(&adapter->q_vector[q_idx]->napi);
   5001}
   5002
   5003static int ixgbe_udp_tunnel_sync(struct net_device *dev, unsigned int table)
   5004{
   5005	struct ixgbe_adapter *adapter = netdev_priv(dev);
   5006	struct ixgbe_hw *hw = &adapter->hw;
   5007	struct udp_tunnel_info ti;
   5008
   5009	udp_tunnel_nic_get_port(dev, table, 0, &ti);
   5010	if (ti.type == UDP_TUNNEL_TYPE_VXLAN)
   5011		adapter->vxlan_port = ti.port;
   5012	else
   5013		adapter->geneve_port = ti.port;
   5014
   5015	IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL,
   5016			ntohs(adapter->vxlan_port) |
   5017			ntohs(adapter->geneve_port) <<
   5018				IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT);
   5019	return 0;
   5020}
   5021
   5022static const struct udp_tunnel_nic_info ixgbe_udp_tunnels_x550 = {
   5023	.sync_table	= ixgbe_udp_tunnel_sync,
   5024	.flags		= UDP_TUNNEL_NIC_INFO_IPV4_ONLY,
   5025	.tables		= {
   5026		{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN,  },
   5027	},
   5028};
   5029
   5030static const struct udp_tunnel_nic_info ixgbe_udp_tunnels_x550em_a = {
   5031	.sync_table	= ixgbe_udp_tunnel_sync,
   5032	.flags		= UDP_TUNNEL_NIC_INFO_IPV4_ONLY,
   5033	.tables		= {
   5034		{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN,  },
   5035		{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, },
   5036	},
   5037};
   5038
   5039#ifdef CONFIG_IXGBE_DCB
   5040/**
   5041 * ixgbe_configure_dcb - Configure DCB hardware
   5042 * @adapter: ixgbe adapter struct
   5043 *
   5044 * This is called by the driver on open to configure the DCB hardware.
   5045 * This is also called by the gennetlink interface when reconfiguring
   5046 * the DCB state.
   5047 */
   5048static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter)
   5049{
   5050	struct ixgbe_hw *hw = &adapter->hw;
   5051	int max_frame = adapter->netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
   5052
   5053	if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED)) {
   5054		if (hw->mac.type == ixgbe_mac_82598EB)
   5055			netif_set_tso_max_size(adapter->netdev, 65536);
   5056		return;
   5057	}
   5058
   5059	if (hw->mac.type == ixgbe_mac_82598EB)
   5060		netif_set_tso_max_size(adapter->netdev, 32768);
   5061
   5062#ifdef IXGBE_FCOE
   5063	if (adapter->netdev->features & NETIF_F_FCOE_MTU)
   5064		max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE);
   5065#endif
   5066
   5067	/* reconfigure the hardware */
   5068	if (adapter->dcbx_cap & DCB_CAP_DCBX_VER_CEE) {
   5069		ixgbe_dcb_calculate_tc_credits(hw, &adapter->dcb_cfg, max_frame,
   5070						DCB_TX_CONFIG);
   5071		ixgbe_dcb_calculate_tc_credits(hw, &adapter->dcb_cfg, max_frame,
   5072						DCB_RX_CONFIG);
   5073		ixgbe_dcb_hw_config(hw, &adapter->dcb_cfg);
   5074	} else if (adapter->ixgbe_ieee_ets && adapter->ixgbe_ieee_pfc) {
   5075		ixgbe_dcb_hw_ets(&adapter->hw,
   5076				 adapter->ixgbe_ieee_ets,
   5077				 max_frame);
   5078		ixgbe_dcb_hw_pfc_config(&adapter->hw,
   5079					adapter->ixgbe_ieee_pfc->pfc_en,
   5080					adapter->ixgbe_ieee_ets->prio_tc);
   5081	}
   5082
   5083	/* Enable RSS Hash per TC */
   5084	if (hw->mac.type != ixgbe_mac_82598EB) {
   5085		u32 msb = 0;
   5086		u16 rss_i = adapter->ring_feature[RING_F_RSS].indices - 1;
   5087
   5088		while (rss_i) {
   5089			msb++;
   5090			rss_i >>= 1;
   5091		}
   5092
   5093		/* write msb to all 8 TCs in one write */
   5094		IXGBE_WRITE_REG(hw, IXGBE_RQTC, msb * 0x11111111);
   5095	}
   5096}
   5097#endif
   5098
   5099/* Additional bittime to account for IXGBE framing */
   5100#define IXGBE_ETH_FRAMING 20
   5101
   5102/**
   5103 * ixgbe_hpbthresh - calculate high water mark for flow control
   5104 *
   5105 * @adapter: board private structure to calculate for
   5106 * @pb: packet buffer to calculate
   5107 */
   5108static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb)
   5109{
   5110	struct ixgbe_hw *hw = &adapter->hw;
   5111	struct net_device *dev = adapter->netdev;
   5112	int link, tc, kb, marker;
   5113	u32 dv_id, rx_pba;
   5114
   5115	/* Calculate max LAN frame size */
   5116	tc = link = dev->mtu + ETH_HLEN + ETH_FCS_LEN + IXGBE_ETH_FRAMING;
   5117
   5118#ifdef IXGBE_FCOE
   5119	/* FCoE traffic class uses FCOE jumbo frames */
   5120	if ((dev->features & NETIF_F_FCOE_MTU) &&
   5121	    (tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) &&
   5122	    (pb == ixgbe_fcoe_get_tc(adapter)))
   5123		tc = IXGBE_FCOE_JUMBO_FRAME_SIZE;
   5124#endif
   5125
   5126	/* Calculate delay value for device */
   5127	switch (hw->mac.type) {
   5128	case ixgbe_mac_X540:
   5129	case ixgbe_mac_X550:
   5130	case ixgbe_mac_X550EM_x:
   5131	case ixgbe_mac_x550em_a:
   5132		dv_id = IXGBE_DV_X540(link, tc);
   5133		break;
   5134	default:
   5135		dv_id = IXGBE_DV(link, tc);
   5136		break;
   5137	}
   5138
   5139	/* Loopback switch introduces additional latency */
   5140	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
   5141		dv_id += IXGBE_B2BT(tc);
   5142
   5143	/* Delay value is calculated in bit times convert to KB */
   5144	kb = IXGBE_BT2KB(dv_id);
   5145	rx_pba = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(pb)) >> 10;
   5146
   5147	marker = rx_pba - kb;
   5148
   5149	/* It is possible that the packet buffer is not large enough
   5150	 * to provide required headroom. In this case throw an error
   5151	 * to user and a do the best we can.
   5152	 */
   5153	if (marker < 0) {
   5154		e_warn(drv, "Packet Buffer(%i) can not provide enough"
   5155			    "headroom to support flow control."
   5156			    "Decrease MTU or number of traffic classes\n", pb);
   5157		marker = tc + 1;
   5158	}
   5159
   5160	return marker;
   5161}
   5162
   5163/**
   5164 * ixgbe_lpbthresh - calculate low water mark for for flow control
   5165 *
   5166 * @adapter: board private structure to calculate for
   5167 * @pb: packet buffer to calculate
   5168 */
   5169static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb)
   5170{
   5171	struct ixgbe_hw *hw = &adapter->hw;
   5172	struct net_device *dev = adapter->netdev;
   5173	int tc;
   5174	u32 dv_id;
   5175
   5176	/* Calculate max LAN frame size */
   5177	tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN;
   5178
   5179#ifdef IXGBE_FCOE
   5180	/* FCoE traffic class uses FCOE jumbo frames */
   5181	if ((dev->features & NETIF_F_FCOE_MTU) &&
   5182	    (tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) &&
   5183	    (pb == netdev_get_prio_tc_map(dev, adapter->fcoe.up)))
   5184		tc = IXGBE_FCOE_JUMBO_FRAME_SIZE;
   5185#endif
   5186
   5187	/* Calculate delay value for device */
   5188	switch (hw->mac.type) {
   5189	case ixgbe_mac_X540:
   5190	case ixgbe_mac_X550:
   5191	case ixgbe_mac_X550EM_x:
   5192	case ixgbe_mac_x550em_a:
   5193		dv_id = IXGBE_LOW_DV_X540(tc);
   5194		break;
   5195	default:
   5196		dv_id = IXGBE_LOW_DV(tc);
   5197		break;
   5198	}
   5199
   5200	/* Delay value is calculated in bit times convert to KB */
   5201	return IXGBE_BT2KB(dv_id);
   5202}
   5203
   5204/*
   5205 * ixgbe_pbthresh_setup - calculate and setup high low water marks
   5206 */
   5207static void ixgbe_pbthresh_setup(struct ixgbe_adapter *adapter)
   5208{
   5209	struct ixgbe_hw *hw = &adapter->hw;
   5210	int num_tc = adapter->hw_tcs;
   5211	int i;
   5212
   5213	if (!num_tc)
   5214		num_tc = 1;
   5215
   5216	for (i = 0; i < num_tc; i++) {
   5217		hw->fc.high_water[i] = ixgbe_hpbthresh(adapter, i);
   5218		hw->fc.low_water[i] = ixgbe_lpbthresh(adapter, i);
   5219
   5220		/* Low water marks must not be larger than high water marks */
   5221		if (hw->fc.low_water[i] > hw->fc.high_water[i])
   5222			hw->fc.low_water[i] = 0;
   5223	}
   5224
   5225	for (; i < MAX_TRAFFIC_CLASS; i++)
   5226		hw->fc.high_water[i] = 0;
   5227}
   5228
   5229static void ixgbe_configure_pb(struct ixgbe_adapter *adapter)
   5230{
   5231	struct ixgbe_hw *hw = &adapter->hw;
   5232	int hdrm;
   5233	u8 tc = adapter->hw_tcs;
   5234
   5235	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE ||
   5236	    adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)
   5237		hdrm = 32 << adapter->fdir_pballoc;
   5238	else
   5239		hdrm = 0;
   5240
   5241	hw->mac.ops.set_rxpba(hw, tc, hdrm, PBA_STRATEGY_EQUAL);
   5242	ixgbe_pbthresh_setup(adapter);
   5243}
   5244
   5245static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter)
   5246{
   5247	struct ixgbe_hw *hw = &adapter->hw;
   5248	struct hlist_node *node2;
   5249	struct ixgbe_fdir_filter *filter;
   5250	u8 queue;
   5251
   5252	spin_lock(&adapter->fdir_perfect_lock);
   5253
   5254	if (!hlist_empty(&adapter->fdir_filter_list))
   5255		ixgbe_fdir_set_input_mask_82599(hw, &adapter->fdir_mask);
   5256
   5257	hlist_for_each_entry_safe(filter, node2,
   5258				  &adapter->fdir_filter_list, fdir_node) {
   5259		if (filter->action == IXGBE_FDIR_DROP_QUEUE) {
   5260			queue = IXGBE_FDIR_DROP_QUEUE;
   5261		} else {
   5262			u32 ring = ethtool_get_flow_spec_ring(filter->action);
   5263			u8 vf = ethtool_get_flow_spec_ring_vf(filter->action);
   5264
   5265			if (!vf && (ring >= adapter->num_rx_queues)) {
   5266				e_err(drv, "FDIR restore failed without VF, ring: %u\n",
   5267				      ring);
   5268				continue;
   5269			} else if (vf &&
   5270				   ((vf > adapter->num_vfs) ||
   5271				     ring >= adapter->num_rx_queues_per_pool)) {
   5272				e_err(drv, "FDIR restore failed with VF, vf: %hhu, ring: %u\n",
   5273				      vf, ring);
   5274				continue;
   5275			}
   5276
   5277			/* Map the ring onto the absolute queue index */
   5278			if (!vf)
   5279				queue = adapter->rx_ring[ring]->reg_idx;
   5280			else
   5281				queue = ((vf - 1) *
   5282					adapter->num_rx_queues_per_pool) + ring;
   5283		}
   5284
   5285		ixgbe_fdir_write_perfect_filter_82599(hw,
   5286				&filter->filter, filter->sw_idx, queue);
   5287	}
   5288
   5289	spin_unlock(&adapter->fdir_perfect_lock);
   5290}
   5291
   5292/**
   5293 * ixgbe_clean_rx_ring - Free Rx Buffers per Queue
   5294 * @rx_ring: ring to free buffers from
   5295 **/
   5296static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring)
   5297{
   5298	u16 i = rx_ring->next_to_clean;
   5299	struct ixgbe_rx_buffer *rx_buffer = &rx_ring->rx_buffer_info[i];
   5300
   5301	if (rx_ring->xsk_pool) {
   5302		ixgbe_xsk_clean_rx_ring(rx_ring);
   5303		goto skip_free;
   5304	}
   5305
   5306	/* Free all the Rx ring sk_buffs */
   5307	while (i != rx_ring->next_to_alloc) {
   5308		if (rx_buffer->skb) {
   5309			struct sk_buff *skb = rx_buffer->skb;
   5310			if (IXGBE_CB(skb)->page_released)
   5311				dma_unmap_page_attrs(rx_ring->dev,
   5312						     IXGBE_CB(skb)->dma,
   5313						     ixgbe_rx_pg_size(rx_ring),
   5314						     DMA_FROM_DEVICE,
   5315						     IXGBE_RX_DMA_ATTR);
   5316			dev_kfree_skb(skb);
   5317		}
   5318
   5319		/* Invalidate cache lines that may have been written to by
   5320		 * device so that we avoid corrupting memory.
   5321		 */
   5322		dma_sync_single_range_for_cpu(rx_ring->dev,
   5323					      rx_buffer->dma,
   5324					      rx_buffer->page_offset,
   5325					      ixgbe_rx_bufsz(rx_ring),
   5326					      DMA_FROM_DEVICE);
   5327
   5328		/* free resources associated with mapping */
   5329		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
   5330				     ixgbe_rx_pg_size(rx_ring),
   5331				     DMA_FROM_DEVICE,
   5332				     IXGBE_RX_DMA_ATTR);
   5333		__page_frag_cache_drain(rx_buffer->page,
   5334					rx_buffer->pagecnt_bias);
   5335
   5336		i++;
   5337		rx_buffer++;
   5338		if (i == rx_ring->count) {
   5339			i = 0;
   5340			rx_buffer = rx_ring->rx_buffer_info;
   5341		}
   5342	}
   5343
   5344skip_free:
   5345	rx_ring->next_to_alloc = 0;
   5346	rx_ring->next_to_clean = 0;
   5347	rx_ring->next_to_use = 0;
   5348}
   5349
   5350static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter,
   5351			     struct ixgbe_fwd_adapter *accel)
   5352{
   5353	u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
   5354	int num_tc = netdev_get_num_tc(adapter->netdev);
   5355	struct net_device *vdev = accel->netdev;
   5356	int i, baseq, err;
   5357
   5358	baseq = accel->pool * adapter->num_rx_queues_per_pool;
   5359	netdev_dbg(vdev, "pool %i:%i queues %i:%i\n",
   5360		   accel->pool, adapter->num_rx_pools,
   5361		   baseq, baseq + adapter->num_rx_queues_per_pool);
   5362
   5363	accel->rx_base_queue = baseq;
   5364	accel->tx_base_queue = baseq;
   5365
   5366	/* record configuration for macvlan interface in vdev */
   5367	for (i = 0; i < num_tc; i++)
   5368		netdev_bind_sb_channel_queue(adapter->netdev, vdev,
   5369					     i, rss_i, baseq + (rss_i * i));
   5370
   5371	for (i = 0; i < adapter->num_rx_queues_per_pool; i++)
   5372		adapter->rx_ring[baseq + i]->netdev = vdev;
   5373
   5374	/* Guarantee all rings are updated before we update the
   5375	 * MAC address filter.
   5376	 */
   5377	wmb();
   5378
   5379	/* ixgbe_add_mac_filter will return an index if it succeeds, so we
   5380	 * need to only treat it as an error value if it is negative.
   5381	 */
   5382	err = ixgbe_add_mac_filter(adapter, vdev->dev_addr,
   5383				   VMDQ_P(accel->pool));
   5384	if (err >= 0)
   5385		return 0;
   5386
   5387	/* if we cannot add the MAC rule then disable the offload */
   5388	macvlan_release_l2fw_offload(vdev);
   5389
   5390	for (i = 0; i < adapter->num_rx_queues_per_pool; i++)
   5391		adapter->rx_ring[baseq + i]->netdev = NULL;
   5392
   5393	netdev_err(vdev, "L2FW offload disabled due to L2 filter error\n");
   5394
   5395	/* unbind the queues and drop the subordinate channel config */
   5396	netdev_unbind_sb_channel(adapter->netdev, vdev);
   5397	netdev_set_sb_channel(vdev, 0);
   5398
   5399	clear_bit(accel->pool, adapter->fwd_bitmask);
   5400	kfree(accel);
   5401
   5402	return err;
   5403}
   5404
   5405static int ixgbe_macvlan_up(struct net_device *vdev,
   5406			    struct netdev_nested_priv *priv)
   5407{
   5408	struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)priv->data;
   5409	struct ixgbe_fwd_adapter *accel;
   5410
   5411	if (!netif_is_macvlan(vdev))
   5412		return 0;
   5413
   5414	accel = macvlan_accel_priv(vdev);
   5415	if (!accel)
   5416		return 0;
   5417
   5418	ixgbe_fwd_ring_up(adapter, accel);
   5419
   5420	return 0;
   5421}
   5422
   5423static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter)
   5424{
   5425	struct netdev_nested_priv priv = {
   5426		.data = (void *)adapter,
   5427	};
   5428
   5429	netdev_walk_all_upper_dev_rcu(adapter->netdev,
   5430				      ixgbe_macvlan_up, &priv);
   5431}
   5432
   5433static void ixgbe_configure(struct ixgbe_adapter *adapter)
   5434{
   5435	struct ixgbe_hw *hw = &adapter->hw;
   5436
   5437	ixgbe_configure_pb(adapter);
   5438#ifdef CONFIG_IXGBE_DCB
   5439	ixgbe_configure_dcb(adapter);
   5440#endif
   5441	/*
   5442	 * We must restore virtualization before VLANs or else
   5443	 * the VLVF registers will not be populated
   5444	 */
   5445	ixgbe_configure_virtualization(adapter);
   5446
   5447	ixgbe_set_rx_mode(adapter->netdev);
   5448	ixgbe_restore_vlan(adapter);
   5449	ixgbe_ipsec_restore(adapter);
   5450
   5451	switch (hw->mac.type) {
   5452	case ixgbe_mac_82599EB:
   5453	case ixgbe_mac_X540:
   5454		hw->mac.ops.disable_rx_buff(hw);
   5455		break;
   5456	default:
   5457		break;
   5458	}
   5459
   5460	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
   5461		ixgbe_init_fdir_signature_82599(&adapter->hw,
   5462						adapter->fdir_pballoc);
   5463	} else if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
   5464		ixgbe_init_fdir_perfect_82599(&adapter->hw,
   5465					      adapter->fdir_pballoc);
   5466		ixgbe_fdir_filter_restore(adapter);
   5467	}
   5468
   5469	switch (hw->mac.type) {
   5470	case ixgbe_mac_82599EB:
   5471	case ixgbe_mac_X540:
   5472		hw->mac.ops.enable_rx_buff(hw);
   5473		break;
   5474	default:
   5475		break;
   5476	}
   5477
   5478#ifdef CONFIG_IXGBE_DCA
   5479	/* configure DCA */
   5480	if (adapter->flags & IXGBE_FLAG_DCA_CAPABLE)
   5481		ixgbe_setup_dca(adapter);
   5482#endif /* CONFIG_IXGBE_DCA */
   5483
   5484#ifdef IXGBE_FCOE
   5485	/* configure FCoE L2 filters, redirection table, and Rx control */
   5486	ixgbe_configure_fcoe(adapter);
   5487
   5488#endif /* IXGBE_FCOE */
   5489	ixgbe_configure_tx(adapter);
   5490	ixgbe_configure_rx(adapter);
   5491	ixgbe_configure_dfwd(adapter);
   5492}
   5493
   5494/**
   5495 * ixgbe_sfp_link_config - set up SFP+ link
   5496 * @adapter: pointer to private adapter struct
   5497 **/
   5498static void ixgbe_sfp_link_config(struct ixgbe_adapter *adapter)
   5499{
   5500	/*
   5501	 * We are assuming the worst case scenario here, and that
   5502	 * is that an SFP was inserted/removed after the reset
   5503	 * but before SFP detection was enabled.  As such the best
   5504	 * solution is to just start searching as soon as we start
   5505	 */
   5506	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   5507		adapter->flags2 |= IXGBE_FLAG2_SEARCH_FOR_SFP;
   5508
   5509	adapter->flags2 |= IXGBE_FLAG2_SFP_NEEDS_RESET;
   5510	adapter->sfp_poll_time = 0;
   5511}
   5512
   5513/**
   5514 * ixgbe_non_sfp_link_config - set up non-SFP+ link
   5515 * @hw: pointer to private hardware struct
   5516 *
   5517 * Returns 0 on success, negative on failure
   5518 **/
   5519static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw)
   5520{
   5521	u32 speed;
   5522	bool autoneg, link_up = false;
   5523	int ret = IXGBE_ERR_LINK_SETUP;
   5524
   5525	if (hw->mac.ops.check_link)
   5526		ret = hw->mac.ops.check_link(hw, &speed, &link_up, false);
   5527
   5528	if (ret)
   5529		return ret;
   5530
   5531	speed = hw->phy.autoneg_advertised;
   5532	if (!speed && hw->mac.ops.get_link_capabilities) {
   5533		ret = hw->mac.ops.get_link_capabilities(hw, &speed,
   5534							&autoneg);
   5535		/* remove NBASE-T speeds from default autonegotiation
   5536		 * to accommodate broken network switches in the field
   5537		 * which cannot cope with advertised NBASE-T speeds
   5538		 */
   5539		speed &= ~(IXGBE_LINK_SPEED_5GB_FULL |
   5540			   IXGBE_LINK_SPEED_2_5GB_FULL);
   5541	}
   5542
   5543	if (ret)
   5544		return ret;
   5545
   5546	if (hw->mac.ops.setup_link)
   5547		ret = hw->mac.ops.setup_link(hw, speed, link_up);
   5548
   5549	return ret;
   5550}
   5551
   5552static void ixgbe_setup_gpie(struct ixgbe_adapter *adapter)
   5553{
   5554	struct ixgbe_hw *hw = &adapter->hw;
   5555	u32 gpie = 0;
   5556
   5557	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) {
   5558		gpie = IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_PBA_SUPPORT |
   5559		       IXGBE_GPIE_OCD;
   5560		gpie |= IXGBE_GPIE_EIAME;
   5561		/*
   5562		 * use EIAM to auto-mask when MSI-X interrupt is asserted
   5563		 * this saves a register write for every interrupt
   5564		 */
   5565		switch (hw->mac.type) {
   5566		case ixgbe_mac_82598EB:
   5567			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   5568			break;
   5569		case ixgbe_mac_82599EB:
   5570		case ixgbe_mac_X540:
   5571		case ixgbe_mac_X550:
   5572		case ixgbe_mac_X550EM_x:
   5573		case ixgbe_mac_x550em_a:
   5574		default:
   5575			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
   5576			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
   5577			break;
   5578		}
   5579	} else {
   5580		/* legacy interrupts, use EIAM to auto-mask when reading EICR,
   5581		 * specifically only auto mask tx and rx interrupts */
   5582		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   5583	}
   5584
   5585	/* XXX: to interrupt immediately for EICS writes, enable this */
   5586	/* gpie |= IXGBE_GPIE_EIMEN; */
   5587
   5588	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
   5589		gpie &= ~IXGBE_GPIE_VTMODE_MASK;
   5590
   5591		switch (adapter->ring_feature[RING_F_VMDQ].mask) {
   5592		case IXGBE_82599_VMDQ_8Q_MASK:
   5593			gpie |= IXGBE_GPIE_VTMODE_16;
   5594			break;
   5595		case IXGBE_82599_VMDQ_4Q_MASK:
   5596			gpie |= IXGBE_GPIE_VTMODE_32;
   5597			break;
   5598		default:
   5599			gpie |= IXGBE_GPIE_VTMODE_64;
   5600			break;
   5601		}
   5602	}
   5603
   5604	/* Enable Thermal over heat sensor interrupt */
   5605	if (adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE) {
   5606		switch (adapter->hw.mac.type) {
   5607		case ixgbe_mac_82599EB:
   5608			gpie |= IXGBE_SDP0_GPIEN_8259X;
   5609			break;
   5610		default:
   5611			break;
   5612		}
   5613	}
   5614
   5615	/* Enable fan failure interrupt */
   5616	if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE)
   5617		gpie |= IXGBE_SDP1_GPIEN(hw);
   5618
   5619	switch (hw->mac.type) {
   5620	case ixgbe_mac_82599EB:
   5621		gpie |= IXGBE_SDP1_GPIEN_8259X | IXGBE_SDP2_GPIEN_8259X;
   5622		break;
   5623	case ixgbe_mac_X550EM_x:
   5624	case ixgbe_mac_x550em_a:
   5625		gpie |= IXGBE_SDP0_GPIEN_X540;
   5626		break;
   5627	default:
   5628		break;
   5629	}
   5630
   5631	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
   5632}
   5633
   5634static void ixgbe_up_complete(struct ixgbe_adapter *adapter)
   5635{
   5636	struct ixgbe_hw *hw = &adapter->hw;
   5637	int err;
   5638	u32 ctrl_ext;
   5639
   5640	ixgbe_get_hw_control(adapter);
   5641	ixgbe_setup_gpie(adapter);
   5642
   5643	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
   5644		ixgbe_configure_msix(adapter);
   5645	else
   5646		ixgbe_configure_msi_and_legacy(adapter);
   5647
   5648	/* enable the optics for 82599 SFP+ fiber */
   5649	if (hw->mac.ops.enable_tx_laser)
   5650		hw->mac.ops.enable_tx_laser(hw);
   5651
   5652	if (hw->phy.ops.set_phy_power)
   5653		hw->phy.ops.set_phy_power(hw, true);
   5654
   5655	smp_mb__before_atomic();
   5656	clear_bit(__IXGBE_DOWN, &adapter->state);
   5657	ixgbe_napi_enable_all(adapter);
   5658
   5659	if (ixgbe_is_sfp(hw)) {
   5660		ixgbe_sfp_link_config(adapter);
   5661	} else {
   5662		err = ixgbe_non_sfp_link_config(hw);
   5663		if (err)
   5664			e_err(probe, "link_config FAILED %d\n", err);
   5665	}
   5666
   5667	/* clear any pending interrupts, may auto mask */
   5668	IXGBE_READ_REG(hw, IXGBE_EICR);
   5669	ixgbe_irq_enable(adapter, true, true);
   5670
   5671	/*
   5672	 * If this adapter has a fan, check to see if we had a failure
   5673	 * before we enabled the interrupt.
   5674	 */
   5675	if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) {
   5676		u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
   5677		if (esdp & IXGBE_ESDP_SDP1)
   5678			e_crit(drv, "Fan has stopped, replace the adapter\n");
   5679	}
   5680
   5681	/* bring the link up in the watchdog, this could race with our first
   5682	 * link up interrupt but shouldn't be a problem */
   5683	adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
   5684	adapter->link_check_timeout = jiffies;
   5685	mod_timer(&adapter->service_timer, jiffies);
   5686
   5687	/* Set PF Reset Done bit so PF/VF Mail Ops can work */
   5688	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
   5689	ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD;
   5690	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
   5691
   5692	/* update setting rx tx for all active vfs */
   5693	ixgbe_set_all_vfs(adapter);
   5694}
   5695
   5696void ixgbe_reinit_locked(struct ixgbe_adapter *adapter)
   5697{
   5698	/* put off any impending NetWatchDogTimeout */
   5699	netif_trans_update(adapter->netdev);
   5700
   5701	while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
   5702		usleep_range(1000, 2000);
   5703	if (adapter->hw.phy.type == ixgbe_phy_fw)
   5704		ixgbe_watchdog_link_is_down(adapter);
   5705	ixgbe_down(adapter);
   5706	/*
   5707	 * If SR-IOV enabled then wait a bit before bringing the adapter
   5708	 * back up to give the VFs time to respond to the reset.  The
   5709	 * two second wait is based upon the watchdog timer cycle in
   5710	 * the VF driver.
   5711	 */
   5712	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
   5713		msleep(2000);
   5714	ixgbe_up(adapter);
   5715	clear_bit(__IXGBE_RESETTING, &adapter->state);
   5716}
   5717
   5718void ixgbe_up(struct ixgbe_adapter *adapter)
   5719{
   5720	/* hardware has been reset, we need to reload some things */
   5721	ixgbe_configure(adapter);
   5722
   5723	ixgbe_up_complete(adapter);
   5724}
   5725
   5726static unsigned long ixgbe_get_completion_timeout(struct ixgbe_adapter *adapter)
   5727{
   5728	u16 devctl2;
   5729
   5730	pcie_capability_read_word(adapter->pdev, PCI_EXP_DEVCTL2, &devctl2);
   5731
   5732	switch (devctl2 & IXGBE_PCIDEVCTRL2_TIMEO_MASK) {
   5733	case IXGBE_PCIDEVCTRL2_17_34s:
   5734	case IXGBE_PCIDEVCTRL2_4_8s:
   5735		/* For now we cap the upper limit on delay to 2 seconds
   5736		 * as we end up going up to 34 seconds of delay in worst
   5737		 * case timeout value.
   5738		 */
   5739	case IXGBE_PCIDEVCTRL2_1_2s:
   5740		return 2000000ul;	/* 2.0 s */
   5741	case IXGBE_PCIDEVCTRL2_260_520ms:
   5742		return 520000ul;	/* 520 ms */
   5743	case IXGBE_PCIDEVCTRL2_65_130ms:
   5744		return 130000ul;	/* 130 ms */
   5745	case IXGBE_PCIDEVCTRL2_16_32ms:
   5746		return 32000ul;		/* 32 ms */
   5747	case IXGBE_PCIDEVCTRL2_1_2ms:
   5748		return 2000ul;		/* 2 ms */
   5749	case IXGBE_PCIDEVCTRL2_50_100us:
   5750		return 100ul;		/* 100 us */
   5751	case IXGBE_PCIDEVCTRL2_16_32ms_def:
   5752		return 32000ul;		/* 32 ms */
   5753	default:
   5754		break;
   5755	}
   5756
   5757	/* We shouldn't need to hit this path, but just in case default as
   5758	 * though completion timeout is not supported and support 32ms.
   5759	 */
   5760	return 32000ul;
   5761}
   5762
   5763void ixgbe_disable_rx(struct ixgbe_adapter *adapter)
   5764{
   5765	unsigned long wait_delay, delay_interval;
   5766	struct ixgbe_hw *hw = &adapter->hw;
   5767	int i, wait_loop;
   5768	u32 rxdctl;
   5769
   5770	/* disable receives */
   5771	hw->mac.ops.disable_rx(hw);
   5772
   5773	if (ixgbe_removed(hw->hw_addr))
   5774		return;
   5775
   5776	/* disable all enabled Rx queues */
   5777	for (i = 0; i < adapter->num_rx_queues; i++) {
   5778		struct ixgbe_ring *ring = adapter->rx_ring[i];
   5779		u8 reg_idx = ring->reg_idx;
   5780
   5781		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
   5782		rxdctl &= ~IXGBE_RXDCTL_ENABLE;
   5783		rxdctl |= IXGBE_RXDCTL_SWFLSH;
   5784
   5785		/* write value back with RXDCTL.ENABLE bit cleared */
   5786		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
   5787	}
   5788
   5789	/* RXDCTL.EN may not change on 82598 if link is down, so skip it */
   5790	if (hw->mac.type == ixgbe_mac_82598EB &&
   5791	    !(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
   5792		return;
   5793
   5794	/* Determine our minimum delay interval. We will increase this value
   5795	 * with each subsequent test. This way if the device returns quickly
   5796	 * we should spend as little time as possible waiting, however as
   5797	 * the time increases we will wait for larger periods of time.
   5798	 *
   5799	 * The trick here is that we increase the interval using the
   5800	 * following pattern: 1x 3x 5x 7x 9x 11x 13x 15x 17x 19x. The result
   5801	 * of that wait is that it totals up to 100x whatever interval we
   5802	 * choose. Since our minimum wait is 100us we can just divide the
   5803	 * total timeout by 100 to get our minimum delay interval.
   5804	 */
   5805	delay_interval = ixgbe_get_completion_timeout(adapter) / 100;
   5806
   5807	wait_loop = IXGBE_MAX_RX_DESC_POLL;
   5808	wait_delay = delay_interval;
   5809
   5810	while (wait_loop--) {
   5811		usleep_range(wait_delay, wait_delay + 10);
   5812		wait_delay += delay_interval * 2;
   5813		rxdctl = 0;
   5814
   5815		/* OR together the reading of all the active RXDCTL registers,
   5816		 * and then test the result. We need the disable to complete
   5817		 * before we start freeing the memory and invalidating the
   5818		 * DMA mappings.
   5819		 */
   5820		for (i = 0; i < adapter->num_rx_queues; i++) {
   5821			struct ixgbe_ring *ring = adapter->rx_ring[i];
   5822			u8 reg_idx = ring->reg_idx;
   5823
   5824			rxdctl |= IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
   5825		}
   5826
   5827		if (!(rxdctl & IXGBE_RXDCTL_ENABLE))
   5828			return;
   5829	}
   5830
   5831	e_err(drv,
   5832	      "RXDCTL.ENABLE for one or more queues not cleared within the polling period\n");
   5833}
   5834
   5835void ixgbe_disable_tx(struct ixgbe_adapter *adapter)
   5836{
   5837	unsigned long wait_delay, delay_interval;
   5838	struct ixgbe_hw *hw = &adapter->hw;
   5839	int i, wait_loop;
   5840	u32 txdctl;
   5841
   5842	if (ixgbe_removed(hw->hw_addr))
   5843		return;
   5844
   5845	/* disable all enabled Tx queues */
   5846	for (i = 0; i < adapter->num_tx_queues; i++) {
   5847		struct ixgbe_ring *ring = adapter->tx_ring[i];
   5848		u8 reg_idx = ring->reg_idx;
   5849
   5850		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
   5851	}
   5852
   5853	/* disable all enabled XDP Tx queues */
   5854	for (i = 0; i < adapter->num_xdp_queues; i++) {
   5855		struct ixgbe_ring *ring = adapter->xdp_ring[i];
   5856		u8 reg_idx = ring->reg_idx;
   5857
   5858		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
   5859	}
   5860
   5861	/* If the link is not up there shouldn't be much in the way of
   5862	 * pending transactions. Those that are left will be flushed out
   5863	 * when the reset logic goes through the flush sequence to clean out
   5864	 * the pending Tx transactions.
   5865	 */
   5866	if (!(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
   5867		goto dma_engine_disable;
   5868
   5869	/* Determine our minimum delay interval. We will increase this value
   5870	 * with each subsequent test. This way if the device returns quickly
   5871	 * we should spend as little time as possible waiting, however as
   5872	 * the time increases we will wait for larger periods of time.
   5873	 *
   5874	 * The trick here is that we increase the interval using the
   5875	 * following pattern: 1x 3x 5x 7x 9x 11x 13x 15x 17x 19x. The result
   5876	 * of that wait is that it totals up to 100x whatever interval we
   5877	 * choose. Since our minimum wait is 100us we can just divide the
   5878	 * total timeout by 100 to get our minimum delay interval.
   5879	 */
   5880	delay_interval = ixgbe_get_completion_timeout(adapter) / 100;
   5881
   5882	wait_loop = IXGBE_MAX_RX_DESC_POLL;
   5883	wait_delay = delay_interval;
   5884
   5885	while (wait_loop--) {
   5886		usleep_range(wait_delay, wait_delay + 10);
   5887		wait_delay += delay_interval * 2;
   5888		txdctl = 0;
   5889
   5890		/* OR together the reading of all the active TXDCTL registers,
   5891		 * and then test the result. We need the disable to complete
   5892		 * before we start freeing the memory and invalidating the
   5893		 * DMA mappings.
   5894		 */
   5895		for (i = 0; i < adapter->num_tx_queues; i++) {
   5896			struct ixgbe_ring *ring = adapter->tx_ring[i];
   5897			u8 reg_idx = ring->reg_idx;
   5898
   5899			txdctl |= IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx));
   5900		}
   5901		for (i = 0; i < adapter->num_xdp_queues; i++) {
   5902			struct ixgbe_ring *ring = adapter->xdp_ring[i];
   5903			u8 reg_idx = ring->reg_idx;
   5904
   5905			txdctl |= IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx));
   5906		}
   5907
   5908		if (!(txdctl & IXGBE_TXDCTL_ENABLE))
   5909			goto dma_engine_disable;
   5910	}
   5911
   5912	e_err(drv,
   5913	      "TXDCTL.ENABLE for one or more queues not cleared within the polling period\n");
   5914
   5915dma_engine_disable:
   5916	/* Disable the Tx DMA engine on 82599 and later MAC */
   5917	switch (hw->mac.type) {
   5918	case ixgbe_mac_82599EB:
   5919	case ixgbe_mac_X540:
   5920	case ixgbe_mac_X550:
   5921	case ixgbe_mac_X550EM_x:
   5922	case ixgbe_mac_x550em_a:
   5923		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL,
   5924				(IXGBE_READ_REG(hw, IXGBE_DMATXCTL) &
   5925				 ~IXGBE_DMATXCTL_TE));
   5926		fallthrough;
   5927	default:
   5928		break;
   5929	}
   5930}
   5931
   5932void ixgbe_reset(struct ixgbe_adapter *adapter)
   5933{
   5934	struct ixgbe_hw *hw = &adapter->hw;
   5935	struct net_device *netdev = adapter->netdev;
   5936	int err;
   5937
   5938	if (ixgbe_removed(hw->hw_addr))
   5939		return;
   5940	/* lock SFP init bit to prevent race conditions with the watchdog */
   5941	while (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state))
   5942		usleep_range(1000, 2000);
   5943
   5944	/* clear all SFP and link config related flags while holding SFP_INIT */
   5945	adapter->flags2 &= ~(IXGBE_FLAG2_SEARCH_FOR_SFP |
   5946			     IXGBE_FLAG2_SFP_NEEDS_RESET);
   5947	adapter->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
   5948
   5949	err = hw->mac.ops.init_hw(hw);
   5950	switch (err) {
   5951	case 0:
   5952	case IXGBE_ERR_SFP_NOT_PRESENT:
   5953	case IXGBE_ERR_SFP_NOT_SUPPORTED:
   5954		break;
   5955	case IXGBE_ERR_PRIMARY_REQUESTS_PENDING:
   5956		e_dev_err("primary disable timed out\n");
   5957		break;
   5958	case IXGBE_ERR_EEPROM_VERSION:
   5959		/* We are running on a pre-production device, log a warning */
   5960		e_dev_warn("This device is a pre-production adapter/LOM. "
   5961			   "Please be aware there may be issues associated with "
   5962			   "your hardware.  If you are experiencing problems "
   5963			   "please contact your Intel or hardware "
   5964			   "representative who provided you with this "
   5965			   "hardware.\n");
   5966		break;
   5967	default:
   5968		e_dev_err("Hardware Error: %d\n", err);
   5969	}
   5970
   5971	clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
   5972
   5973	/* flush entries out of MAC table */
   5974	ixgbe_flush_sw_mac_table(adapter);
   5975	__dev_uc_unsync(netdev, NULL);
   5976
   5977	/* do not flush user set addresses */
   5978	ixgbe_mac_set_default_filter(adapter);
   5979
   5980	/* update SAN MAC vmdq pool selection */
   5981	if (hw->mac.san_mac_rar_index)
   5982		hw->mac.ops.set_vmdq_san_mac(hw, VMDQ_P(0));
   5983
   5984	if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state))
   5985		ixgbe_ptp_reset(adapter);
   5986
   5987	if (hw->phy.ops.set_phy_power) {
   5988		if (!netif_running(adapter->netdev) && !adapter->wol)
   5989			hw->phy.ops.set_phy_power(hw, false);
   5990		else
   5991			hw->phy.ops.set_phy_power(hw, true);
   5992	}
   5993}
   5994
   5995/**
   5996 * ixgbe_clean_tx_ring - Free Tx Buffers
   5997 * @tx_ring: ring to be cleaned
   5998 **/
   5999static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)
   6000{
   6001	u16 i = tx_ring->next_to_clean;
   6002	struct ixgbe_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
   6003
   6004	if (tx_ring->xsk_pool) {
   6005		ixgbe_xsk_clean_tx_ring(tx_ring);
   6006		goto out;
   6007	}
   6008
   6009	while (i != tx_ring->next_to_use) {
   6010		union ixgbe_adv_tx_desc *eop_desc, *tx_desc;
   6011
   6012		/* Free all the Tx ring sk_buffs */
   6013		if (ring_is_xdp(tx_ring))
   6014			xdp_return_frame(tx_buffer->xdpf);
   6015		else
   6016			dev_kfree_skb_any(tx_buffer->skb);
   6017
   6018		/* unmap skb header data */
   6019		dma_unmap_single(tx_ring->dev,
   6020				 dma_unmap_addr(tx_buffer, dma),
   6021				 dma_unmap_len(tx_buffer, len),
   6022				 DMA_TO_DEVICE);
   6023
   6024		/* check for eop_desc to determine the end of the packet */
   6025		eop_desc = tx_buffer->next_to_watch;
   6026		tx_desc = IXGBE_TX_DESC(tx_ring, i);
   6027
   6028		/* unmap remaining buffers */
   6029		while (tx_desc != eop_desc) {
   6030			tx_buffer++;
   6031			tx_desc++;
   6032			i++;
   6033			if (unlikely(i == tx_ring->count)) {
   6034				i = 0;
   6035				tx_buffer = tx_ring->tx_buffer_info;
   6036				tx_desc = IXGBE_TX_DESC(tx_ring, 0);
   6037			}
   6038
   6039			/* unmap any remaining paged data */
   6040			if (dma_unmap_len(tx_buffer, len))
   6041				dma_unmap_page(tx_ring->dev,
   6042					       dma_unmap_addr(tx_buffer, dma),
   6043					       dma_unmap_len(tx_buffer, len),
   6044					       DMA_TO_DEVICE);
   6045		}
   6046
   6047		/* move us one more past the eop_desc for start of next pkt */
   6048		tx_buffer++;
   6049		i++;
   6050		if (unlikely(i == tx_ring->count)) {
   6051			i = 0;
   6052			tx_buffer = tx_ring->tx_buffer_info;
   6053		}
   6054	}
   6055
   6056	/* reset BQL for queue */
   6057	if (!ring_is_xdp(tx_ring))
   6058		netdev_tx_reset_queue(txring_txq(tx_ring));
   6059
   6060out:
   6061	/* reset next_to_use and next_to_clean */
   6062	tx_ring->next_to_use = 0;
   6063	tx_ring->next_to_clean = 0;
   6064}
   6065
   6066/**
   6067 * ixgbe_clean_all_rx_rings - Free Rx Buffers for all queues
   6068 * @adapter: board private structure
   6069 **/
   6070static void ixgbe_clean_all_rx_rings(struct ixgbe_adapter *adapter)
   6071{
   6072	int i;
   6073
   6074	for (i = 0; i < adapter->num_rx_queues; i++)
   6075		ixgbe_clean_rx_ring(adapter->rx_ring[i]);
   6076}
   6077
   6078/**
   6079 * ixgbe_clean_all_tx_rings - Free Tx Buffers for all queues
   6080 * @adapter: board private structure
   6081 **/
   6082static void ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter)
   6083{
   6084	int i;
   6085
   6086	for (i = 0; i < adapter->num_tx_queues; i++)
   6087		ixgbe_clean_tx_ring(adapter->tx_ring[i]);
   6088	for (i = 0; i < adapter->num_xdp_queues; i++)
   6089		ixgbe_clean_tx_ring(adapter->xdp_ring[i]);
   6090}
   6091
   6092static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter)
   6093{
   6094	struct hlist_node *node2;
   6095	struct ixgbe_fdir_filter *filter;
   6096
   6097	spin_lock(&adapter->fdir_perfect_lock);
   6098
   6099	hlist_for_each_entry_safe(filter, node2,
   6100				  &adapter->fdir_filter_list, fdir_node) {
   6101		hlist_del(&filter->fdir_node);
   6102		kfree(filter);
   6103	}
   6104	adapter->fdir_filter_count = 0;
   6105
   6106	spin_unlock(&adapter->fdir_perfect_lock);
   6107}
   6108
   6109void ixgbe_down(struct ixgbe_adapter *adapter)
   6110{
   6111	struct net_device *netdev = adapter->netdev;
   6112	struct ixgbe_hw *hw = &adapter->hw;
   6113	int i;
   6114
   6115	/* signal that we are down to the interrupt handler */
   6116	if (test_and_set_bit(__IXGBE_DOWN, &adapter->state))
   6117		return; /* do nothing if already down */
   6118
   6119	/* Shut off incoming Tx traffic */
   6120	netif_tx_stop_all_queues(netdev);
   6121
   6122	/* call carrier off first to avoid false dev_watchdog timeouts */
   6123	netif_carrier_off(netdev);
   6124	netif_tx_disable(netdev);
   6125
   6126	/* Disable Rx */
   6127	ixgbe_disable_rx(adapter);
   6128
   6129	/* synchronize_rcu() needed for pending XDP buffers to drain */
   6130	if (adapter->xdp_ring[0])
   6131		synchronize_rcu();
   6132
   6133	ixgbe_irq_disable(adapter);
   6134
   6135	ixgbe_napi_disable_all(adapter);
   6136
   6137	clear_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
   6138	adapter->flags2 &= ~IXGBE_FLAG2_FDIR_REQUIRES_REINIT;
   6139	adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
   6140
   6141	del_timer_sync(&adapter->service_timer);
   6142
   6143	if (adapter->num_vfs) {
   6144		/* Clear EITR Select mapping */
   6145		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITRSEL, 0);
   6146
   6147		/* Mark all the VFs as inactive */
   6148		for (i = 0 ; i < adapter->num_vfs; i++)
   6149			adapter->vfinfo[i].clear_to_send = false;
   6150
   6151		/* update setting rx tx for all active vfs */
   6152		ixgbe_set_all_vfs(adapter);
   6153	}
   6154
   6155	/* disable transmits in the hardware now that interrupts are off */
   6156	ixgbe_disable_tx(adapter);
   6157
   6158	if (!pci_channel_offline(adapter->pdev))
   6159		ixgbe_reset(adapter);
   6160
   6161	/* power down the optics for 82599 SFP+ fiber */
   6162	if (hw->mac.ops.disable_tx_laser)
   6163		hw->mac.ops.disable_tx_laser(hw);
   6164
   6165	ixgbe_clean_all_tx_rings(adapter);
   6166	ixgbe_clean_all_rx_rings(adapter);
   6167}
   6168
   6169/**
   6170 * ixgbe_set_eee_capable - helper function to determine EEE support on X550
   6171 * @adapter: board private structure
   6172 */
   6173static void ixgbe_set_eee_capable(struct ixgbe_adapter *adapter)
   6174{
   6175	struct ixgbe_hw *hw = &adapter->hw;
   6176
   6177	switch (hw->device_id) {
   6178	case IXGBE_DEV_ID_X550EM_A_1G_T:
   6179	case IXGBE_DEV_ID_X550EM_A_1G_T_L:
   6180		if (!hw->phy.eee_speeds_supported)
   6181			break;
   6182		adapter->flags2 |= IXGBE_FLAG2_EEE_CAPABLE;
   6183		if (!hw->phy.eee_speeds_advertised)
   6184			break;
   6185		adapter->flags2 |= IXGBE_FLAG2_EEE_ENABLED;
   6186		break;
   6187	default:
   6188		adapter->flags2 &= ~IXGBE_FLAG2_EEE_CAPABLE;
   6189		adapter->flags2 &= ~IXGBE_FLAG2_EEE_ENABLED;
   6190		break;
   6191	}
   6192}
   6193
   6194/**
   6195 * ixgbe_tx_timeout - Respond to a Tx Hang
   6196 * @netdev: network interface device structure
   6197 * @txqueue: queue number that timed out
   6198 **/
   6199static void ixgbe_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
   6200{
   6201	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   6202
   6203	/* Do the reset outside of interrupt context */
   6204	ixgbe_tx_timeout_reset(adapter);
   6205}
   6206
   6207#ifdef CONFIG_IXGBE_DCB
   6208static void ixgbe_init_dcb(struct ixgbe_adapter *adapter)
   6209{
   6210	struct ixgbe_hw *hw = &adapter->hw;
   6211	struct tc_configuration *tc;
   6212	int j;
   6213
   6214	switch (hw->mac.type) {
   6215	case ixgbe_mac_82598EB:
   6216	case ixgbe_mac_82599EB:
   6217		adapter->dcb_cfg.num_tcs.pg_tcs = MAX_TRAFFIC_CLASS;
   6218		adapter->dcb_cfg.num_tcs.pfc_tcs = MAX_TRAFFIC_CLASS;
   6219		break;
   6220	case ixgbe_mac_X540:
   6221	case ixgbe_mac_X550:
   6222		adapter->dcb_cfg.num_tcs.pg_tcs = X540_TRAFFIC_CLASS;
   6223		adapter->dcb_cfg.num_tcs.pfc_tcs = X540_TRAFFIC_CLASS;
   6224		break;
   6225	case ixgbe_mac_X550EM_x:
   6226	case ixgbe_mac_x550em_a:
   6227	default:
   6228		adapter->dcb_cfg.num_tcs.pg_tcs = DEF_TRAFFIC_CLASS;
   6229		adapter->dcb_cfg.num_tcs.pfc_tcs = DEF_TRAFFIC_CLASS;
   6230		break;
   6231	}
   6232
   6233	/* Configure DCB traffic classes */
   6234	for (j = 0; j < MAX_TRAFFIC_CLASS; j++) {
   6235		tc = &adapter->dcb_cfg.tc_config[j];
   6236		tc->path[DCB_TX_CONFIG].bwg_id = 0;
   6237		tc->path[DCB_TX_CONFIG].bwg_percent = 12 + (j & 1);
   6238		tc->path[DCB_RX_CONFIG].bwg_id = 0;
   6239		tc->path[DCB_RX_CONFIG].bwg_percent = 12 + (j & 1);
   6240		tc->dcb_pfc = pfc_disabled;
   6241	}
   6242
   6243	/* Initialize default user to priority mapping, UPx->TC0 */
   6244	tc = &adapter->dcb_cfg.tc_config[0];
   6245	tc->path[DCB_TX_CONFIG].up_to_tc_bitmap = 0xFF;
   6246	tc->path[DCB_RX_CONFIG].up_to_tc_bitmap = 0xFF;
   6247
   6248	adapter->dcb_cfg.bw_percentage[DCB_TX_CONFIG][0] = 100;
   6249	adapter->dcb_cfg.bw_percentage[DCB_RX_CONFIG][0] = 100;
   6250	adapter->dcb_cfg.pfc_mode_enable = false;
   6251	adapter->dcb_set_bitmap = 0x00;
   6252	if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE)
   6253		adapter->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_CEE;
   6254	memcpy(&adapter->temp_dcb_cfg, &adapter->dcb_cfg,
   6255	       sizeof(adapter->temp_dcb_cfg));
   6256}
   6257#endif
   6258
   6259/**
   6260 * ixgbe_sw_init - Initialize general software structures (struct ixgbe_adapter)
   6261 * @adapter: board private structure to initialize
   6262 * @ii: pointer to ixgbe_info for device
   6263 *
   6264 * ixgbe_sw_init initializes the Adapter private data structure.
   6265 * Fields are initialized based on PCI device information and
   6266 * OS network device settings (MTU size).
   6267 **/
   6268static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
   6269			 const struct ixgbe_info *ii)
   6270{
   6271	struct ixgbe_hw *hw = &adapter->hw;
   6272	struct pci_dev *pdev = adapter->pdev;
   6273	unsigned int rss, fdir;
   6274	u32 fwsm;
   6275	int i;
   6276
   6277	/* PCI config space info */
   6278
   6279	hw->vendor_id = pdev->vendor;
   6280	hw->device_id = pdev->device;
   6281	hw->revision_id = pdev->revision;
   6282	hw->subsystem_vendor_id = pdev->subsystem_vendor;
   6283	hw->subsystem_device_id = pdev->subsystem_device;
   6284
   6285	/* get_invariants needs the device IDs */
   6286	ii->get_invariants(hw);
   6287
   6288	/* Set common capability flags and settings */
   6289	rss = min_t(int, ixgbe_max_rss_indices(adapter), num_online_cpus());
   6290	adapter->ring_feature[RING_F_RSS].limit = rss;
   6291	adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE;
   6292	adapter->max_q_vectors = MAX_Q_VECTORS_82599;
   6293	adapter->atr_sample_rate = 20;
   6294	fdir = min_t(int, IXGBE_MAX_FDIR_INDICES, num_online_cpus());
   6295	adapter->ring_feature[RING_F_FDIR].limit = fdir;
   6296	adapter->fdir_pballoc = IXGBE_FDIR_PBALLOC_64K;
   6297	adapter->ring_feature[RING_F_VMDQ].limit = 1;
   6298#ifdef CONFIG_IXGBE_DCA
   6299	adapter->flags |= IXGBE_FLAG_DCA_CAPABLE;
   6300#endif
   6301#ifdef CONFIG_IXGBE_DCB
   6302	adapter->flags |= IXGBE_FLAG_DCB_CAPABLE;
   6303	adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
   6304#endif
   6305#ifdef IXGBE_FCOE
   6306	adapter->flags |= IXGBE_FLAG_FCOE_CAPABLE;
   6307	adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
   6308#ifdef CONFIG_IXGBE_DCB
   6309	/* Default traffic class to use for FCoE */
   6310	adapter->fcoe.up = IXGBE_FCOE_DEFTC;
   6311#endif /* CONFIG_IXGBE_DCB */
   6312#endif /* IXGBE_FCOE */
   6313
   6314	/* initialize static ixgbe jump table entries */
   6315	adapter->jump_tables[0] = kzalloc(sizeof(*adapter->jump_tables[0]),
   6316					  GFP_KERNEL);
   6317	if (!adapter->jump_tables[0])
   6318		return -ENOMEM;
   6319	adapter->jump_tables[0]->mat = ixgbe_ipv4_fields;
   6320
   6321	for (i = 1; i < IXGBE_MAX_LINK_HANDLE; i++)
   6322		adapter->jump_tables[i] = NULL;
   6323
   6324	adapter->mac_table = kcalloc(hw->mac.num_rar_entries,
   6325				     sizeof(struct ixgbe_mac_addr),
   6326				     GFP_KERNEL);
   6327	if (!adapter->mac_table)
   6328		return -ENOMEM;
   6329
   6330	if (ixgbe_init_rss_key(adapter))
   6331		return -ENOMEM;
   6332
   6333	adapter->af_xdp_zc_qps = bitmap_zalloc(IXGBE_MAX_XDP_QS, GFP_KERNEL);
   6334	if (!adapter->af_xdp_zc_qps)
   6335		return -ENOMEM;
   6336
   6337	/* Set MAC specific capability flags and exceptions */
   6338	switch (hw->mac.type) {
   6339	case ixgbe_mac_82598EB:
   6340		adapter->flags2 &= ~IXGBE_FLAG2_RSC_CAPABLE;
   6341
   6342		if (hw->device_id == IXGBE_DEV_ID_82598AT)
   6343			adapter->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE;
   6344
   6345		adapter->max_q_vectors = MAX_Q_VECTORS_82598;
   6346		adapter->ring_feature[RING_F_FDIR].limit = 0;
   6347		adapter->atr_sample_rate = 0;
   6348		adapter->fdir_pballoc = 0;
   6349#ifdef IXGBE_FCOE
   6350		adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE;
   6351		adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
   6352#ifdef CONFIG_IXGBE_DCB
   6353		adapter->fcoe.up = 0;
   6354#endif /* IXGBE_DCB */
   6355#endif /* IXGBE_FCOE */
   6356		break;
   6357	case ixgbe_mac_82599EB:
   6358		if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM)
   6359			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
   6360		break;
   6361	case ixgbe_mac_X540:
   6362		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw));
   6363		if (fwsm & IXGBE_FWSM_TS_ENABLED)
   6364			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
   6365		break;
   6366	case ixgbe_mac_x550em_a:
   6367		switch (hw->device_id) {
   6368		case IXGBE_DEV_ID_X550EM_A_1G_T:
   6369		case IXGBE_DEV_ID_X550EM_A_1G_T_L:
   6370			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
   6371			break;
   6372		default:
   6373			break;
   6374		}
   6375		fallthrough;
   6376	case ixgbe_mac_X550EM_x:
   6377#ifdef CONFIG_IXGBE_DCB
   6378		adapter->flags &= ~IXGBE_FLAG_DCB_CAPABLE;
   6379#endif
   6380#ifdef IXGBE_FCOE
   6381		adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE;
   6382#ifdef CONFIG_IXGBE_DCB
   6383		adapter->fcoe.up = 0;
   6384#endif /* IXGBE_DCB */
   6385#endif /* IXGBE_FCOE */
   6386		fallthrough;
   6387	case ixgbe_mac_X550:
   6388		if (hw->mac.type == ixgbe_mac_X550)
   6389			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
   6390#ifdef CONFIG_IXGBE_DCA
   6391		adapter->flags &= ~IXGBE_FLAG_DCA_CAPABLE;
   6392#endif
   6393		break;
   6394	default:
   6395		break;
   6396	}
   6397
   6398#ifdef IXGBE_FCOE
   6399	/* FCoE support exists, always init the FCoE lock */
   6400	spin_lock_init(&adapter->fcoe.lock);
   6401
   6402#endif
   6403	/* n-tuple support exists, always init our spinlock */
   6404	spin_lock_init(&adapter->fdir_perfect_lock);
   6405
   6406#ifdef CONFIG_IXGBE_DCB
   6407	ixgbe_init_dcb(adapter);
   6408#endif
   6409	ixgbe_init_ipsec_offload(adapter);
   6410
   6411	/* default flow control settings */
   6412	hw->fc.requested_mode = ixgbe_fc_full;
   6413	hw->fc.current_mode = ixgbe_fc_full;	/* init for ethtool output */
   6414	ixgbe_pbthresh_setup(adapter);
   6415	hw->fc.pause_time = IXGBE_DEFAULT_FCPAUSE;
   6416	hw->fc.send_xon = true;
   6417	hw->fc.disable_fc_autoneg = ixgbe_device_supports_autoneg_fc(hw);
   6418
   6419#ifdef CONFIG_PCI_IOV
   6420	if (max_vfs > 0)
   6421		e_dev_warn("Enabling SR-IOV VFs using the max_vfs module parameter is deprecated - please use the pci sysfs interface instead.\n");
   6422
   6423	/* assign number of SR-IOV VFs */
   6424	if (hw->mac.type != ixgbe_mac_82598EB) {
   6425		if (max_vfs > IXGBE_MAX_VFS_DRV_LIMIT) {
   6426			max_vfs = 0;
   6427			e_dev_warn("max_vfs parameter out of range. Not assigning any SR-IOV VFs\n");
   6428		}
   6429	}
   6430#endif /* CONFIG_PCI_IOV */
   6431
   6432	/* enable itr by default in dynamic mode */
   6433	adapter->rx_itr_setting = 1;
   6434	adapter->tx_itr_setting = 1;
   6435
   6436	/* set default ring sizes */
   6437	adapter->tx_ring_count = IXGBE_DEFAULT_TXD;
   6438	adapter->rx_ring_count = IXGBE_DEFAULT_RXD;
   6439
   6440	/* set default work limits */
   6441	adapter->tx_work_limit = IXGBE_DEFAULT_TX_WORK;
   6442
   6443	/* initialize eeprom parameters */
   6444	if (ixgbe_init_eeprom_params_generic(hw)) {
   6445		e_dev_err("EEPROM initialization failed\n");
   6446		return -EIO;
   6447	}
   6448
   6449	/* PF holds first pool slot */
   6450	set_bit(0, adapter->fwd_bitmask);
   6451	set_bit(__IXGBE_DOWN, &adapter->state);
   6452
   6453	return 0;
   6454}
   6455
   6456/**
   6457 * ixgbe_setup_tx_resources - allocate Tx resources (Descriptors)
   6458 * @tx_ring:    tx descriptor ring (for a specific queue) to setup
   6459 *
   6460 * Return 0 on success, negative on failure
   6461 **/
   6462int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring)
   6463{
   6464	struct device *dev = tx_ring->dev;
   6465	int orig_node = dev_to_node(dev);
   6466	int ring_node = NUMA_NO_NODE;
   6467	int size;
   6468
   6469	size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count;
   6470
   6471	if (tx_ring->q_vector)
   6472		ring_node = tx_ring->q_vector->numa_node;
   6473
   6474	tx_ring->tx_buffer_info = vmalloc_node(size, ring_node);
   6475	if (!tx_ring->tx_buffer_info)
   6476		tx_ring->tx_buffer_info = vmalloc(size);
   6477	if (!tx_ring->tx_buffer_info)
   6478		goto err;
   6479
   6480	/* round up to nearest 4K */
   6481	tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc);
   6482	tx_ring->size = ALIGN(tx_ring->size, 4096);
   6483
   6484	set_dev_node(dev, ring_node);
   6485	tx_ring->desc = dma_alloc_coherent(dev,
   6486					   tx_ring->size,
   6487					   &tx_ring->dma,
   6488					   GFP_KERNEL);
   6489	set_dev_node(dev, orig_node);
   6490	if (!tx_ring->desc)
   6491		tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
   6492						   &tx_ring->dma, GFP_KERNEL);
   6493	if (!tx_ring->desc)
   6494		goto err;
   6495
   6496	tx_ring->next_to_use = 0;
   6497	tx_ring->next_to_clean = 0;
   6498	return 0;
   6499
   6500err:
   6501	vfree(tx_ring->tx_buffer_info);
   6502	tx_ring->tx_buffer_info = NULL;
   6503	dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n");
   6504	return -ENOMEM;
   6505}
   6506
   6507/**
   6508 * ixgbe_setup_all_tx_resources - allocate all queues Tx resources
   6509 * @adapter: board private structure
   6510 *
   6511 * If this function returns with an error, then it's possible one or
   6512 * more of the rings is populated (while the rest are not).  It is the
   6513 * callers duty to clean those orphaned rings.
   6514 *
   6515 * Return 0 on success, negative on failure
   6516 **/
   6517static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
   6518{
   6519	int i, j = 0, err = 0;
   6520
   6521	for (i = 0; i < adapter->num_tx_queues; i++) {
   6522		err = ixgbe_setup_tx_resources(adapter->tx_ring[i]);
   6523		if (!err)
   6524			continue;
   6525
   6526		e_err(probe, "Allocation for Tx Queue %u failed\n", i);
   6527		goto err_setup_tx;
   6528	}
   6529	for (j = 0; j < adapter->num_xdp_queues; j++) {
   6530		err = ixgbe_setup_tx_resources(adapter->xdp_ring[j]);
   6531		if (!err)
   6532			continue;
   6533
   6534		e_err(probe, "Allocation for Tx Queue %u failed\n", j);
   6535		goto err_setup_tx;
   6536	}
   6537
   6538	return 0;
   6539err_setup_tx:
   6540	/* rewind the index freeing the rings as we go */
   6541	while (j--)
   6542		ixgbe_free_tx_resources(adapter->xdp_ring[j]);
   6543	while (i--)
   6544		ixgbe_free_tx_resources(adapter->tx_ring[i]);
   6545	return err;
   6546}
   6547
   6548static int ixgbe_rx_napi_id(struct ixgbe_ring *rx_ring)
   6549{
   6550	struct ixgbe_q_vector *q_vector = rx_ring->q_vector;
   6551
   6552	return q_vector ? q_vector->napi.napi_id : 0;
   6553}
   6554
   6555/**
   6556 * ixgbe_setup_rx_resources - allocate Rx resources (Descriptors)
   6557 * @adapter: pointer to ixgbe_adapter
   6558 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
   6559 *
   6560 * Returns 0 on success, negative on failure
   6561 **/
   6562int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
   6563			     struct ixgbe_ring *rx_ring)
   6564{
   6565	struct device *dev = rx_ring->dev;
   6566	int orig_node = dev_to_node(dev);
   6567	int ring_node = NUMA_NO_NODE;
   6568	int size;
   6569
   6570	size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
   6571
   6572	if (rx_ring->q_vector)
   6573		ring_node = rx_ring->q_vector->numa_node;
   6574
   6575	rx_ring->rx_buffer_info = vmalloc_node(size, ring_node);
   6576	if (!rx_ring->rx_buffer_info)
   6577		rx_ring->rx_buffer_info = vmalloc(size);
   6578	if (!rx_ring->rx_buffer_info)
   6579		goto err;
   6580
   6581	/* Round up to nearest 4K */
   6582	rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc);
   6583	rx_ring->size = ALIGN(rx_ring->size, 4096);
   6584
   6585	set_dev_node(dev, ring_node);
   6586	rx_ring->desc = dma_alloc_coherent(dev,
   6587					   rx_ring->size,
   6588					   &rx_ring->dma,
   6589					   GFP_KERNEL);
   6590	set_dev_node(dev, orig_node);
   6591	if (!rx_ring->desc)
   6592		rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
   6593						   &rx_ring->dma, GFP_KERNEL);
   6594	if (!rx_ring->desc)
   6595		goto err;
   6596
   6597	rx_ring->next_to_clean = 0;
   6598	rx_ring->next_to_use = 0;
   6599
   6600	/* XDP RX-queue info */
   6601	if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev,
   6602			     rx_ring->queue_index, ixgbe_rx_napi_id(rx_ring)) < 0)
   6603		goto err;
   6604
   6605	rx_ring->xdp_prog = adapter->xdp_prog;
   6606
   6607	return 0;
   6608err:
   6609	vfree(rx_ring->rx_buffer_info);
   6610	rx_ring->rx_buffer_info = NULL;
   6611	dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
   6612	return -ENOMEM;
   6613}
   6614
   6615/**
   6616 * ixgbe_setup_all_rx_resources - allocate all queues Rx resources
   6617 * @adapter: board private structure
   6618 *
   6619 * If this function returns with an error, then it's possible one or
   6620 * more of the rings is populated (while the rest are not).  It is the
   6621 * callers duty to clean those orphaned rings.
   6622 *
   6623 * Return 0 on success, negative on failure
   6624 **/
   6625static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
   6626{
   6627	int i, err = 0;
   6628
   6629	for (i = 0; i < adapter->num_rx_queues; i++) {
   6630		err = ixgbe_setup_rx_resources(adapter, adapter->rx_ring[i]);
   6631		if (!err)
   6632			continue;
   6633
   6634		e_err(probe, "Allocation for Rx Queue %u failed\n", i);
   6635		goto err_setup_rx;
   6636	}
   6637
   6638#ifdef IXGBE_FCOE
   6639	err = ixgbe_setup_fcoe_ddp_resources(adapter);
   6640	if (!err)
   6641#endif
   6642		return 0;
   6643err_setup_rx:
   6644	/* rewind the index freeing the rings as we go */
   6645	while (i--)
   6646		ixgbe_free_rx_resources(adapter->rx_ring[i]);
   6647	return err;
   6648}
   6649
   6650/**
   6651 * ixgbe_free_tx_resources - Free Tx Resources per Queue
   6652 * @tx_ring: Tx descriptor ring for a specific queue
   6653 *
   6654 * Free all transmit software resources
   6655 **/
   6656void ixgbe_free_tx_resources(struct ixgbe_ring *tx_ring)
   6657{
   6658	ixgbe_clean_tx_ring(tx_ring);
   6659
   6660	vfree(tx_ring->tx_buffer_info);
   6661	tx_ring->tx_buffer_info = NULL;
   6662
   6663	/* if not set, then don't free */
   6664	if (!tx_ring->desc)
   6665		return;
   6666
   6667	dma_free_coherent(tx_ring->dev, tx_ring->size,
   6668			  tx_ring->desc, tx_ring->dma);
   6669
   6670	tx_ring->desc = NULL;
   6671}
   6672
   6673/**
   6674 * ixgbe_free_all_tx_resources - Free Tx Resources for All Queues
   6675 * @adapter: board private structure
   6676 *
   6677 * Free all transmit software resources
   6678 **/
   6679static void ixgbe_free_all_tx_resources(struct ixgbe_adapter *adapter)
   6680{
   6681	int i;
   6682
   6683	for (i = 0; i < adapter->num_tx_queues; i++)
   6684		if (adapter->tx_ring[i]->desc)
   6685			ixgbe_free_tx_resources(adapter->tx_ring[i]);
   6686	for (i = 0; i < adapter->num_xdp_queues; i++)
   6687		if (adapter->xdp_ring[i]->desc)
   6688			ixgbe_free_tx_resources(adapter->xdp_ring[i]);
   6689}
   6690
   6691/**
   6692 * ixgbe_free_rx_resources - Free Rx Resources
   6693 * @rx_ring: ring to clean the resources from
   6694 *
   6695 * Free all receive software resources
   6696 **/
   6697void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
   6698{
   6699	ixgbe_clean_rx_ring(rx_ring);
   6700
   6701	rx_ring->xdp_prog = NULL;
   6702	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
   6703	vfree(rx_ring->rx_buffer_info);
   6704	rx_ring->rx_buffer_info = NULL;
   6705
   6706	/* if not set, then don't free */
   6707	if (!rx_ring->desc)
   6708		return;
   6709
   6710	dma_free_coherent(rx_ring->dev, rx_ring->size,
   6711			  rx_ring->desc, rx_ring->dma);
   6712
   6713	rx_ring->desc = NULL;
   6714}
   6715
   6716/**
   6717 * ixgbe_free_all_rx_resources - Free Rx Resources for All Queues
   6718 * @adapter: board private structure
   6719 *
   6720 * Free all receive software resources
   6721 **/
   6722static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter)
   6723{
   6724	int i;
   6725
   6726#ifdef IXGBE_FCOE
   6727	ixgbe_free_fcoe_ddp_resources(adapter);
   6728
   6729#endif
   6730	for (i = 0; i < adapter->num_rx_queues; i++)
   6731		if (adapter->rx_ring[i]->desc)
   6732			ixgbe_free_rx_resources(adapter->rx_ring[i]);
   6733}
   6734
   6735/**
   6736 * ixgbe_change_mtu - Change the Maximum Transfer Unit
   6737 * @netdev: network interface device structure
   6738 * @new_mtu: new value for maximum frame size
   6739 *
   6740 * Returns 0 on success, negative on failure
   6741 **/
   6742static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu)
   6743{
   6744	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   6745
   6746	if (adapter->xdp_prog) {
   6747		int new_frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN +
   6748				     VLAN_HLEN;
   6749		int i;
   6750
   6751		for (i = 0; i < adapter->num_rx_queues; i++) {
   6752			struct ixgbe_ring *ring = adapter->rx_ring[i];
   6753
   6754			if (new_frame_size > ixgbe_rx_bufsz(ring)) {
   6755				e_warn(probe, "Requested MTU size is not supported with XDP\n");
   6756				return -EINVAL;
   6757			}
   6758		}
   6759	}
   6760
   6761	/*
   6762	 * For 82599EB we cannot allow legacy VFs to enable their receive
   6763	 * paths when MTU greater than 1500 is configured.  So display a
   6764	 * warning that legacy VFs will be disabled.
   6765	 */
   6766	if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) &&
   6767	    (adapter->hw.mac.type == ixgbe_mac_82599EB) &&
   6768	    (new_mtu > ETH_DATA_LEN))
   6769		e_warn(probe, "Setting MTU > 1500 will disable legacy VFs\n");
   6770
   6771	netdev_dbg(netdev, "changing MTU from %d to %d\n",
   6772		   netdev->mtu, new_mtu);
   6773
   6774	/* must set new MTU before calling down or up */
   6775	netdev->mtu = new_mtu;
   6776
   6777	if (netif_running(netdev))
   6778		ixgbe_reinit_locked(adapter);
   6779
   6780	return 0;
   6781}
   6782
   6783/**
   6784 * ixgbe_open - Called when a network interface is made active
   6785 * @netdev: network interface device structure
   6786 *
   6787 * Returns 0 on success, negative value on failure
   6788 *
   6789 * The open entry point is called when a network interface is made
   6790 * active by the system (IFF_UP).  At this point all resources needed
   6791 * for transmit and receive operations are allocated, the interrupt
   6792 * handler is registered with the OS, the watchdog timer is started,
   6793 * and the stack is notified that the interface is ready.
   6794 **/
   6795int ixgbe_open(struct net_device *netdev)
   6796{
   6797	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   6798	struct ixgbe_hw *hw = &adapter->hw;
   6799	int err, queues;
   6800
   6801	/* disallow open during test */
   6802	if (test_bit(__IXGBE_TESTING, &adapter->state))
   6803		return -EBUSY;
   6804
   6805	netif_carrier_off(netdev);
   6806
   6807	/* allocate transmit descriptors */
   6808	err = ixgbe_setup_all_tx_resources(adapter);
   6809	if (err)
   6810		goto err_setup_tx;
   6811
   6812	/* allocate receive descriptors */
   6813	err = ixgbe_setup_all_rx_resources(adapter);
   6814	if (err)
   6815		goto err_setup_rx;
   6816
   6817	ixgbe_configure(adapter);
   6818
   6819	err = ixgbe_request_irq(adapter);
   6820	if (err)
   6821		goto err_req_irq;
   6822
   6823	/* Notify the stack of the actual queue counts. */
   6824	queues = adapter->num_tx_queues;
   6825	err = netif_set_real_num_tx_queues(netdev, queues);
   6826	if (err)
   6827		goto err_set_queues;
   6828
   6829	queues = adapter->num_rx_queues;
   6830	err = netif_set_real_num_rx_queues(netdev, queues);
   6831	if (err)
   6832		goto err_set_queues;
   6833
   6834	ixgbe_ptp_init(adapter);
   6835
   6836	ixgbe_up_complete(adapter);
   6837
   6838	udp_tunnel_nic_reset_ntf(netdev);
   6839
   6840	return 0;
   6841
   6842err_set_queues:
   6843	ixgbe_free_irq(adapter);
   6844err_req_irq:
   6845	ixgbe_free_all_rx_resources(adapter);
   6846	if (hw->phy.ops.set_phy_power && !adapter->wol)
   6847		hw->phy.ops.set_phy_power(&adapter->hw, false);
   6848err_setup_rx:
   6849	ixgbe_free_all_tx_resources(adapter);
   6850err_setup_tx:
   6851	ixgbe_reset(adapter);
   6852
   6853	return err;
   6854}
   6855
   6856static void ixgbe_close_suspend(struct ixgbe_adapter *adapter)
   6857{
   6858	ixgbe_ptp_suspend(adapter);
   6859
   6860	if (adapter->hw.phy.ops.enter_lplu) {
   6861		adapter->hw.phy.reset_disable = true;
   6862		ixgbe_down(adapter);
   6863		adapter->hw.phy.ops.enter_lplu(&adapter->hw);
   6864		adapter->hw.phy.reset_disable = false;
   6865	} else {
   6866		ixgbe_down(adapter);
   6867	}
   6868
   6869	ixgbe_free_irq(adapter);
   6870
   6871	ixgbe_free_all_tx_resources(adapter);
   6872	ixgbe_free_all_rx_resources(adapter);
   6873}
   6874
   6875/**
   6876 * ixgbe_close - Disables a network interface
   6877 * @netdev: network interface device structure
   6878 *
   6879 * Returns 0, this is not allowed to fail
   6880 *
   6881 * The close entry point is called when an interface is de-activated
   6882 * by the OS.  The hardware is still under the drivers control, but
   6883 * needs to be disabled.  A global MAC reset is issued to stop the
   6884 * hardware, and all transmit and receive resources are freed.
   6885 **/
   6886int ixgbe_close(struct net_device *netdev)
   6887{
   6888	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   6889
   6890	ixgbe_ptp_stop(adapter);
   6891
   6892	if (netif_device_present(netdev))
   6893		ixgbe_close_suspend(adapter);
   6894
   6895	ixgbe_fdir_filter_exit(adapter);
   6896
   6897	ixgbe_release_hw_control(adapter);
   6898
   6899	return 0;
   6900}
   6901
   6902static int __maybe_unused ixgbe_resume(struct device *dev_d)
   6903{
   6904	struct pci_dev *pdev = to_pci_dev(dev_d);
   6905	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
   6906	struct net_device *netdev = adapter->netdev;
   6907	u32 err;
   6908
   6909	adapter->hw.hw_addr = adapter->io_addr;
   6910
   6911	err = pci_enable_device_mem(pdev);
   6912	if (err) {
   6913		e_dev_err("Cannot enable PCI device from suspend\n");
   6914		return err;
   6915	}
   6916	smp_mb__before_atomic();
   6917	clear_bit(__IXGBE_DISABLED, &adapter->state);
   6918	pci_set_master(pdev);
   6919
   6920	device_wakeup_disable(dev_d);
   6921
   6922	ixgbe_reset(adapter);
   6923
   6924	IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0);
   6925
   6926	rtnl_lock();
   6927	err = ixgbe_init_interrupt_scheme(adapter);
   6928	if (!err && netif_running(netdev))
   6929		err = ixgbe_open(netdev);
   6930
   6931
   6932	if (!err)
   6933		netif_device_attach(netdev);
   6934	rtnl_unlock();
   6935
   6936	return err;
   6937}
   6938
   6939static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake)
   6940{
   6941	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
   6942	struct net_device *netdev = adapter->netdev;
   6943	struct ixgbe_hw *hw = &adapter->hw;
   6944	u32 ctrl;
   6945	u32 wufc = adapter->wol;
   6946
   6947	rtnl_lock();
   6948	netif_device_detach(netdev);
   6949
   6950	if (netif_running(netdev))
   6951		ixgbe_close_suspend(adapter);
   6952
   6953	ixgbe_clear_interrupt_scheme(adapter);
   6954	rtnl_unlock();
   6955
   6956	if (hw->mac.ops.stop_link_on_d3)
   6957		hw->mac.ops.stop_link_on_d3(hw);
   6958
   6959	if (wufc) {
   6960		u32 fctrl;
   6961
   6962		ixgbe_set_rx_mode(netdev);
   6963
   6964		/* enable the optics for 82599 SFP+ fiber as we can WoL */
   6965		if (hw->mac.ops.enable_tx_laser)
   6966			hw->mac.ops.enable_tx_laser(hw);
   6967
   6968		/* enable the reception of multicast packets */
   6969		fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
   6970		fctrl |= IXGBE_FCTRL_MPE;
   6971		IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
   6972
   6973		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
   6974		ctrl |= IXGBE_CTRL_GIO_DIS;
   6975		IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
   6976
   6977		IXGBE_WRITE_REG(hw, IXGBE_WUFC, wufc);
   6978	} else {
   6979		IXGBE_WRITE_REG(hw, IXGBE_WUC, 0);
   6980		IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0);
   6981	}
   6982
   6983	switch (hw->mac.type) {
   6984	case ixgbe_mac_82598EB:
   6985		pci_wake_from_d3(pdev, false);
   6986		break;
   6987	case ixgbe_mac_82599EB:
   6988	case ixgbe_mac_X540:
   6989	case ixgbe_mac_X550:
   6990	case ixgbe_mac_X550EM_x:
   6991	case ixgbe_mac_x550em_a:
   6992		pci_wake_from_d3(pdev, !!wufc);
   6993		break;
   6994	default:
   6995		break;
   6996	}
   6997
   6998	*enable_wake = !!wufc;
   6999	if (hw->phy.ops.set_phy_power && !*enable_wake)
   7000		hw->phy.ops.set_phy_power(hw, false);
   7001
   7002	ixgbe_release_hw_control(adapter);
   7003
   7004	if (!test_and_set_bit(__IXGBE_DISABLED, &adapter->state))
   7005		pci_disable_device(pdev);
   7006
   7007	return 0;
   7008}
   7009
   7010static int __maybe_unused ixgbe_suspend(struct device *dev_d)
   7011{
   7012	struct pci_dev *pdev = to_pci_dev(dev_d);
   7013	int retval;
   7014	bool wake;
   7015
   7016	retval = __ixgbe_shutdown(pdev, &wake);
   7017
   7018	device_set_wakeup_enable(dev_d, wake);
   7019
   7020	return retval;
   7021}
   7022
   7023static void ixgbe_shutdown(struct pci_dev *pdev)
   7024{
   7025	bool wake;
   7026
   7027	__ixgbe_shutdown(pdev, &wake);
   7028
   7029	if (system_state == SYSTEM_POWER_OFF) {
   7030		pci_wake_from_d3(pdev, wake);
   7031		pci_set_power_state(pdev, PCI_D3hot);
   7032	}
   7033}
   7034
   7035/**
   7036 * ixgbe_update_stats - Update the board statistics counters.
   7037 * @adapter: board private structure
   7038 **/
   7039void ixgbe_update_stats(struct ixgbe_adapter *adapter)
   7040{
   7041	struct net_device *netdev = adapter->netdev;
   7042	struct ixgbe_hw *hw = &adapter->hw;
   7043	struct ixgbe_hw_stats *hwstats = &adapter->stats;
   7044	u64 total_mpc = 0;
   7045	u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot;
   7046	u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0;
   7047	u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
   7048	u64 alloc_rx_page = 0;
   7049	u64 bytes = 0, packets = 0, hw_csum_rx_error = 0;
   7050
   7051	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
   7052	    test_bit(__IXGBE_RESETTING, &adapter->state))
   7053		return;
   7054
   7055	if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
   7056		u64 rsc_count = 0;
   7057		u64 rsc_flush = 0;
   7058		for (i = 0; i < adapter->num_rx_queues; i++) {
   7059			rsc_count += adapter->rx_ring[i]->rx_stats.rsc_count;
   7060			rsc_flush += adapter->rx_ring[i]->rx_stats.rsc_flush;
   7061		}
   7062		adapter->rsc_total_count = rsc_count;
   7063		adapter->rsc_total_flush = rsc_flush;
   7064	}
   7065
   7066	for (i = 0; i < adapter->num_rx_queues; i++) {
   7067		struct ixgbe_ring *rx_ring = READ_ONCE(adapter->rx_ring[i]);
   7068
   7069		if (!rx_ring)
   7070			continue;
   7071		non_eop_descs += rx_ring->rx_stats.non_eop_descs;
   7072		alloc_rx_page += rx_ring->rx_stats.alloc_rx_page;
   7073		alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed;
   7074		alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed;
   7075		hw_csum_rx_error += rx_ring->rx_stats.csum_err;
   7076		bytes += rx_ring->stats.bytes;
   7077		packets += rx_ring->stats.packets;
   7078	}
   7079	adapter->non_eop_descs = non_eop_descs;
   7080	adapter->alloc_rx_page = alloc_rx_page;
   7081	adapter->alloc_rx_page_failed = alloc_rx_page_failed;
   7082	adapter->alloc_rx_buff_failed = alloc_rx_buff_failed;
   7083	adapter->hw_csum_rx_error = hw_csum_rx_error;
   7084	netdev->stats.rx_bytes = bytes;
   7085	netdev->stats.rx_packets = packets;
   7086
   7087	bytes = 0;
   7088	packets = 0;
   7089	/* gather some stats to the adapter struct that are per queue */
   7090	for (i = 0; i < adapter->num_tx_queues; i++) {
   7091		struct ixgbe_ring *tx_ring = READ_ONCE(adapter->tx_ring[i]);
   7092
   7093		if (!tx_ring)
   7094			continue;
   7095		restart_queue += tx_ring->tx_stats.restart_queue;
   7096		tx_busy += tx_ring->tx_stats.tx_busy;
   7097		bytes += tx_ring->stats.bytes;
   7098		packets += tx_ring->stats.packets;
   7099	}
   7100	for (i = 0; i < adapter->num_xdp_queues; i++) {
   7101		struct ixgbe_ring *xdp_ring = READ_ONCE(adapter->xdp_ring[i]);
   7102
   7103		if (!xdp_ring)
   7104			continue;
   7105		restart_queue += xdp_ring->tx_stats.restart_queue;
   7106		tx_busy += xdp_ring->tx_stats.tx_busy;
   7107		bytes += xdp_ring->stats.bytes;
   7108		packets += xdp_ring->stats.packets;
   7109	}
   7110	adapter->restart_queue = restart_queue;
   7111	adapter->tx_busy = tx_busy;
   7112	netdev->stats.tx_bytes = bytes;
   7113	netdev->stats.tx_packets = packets;
   7114
   7115	hwstats->crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
   7116
   7117	/* 8 register reads */
   7118	for (i = 0; i < 8; i++) {
   7119		/* for packet buffers not used, the register should read 0 */
   7120		mpc = IXGBE_READ_REG(hw, IXGBE_MPC(i));
   7121		missed_rx += mpc;
   7122		hwstats->mpc[i] += mpc;
   7123		total_mpc += hwstats->mpc[i];
   7124		hwstats->pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
   7125		hwstats->pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
   7126		switch (hw->mac.type) {
   7127		case ixgbe_mac_82598EB:
   7128			hwstats->rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i));
   7129			hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
   7130			hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
   7131			hwstats->pxonrxc[i] +=
   7132				IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
   7133			break;
   7134		case ixgbe_mac_82599EB:
   7135		case ixgbe_mac_X540:
   7136		case ixgbe_mac_X550:
   7137		case ixgbe_mac_X550EM_x:
   7138		case ixgbe_mac_x550em_a:
   7139			hwstats->pxonrxc[i] +=
   7140				IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
   7141			break;
   7142		default:
   7143			break;
   7144		}
   7145	}
   7146
   7147	/*16 register reads */
   7148	for (i = 0; i < 16; i++) {
   7149		hwstats->qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
   7150		hwstats->qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
   7151		if ((hw->mac.type == ixgbe_mac_82599EB) ||
   7152		    (hw->mac.type == ixgbe_mac_X540) ||
   7153		    (hw->mac.type == ixgbe_mac_X550) ||
   7154		    (hw->mac.type == ixgbe_mac_X550EM_x) ||
   7155		    (hw->mac.type == ixgbe_mac_x550em_a)) {
   7156			hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC_L(i));
   7157			IXGBE_READ_REG(hw, IXGBE_QBTC_H(i)); /* to clear */
   7158			hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC_L(i));
   7159			IXGBE_READ_REG(hw, IXGBE_QBRC_H(i)); /* to clear */
   7160		}
   7161	}
   7162
   7163	hwstats->gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
   7164	/* work around hardware counting issue */
   7165	hwstats->gprc -= missed_rx;
   7166
   7167	ixgbe_update_xoff_received(adapter);
   7168
   7169	/* 82598 hardware only has a 32 bit counter in the high register */
   7170	switch (hw->mac.type) {
   7171	case ixgbe_mac_82598EB:
   7172		hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
   7173		hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
   7174		hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
   7175		hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORH);
   7176		break;
   7177	case ixgbe_mac_X540:
   7178	case ixgbe_mac_X550:
   7179	case ixgbe_mac_X550EM_x:
   7180	case ixgbe_mac_x550em_a:
   7181		/* OS2BMC stats are X540 and later */
   7182		hwstats->o2bgptc += IXGBE_READ_REG(hw, IXGBE_O2BGPTC);
   7183		hwstats->o2bspc += IXGBE_READ_REG(hw, IXGBE_O2BSPC);
   7184		hwstats->b2ospc += IXGBE_READ_REG(hw, IXGBE_B2OSPC);
   7185		hwstats->b2ogprc += IXGBE_READ_REG(hw, IXGBE_B2OGPRC);
   7186		fallthrough;
   7187	case ixgbe_mac_82599EB:
   7188		for (i = 0; i < 16; i++)
   7189			adapter->hw_rx_no_dma_resources +=
   7190					     IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
   7191		hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCL);
   7192		IXGBE_READ_REG(hw, IXGBE_GORCH); /* to clear */
   7193		hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL);
   7194		IXGBE_READ_REG(hw, IXGBE_GOTCH); /* to clear */
   7195		hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORL);
   7196		IXGBE_READ_REG(hw, IXGBE_TORH); /* to clear */
   7197		hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
   7198		hwstats->fdirmatch += IXGBE_READ_REG(hw, IXGBE_FDIRMATCH);
   7199		hwstats->fdirmiss += IXGBE_READ_REG(hw, IXGBE_FDIRMISS);
   7200#ifdef IXGBE_FCOE
   7201		hwstats->fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
   7202		hwstats->fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
   7203		hwstats->fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
   7204		hwstats->fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
   7205		hwstats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
   7206		hwstats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
   7207		/* Add up per cpu counters for total ddp aloc fail */
   7208		if (adapter->fcoe.ddp_pool) {
   7209			struct ixgbe_fcoe *fcoe = &adapter->fcoe;
   7210			struct ixgbe_fcoe_ddp_pool *ddp_pool;
   7211			unsigned int cpu;
   7212			u64 noddp = 0, noddp_ext_buff = 0;
   7213			for_each_possible_cpu(cpu) {
   7214				ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
   7215				noddp += ddp_pool->noddp;
   7216				noddp_ext_buff += ddp_pool->noddp_ext_buff;
   7217			}
   7218			hwstats->fcoe_noddp = noddp;
   7219			hwstats->fcoe_noddp_ext_buff = noddp_ext_buff;
   7220		}
   7221#endif /* IXGBE_FCOE */
   7222		break;
   7223	default:
   7224		break;
   7225	}
   7226	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
   7227	hwstats->bprc += bprc;
   7228	hwstats->mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
   7229	if (hw->mac.type == ixgbe_mac_82598EB)
   7230		hwstats->mprc -= bprc;
   7231	hwstats->roc += IXGBE_READ_REG(hw, IXGBE_ROC);
   7232	hwstats->prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
   7233	hwstats->prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
   7234	hwstats->prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
   7235	hwstats->prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
   7236	hwstats->prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
   7237	hwstats->prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
   7238	hwstats->rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
   7239	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
   7240	hwstats->lxontxc += lxon;
   7241	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
   7242	hwstats->lxofftxc += lxoff;
   7243	hwstats->gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
   7244	hwstats->mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
   7245	/*
   7246	 * 82598 errata - tx of flow control packets is included in tx counters
   7247	 */
   7248	xon_off_tot = lxon + lxoff;
   7249	hwstats->gptc -= xon_off_tot;
   7250	hwstats->mptc -= xon_off_tot;
   7251	hwstats->gotc -= (xon_off_tot * (ETH_ZLEN + ETH_FCS_LEN));
   7252	hwstats->ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
   7253	hwstats->rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
   7254	hwstats->rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
   7255	hwstats->tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
   7256	hwstats->ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
   7257	hwstats->ptc64 -= xon_off_tot;
   7258	hwstats->ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
   7259	hwstats->ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
   7260	hwstats->ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
   7261	hwstats->ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
   7262	hwstats->ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
   7263	hwstats->bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
   7264
   7265	/* Fill out the OS statistics structure */
   7266	netdev->stats.multicast = hwstats->mprc;
   7267
   7268	/* Rx Errors */
   7269	netdev->stats.rx_errors = hwstats->crcerrs + hwstats->rlec;
   7270	netdev->stats.rx_dropped = 0;
   7271	netdev->stats.rx_length_errors = hwstats->rlec;
   7272	netdev->stats.rx_crc_errors = hwstats->crcerrs;
   7273	netdev->stats.rx_missed_errors = total_mpc;
   7274}
   7275
   7276/**
   7277 * ixgbe_fdir_reinit_subtask - worker thread to reinit FDIR filter table
   7278 * @adapter: pointer to the device adapter structure
   7279 **/
   7280static void ixgbe_fdir_reinit_subtask(struct ixgbe_adapter *adapter)
   7281{
   7282	struct ixgbe_hw *hw = &adapter->hw;
   7283	int i;
   7284
   7285	if (!(adapter->flags2 & IXGBE_FLAG2_FDIR_REQUIRES_REINIT))
   7286		return;
   7287
   7288	adapter->flags2 &= ~IXGBE_FLAG2_FDIR_REQUIRES_REINIT;
   7289
   7290	/* if interface is down do nothing */
   7291	if (test_bit(__IXGBE_DOWN, &adapter->state))
   7292		return;
   7293
   7294	/* do nothing if we are not using signature filters */
   7295	if (!(adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE))
   7296		return;
   7297
   7298	adapter->fdir_overflow++;
   7299
   7300	if (ixgbe_reinit_fdir_tables_82599(hw) == 0) {
   7301		for (i = 0; i < adapter->num_tx_queues; i++)
   7302			set_bit(__IXGBE_TX_FDIR_INIT_DONE,
   7303				&(adapter->tx_ring[i]->state));
   7304		for (i = 0; i < adapter->num_xdp_queues; i++)
   7305			set_bit(__IXGBE_TX_FDIR_INIT_DONE,
   7306				&adapter->xdp_ring[i]->state);
   7307		/* re-enable flow director interrupts */
   7308		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
   7309	} else {
   7310		e_err(probe, "failed to finish FDIR re-initialization, "
   7311		      "ignored adding FDIR ATR filters\n");
   7312	}
   7313}
   7314
   7315/**
   7316 * ixgbe_check_hang_subtask - check for hung queues and dropped interrupts
   7317 * @adapter: pointer to the device adapter structure
   7318 *
   7319 * This function serves two purposes.  First it strobes the interrupt lines
   7320 * in order to make certain interrupts are occurring.  Secondly it sets the
   7321 * bits needed to check for TX hangs.  As a result we should immediately
   7322 * determine if a hang has occurred.
   7323 */
   7324static void ixgbe_check_hang_subtask(struct ixgbe_adapter *adapter)
   7325{
   7326	struct ixgbe_hw *hw = &adapter->hw;
   7327	u64 eics = 0;
   7328	int i;
   7329
   7330	/* If we're down, removing or resetting, just bail */
   7331	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
   7332	    test_bit(__IXGBE_REMOVING, &adapter->state) ||
   7333	    test_bit(__IXGBE_RESETTING, &adapter->state))
   7334		return;
   7335
   7336	/* Force detection of hung controller */
   7337	if (netif_carrier_ok(adapter->netdev)) {
   7338		for (i = 0; i < adapter->num_tx_queues; i++)
   7339			set_check_for_tx_hang(adapter->tx_ring[i]);
   7340		for (i = 0; i < adapter->num_xdp_queues; i++)
   7341			set_check_for_tx_hang(adapter->xdp_ring[i]);
   7342	}
   7343
   7344	if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) {
   7345		/*
   7346		 * for legacy and MSI interrupts don't set any bits
   7347		 * that are enabled for EIAM, because this operation
   7348		 * would set *both* EIMS and EICS for any bit in EIAM
   7349		 */
   7350		IXGBE_WRITE_REG(hw, IXGBE_EICS,
   7351			(IXGBE_EICS_TCP_TIMER | IXGBE_EICS_OTHER));
   7352	} else {
   7353		/* get one bit for every active tx/rx interrupt vector */
   7354		for (i = 0; i < adapter->num_q_vectors; i++) {
   7355			struct ixgbe_q_vector *qv = adapter->q_vector[i];
   7356			if (qv->rx.ring || qv->tx.ring)
   7357				eics |= BIT_ULL(i);
   7358		}
   7359	}
   7360
   7361	/* Cause software interrupt to ensure rings are cleaned */
   7362	ixgbe_irq_rearm_queues(adapter, eics);
   7363}
   7364
   7365/**
   7366 * ixgbe_watchdog_update_link - update the link status
   7367 * @adapter: pointer to the device adapter structure
   7368 **/
   7369static void ixgbe_watchdog_update_link(struct ixgbe_adapter *adapter)
   7370{
   7371	struct ixgbe_hw *hw = &adapter->hw;
   7372	u32 link_speed = adapter->link_speed;
   7373	bool link_up = adapter->link_up;
   7374	bool pfc_en = adapter->dcb_cfg.pfc_mode_enable;
   7375
   7376	if (!(adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE))
   7377		return;
   7378
   7379	if (hw->mac.ops.check_link) {
   7380		hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
   7381	} else {
   7382		/* always assume link is up, if no check link function */
   7383		link_speed = IXGBE_LINK_SPEED_10GB_FULL;
   7384		link_up = true;
   7385	}
   7386
   7387	if (adapter->ixgbe_ieee_pfc)
   7388		pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
   7389
   7390	if (link_up && !((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && pfc_en)) {
   7391		hw->mac.ops.fc_enable(hw);
   7392		ixgbe_set_rx_drop_en(adapter);
   7393	}
   7394
   7395	if (link_up ||
   7396	    time_after(jiffies, (adapter->link_check_timeout +
   7397				 IXGBE_TRY_LINK_TIMEOUT))) {
   7398		adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
   7399		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMC_LSC);
   7400		IXGBE_WRITE_FLUSH(hw);
   7401	}
   7402
   7403	adapter->link_up = link_up;
   7404	adapter->link_speed = link_speed;
   7405}
   7406
   7407static void ixgbe_update_default_up(struct ixgbe_adapter *adapter)
   7408{
   7409#ifdef CONFIG_IXGBE_DCB
   7410	struct net_device *netdev = adapter->netdev;
   7411	struct dcb_app app = {
   7412			      .selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE,
   7413			      .protocol = 0,
   7414			     };
   7415	u8 up = 0;
   7416
   7417	if (adapter->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)
   7418		up = dcb_ieee_getapp_mask(netdev, &app);
   7419
   7420	adapter->default_up = (up > 1) ? (ffs(up) - 1) : 0;
   7421#endif
   7422}
   7423
   7424/**
   7425 * ixgbe_watchdog_link_is_up - update netif_carrier status and
   7426 *                             print link up message
   7427 * @adapter: pointer to the device adapter structure
   7428 **/
   7429static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter)
   7430{
   7431	struct net_device *netdev = adapter->netdev;
   7432	struct ixgbe_hw *hw = &adapter->hw;
   7433	u32 link_speed = adapter->link_speed;
   7434	const char *speed_str;
   7435	bool flow_rx, flow_tx;
   7436
   7437	/* only continue if link was previously down */
   7438	if (netif_carrier_ok(netdev))
   7439		return;
   7440
   7441	adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP;
   7442
   7443	switch (hw->mac.type) {
   7444	case ixgbe_mac_82598EB: {
   7445		u32 frctl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
   7446		u32 rmcs = IXGBE_READ_REG(hw, IXGBE_RMCS);
   7447		flow_rx = !!(frctl & IXGBE_FCTRL_RFCE);
   7448		flow_tx = !!(rmcs & IXGBE_RMCS_TFCE_802_3X);
   7449	}
   7450		break;
   7451	case ixgbe_mac_X540:
   7452	case ixgbe_mac_X550:
   7453	case ixgbe_mac_X550EM_x:
   7454	case ixgbe_mac_x550em_a:
   7455	case ixgbe_mac_82599EB: {
   7456		u32 mflcn = IXGBE_READ_REG(hw, IXGBE_MFLCN);
   7457		u32 fccfg = IXGBE_READ_REG(hw, IXGBE_FCCFG);
   7458		flow_rx = !!(mflcn & IXGBE_MFLCN_RFCE);
   7459		flow_tx = !!(fccfg & IXGBE_FCCFG_TFCE_802_3X);
   7460	}
   7461		break;
   7462	default:
   7463		flow_tx = false;
   7464		flow_rx = false;
   7465		break;
   7466	}
   7467
   7468	adapter->last_rx_ptp_check = jiffies;
   7469
   7470	if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state))
   7471		ixgbe_ptp_start_cyclecounter(adapter);
   7472
   7473	switch (link_speed) {
   7474	case IXGBE_LINK_SPEED_10GB_FULL:
   7475		speed_str = "10 Gbps";
   7476		break;
   7477	case IXGBE_LINK_SPEED_5GB_FULL:
   7478		speed_str = "5 Gbps";
   7479		break;
   7480	case IXGBE_LINK_SPEED_2_5GB_FULL:
   7481		speed_str = "2.5 Gbps";
   7482		break;
   7483	case IXGBE_LINK_SPEED_1GB_FULL:
   7484		speed_str = "1 Gbps";
   7485		break;
   7486	case IXGBE_LINK_SPEED_100_FULL:
   7487		speed_str = "100 Mbps";
   7488		break;
   7489	case IXGBE_LINK_SPEED_10_FULL:
   7490		speed_str = "10 Mbps";
   7491		break;
   7492	default:
   7493		speed_str = "unknown speed";
   7494		break;
   7495	}
   7496	e_info(drv, "NIC Link is Up %s, Flow Control: %s\n", speed_str,
   7497	       ((flow_rx && flow_tx) ? "RX/TX" :
   7498	       (flow_rx ? "RX" :
   7499	       (flow_tx ? "TX" : "None"))));
   7500
   7501	netif_carrier_on(netdev);
   7502	ixgbe_check_vf_rate_limit(adapter);
   7503
   7504	/* enable transmits */
   7505	netif_tx_wake_all_queues(adapter->netdev);
   7506
   7507	/* update the default user priority for VFs */
   7508	ixgbe_update_default_up(adapter);
   7509
   7510	/* ping all the active vfs to let them know link has changed */
   7511	ixgbe_ping_all_vfs(adapter);
   7512}
   7513
   7514/**
   7515 * ixgbe_watchdog_link_is_down - update netif_carrier status and
   7516 *                               print link down message
   7517 * @adapter: pointer to the adapter structure
   7518 **/
   7519static void ixgbe_watchdog_link_is_down(struct ixgbe_adapter *adapter)
   7520{
   7521	struct net_device *netdev = adapter->netdev;
   7522	struct ixgbe_hw *hw = &adapter->hw;
   7523
   7524	adapter->link_up = false;
   7525	adapter->link_speed = 0;
   7526
   7527	/* only continue if link was up previously */
   7528	if (!netif_carrier_ok(netdev))
   7529		return;
   7530
   7531	/* poll for SFP+ cable when link is down */
   7532	if (ixgbe_is_sfp(hw) && hw->mac.type == ixgbe_mac_82598EB)
   7533		adapter->flags2 |= IXGBE_FLAG2_SEARCH_FOR_SFP;
   7534
   7535	if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state))
   7536		ixgbe_ptp_start_cyclecounter(adapter);
   7537
   7538	e_info(drv, "NIC Link is Down\n");
   7539	netif_carrier_off(netdev);
   7540
   7541	/* ping all the active vfs to let them know link has changed */
   7542	ixgbe_ping_all_vfs(adapter);
   7543}
   7544
   7545static bool ixgbe_ring_tx_pending(struct ixgbe_adapter *adapter)
   7546{
   7547	int i;
   7548
   7549	for (i = 0; i < adapter->num_tx_queues; i++) {
   7550		struct ixgbe_ring *tx_ring = adapter->tx_ring[i];
   7551
   7552		if (tx_ring->next_to_use != tx_ring->next_to_clean)
   7553			return true;
   7554	}
   7555
   7556	for (i = 0; i < adapter->num_xdp_queues; i++) {
   7557		struct ixgbe_ring *ring = adapter->xdp_ring[i];
   7558
   7559		if (ring->next_to_use != ring->next_to_clean)
   7560			return true;
   7561	}
   7562
   7563	return false;
   7564}
   7565
   7566static bool ixgbe_vf_tx_pending(struct ixgbe_adapter *adapter)
   7567{
   7568	struct ixgbe_hw *hw = &adapter->hw;
   7569	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
   7570	u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
   7571
   7572	int i, j;
   7573
   7574	if (!adapter->num_vfs)
   7575		return false;
   7576
   7577	/* resetting the PF is only needed for MAC before X550 */
   7578	if (hw->mac.type >= ixgbe_mac_X550)
   7579		return false;
   7580
   7581	for (i = 0; i < adapter->num_vfs; i++) {
   7582		for (j = 0; j < q_per_pool; j++) {
   7583			u32 h, t;
   7584
   7585			h = IXGBE_READ_REG(hw, IXGBE_PVFTDHN(q_per_pool, i, j));
   7586			t = IXGBE_READ_REG(hw, IXGBE_PVFTDTN(q_per_pool, i, j));
   7587
   7588			if (h != t)
   7589				return true;
   7590		}
   7591	}
   7592
   7593	return false;
   7594}
   7595
   7596/**
   7597 * ixgbe_watchdog_flush_tx - flush queues on link down
   7598 * @adapter: pointer to the device adapter structure
   7599 **/
   7600static void ixgbe_watchdog_flush_tx(struct ixgbe_adapter *adapter)
   7601{
   7602	if (!netif_carrier_ok(adapter->netdev)) {
   7603		if (ixgbe_ring_tx_pending(adapter) ||
   7604		    ixgbe_vf_tx_pending(adapter)) {
   7605			/* We've lost link, so the controller stops DMA,
   7606			 * but we've got queued Tx work that's never going
   7607			 * to get done, so reset controller to flush Tx.
   7608			 * (Do the reset outside of interrupt context).
   7609			 */
   7610			e_warn(drv, "initiating reset to clear Tx work after link loss\n");
   7611			set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
   7612		}
   7613	}
   7614}
   7615
   7616#ifdef CONFIG_PCI_IOV
   7617static void ixgbe_bad_vf_abort(struct ixgbe_adapter *adapter, u32 vf)
   7618{
   7619	struct ixgbe_hw *hw = &adapter->hw;
   7620
   7621	if (adapter->hw.mac.type == ixgbe_mac_82599EB &&
   7622	    adapter->flags2 & IXGBE_FLAG2_AUTO_DISABLE_VF) {
   7623		adapter->vfinfo[vf].primary_abort_count++;
   7624		if (adapter->vfinfo[vf].primary_abort_count ==
   7625		    IXGBE_PRIMARY_ABORT_LIMIT) {
   7626			ixgbe_set_vf_link_state(adapter, vf,
   7627						IFLA_VF_LINK_STATE_DISABLE);
   7628			adapter->vfinfo[vf].primary_abort_count = 0;
   7629
   7630			e_info(drv,
   7631			       "Malicious Driver Detection event detected on PF %d VF %d MAC: %pM mdd-disable-vf=on",
   7632			       hw->bus.func, vf,
   7633			       adapter->vfinfo[vf].vf_mac_addresses);
   7634		}
   7635	}
   7636}
   7637
   7638static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter)
   7639{
   7640	struct ixgbe_hw *hw = &adapter->hw;
   7641	struct pci_dev *pdev = adapter->pdev;
   7642	unsigned int vf;
   7643	u32 gpc;
   7644
   7645	if (!(netif_carrier_ok(adapter->netdev)))
   7646		return;
   7647
   7648	gpc = IXGBE_READ_REG(hw, IXGBE_TXDGPC);
   7649	if (gpc) /* If incrementing then no need for the check below */
   7650		return;
   7651	/* Check to see if a bad DMA write target from an errant or
   7652	 * malicious VF has caused a PCIe error.  If so then we can
   7653	 * issue a VFLR to the offending VF(s) and then resume without
   7654	 * requesting a full slot reset.
   7655	 */
   7656
   7657	if (!pdev)
   7658		return;
   7659
   7660	/* check status reg for all VFs owned by this PF */
   7661	for (vf = 0; vf < adapter->num_vfs; ++vf) {
   7662		struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev;
   7663		u16 status_reg;
   7664
   7665		if (!vfdev)
   7666			continue;
   7667		pci_read_config_word(vfdev, PCI_STATUS, &status_reg);
   7668		if (status_reg != IXGBE_FAILED_READ_CFG_WORD &&
   7669		    status_reg & PCI_STATUS_REC_MASTER_ABORT) {
   7670			ixgbe_bad_vf_abort(adapter, vf);
   7671			pcie_flr(vfdev);
   7672		}
   7673	}
   7674}
   7675
   7676static void ixgbe_spoof_check(struct ixgbe_adapter *adapter)
   7677{
   7678	u32 ssvpc;
   7679
   7680	/* Do not perform spoof check for 82598 or if not in IOV mode */
   7681	if (adapter->hw.mac.type == ixgbe_mac_82598EB ||
   7682	    adapter->num_vfs == 0)
   7683		return;
   7684
   7685	ssvpc = IXGBE_READ_REG(&adapter->hw, IXGBE_SSVPC);
   7686
   7687	/*
   7688	 * ssvpc register is cleared on read, if zero then no
   7689	 * spoofed packets in the last interval.
   7690	 */
   7691	if (!ssvpc)
   7692		return;
   7693
   7694	e_warn(drv, "%u Spoofed packets detected\n", ssvpc);
   7695}
   7696#else
   7697static void ixgbe_spoof_check(struct ixgbe_adapter __always_unused *adapter)
   7698{
   7699}
   7700
   7701static void
   7702ixgbe_check_for_bad_vf(struct ixgbe_adapter __always_unused *adapter)
   7703{
   7704}
   7705#endif /* CONFIG_PCI_IOV */
   7706
   7707
   7708/**
   7709 * ixgbe_watchdog_subtask - check and bring link up
   7710 * @adapter: pointer to the device adapter structure
   7711 **/
   7712static void ixgbe_watchdog_subtask(struct ixgbe_adapter *adapter)
   7713{
   7714	/* if interface is down, removing or resetting, do nothing */
   7715	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
   7716	    test_bit(__IXGBE_REMOVING, &adapter->state) ||
   7717	    test_bit(__IXGBE_RESETTING, &adapter->state))
   7718		return;
   7719
   7720	ixgbe_watchdog_update_link(adapter);
   7721
   7722	if (adapter->link_up)
   7723		ixgbe_watchdog_link_is_up(adapter);
   7724	else
   7725		ixgbe_watchdog_link_is_down(adapter);
   7726
   7727	ixgbe_check_for_bad_vf(adapter);
   7728	ixgbe_spoof_check(adapter);
   7729	ixgbe_update_stats(adapter);
   7730
   7731	ixgbe_watchdog_flush_tx(adapter);
   7732}
   7733
   7734/**
   7735 * ixgbe_sfp_detection_subtask - poll for SFP+ cable
   7736 * @adapter: the ixgbe adapter structure
   7737 **/
   7738static void ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter)
   7739{
   7740	struct ixgbe_hw *hw = &adapter->hw;
   7741	s32 err;
   7742
   7743	/* not searching for SFP so there is nothing to do here */
   7744	if (!(adapter->flags2 & IXGBE_FLAG2_SEARCH_FOR_SFP) &&
   7745	    !(adapter->flags2 & IXGBE_FLAG2_SFP_NEEDS_RESET))
   7746		return;
   7747
   7748	if (adapter->sfp_poll_time &&
   7749	    time_after(adapter->sfp_poll_time, jiffies))
   7750		return; /* If not yet time to poll for SFP */
   7751
   7752	/* someone else is in init, wait until next service event */
   7753	if (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state))
   7754		return;
   7755
   7756	adapter->sfp_poll_time = jiffies + IXGBE_SFP_POLL_JIFFIES - 1;
   7757
   7758	err = hw->phy.ops.identify_sfp(hw);
   7759	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED)
   7760		goto sfp_out;
   7761
   7762	if (err == IXGBE_ERR_SFP_NOT_PRESENT) {
   7763		/* If no cable is present, then we need to reset
   7764		 * the next time we find a good cable. */
   7765		adapter->flags2 |= IXGBE_FLAG2_SFP_NEEDS_RESET;
   7766	}
   7767
   7768	/* exit on error */
   7769	if (err)
   7770		goto sfp_out;
   7771
   7772	/* exit if reset not needed */
   7773	if (!(adapter->flags2 & IXGBE_FLAG2_SFP_NEEDS_RESET))
   7774		goto sfp_out;
   7775
   7776	adapter->flags2 &= ~IXGBE_FLAG2_SFP_NEEDS_RESET;
   7777
   7778	/*
   7779	 * A module may be identified correctly, but the EEPROM may not have
   7780	 * support for that module.  setup_sfp() will fail in that case, so
   7781	 * we should not allow that module to load.
   7782	 */
   7783	if (hw->mac.type == ixgbe_mac_82598EB)
   7784		err = hw->phy.ops.reset(hw);
   7785	else
   7786		err = hw->mac.ops.setup_sfp(hw);
   7787
   7788	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED)
   7789		goto sfp_out;
   7790
   7791	adapter->flags |= IXGBE_FLAG_NEED_LINK_CONFIG;
   7792	e_info(probe, "detected SFP+: %d\n", hw->phy.sfp_type);
   7793
   7794sfp_out:
   7795	clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
   7796
   7797	if ((err == IXGBE_ERR_SFP_NOT_SUPPORTED) &&
   7798	    (adapter->netdev->reg_state == NETREG_REGISTERED)) {
   7799		e_dev_err("failed to initialize because an unsupported "
   7800			  "SFP+ module type was detected.\n");
   7801		e_dev_err("Reload the driver after installing a "
   7802			  "supported module.\n");
   7803		unregister_netdev(adapter->netdev);
   7804	}
   7805}
   7806
   7807/**
   7808 * ixgbe_sfp_link_config_subtask - set up link SFP after module install
   7809 * @adapter: the ixgbe adapter structure
   7810 **/
   7811static void ixgbe_sfp_link_config_subtask(struct ixgbe_adapter *adapter)
   7812{
   7813	struct ixgbe_hw *hw = &adapter->hw;
   7814	u32 cap_speed;
   7815	u32 speed;
   7816	bool autoneg = false;
   7817
   7818	if (!(adapter->flags & IXGBE_FLAG_NEED_LINK_CONFIG))
   7819		return;
   7820
   7821	/* someone else is in init, wait until next service event */
   7822	if (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state))
   7823		return;
   7824
   7825	adapter->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
   7826
   7827	hw->mac.ops.get_link_capabilities(hw, &cap_speed, &autoneg);
   7828
   7829	/* advertise highest capable link speed */
   7830	if (!autoneg && (cap_speed & IXGBE_LINK_SPEED_10GB_FULL))
   7831		speed = IXGBE_LINK_SPEED_10GB_FULL;
   7832	else
   7833		speed = cap_speed & (IXGBE_LINK_SPEED_10GB_FULL |
   7834				     IXGBE_LINK_SPEED_1GB_FULL);
   7835
   7836	if (hw->mac.ops.setup_link)
   7837		hw->mac.ops.setup_link(hw, speed, true);
   7838
   7839	adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
   7840	adapter->link_check_timeout = jiffies;
   7841	clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
   7842}
   7843
   7844/**
   7845 * ixgbe_service_timer - Timer Call-back
   7846 * @t: pointer to timer_list structure
   7847 **/
   7848static void ixgbe_service_timer(struct timer_list *t)
   7849{
   7850	struct ixgbe_adapter *adapter = from_timer(adapter, t, service_timer);
   7851	unsigned long next_event_offset;
   7852
   7853	/* poll faster when waiting for link */
   7854	if (adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE)
   7855		next_event_offset = HZ / 10;
   7856	else
   7857		next_event_offset = HZ * 2;
   7858
   7859	/* Reset the timer */
   7860	mod_timer(&adapter->service_timer, next_event_offset + jiffies);
   7861
   7862	ixgbe_service_event_schedule(adapter);
   7863}
   7864
   7865static void ixgbe_phy_interrupt_subtask(struct ixgbe_adapter *adapter)
   7866{
   7867	struct ixgbe_hw *hw = &adapter->hw;
   7868	u32 status;
   7869
   7870	if (!(adapter->flags2 & IXGBE_FLAG2_PHY_INTERRUPT))
   7871		return;
   7872
   7873	adapter->flags2 &= ~IXGBE_FLAG2_PHY_INTERRUPT;
   7874
   7875	if (!hw->phy.ops.handle_lasi)
   7876		return;
   7877
   7878	status = hw->phy.ops.handle_lasi(&adapter->hw);
   7879	if (status != IXGBE_ERR_OVERTEMP)
   7880		return;
   7881
   7882	e_crit(drv, "%s\n", ixgbe_overheat_msg);
   7883}
   7884
   7885static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter)
   7886{
   7887	if (!test_and_clear_bit(__IXGBE_RESET_REQUESTED, &adapter->state))
   7888		return;
   7889
   7890	rtnl_lock();
   7891	/* If we're already down, removing or resetting, just bail */
   7892	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
   7893	    test_bit(__IXGBE_REMOVING, &adapter->state) ||
   7894	    test_bit(__IXGBE_RESETTING, &adapter->state)) {
   7895		rtnl_unlock();
   7896		return;
   7897	}
   7898
   7899	ixgbe_dump(adapter);
   7900	netdev_err(adapter->netdev, "Reset adapter\n");
   7901	adapter->tx_timeout_count++;
   7902
   7903	ixgbe_reinit_locked(adapter);
   7904	rtnl_unlock();
   7905}
   7906
   7907/**
   7908 * ixgbe_check_fw_error - Check firmware for errors
   7909 * @adapter: the adapter private structure
   7910 *
   7911 * Check firmware errors in register FWSM
   7912 */
   7913static bool ixgbe_check_fw_error(struct ixgbe_adapter *adapter)
   7914{
   7915	struct ixgbe_hw *hw = &adapter->hw;
   7916	u32 fwsm;
   7917
   7918	/* read fwsm.ext_err_ind register and log errors */
   7919	fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw));
   7920
   7921	if (fwsm & IXGBE_FWSM_EXT_ERR_IND_MASK ||
   7922	    !(fwsm & IXGBE_FWSM_FW_VAL_BIT))
   7923		e_dev_warn("Warning firmware error detected FWSM: 0x%08X\n",
   7924			   fwsm);
   7925
   7926	if (hw->mac.ops.fw_recovery_mode && hw->mac.ops.fw_recovery_mode(hw)) {
   7927		e_dev_err("Firmware recovery mode detected. Limiting functionality. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
   7928		return true;
   7929	}
   7930
   7931	return false;
   7932}
   7933
   7934/**
   7935 * ixgbe_service_task - manages and runs subtasks
   7936 * @work: pointer to work_struct containing our data
   7937 **/
   7938static void ixgbe_service_task(struct work_struct *work)
   7939{
   7940	struct ixgbe_adapter *adapter = container_of(work,
   7941						     struct ixgbe_adapter,
   7942						     service_task);
   7943	if (ixgbe_removed(adapter->hw.hw_addr)) {
   7944		if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
   7945			rtnl_lock();
   7946			ixgbe_down(adapter);
   7947			rtnl_unlock();
   7948		}
   7949		ixgbe_service_event_complete(adapter);
   7950		return;
   7951	}
   7952	if (ixgbe_check_fw_error(adapter)) {
   7953		if (!test_bit(__IXGBE_DOWN, &adapter->state))
   7954			unregister_netdev(adapter->netdev);
   7955		ixgbe_service_event_complete(adapter);
   7956		return;
   7957	}
   7958	ixgbe_reset_subtask(adapter);
   7959	ixgbe_phy_interrupt_subtask(adapter);
   7960	ixgbe_sfp_detection_subtask(adapter);
   7961	ixgbe_sfp_link_config_subtask(adapter);
   7962	ixgbe_check_overtemp_subtask(adapter);
   7963	ixgbe_watchdog_subtask(adapter);
   7964	ixgbe_fdir_reinit_subtask(adapter);
   7965	ixgbe_check_hang_subtask(adapter);
   7966
   7967	if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) {
   7968		ixgbe_ptp_overflow_check(adapter);
   7969		if (adapter->flags & IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER)
   7970			ixgbe_ptp_rx_hang(adapter);
   7971		ixgbe_ptp_tx_hang(adapter);
   7972	}
   7973
   7974	ixgbe_service_event_complete(adapter);
   7975}
   7976
   7977static int ixgbe_tso(struct ixgbe_ring *tx_ring,
   7978		     struct ixgbe_tx_buffer *first,
   7979		     u8 *hdr_len,
   7980		     struct ixgbe_ipsec_tx_data *itd)
   7981{
   7982	u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
   7983	struct sk_buff *skb = first->skb;
   7984	union {
   7985		struct iphdr *v4;
   7986		struct ipv6hdr *v6;
   7987		unsigned char *hdr;
   7988	} ip;
   7989	union {
   7990		struct tcphdr *tcp;
   7991		struct udphdr *udp;
   7992		unsigned char *hdr;
   7993	} l4;
   7994	u32 paylen, l4_offset;
   7995	u32 fceof_saidx = 0;
   7996	int err;
   7997
   7998	if (skb->ip_summed != CHECKSUM_PARTIAL)
   7999		return 0;
   8000
   8001	if (!skb_is_gso(skb))
   8002		return 0;
   8003
   8004	err = skb_cow_head(skb, 0);
   8005	if (err < 0)
   8006		return err;
   8007
   8008	if (eth_p_mpls(first->protocol))
   8009		ip.hdr = skb_inner_network_header(skb);
   8010	else
   8011		ip.hdr = skb_network_header(skb);
   8012	l4.hdr = skb_checksum_start(skb);
   8013
   8014	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
   8015	type_tucmd = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ?
   8016		      IXGBE_ADVTXD_TUCMD_L4T_UDP : IXGBE_ADVTXD_TUCMD_L4T_TCP;
   8017
   8018	/* initialize outer IP header fields */
   8019	if (ip.v4->version == 4) {
   8020		unsigned char *csum_start = skb_checksum_start(skb);
   8021		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
   8022		int len = csum_start - trans_start;
   8023
   8024		/* IP header will have to cancel out any data that
   8025		 * is not a part of the outer IP header, so set to
   8026		 * a reverse csum if needed, else init check to 0.
   8027		 */
   8028		ip.v4->check = (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) ?
   8029					   csum_fold(csum_partial(trans_start,
   8030								  len, 0)) : 0;
   8031		type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4;
   8032
   8033		ip.v4->tot_len = 0;
   8034		first->tx_flags |= IXGBE_TX_FLAGS_TSO |
   8035				   IXGBE_TX_FLAGS_CSUM |
   8036				   IXGBE_TX_FLAGS_IPV4;
   8037	} else {
   8038		ip.v6->payload_len = 0;
   8039		first->tx_flags |= IXGBE_TX_FLAGS_TSO |
   8040				   IXGBE_TX_FLAGS_CSUM;
   8041	}
   8042
   8043	/* determine offset of inner transport header */
   8044	l4_offset = l4.hdr - skb->data;
   8045
   8046	/* remove payload length from inner checksum */
   8047	paylen = skb->len - l4_offset;
   8048
   8049	if (type_tucmd & IXGBE_ADVTXD_TUCMD_L4T_TCP) {
   8050		/* compute length of segmentation header */
   8051		*hdr_len = (l4.tcp->doff * 4) + l4_offset;
   8052		csum_replace_by_diff(&l4.tcp->check,
   8053				     (__force __wsum)htonl(paylen));
   8054	} else {
   8055		/* compute length of segmentation header */
   8056		*hdr_len = sizeof(*l4.udp) + l4_offset;
   8057		csum_replace_by_diff(&l4.udp->check,
   8058				     (__force __wsum)htonl(paylen));
   8059	}
   8060
   8061	/* update gso size and bytecount with header size */
   8062	first->gso_segs = skb_shinfo(skb)->gso_segs;
   8063	first->bytecount += (first->gso_segs - 1) * *hdr_len;
   8064
   8065	/* mss_l4len_id: use 0 as index for TSO */
   8066	mss_l4len_idx = (*hdr_len - l4_offset) << IXGBE_ADVTXD_L4LEN_SHIFT;
   8067	mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT;
   8068
   8069	fceof_saidx |= itd->sa_idx;
   8070	type_tucmd |= itd->flags | itd->trailer_len;
   8071
   8072	/* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */
   8073	vlan_macip_lens = l4.hdr - ip.hdr;
   8074	vlan_macip_lens |= (ip.hdr - skb->data) << IXGBE_ADVTXD_MACLEN_SHIFT;
   8075	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
   8076
   8077	ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd,
   8078			  mss_l4len_idx);
   8079
   8080	return 1;
   8081}
   8082
   8083static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
   8084			  struct ixgbe_tx_buffer *first,
   8085			  struct ixgbe_ipsec_tx_data *itd)
   8086{
   8087	struct sk_buff *skb = first->skb;
   8088	u32 vlan_macip_lens = 0;
   8089	u32 fceof_saidx = 0;
   8090	u32 type_tucmd = 0;
   8091
   8092	if (skb->ip_summed != CHECKSUM_PARTIAL) {
   8093csum_failed:
   8094		if (!(first->tx_flags & (IXGBE_TX_FLAGS_HW_VLAN |
   8095					 IXGBE_TX_FLAGS_CC)))
   8096			return;
   8097		goto no_csum;
   8098	}
   8099
   8100	switch (skb->csum_offset) {
   8101	case offsetof(struct tcphdr, check):
   8102		type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_TCP;
   8103		fallthrough;
   8104	case offsetof(struct udphdr, check):
   8105		break;
   8106	case offsetof(struct sctphdr, checksum):
   8107		/* validate that this is actually an SCTP request */
   8108		if (skb_csum_is_sctp(skb)) {
   8109			type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_SCTP;
   8110			break;
   8111		}
   8112		fallthrough;
   8113	default:
   8114		skb_checksum_help(skb);
   8115		goto csum_failed;
   8116	}
   8117
   8118	/* update TX checksum flag */
   8119	first->tx_flags |= IXGBE_TX_FLAGS_CSUM;
   8120	vlan_macip_lens = skb_checksum_start_offset(skb) -
   8121			  skb_network_offset(skb);
   8122no_csum:
   8123	/* vlan_macip_lens: MACLEN, VLAN tag */
   8124	vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT;
   8125	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
   8126
   8127	fceof_saidx |= itd->sa_idx;
   8128	type_tucmd |= itd->flags | itd->trailer_len;
   8129
   8130	ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, 0);
   8131}
   8132
   8133#define IXGBE_SET_FLAG(_input, _flag, _result) \
   8134	((_flag <= _result) ? \
   8135	 ((u32)(_input & _flag) * (_result / _flag)) : \
   8136	 ((u32)(_input & _flag) / (_flag / _result)))
   8137
   8138static u32 ixgbe_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
   8139{
   8140	/* set type for advanced descriptor with frame checksum insertion */
   8141	u32 cmd_type = IXGBE_ADVTXD_DTYP_DATA |
   8142		       IXGBE_ADVTXD_DCMD_DEXT |
   8143		       IXGBE_ADVTXD_DCMD_IFCS;
   8144
   8145	/* set HW vlan bit if vlan is present */
   8146	cmd_type |= IXGBE_SET_FLAG(tx_flags, IXGBE_TX_FLAGS_HW_VLAN,
   8147				   IXGBE_ADVTXD_DCMD_VLE);
   8148
   8149	/* set segmentation enable bits for TSO/FSO */
   8150	cmd_type |= IXGBE_SET_FLAG(tx_flags, IXGBE_TX_FLAGS_TSO,
   8151				   IXGBE_ADVTXD_DCMD_TSE);
   8152
   8153	/* set timestamp bit if present */
   8154	cmd_type |= IXGBE_SET_FLAG(tx_flags, IXGBE_TX_FLAGS_TSTAMP,
   8155				   IXGBE_ADVTXD_MAC_TSTAMP);
   8156
   8157	/* insert frame checksum */
   8158	cmd_type ^= IXGBE_SET_FLAG(skb->no_fcs, 1, IXGBE_ADVTXD_DCMD_IFCS);
   8159
   8160	return cmd_type;
   8161}
   8162
   8163static void ixgbe_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc,
   8164				   u32 tx_flags, unsigned int paylen)
   8165{
   8166	u32 olinfo_status = paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   8167
   8168	/* enable L4 checksum for TSO and TX checksum offload */
   8169	olinfo_status |= IXGBE_SET_FLAG(tx_flags,
   8170					IXGBE_TX_FLAGS_CSUM,
   8171					IXGBE_ADVTXD_POPTS_TXSM);
   8172
   8173	/* enable IPv4 checksum for TSO */
   8174	olinfo_status |= IXGBE_SET_FLAG(tx_flags,
   8175					IXGBE_TX_FLAGS_IPV4,
   8176					IXGBE_ADVTXD_POPTS_IXSM);
   8177
   8178	/* enable IPsec */
   8179	olinfo_status |= IXGBE_SET_FLAG(tx_flags,
   8180					IXGBE_TX_FLAGS_IPSEC,
   8181					IXGBE_ADVTXD_POPTS_IPSEC);
   8182
   8183	/*
   8184	 * Check Context must be set if Tx switch is enabled, which it
   8185	 * always is for case where virtual functions are running
   8186	 */
   8187	olinfo_status |= IXGBE_SET_FLAG(tx_flags,
   8188					IXGBE_TX_FLAGS_CC,
   8189					IXGBE_ADVTXD_CC);
   8190
   8191	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
   8192}
   8193
   8194static int __ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
   8195{
   8196	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
   8197
   8198	/* Herbert's original patch had:
   8199	 *  smp_mb__after_netif_stop_queue();
   8200	 * but since that doesn't exist yet, just open code it.
   8201	 */
   8202	smp_mb();
   8203
   8204	/* We need to check again in a case another CPU has just
   8205	 * made room available.
   8206	 */
   8207	if (likely(ixgbe_desc_unused(tx_ring) < size))
   8208		return -EBUSY;
   8209
   8210	/* A reprieve! - use start_queue because it doesn't call schedule */
   8211	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
   8212	++tx_ring->tx_stats.restart_queue;
   8213	return 0;
   8214}
   8215
   8216static inline int ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
   8217{
   8218	if (likely(ixgbe_desc_unused(tx_ring) >= size))
   8219		return 0;
   8220
   8221	return __ixgbe_maybe_stop_tx(tx_ring, size);
   8222}
   8223
   8224static int ixgbe_tx_map(struct ixgbe_ring *tx_ring,
   8225			struct ixgbe_tx_buffer *first,
   8226			const u8 hdr_len)
   8227{
   8228	struct sk_buff *skb = first->skb;
   8229	struct ixgbe_tx_buffer *tx_buffer;
   8230	union ixgbe_adv_tx_desc *tx_desc;
   8231	skb_frag_t *frag;
   8232	dma_addr_t dma;
   8233	unsigned int data_len, size;
   8234	u32 tx_flags = first->tx_flags;
   8235	u32 cmd_type = ixgbe_tx_cmd_type(skb, tx_flags);
   8236	u16 i = tx_ring->next_to_use;
   8237
   8238	tx_desc = IXGBE_TX_DESC(tx_ring, i);
   8239
   8240	ixgbe_tx_olinfo_status(tx_desc, tx_flags, skb->len - hdr_len);
   8241
   8242	size = skb_headlen(skb);
   8243	data_len = skb->data_len;
   8244
   8245#ifdef IXGBE_FCOE
   8246	if (tx_flags & IXGBE_TX_FLAGS_FCOE) {
   8247		if (data_len < sizeof(struct fcoe_crc_eof)) {
   8248			size -= sizeof(struct fcoe_crc_eof) - data_len;
   8249			data_len = 0;
   8250		} else {
   8251			data_len -= sizeof(struct fcoe_crc_eof);
   8252		}
   8253	}
   8254
   8255#endif
   8256	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
   8257
   8258	tx_buffer = first;
   8259
   8260	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
   8261		if (dma_mapping_error(tx_ring->dev, dma))
   8262			goto dma_error;
   8263
   8264		/* record length, and DMA address */
   8265		dma_unmap_len_set(tx_buffer, len, size);
   8266		dma_unmap_addr_set(tx_buffer, dma, dma);
   8267
   8268		tx_desc->read.buffer_addr = cpu_to_le64(dma);
   8269
   8270		while (unlikely(size > IXGBE_MAX_DATA_PER_TXD)) {
   8271			tx_desc->read.cmd_type_len =
   8272				cpu_to_le32(cmd_type ^ IXGBE_MAX_DATA_PER_TXD);
   8273
   8274			i++;
   8275			tx_desc++;
   8276			if (i == tx_ring->count) {
   8277				tx_desc = IXGBE_TX_DESC(tx_ring, 0);
   8278				i = 0;
   8279			}
   8280			tx_desc->read.olinfo_status = 0;
   8281
   8282			dma += IXGBE_MAX_DATA_PER_TXD;
   8283			size -= IXGBE_MAX_DATA_PER_TXD;
   8284
   8285			tx_desc->read.buffer_addr = cpu_to_le64(dma);
   8286		}
   8287
   8288		if (likely(!data_len))
   8289			break;
   8290
   8291		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
   8292
   8293		i++;
   8294		tx_desc++;
   8295		if (i == tx_ring->count) {
   8296			tx_desc = IXGBE_TX_DESC(tx_ring, 0);
   8297			i = 0;
   8298		}
   8299		tx_desc->read.olinfo_status = 0;
   8300
   8301#ifdef IXGBE_FCOE
   8302		size = min_t(unsigned int, data_len, skb_frag_size(frag));
   8303#else
   8304		size = skb_frag_size(frag);
   8305#endif
   8306		data_len -= size;
   8307
   8308		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
   8309				       DMA_TO_DEVICE);
   8310
   8311		tx_buffer = &tx_ring->tx_buffer_info[i];
   8312	}
   8313
   8314	/* write last descriptor with RS and EOP bits */
   8315	cmd_type |= size | IXGBE_TXD_CMD;
   8316	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
   8317
   8318	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
   8319
   8320	/* set the timestamp */
   8321	first->time_stamp = jiffies;
   8322
   8323	skb_tx_timestamp(skb);
   8324
   8325	/*
   8326	 * Force memory writes to complete before letting h/w know there
   8327	 * are new descriptors to fetch.  (Only applicable for weak-ordered
   8328	 * memory model archs, such as IA-64).
   8329	 *
   8330	 * We also need this memory barrier to make certain all of the
   8331	 * status bits have been updated before next_to_watch is written.
   8332	 */
   8333	wmb();
   8334
   8335	/* set next_to_watch value indicating a packet is present */
   8336	first->next_to_watch = tx_desc;
   8337
   8338	i++;
   8339	if (i == tx_ring->count)
   8340		i = 0;
   8341
   8342	tx_ring->next_to_use = i;
   8343
   8344	ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED);
   8345
   8346	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
   8347		writel(i, tx_ring->tail);
   8348	}
   8349
   8350	return 0;
   8351dma_error:
   8352	dev_err(tx_ring->dev, "TX DMA map failed\n");
   8353
   8354	/* clear dma mappings for failed tx_buffer_info map */
   8355	for (;;) {
   8356		tx_buffer = &tx_ring->tx_buffer_info[i];
   8357		if (dma_unmap_len(tx_buffer, len))
   8358			dma_unmap_page(tx_ring->dev,
   8359				       dma_unmap_addr(tx_buffer, dma),
   8360				       dma_unmap_len(tx_buffer, len),
   8361				       DMA_TO_DEVICE);
   8362		dma_unmap_len_set(tx_buffer, len, 0);
   8363		if (tx_buffer == first)
   8364			break;
   8365		if (i == 0)
   8366			i += tx_ring->count;
   8367		i--;
   8368	}
   8369
   8370	dev_kfree_skb_any(first->skb);
   8371	first->skb = NULL;
   8372
   8373	tx_ring->next_to_use = i;
   8374
   8375	return -1;
   8376}
   8377
   8378static void ixgbe_atr(struct ixgbe_ring *ring,
   8379		      struct ixgbe_tx_buffer *first)
   8380{
   8381	struct ixgbe_q_vector *q_vector = ring->q_vector;
   8382	union ixgbe_atr_hash_dword input = { .dword = 0 };
   8383	union ixgbe_atr_hash_dword common = { .dword = 0 };
   8384	union {
   8385		unsigned char *network;
   8386		struct iphdr *ipv4;
   8387		struct ipv6hdr *ipv6;
   8388	} hdr;
   8389	struct tcphdr *th;
   8390	unsigned int hlen;
   8391	struct sk_buff *skb;
   8392	__be16 vlan_id;
   8393	int l4_proto;
   8394
   8395	/* if ring doesn't have a interrupt vector, cannot perform ATR */
   8396	if (!q_vector)
   8397		return;
   8398
   8399	/* do nothing if sampling is disabled */
   8400	if (!ring->atr_sample_rate)
   8401		return;
   8402
   8403	ring->atr_count++;
   8404
   8405	/* currently only IPv4/IPv6 with TCP is supported */
   8406	if ((first->protocol != htons(ETH_P_IP)) &&
   8407	    (first->protocol != htons(ETH_P_IPV6)))
   8408		return;
   8409
   8410	/* snag network header to get L4 type and address */
   8411	skb = first->skb;
   8412	hdr.network = skb_network_header(skb);
   8413	if (unlikely(hdr.network <= skb->data))
   8414		return;
   8415	if (skb->encapsulation &&
   8416	    first->protocol == htons(ETH_P_IP) &&
   8417	    hdr.ipv4->protocol == IPPROTO_UDP) {
   8418		struct ixgbe_adapter *adapter = q_vector->adapter;
   8419
   8420		if (unlikely(skb_tail_pointer(skb) < hdr.network +
   8421			     VXLAN_HEADROOM))
   8422			return;
   8423
   8424		/* verify the port is recognized as VXLAN */
   8425		if (adapter->vxlan_port &&
   8426		    udp_hdr(skb)->dest == adapter->vxlan_port)
   8427			hdr.network = skb_inner_network_header(skb);
   8428
   8429		if (adapter->geneve_port &&
   8430		    udp_hdr(skb)->dest == adapter->geneve_port)
   8431			hdr.network = skb_inner_network_header(skb);
   8432	}
   8433
   8434	/* Make sure we have at least [minimum IPv4 header + TCP]
   8435	 * or [IPv6 header] bytes
   8436	 */
   8437	if (unlikely(skb_tail_pointer(skb) < hdr.network + 40))
   8438		return;
   8439
   8440	/* Currently only IPv4/IPv6 with TCP is supported */
   8441	switch (hdr.ipv4->version) {
   8442	case IPVERSION:
   8443		/* access ihl as u8 to avoid unaligned access on ia64 */
   8444		hlen = (hdr.network[0] & 0x0F) << 2;
   8445		l4_proto = hdr.ipv4->protocol;
   8446		break;
   8447	case 6:
   8448		hlen = hdr.network - skb->data;
   8449		l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL);
   8450		hlen -= hdr.network - skb->data;
   8451		break;
   8452	default:
   8453		return;
   8454	}
   8455
   8456	if (l4_proto != IPPROTO_TCP)
   8457		return;
   8458
   8459	if (unlikely(skb_tail_pointer(skb) < hdr.network +
   8460		     hlen + sizeof(struct tcphdr)))
   8461		return;
   8462
   8463	th = (struct tcphdr *)(hdr.network + hlen);
   8464
   8465	/* skip this packet since the socket is closing */
   8466	if (th->fin)
   8467		return;
   8468
   8469	/* sample on all syn packets or once every atr sample count */
   8470	if (!th->syn && (ring->atr_count < ring->atr_sample_rate))
   8471		return;
   8472
   8473	/* reset sample count */
   8474	ring->atr_count = 0;
   8475
   8476	vlan_id = htons(first->tx_flags >> IXGBE_TX_FLAGS_VLAN_SHIFT);
   8477
   8478	/*
   8479	 * src and dst are inverted, think how the receiver sees them
   8480	 *
   8481	 * The input is broken into two sections, a non-compressed section
   8482	 * containing vm_pool, vlan_id, and flow_type.  The rest of the data
   8483	 * is XORed together and stored in the compressed dword.
   8484	 */
   8485	input.formatted.vlan_id = vlan_id;
   8486
   8487	/*
   8488	 * since src port and flex bytes occupy the same word XOR them together
   8489	 * and write the value to source port portion of compressed dword
   8490	 */
   8491	if (first->tx_flags & (IXGBE_TX_FLAGS_SW_VLAN | IXGBE_TX_FLAGS_HW_VLAN))
   8492		common.port.src ^= th->dest ^ htons(ETH_P_8021Q);
   8493	else
   8494		common.port.src ^= th->dest ^ first->protocol;
   8495	common.port.dst ^= th->source;
   8496
   8497	switch (hdr.ipv4->version) {
   8498	case IPVERSION:
   8499		input.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
   8500		common.ip ^= hdr.ipv4->saddr ^ hdr.ipv4->daddr;
   8501		break;
   8502	case 6:
   8503		input.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_TCPV6;
   8504		common.ip ^= hdr.ipv6->saddr.s6_addr32[0] ^
   8505			     hdr.ipv6->saddr.s6_addr32[1] ^
   8506			     hdr.ipv6->saddr.s6_addr32[2] ^
   8507			     hdr.ipv6->saddr.s6_addr32[3] ^
   8508			     hdr.ipv6->daddr.s6_addr32[0] ^
   8509			     hdr.ipv6->daddr.s6_addr32[1] ^
   8510			     hdr.ipv6->daddr.s6_addr32[2] ^
   8511			     hdr.ipv6->daddr.s6_addr32[3];
   8512		break;
   8513	default:
   8514		break;
   8515	}
   8516
   8517	if (hdr.network != skb_network_header(skb))
   8518		input.formatted.flow_type |= IXGBE_ATR_L4TYPE_TUNNEL_MASK;
   8519
   8520	/* This assumes the Rx queue and Tx queue are bound to the same CPU */
   8521	ixgbe_fdir_add_signature_filter_82599(&q_vector->adapter->hw,
   8522					      input, common, ring->queue_index);
   8523}
   8524
   8525#ifdef IXGBE_FCOE
   8526static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
   8527			      struct net_device *sb_dev)
   8528{
   8529	struct ixgbe_adapter *adapter;
   8530	struct ixgbe_ring_feature *f;
   8531	int txq;
   8532
   8533	if (sb_dev) {
   8534		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
   8535		struct net_device *vdev = sb_dev;
   8536
   8537		txq = vdev->tc_to_txq[tc].offset;
   8538		txq += reciprocal_scale(skb_get_hash(skb),
   8539					vdev->tc_to_txq[tc].count);
   8540
   8541		return txq;
   8542	}
   8543
   8544	/*
   8545	 * only execute the code below if protocol is FCoE
   8546	 * or FIP and we have FCoE enabled on the adapter
   8547	 */
   8548	switch (vlan_get_protocol(skb)) {
   8549	case htons(ETH_P_FCOE):
   8550	case htons(ETH_P_FIP):
   8551		adapter = netdev_priv(dev);
   8552
   8553		if (!sb_dev && (adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
   8554			break;
   8555		fallthrough;
   8556	default:
   8557		return netdev_pick_tx(dev, skb, sb_dev);
   8558	}
   8559
   8560	f = &adapter->ring_feature[RING_F_FCOE];
   8561
   8562	txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) :
   8563					   smp_processor_id();
   8564
   8565	while (txq >= f->indices)
   8566		txq -= f->indices;
   8567
   8568	return txq + f->offset;
   8569}
   8570
   8571#endif
   8572int ixgbe_xmit_xdp_ring(struct ixgbe_ring *ring,
   8573			struct xdp_frame *xdpf)
   8574{
   8575	struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
   8576	u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0;
   8577	u16 i = 0, index = ring->next_to_use;
   8578	struct ixgbe_tx_buffer *tx_head = &ring->tx_buffer_info[index];
   8579	struct ixgbe_tx_buffer *tx_buff = tx_head;
   8580	union ixgbe_adv_tx_desc *tx_desc = IXGBE_TX_DESC(ring, index);
   8581	u32 cmd_type, len = xdpf->len;
   8582	void *data = xdpf->data;
   8583
   8584	if (unlikely(ixgbe_desc_unused(ring) < 1 + nr_frags))
   8585		return IXGBE_XDP_CONSUMED;
   8586
   8587	tx_head->bytecount = xdp_get_frame_len(xdpf);
   8588	tx_head->gso_segs = 1;
   8589	tx_head->xdpf = xdpf;
   8590
   8591	tx_desc->read.olinfo_status =
   8592		cpu_to_le32(tx_head->bytecount << IXGBE_ADVTXD_PAYLEN_SHIFT);
   8593
   8594	for (;;) {
   8595		dma_addr_t dma;
   8596
   8597		dma = dma_map_single(ring->dev, data, len, DMA_TO_DEVICE);
   8598		if (dma_mapping_error(ring->dev, dma))
   8599			goto unmap;
   8600
   8601		dma_unmap_len_set(tx_buff, len, len);
   8602		dma_unmap_addr_set(tx_buff, dma, dma);
   8603
   8604		cmd_type = IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_DEXT |
   8605			   IXGBE_ADVTXD_DCMD_IFCS | len;
   8606		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
   8607		tx_desc->read.buffer_addr = cpu_to_le64(dma);
   8608		tx_buff->protocol = 0;
   8609
   8610		if (++index == ring->count)
   8611			index = 0;
   8612
   8613		if (i == nr_frags)
   8614			break;
   8615
   8616		tx_buff = &ring->tx_buffer_info[index];
   8617		tx_desc = IXGBE_TX_DESC(ring, index);
   8618		tx_desc->read.olinfo_status = 0;
   8619
   8620		data = skb_frag_address(&sinfo->frags[i]);
   8621		len = skb_frag_size(&sinfo->frags[i]);
   8622		i++;
   8623	}
   8624	/* put descriptor type bits */
   8625	tx_desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD);
   8626
   8627	/* Avoid any potential race with xdp_xmit and cleanup */
   8628	smp_wmb();
   8629
   8630	tx_head->next_to_watch = tx_desc;
   8631	ring->next_to_use = index;
   8632
   8633	return IXGBE_XDP_TX;
   8634
   8635unmap:
   8636	for (;;) {
   8637		tx_buff = &ring->tx_buffer_info[index];
   8638		if (dma_unmap_len(tx_buff, len))
   8639			dma_unmap_page(ring->dev, dma_unmap_addr(tx_buff, dma),
   8640				       dma_unmap_len(tx_buff, len),
   8641				       DMA_TO_DEVICE);
   8642		dma_unmap_len_set(tx_buff, len, 0);
   8643		if (tx_buff == tx_head)
   8644			break;
   8645
   8646		if (!index)
   8647			index += ring->count;
   8648		index--;
   8649	}
   8650
   8651	return IXGBE_XDP_CONSUMED;
   8652}
   8653
   8654netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
   8655			  struct ixgbe_adapter *adapter,
   8656			  struct ixgbe_ring *tx_ring)
   8657{
   8658	struct ixgbe_tx_buffer *first;
   8659	int tso;
   8660	u32 tx_flags = 0;
   8661	unsigned short f;
   8662	u16 count = TXD_USE_COUNT(skb_headlen(skb));
   8663	struct ixgbe_ipsec_tx_data ipsec_tx = { 0 };
   8664	__be16 protocol = skb->protocol;
   8665	u8 hdr_len = 0;
   8666
   8667	/*
   8668	 * need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
   8669	 *       + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD,
   8670	 *       + 2 desc gap to keep tail from touching head,
   8671	 *       + 1 desc for context descriptor,
   8672	 * otherwise try next time
   8673	 */
   8674	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
   8675		count += TXD_USE_COUNT(skb_frag_size(
   8676						&skb_shinfo(skb)->frags[f]));
   8677
   8678	if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) {
   8679		tx_ring->tx_stats.tx_busy++;
   8680		return NETDEV_TX_BUSY;
   8681	}
   8682
   8683	/* record the location of the first descriptor for this packet */
   8684	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
   8685	first->skb = skb;
   8686	first->bytecount = skb->len;
   8687	first->gso_segs = 1;
   8688
   8689	/* if we have a HW VLAN tag being added default to the HW one */
   8690	if (skb_vlan_tag_present(skb)) {
   8691		tx_flags |= skb_vlan_tag_get(skb) << IXGBE_TX_FLAGS_VLAN_SHIFT;
   8692		tx_flags |= IXGBE_TX_FLAGS_HW_VLAN;
   8693	/* else if it is a SW VLAN check the next protocol and store the tag */
   8694	} else if (protocol == htons(ETH_P_8021Q)) {
   8695		struct vlan_hdr *vhdr, _vhdr;
   8696		vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
   8697		if (!vhdr)
   8698			goto out_drop;
   8699
   8700		tx_flags |= ntohs(vhdr->h_vlan_TCI) <<
   8701				  IXGBE_TX_FLAGS_VLAN_SHIFT;
   8702		tx_flags |= IXGBE_TX_FLAGS_SW_VLAN;
   8703	}
   8704	protocol = vlan_get_protocol(skb);
   8705
   8706	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
   8707	    adapter->ptp_clock) {
   8708		if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
   8709		    !test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS,
   8710					   &adapter->state)) {
   8711			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
   8712			tx_flags |= IXGBE_TX_FLAGS_TSTAMP;
   8713
   8714			/* schedule check for Tx timestamp */
   8715			adapter->ptp_tx_skb = skb_get(skb);
   8716			adapter->ptp_tx_start = jiffies;
   8717			schedule_work(&adapter->ptp_tx_work);
   8718		} else {
   8719			adapter->tx_hwtstamp_skipped++;
   8720		}
   8721	}
   8722
   8723#ifdef CONFIG_PCI_IOV
   8724	/*
   8725	 * Use the l2switch_enable flag - would be false if the DMA
   8726	 * Tx switch had been disabled.
   8727	 */
   8728	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
   8729		tx_flags |= IXGBE_TX_FLAGS_CC;
   8730
   8731#endif
   8732	/* DCB maps skb priorities 0-7 onto 3 bit PCP of VLAN tag. */
   8733	if ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
   8734	    ((tx_flags & (IXGBE_TX_FLAGS_HW_VLAN | IXGBE_TX_FLAGS_SW_VLAN)) ||
   8735	     (skb->priority != TC_PRIO_CONTROL))) {
   8736		tx_flags &= ~IXGBE_TX_FLAGS_VLAN_PRIO_MASK;
   8737		tx_flags |= (skb->priority & 0x7) <<
   8738					IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT;
   8739		if (tx_flags & IXGBE_TX_FLAGS_SW_VLAN) {
   8740			struct vlan_ethhdr *vhdr;
   8741
   8742			if (skb_cow_head(skb, 0))
   8743				goto out_drop;
   8744			vhdr = (struct vlan_ethhdr *)skb->data;
   8745			vhdr->h_vlan_TCI = htons(tx_flags >>
   8746						 IXGBE_TX_FLAGS_VLAN_SHIFT);
   8747		} else {
   8748			tx_flags |= IXGBE_TX_FLAGS_HW_VLAN;
   8749		}
   8750	}
   8751
   8752	/* record initial flags and protocol */
   8753	first->tx_flags = tx_flags;
   8754	first->protocol = protocol;
   8755
   8756#ifdef IXGBE_FCOE
   8757	/* setup tx offload for FCoE */
   8758	if ((protocol == htons(ETH_P_FCOE)) &&
   8759	    (tx_ring->netdev->features & (NETIF_F_FSO | NETIF_F_FCOE_CRC))) {
   8760		tso = ixgbe_fso(tx_ring, first, &hdr_len);
   8761		if (tso < 0)
   8762			goto out_drop;
   8763
   8764		goto xmit_fcoe;
   8765	}
   8766
   8767#endif /* IXGBE_FCOE */
   8768
   8769#ifdef CONFIG_IXGBE_IPSEC
   8770	if (xfrm_offload(skb) &&
   8771	    !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
   8772		goto out_drop;
   8773#endif
   8774	tso = ixgbe_tso(tx_ring, first, &hdr_len, &ipsec_tx);
   8775	if (tso < 0)
   8776		goto out_drop;
   8777	else if (!tso)
   8778		ixgbe_tx_csum(tx_ring, first, &ipsec_tx);
   8779
   8780	/* add the ATR filter if ATR is on */
   8781	if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state))
   8782		ixgbe_atr(tx_ring, first);
   8783
   8784#ifdef IXGBE_FCOE
   8785xmit_fcoe:
   8786#endif /* IXGBE_FCOE */
   8787	if (ixgbe_tx_map(tx_ring, first, hdr_len))
   8788		goto cleanup_tx_timestamp;
   8789
   8790	return NETDEV_TX_OK;
   8791
   8792out_drop:
   8793	dev_kfree_skb_any(first->skb);
   8794	first->skb = NULL;
   8795cleanup_tx_timestamp:
   8796	if (unlikely(tx_flags & IXGBE_TX_FLAGS_TSTAMP)) {
   8797		dev_kfree_skb_any(adapter->ptp_tx_skb);
   8798		adapter->ptp_tx_skb = NULL;
   8799		cancel_work_sync(&adapter->ptp_tx_work);
   8800		clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state);
   8801	}
   8802
   8803	return NETDEV_TX_OK;
   8804}
   8805
   8806static netdev_tx_t __ixgbe_xmit_frame(struct sk_buff *skb,
   8807				      struct net_device *netdev,
   8808				      struct ixgbe_ring *ring)
   8809{
   8810	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   8811	struct ixgbe_ring *tx_ring;
   8812
   8813	/*
   8814	 * The minimum packet size for olinfo paylen is 17 so pad the skb
   8815	 * in order to meet this minimum size requirement.
   8816	 */
   8817	if (skb_put_padto(skb, 17))
   8818		return NETDEV_TX_OK;
   8819
   8820	tx_ring = ring ? ring : adapter->tx_ring[skb_get_queue_mapping(skb)];
   8821	if (unlikely(test_bit(__IXGBE_TX_DISABLED, &tx_ring->state)))
   8822		return NETDEV_TX_BUSY;
   8823
   8824	return ixgbe_xmit_frame_ring(skb, adapter, tx_ring);
   8825}
   8826
   8827static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb,
   8828				    struct net_device *netdev)
   8829{
   8830	return __ixgbe_xmit_frame(skb, netdev, NULL);
   8831}
   8832
   8833/**
   8834 * ixgbe_set_mac - Change the Ethernet Address of the NIC
   8835 * @netdev: network interface device structure
   8836 * @p: pointer to an address structure
   8837 *
   8838 * Returns 0 on success, negative on failure
   8839 **/
   8840static int ixgbe_set_mac(struct net_device *netdev, void *p)
   8841{
   8842	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   8843	struct ixgbe_hw *hw = &adapter->hw;
   8844	struct sockaddr *addr = p;
   8845
   8846	if (!is_valid_ether_addr(addr->sa_data))
   8847		return -EADDRNOTAVAIL;
   8848
   8849	eth_hw_addr_set(netdev, addr->sa_data);
   8850	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
   8851
   8852	ixgbe_mac_set_default_filter(adapter);
   8853
   8854	return 0;
   8855}
   8856
   8857static int
   8858ixgbe_mdio_read(struct net_device *netdev, int prtad, int devad, u16 addr)
   8859{
   8860	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   8861	struct ixgbe_hw *hw = &adapter->hw;
   8862	u16 value;
   8863	int rc;
   8864
   8865	if (adapter->mii_bus) {
   8866		int regnum = addr;
   8867
   8868		if (devad != MDIO_DEVAD_NONE)
   8869			regnum |= (devad << 16) | MII_ADDR_C45;
   8870
   8871		return mdiobus_read(adapter->mii_bus, prtad, regnum);
   8872	}
   8873
   8874	if (prtad != hw->phy.mdio.prtad)
   8875		return -EINVAL;
   8876	rc = hw->phy.ops.read_reg(hw, addr, devad, &value);
   8877	if (!rc)
   8878		rc = value;
   8879	return rc;
   8880}
   8881
   8882static int ixgbe_mdio_write(struct net_device *netdev, int prtad, int devad,
   8883			    u16 addr, u16 value)
   8884{
   8885	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   8886	struct ixgbe_hw *hw = &adapter->hw;
   8887
   8888	if (adapter->mii_bus) {
   8889		int regnum = addr;
   8890
   8891		if (devad != MDIO_DEVAD_NONE)
   8892			regnum |= (devad << 16) | MII_ADDR_C45;
   8893
   8894		return mdiobus_write(adapter->mii_bus, prtad, regnum, value);
   8895	}
   8896
   8897	if (prtad != hw->phy.mdio.prtad)
   8898		return -EINVAL;
   8899	return hw->phy.ops.write_reg(hw, addr, devad, value);
   8900}
   8901
   8902static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
   8903{
   8904	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   8905
   8906	switch (cmd) {
   8907	case SIOCSHWTSTAMP:
   8908		return ixgbe_ptp_set_ts_config(adapter, req);
   8909	case SIOCGHWTSTAMP:
   8910		return ixgbe_ptp_get_ts_config(adapter, req);
   8911	case SIOCGMIIPHY:
   8912		if (!adapter->hw.phy.ops.read_reg)
   8913			return -EOPNOTSUPP;
   8914		fallthrough;
   8915	default:
   8916		return mdio_mii_ioctl(&adapter->hw.phy.mdio, if_mii(req), cmd);
   8917	}
   8918}
   8919
   8920/**
   8921 * ixgbe_add_sanmac_netdev - Add the SAN MAC address to the corresponding
   8922 * netdev->dev_addrs
   8923 * @dev: network interface device structure
   8924 *
   8925 * Returns non-zero on failure
   8926 **/
   8927static int ixgbe_add_sanmac_netdev(struct net_device *dev)
   8928{
   8929	int err = 0;
   8930	struct ixgbe_adapter *adapter = netdev_priv(dev);
   8931	struct ixgbe_hw *hw = &adapter->hw;
   8932
   8933	if (is_valid_ether_addr(hw->mac.san_addr)) {
   8934		rtnl_lock();
   8935		err = dev_addr_add(dev, hw->mac.san_addr, NETDEV_HW_ADDR_T_SAN);
   8936		rtnl_unlock();
   8937
   8938		/* update SAN MAC vmdq pool selection */
   8939		hw->mac.ops.set_vmdq_san_mac(hw, VMDQ_P(0));
   8940	}
   8941	return err;
   8942}
   8943
   8944/**
   8945 * ixgbe_del_sanmac_netdev - Removes the SAN MAC address to the corresponding
   8946 * netdev->dev_addrs
   8947 * @dev: network interface device structure
   8948 *
   8949 * Returns non-zero on failure
   8950 **/
   8951static int ixgbe_del_sanmac_netdev(struct net_device *dev)
   8952{
   8953	int err = 0;
   8954	struct ixgbe_adapter *adapter = netdev_priv(dev);
   8955	struct ixgbe_mac_info *mac = &adapter->hw.mac;
   8956
   8957	if (is_valid_ether_addr(mac->san_addr)) {
   8958		rtnl_lock();
   8959		err = dev_addr_del(dev, mac->san_addr, NETDEV_HW_ADDR_T_SAN);
   8960		rtnl_unlock();
   8961	}
   8962	return err;
   8963}
   8964
   8965static void ixgbe_get_ring_stats64(struct rtnl_link_stats64 *stats,
   8966				   struct ixgbe_ring *ring)
   8967{
   8968	u64 bytes, packets;
   8969	unsigned int start;
   8970
   8971	if (ring) {
   8972		do {
   8973			start = u64_stats_fetch_begin_irq(&ring->syncp);
   8974			packets = ring->stats.packets;
   8975			bytes   = ring->stats.bytes;
   8976		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
   8977		stats->tx_packets += packets;
   8978		stats->tx_bytes   += bytes;
   8979	}
   8980}
   8981
   8982static void ixgbe_get_stats64(struct net_device *netdev,
   8983			      struct rtnl_link_stats64 *stats)
   8984{
   8985	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   8986	int i;
   8987
   8988	rcu_read_lock();
   8989	for (i = 0; i < adapter->num_rx_queues; i++) {
   8990		struct ixgbe_ring *ring = READ_ONCE(adapter->rx_ring[i]);
   8991		u64 bytes, packets;
   8992		unsigned int start;
   8993
   8994		if (ring) {
   8995			do {
   8996				start = u64_stats_fetch_begin_irq(&ring->syncp);
   8997				packets = ring->stats.packets;
   8998				bytes   = ring->stats.bytes;
   8999			} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
   9000			stats->rx_packets += packets;
   9001			stats->rx_bytes   += bytes;
   9002		}
   9003	}
   9004
   9005	for (i = 0; i < adapter->num_tx_queues; i++) {
   9006		struct ixgbe_ring *ring = READ_ONCE(adapter->tx_ring[i]);
   9007
   9008		ixgbe_get_ring_stats64(stats, ring);
   9009	}
   9010	for (i = 0; i < adapter->num_xdp_queues; i++) {
   9011		struct ixgbe_ring *ring = READ_ONCE(adapter->xdp_ring[i]);
   9012
   9013		ixgbe_get_ring_stats64(stats, ring);
   9014	}
   9015	rcu_read_unlock();
   9016
   9017	/* following stats updated by ixgbe_watchdog_task() */
   9018	stats->multicast	= netdev->stats.multicast;
   9019	stats->rx_errors	= netdev->stats.rx_errors;
   9020	stats->rx_length_errors	= netdev->stats.rx_length_errors;
   9021	stats->rx_crc_errors	= netdev->stats.rx_crc_errors;
   9022	stats->rx_missed_errors	= netdev->stats.rx_missed_errors;
   9023}
   9024
   9025#ifdef CONFIG_IXGBE_DCB
   9026/**
   9027 * ixgbe_validate_rtr - verify 802.1Qp to Rx packet buffer mapping is valid.
   9028 * @adapter: pointer to ixgbe_adapter
   9029 * @tc: number of traffic classes currently enabled
   9030 *
   9031 * Configure a valid 802.1Qp to Rx packet buffer mapping ie confirm
   9032 * 802.1Q priority maps to a packet buffer that exists.
   9033 */
   9034static void ixgbe_validate_rtr(struct ixgbe_adapter *adapter, u8 tc)
   9035{
   9036	struct ixgbe_hw *hw = &adapter->hw;
   9037	u32 reg, rsave;
   9038	int i;
   9039
   9040	/* 82598 have a static priority to TC mapping that can not
   9041	 * be changed so no validation is needed.
   9042	 */
   9043	if (hw->mac.type == ixgbe_mac_82598EB)
   9044		return;
   9045
   9046	reg = IXGBE_READ_REG(hw, IXGBE_RTRUP2TC);
   9047	rsave = reg;
   9048
   9049	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
   9050		u8 up2tc = reg >> (i * IXGBE_RTRUP2TC_UP_SHIFT);
   9051
   9052		/* If up2tc is out of bounds default to zero */
   9053		if (up2tc > tc)
   9054			reg &= ~(0x7 << IXGBE_RTRUP2TC_UP_SHIFT);
   9055	}
   9056
   9057	if (reg != rsave)
   9058		IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, reg);
   9059
   9060	return;
   9061}
   9062
   9063/**
   9064 * ixgbe_set_prio_tc_map - Configure netdev prio tc map
   9065 * @adapter: Pointer to adapter struct
   9066 *
   9067 * Populate the netdev user priority to tc map
   9068 */
   9069static void ixgbe_set_prio_tc_map(struct ixgbe_adapter *adapter)
   9070{
   9071	struct net_device *dev = adapter->netdev;
   9072	struct ixgbe_dcb_config *dcb_cfg = &adapter->dcb_cfg;
   9073	struct ieee_ets *ets = adapter->ixgbe_ieee_ets;
   9074	u8 prio;
   9075
   9076	for (prio = 0; prio < MAX_USER_PRIORITY; prio++) {
   9077		u8 tc = 0;
   9078
   9079		if (adapter->dcbx_cap & DCB_CAP_DCBX_VER_CEE)
   9080			tc = ixgbe_dcb_get_tc_from_up(dcb_cfg, 0, prio);
   9081		else if (ets)
   9082			tc = ets->prio_tc[prio];
   9083
   9084		netdev_set_prio_tc_map(dev, prio, tc);
   9085	}
   9086}
   9087
   9088#endif /* CONFIG_IXGBE_DCB */
   9089static int ixgbe_reassign_macvlan_pool(struct net_device *vdev,
   9090				       struct netdev_nested_priv *priv)
   9091{
   9092	struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)priv->data;
   9093	struct ixgbe_fwd_adapter *accel;
   9094	int pool;
   9095
   9096	/* we only care about macvlans... */
   9097	if (!netif_is_macvlan(vdev))
   9098		return 0;
   9099
   9100	/* that have hardware offload enabled... */
   9101	accel = macvlan_accel_priv(vdev);
   9102	if (!accel)
   9103		return 0;
   9104
   9105	/* If we can relocate to a different bit do so */
   9106	pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
   9107	if (pool < adapter->num_rx_pools) {
   9108		set_bit(pool, adapter->fwd_bitmask);
   9109		accel->pool = pool;
   9110		return 0;
   9111	}
   9112
   9113	/* if we cannot find a free pool then disable the offload */
   9114	netdev_err(vdev, "L2FW offload disabled due to lack of queue resources\n");
   9115	macvlan_release_l2fw_offload(vdev);
   9116
   9117	/* unbind the queues and drop the subordinate channel config */
   9118	netdev_unbind_sb_channel(adapter->netdev, vdev);
   9119	netdev_set_sb_channel(vdev, 0);
   9120
   9121	kfree(accel);
   9122
   9123	return 0;
   9124}
   9125
   9126static void ixgbe_defrag_macvlan_pools(struct net_device *dev)
   9127{
   9128	struct ixgbe_adapter *adapter = netdev_priv(dev);
   9129	struct netdev_nested_priv priv = {
   9130		.data = (void *)adapter,
   9131	};
   9132
   9133	/* flush any stale bits out of the fwd bitmask */
   9134	bitmap_clear(adapter->fwd_bitmask, 1, 63);
   9135
   9136	/* walk through upper devices reassigning pools */
   9137	netdev_walk_all_upper_dev_rcu(dev, ixgbe_reassign_macvlan_pool,
   9138				      &priv);
   9139}
   9140
   9141/**
   9142 * ixgbe_setup_tc - configure net_device for multiple traffic classes
   9143 *
   9144 * @dev: net device to configure
   9145 * @tc: number of traffic classes to enable
   9146 */
   9147int ixgbe_setup_tc(struct net_device *dev, u8 tc)
   9148{
   9149	struct ixgbe_adapter *adapter = netdev_priv(dev);
   9150	struct ixgbe_hw *hw = &adapter->hw;
   9151
   9152	/* Hardware supports up to 8 traffic classes */
   9153	if (tc > adapter->dcb_cfg.num_tcs.pg_tcs)
   9154		return -EINVAL;
   9155
   9156	if (hw->mac.type == ixgbe_mac_82598EB && tc && tc < MAX_TRAFFIC_CLASS)
   9157		return -EINVAL;
   9158
   9159	/* Hardware has to reinitialize queues and interrupts to
   9160	 * match packet buffer alignment. Unfortunately, the
   9161	 * hardware is not flexible enough to do this dynamically.
   9162	 */
   9163	if (netif_running(dev))
   9164		ixgbe_close(dev);
   9165	else
   9166		ixgbe_reset(adapter);
   9167
   9168	ixgbe_clear_interrupt_scheme(adapter);
   9169
   9170#ifdef CONFIG_IXGBE_DCB
   9171	if (tc) {
   9172		if (adapter->xdp_prog) {
   9173			e_warn(probe, "DCB is not supported with XDP\n");
   9174
   9175			ixgbe_init_interrupt_scheme(adapter);
   9176			if (netif_running(dev))
   9177				ixgbe_open(dev);
   9178			return -EINVAL;
   9179		}
   9180
   9181		netdev_set_num_tc(dev, tc);
   9182		ixgbe_set_prio_tc_map(adapter);
   9183
   9184		adapter->hw_tcs = tc;
   9185		adapter->flags |= IXGBE_FLAG_DCB_ENABLED;
   9186
   9187		if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
   9188			adapter->last_lfc_mode = adapter->hw.fc.requested_mode;
   9189			adapter->hw.fc.requested_mode = ixgbe_fc_none;
   9190		}
   9191	} else {
   9192		netdev_reset_tc(dev);
   9193
   9194		if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   9195			adapter->hw.fc.requested_mode = adapter->last_lfc_mode;
   9196
   9197		adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
   9198		adapter->hw_tcs = tc;
   9199
   9200		adapter->temp_dcb_cfg.pfc_mode_enable = false;
   9201		adapter->dcb_cfg.pfc_mode_enable = false;
   9202	}
   9203
   9204	ixgbe_validate_rtr(adapter, tc);
   9205
   9206#endif /* CONFIG_IXGBE_DCB */
   9207	ixgbe_init_interrupt_scheme(adapter);
   9208
   9209	ixgbe_defrag_macvlan_pools(dev);
   9210
   9211	if (netif_running(dev))
   9212		return ixgbe_open(dev);
   9213
   9214	return 0;
   9215}
   9216
   9217static int ixgbe_delete_clsu32(struct ixgbe_adapter *adapter,
   9218			       struct tc_cls_u32_offload *cls)
   9219{
   9220	u32 hdl = cls->knode.handle;
   9221	u32 uhtid = TC_U32_USERHTID(cls->knode.handle);
   9222	u32 loc = cls->knode.handle & 0xfffff;
   9223	int err = 0, i, j;
   9224	struct ixgbe_jump_table *jump = NULL;
   9225
   9226	if (loc > IXGBE_MAX_HW_ENTRIES)
   9227		return -EINVAL;
   9228
   9229	if ((uhtid != 0x800) && (uhtid >= IXGBE_MAX_LINK_HANDLE))
   9230		return -EINVAL;
   9231
   9232	/* Clear this filter in the link data it is associated with */
   9233	if (uhtid != 0x800) {
   9234		jump = adapter->jump_tables[uhtid];
   9235		if (!jump)
   9236			return -EINVAL;
   9237		if (!test_bit(loc - 1, jump->child_loc_map))
   9238			return -EINVAL;
   9239		clear_bit(loc - 1, jump->child_loc_map);
   9240	}
   9241
   9242	/* Check if the filter being deleted is a link */
   9243	for (i = 1; i < IXGBE_MAX_LINK_HANDLE; i++) {
   9244		jump = adapter->jump_tables[i];
   9245		if (jump && jump->link_hdl == hdl) {
   9246			/* Delete filters in the hardware in the child hash
   9247			 * table associated with this link
   9248			 */
   9249			for (j = 0; j < IXGBE_MAX_HW_ENTRIES; j++) {
   9250				if (!test_bit(j, jump->child_loc_map))
   9251					continue;
   9252				spin_lock(&adapter->fdir_perfect_lock);
   9253				err = ixgbe_update_ethtool_fdir_entry(adapter,
   9254								      NULL,
   9255								      j + 1);
   9256				spin_unlock(&adapter->fdir_perfect_lock);
   9257				clear_bit(j, jump->child_loc_map);
   9258			}
   9259			/* Remove resources for this link */
   9260			kfree(jump->input);
   9261			kfree(jump->mask);
   9262			kfree(jump);
   9263			adapter->jump_tables[i] = NULL;
   9264			return err;
   9265		}
   9266	}
   9267
   9268	spin_lock(&adapter->fdir_perfect_lock);
   9269	err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, loc);
   9270	spin_unlock(&adapter->fdir_perfect_lock);
   9271	return err;
   9272}
   9273
   9274static int ixgbe_configure_clsu32_add_hnode(struct ixgbe_adapter *adapter,
   9275					    struct tc_cls_u32_offload *cls)
   9276{
   9277	u32 uhtid = TC_U32_USERHTID(cls->hnode.handle);
   9278
   9279	if (uhtid >= IXGBE_MAX_LINK_HANDLE)
   9280		return -EINVAL;
   9281
   9282	/* This ixgbe devices do not support hash tables at the moment
   9283	 * so abort when given hash tables.
   9284	 */
   9285	if (cls->hnode.divisor > 0)
   9286		return -EINVAL;
   9287
   9288	set_bit(uhtid - 1, &adapter->tables);
   9289	return 0;
   9290}
   9291
   9292static int ixgbe_configure_clsu32_del_hnode(struct ixgbe_adapter *adapter,
   9293					    struct tc_cls_u32_offload *cls)
   9294{
   9295	u32 uhtid = TC_U32_USERHTID(cls->hnode.handle);
   9296
   9297	if (uhtid >= IXGBE_MAX_LINK_HANDLE)
   9298		return -EINVAL;
   9299
   9300	clear_bit(uhtid - 1, &adapter->tables);
   9301	return 0;
   9302}
   9303
   9304#ifdef CONFIG_NET_CLS_ACT
   9305struct upper_walk_data {
   9306	struct ixgbe_adapter *adapter;
   9307	u64 action;
   9308	int ifindex;
   9309	u8 queue;
   9310};
   9311
   9312static int get_macvlan_queue(struct net_device *upper,
   9313			     struct netdev_nested_priv *priv)
   9314{
   9315	if (netif_is_macvlan(upper)) {
   9316		struct ixgbe_fwd_adapter *vadapter = macvlan_accel_priv(upper);
   9317		struct ixgbe_adapter *adapter;
   9318		struct upper_walk_data *data;
   9319		int ifindex;
   9320
   9321		data = (struct upper_walk_data *)priv->data;
   9322		ifindex = data->ifindex;
   9323		adapter = data->adapter;
   9324		if (vadapter && upper->ifindex == ifindex) {
   9325			data->queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx;
   9326			data->action = data->queue;
   9327			return 1;
   9328		}
   9329	}
   9330
   9331	return 0;
   9332}
   9333
   9334static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex,
   9335				  u8 *queue, u64 *action)
   9336{
   9337	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
   9338	unsigned int num_vfs = adapter->num_vfs, vf;
   9339	struct netdev_nested_priv priv;
   9340	struct upper_walk_data data;
   9341	struct net_device *upper;
   9342
   9343	/* redirect to a SRIOV VF */
   9344	for (vf = 0; vf < num_vfs; ++vf) {
   9345		upper = pci_get_drvdata(adapter->vfinfo[vf].vfdev);
   9346		if (upper->ifindex == ifindex) {
   9347			*queue = vf * __ALIGN_MASK(1, ~vmdq->mask);
   9348			*action = vf + 1;
   9349			*action <<= ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
   9350			return 0;
   9351		}
   9352	}
   9353
   9354	/* redirect to a offloaded macvlan netdev */
   9355	data.adapter = adapter;
   9356	data.ifindex = ifindex;
   9357	data.action = 0;
   9358	data.queue = 0;
   9359	priv.data = (void *)&data;
   9360	if (netdev_walk_all_upper_dev_rcu(adapter->netdev,
   9361					  get_macvlan_queue, &priv)) {
   9362		*action = data.action;
   9363		*queue = data.queue;
   9364
   9365		return 0;
   9366	}
   9367
   9368	return -EINVAL;
   9369}
   9370
   9371static int parse_tc_actions(struct ixgbe_adapter *adapter,
   9372			    struct tcf_exts *exts, u64 *action, u8 *queue)
   9373{
   9374	const struct tc_action *a;
   9375	int i;
   9376
   9377	if (!tcf_exts_has_actions(exts))
   9378		return -EINVAL;
   9379
   9380	tcf_exts_for_each_action(i, a, exts) {
   9381		/* Drop action */
   9382		if (is_tcf_gact_shot(a)) {
   9383			*action = IXGBE_FDIR_DROP_QUEUE;
   9384			*queue = IXGBE_FDIR_DROP_QUEUE;
   9385			return 0;
   9386		}
   9387
   9388		/* Redirect to a VF or a offloaded macvlan */
   9389		if (is_tcf_mirred_egress_redirect(a)) {
   9390			struct net_device *dev = tcf_mirred_dev(a);
   9391
   9392			if (!dev)
   9393				return -EINVAL;
   9394			return handle_redirect_action(adapter, dev->ifindex,
   9395						      queue, action);
   9396		}
   9397
   9398		return -EINVAL;
   9399	}
   9400
   9401	return -EINVAL;
   9402}
   9403#else
   9404static int parse_tc_actions(struct ixgbe_adapter *adapter,
   9405			    struct tcf_exts *exts, u64 *action, u8 *queue)
   9406{
   9407	return -EINVAL;
   9408}
   9409#endif /* CONFIG_NET_CLS_ACT */
   9410
   9411static int ixgbe_clsu32_build_input(struct ixgbe_fdir_filter *input,
   9412				    union ixgbe_atr_input *mask,
   9413				    struct tc_cls_u32_offload *cls,
   9414				    struct ixgbe_mat_field *field_ptr,
   9415				    struct ixgbe_nexthdr *nexthdr)
   9416{
   9417	int i, j, off;
   9418	__be32 val, m;
   9419	bool found_entry = false, found_jump_field = false;
   9420
   9421	for (i = 0; i < cls->knode.sel->nkeys; i++) {
   9422		off = cls->knode.sel->keys[i].off;
   9423		val = cls->knode.sel->keys[i].val;
   9424		m = cls->knode.sel->keys[i].mask;
   9425
   9426		for (j = 0; field_ptr[j].val; j++) {
   9427			if (field_ptr[j].off == off) {
   9428				field_ptr[j].val(input, mask, (__force u32)val,
   9429						 (__force u32)m);
   9430				input->filter.formatted.flow_type |=
   9431					field_ptr[j].type;
   9432				found_entry = true;
   9433				break;
   9434			}
   9435		}
   9436		if (nexthdr) {
   9437			if (nexthdr->off == cls->knode.sel->keys[i].off &&
   9438			    nexthdr->val ==
   9439			    (__force u32)cls->knode.sel->keys[i].val &&
   9440			    nexthdr->mask ==
   9441			    (__force u32)cls->knode.sel->keys[i].mask)
   9442				found_jump_field = true;
   9443			else
   9444				continue;
   9445		}
   9446	}
   9447
   9448	if (nexthdr && !found_jump_field)
   9449		return -EINVAL;
   9450
   9451	if (!found_entry)
   9452		return 0;
   9453
   9454	mask->formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK |
   9455				    IXGBE_ATR_L4TYPE_MASK;
   9456
   9457	if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4)
   9458		mask->formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK;
   9459
   9460	return 0;
   9461}
   9462
   9463static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
   9464				  struct tc_cls_u32_offload *cls)
   9465{
   9466	__be16 protocol = cls->common.protocol;
   9467	u32 loc = cls->knode.handle & 0xfffff;
   9468	struct ixgbe_hw *hw = &adapter->hw;
   9469	struct ixgbe_mat_field *field_ptr;
   9470	struct ixgbe_fdir_filter *input = NULL;
   9471	union ixgbe_atr_input *mask = NULL;
   9472	struct ixgbe_jump_table *jump = NULL;
   9473	int i, err = -EINVAL;
   9474	u8 queue;
   9475	u32 uhtid, link_uhtid;
   9476
   9477	uhtid = TC_U32_USERHTID(cls->knode.handle);
   9478	link_uhtid = TC_U32_USERHTID(cls->knode.link_handle);
   9479
   9480	/* At the moment cls_u32 jumps to network layer and skips past
   9481	 * L2 headers. The canonical method to match L2 frames is to use
   9482	 * negative values. However this is error prone at best but really
   9483	 * just broken because there is no way to "know" what sort of hdr
   9484	 * is in front of the network layer. Fix cls_u32 to support L2
   9485	 * headers when needed.
   9486	 */
   9487	if (protocol != htons(ETH_P_IP))
   9488		return err;
   9489
   9490	if (loc >= ((1024 << adapter->fdir_pballoc) - 2)) {
   9491		e_err(drv, "Location out of range\n");
   9492		return err;
   9493	}
   9494
   9495	/* cls u32 is a graph starting at root node 0x800. The driver tracks
   9496	 * links and also the fields used to advance the parser across each
   9497	 * link (e.g. nexthdr/eat parameters from 'tc'). This way we can map
   9498	 * the u32 graph onto the hardware parse graph denoted in ixgbe_model.h
   9499	 * To add support for new nodes update ixgbe_model.h parse structures
   9500	 * this function _should_ be generic try not to hardcode values here.
   9501	 */
   9502	if (uhtid == 0x800) {
   9503		field_ptr = (adapter->jump_tables[0])->mat;
   9504	} else {
   9505		if (uhtid >= IXGBE_MAX_LINK_HANDLE)
   9506			return err;
   9507		if (!adapter->jump_tables[uhtid])
   9508			return err;
   9509		field_ptr = (adapter->jump_tables[uhtid])->mat;
   9510	}
   9511
   9512	if (!field_ptr)
   9513		return err;
   9514
   9515	/* At this point we know the field_ptr is valid and need to either
   9516	 * build cls_u32 link or attach filter. Because adding a link to
   9517	 * a handle that does not exist is invalid and the same for adding
   9518	 * rules to handles that don't exist.
   9519	 */
   9520
   9521	if (link_uhtid) {
   9522		struct ixgbe_nexthdr *nexthdr = ixgbe_ipv4_jumps;
   9523
   9524		if (link_uhtid >= IXGBE_MAX_LINK_HANDLE)
   9525			return err;
   9526
   9527		if (!test_bit(link_uhtid - 1, &adapter->tables))
   9528			return err;
   9529
   9530		/* Multiple filters as links to the same hash table are not
   9531		 * supported. To add a new filter with the same next header
   9532		 * but different match/jump conditions, create a new hash table
   9533		 * and link to it.
   9534		 */
   9535		if (adapter->jump_tables[link_uhtid] &&
   9536		    (adapter->jump_tables[link_uhtid])->link_hdl) {
   9537			e_err(drv, "Link filter exists for link: %x\n",
   9538			      link_uhtid);
   9539			return err;
   9540		}
   9541
   9542		for (i = 0; nexthdr[i].jump; i++) {
   9543			if (nexthdr[i].o != cls->knode.sel->offoff ||
   9544			    nexthdr[i].s != cls->knode.sel->offshift ||
   9545			    nexthdr[i].m !=
   9546			    (__force u32)cls->knode.sel->offmask)
   9547				return err;
   9548
   9549			jump = kzalloc(sizeof(*jump), GFP_KERNEL);
   9550			if (!jump)
   9551				return -ENOMEM;
   9552			input = kzalloc(sizeof(*input), GFP_KERNEL);
   9553			if (!input) {
   9554				err = -ENOMEM;
   9555				goto free_jump;
   9556			}
   9557			mask = kzalloc(sizeof(*mask), GFP_KERNEL);
   9558			if (!mask) {
   9559				err = -ENOMEM;
   9560				goto free_input;
   9561			}
   9562			jump->input = input;
   9563			jump->mask = mask;
   9564			jump->link_hdl = cls->knode.handle;
   9565
   9566			err = ixgbe_clsu32_build_input(input, mask, cls,
   9567						       field_ptr, &nexthdr[i]);
   9568			if (!err) {
   9569				jump->mat = nexthdr[i].jump;
   9570				adapter->jump_tables[link_uhtid] = jump;
   9571				break;
   9572			} else {
   9573				kfree(mask);
   9574				kfree(input);
   9575				kfree(jump);
   9576			}
   9577		}
   9578		return 0;
   9579	}
   9580
   9581	input = kzalloc(sizeof(*input), GFP_KERNEL);
   9582	if (!input)
   9583		return -ENOMEM;
   9584	mask = kzalloc(sizeof(*mask), GFP_KERNEL);
   9585	if (!mask) {
   9586		err = -ENOMEM;
   9587		goto free_input;
   9588	}
   9589
   9590	if ((uhtid != 0x800) && (adapter->jump_tables[uhtid])) {
   9591		if ((adapter->jump_tables[uhtid])->input)
   9592			memcpy(input, (adapter->jump_tables[uhtid])->input,
   9593			       sizeof(*input));
   9594		if ((adapter->jump_tables[uhtid])->mask)
   9595			memcpy(mask, (adapter->jump_tables[uhtid])->mask,
   9596			       sizeof(*mask));
   9597
   9598		/* Lookup in all child hash tables if this location is already
   9599		 * filled with a filter
   9600		 */
   9601		for (i = 1; i < IXGBE_MAX_LINK_HANDLE; i++) {
   9602			struct ixgbe_jump_table *link = adapter->jump_tables[i];
   9603
   9604			if (link && (test_bit(loc - 1, link->child_loc_map))) {
   9605				e_err(drv, "Filter exists in location: %x\n",
   9606				      loc);
   9607				err = -EINVAL;
   9608				goto err_out;
   9609			}
   9610		}
   9611	}
   9612	err = ixgbe_clsu32_build_input(input, mask, cls, field_ptr, NULL);
   9613	if (err)
   9614		goto err_out;
   9615
   9616	err = parse_tc_actions(adapter, cls->knode.exts, &input->action,
   9617			       &queue);
   9618	if (err < 0)
   9619		goto err_out;
   9620
   9621	input->sw_idx = loc;
   9622
   9623	spin_lock(&adapter->fdir_perfect_lock);
   9624
   9625	if (hlist_empty(&adapter->fdir_filter_list)) {
   9626		memcpy(&adapter->fdir_mask, mask, sizeof(*mask));
   9627		err = ixgbe_fdir_set_input_mask_82599(hw, mask);
   9628		if (err)
   9629			goto err_out_w_lock;
   9630	} else if (memcmp(&adapter->fdir_mask, mask, sizeof(*mask))) {
   9631		err = -EINVAL;
   9632		goto err_out_w_lock;
   9633	}
   9634
   9635	ixgbe_atr_compute_perfect_hash_82599(&input->filter, mask);
   9636	err = ixgbe_fdir_write_perfect_filter_82599(hw, &input->filter,
   9637						    input->sw_idx, queue);
   9638	if (err)
   9639		goto err_out_w_lock;
   9640
   9641	ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
   9642	spin_unlock(&adapter->fdir_perfect_lock);
   9643
   9644	if ((uhtid != 0x800) && (adapter->jump_tables[uhtid]))
   9645		set_bit(loc - 1, (adapter->jump_tables[uhtid])->child_loc_map);
   9646
   9647	kfree(mask);
   9648	return err;
   9649err_out_w_lock:
   9650	spin_unlock(&adapter->fdir_perfect_lock);
   9651err_out:
   9652	kfree(mask);
   9653free_input:
   9654	kfree(input);
   9655free_jump:
   9656	kfree(jump);
   9657	return err;
   9658}
   9659
   9660static int ixgbe_setup_tc_cls_u32(struct ixgbe_adapter *adapter,
   9661				  struct tc_cls_u32_offload *cls_u32)
   9662{
   9663	switch (cls_u32->command) {
   9664	case TC_CLSU32_NEW_KNODE:
   9665	case TC_CLSU32_REPLACE_KNODE:
   9666		return ixgbe_configure_clsu32(adapter, cls_u32);
   9667	case TC_CLSU32_DELETE_KNODE:
   9668		return ixgbe_delete_clsu32(adapter, cls_u32);
   9669	case TC_CLSU32_NEW_HNODE:
   9670	case TC_CLSU32_REPLACE_HNODE:
   9671		return ixgbe_configure_clsu32_add_hnode(adapter, cls_u32);
   9672	case TC_CLSU32_DELETE_HNODE:
   9673		return ixgbe_configure_clsu32_del_hnode(adapter, cls_u32);
   9674	default:
   9675		return -EOPNOTSUPP;
   9676	}
   9677}
   9678
   9679static int ixgbe_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
   9680				   void *cb_priv)
   9681{
   9682	struct ixgbe_adapter *adapter = cb_priv;
   9683
   9684	if (!tc_cls_can_offload_and_chain0(adapter->netdev, type_data))
   9685		return -EOPNOTSUPP;
   9686
   9687	switch (type) {
   9688	case TC_SETUP_CLSU32:
   9689		return ixgbe_setup_tc_cls_u32(adapter, type_data);
   9690	default:
   9691		return -EOPNOTSUPP;
   9692	}
   9693}
   9694
   9695static int ixgbe_setup_tc_mqprio(struct net_device *dev,
   9696				 struct tc_mqprio_qopt *mqprio)
   9697{
   9698	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
   9699	return ixgbe_setup_tc(dev, mqprio->num_tc);
   9700}
   9701
   9702static LIST_HEAD(ixgbe_block_cb_list);
   9703
   9704static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type,
   9705			    void *type_data)
   9706{
   9707	struct ixgbe_adapter *adapter = netdev_priv(dev);
   9708
   9709	switch (type) {
   9710	case TC_SETUP_BLOCK:
   9711		return flow_block_cb_setup_simple(type_data,
   9712						  &ixgbe_block_cb_list,
   9713						  ixgbe_setup_tc_block_cb,
   9714						  adapter, adapter, true);
   9715	case TC_SETUP_QDISC_MQPRIO:
   9716		return ixgbe_setup_tc_mqprio(dev, type_data);
   9717	default:
   9718		return -EOPNOTSUPP;
   9719	}
   9720}
   9721
   9722#ifdef CONFIG_PCI_IOV
   9723void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter)
   9724{
   9725	struct net_device *netdev = adapter->netdev;
   9726
   9727	rtnl_lock();
   9728	ixgbe_setup_tc(netdev, adapter->hw_tcs);
   9729	rtnl_unlock();
   9730}
   9731
   9732#endif
   9733void ixgbe_do_reset(struct net_device *netdev)
   9734{
   9735	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   9736
   9737	if (netif_running(netdev))
   9738		ixgbe_reinit_locked(adapter);
   9739	else
   9740		ixgbe_reset(adapter);
   9741}
   9742
   9743static netdev_features_t ixgbe_fix_features(struct net_device *netdev,
   9744					    netdev_features_t features)
   9745{
   9746	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   9747
   9748	/* If Rx checksum is disabled, then RSC/LRO should also be disabled */
   9749	if (!(features & NETIF_F_RXCSUM))
   9750		features &= ~NETIF_F_LRO;
   9751
   9752	/* Turn off LRO if not RSC capable */
   9753	if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE))
   9754		features &= ~NETIF_F_LRO;
   9755
   9756	if (adapter->xdp_prog && (features & NETIF_F_LRO)) {
   9757		e_dev_err("LRO is not supported with XDP\n");
   9758		features &= ~NETIF_F_LRO;
   9759	}
   9760
   9761	return features;
   9762}
   9763
   9764static void ixgbe_reset_l2fw_offload(struct ixgbe_adapter *adapter)
   9765{
   9766	int rss = min_t(int, ixgbe_max_rss_indices(adapter),
   9767			num_online_cpus());
   9768
   9769	/* go back to full RSS if we're not running SR-IOV */
   9770	if (!adapter->ring_feature[RING_F_VMDQ].offset)
   9771		adapter->flags &= ~(IXGBE_FLAG_VMDQ_ENABLED |
   9772				    IXGBE_FLAG_SRIOV_ENABLED);
   9773
   9774	adapter->ring_feature[RING_F_RSS].limit = rss;
   9775	adapter->ring_feature[RING_F_VMDQ].limit = 1;
   9776
   9777	ixgbe_setup_tc(adapter->netdev, adapter->hw_tcs);
   9778}
   9779
   9780static int ixgbe_set_features(struct net_device *netdev,
   9781			      netdev_features_t features)
   9782{
   9783	struct ixgbe_adapter *adapter = netdev_priv(netdev);
   9784	netdev_features_t changed = netdev->features ^ features;
   9785	bool need_reset = false;
   9786
   9787	/* Make sure RSC matches LRO, reset if change */
   9788	if (!(features & NETIF_F_LRO)) {
   9789		if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
   9790			need_reset = true;
   9791		adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
   9792	} else if ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) &&
   9793		   !(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) {
   9794		if (adapter->rx_itr_setting == 1 ||
   9795		    adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) {
   9796			adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED;
   9797			need_reset = true;
   9798		} else if ((changed ^ features) & NETIF_F_LRO) {
   9799			e_info(probe, "rx-usecs set too low, "
   9800			       "disabling RSC\n");
   9801		}
   9802	}
   9803
   9804	/*
   9805	 * Check if Flow Director n-tuple support or hw_tc support was
   9806	 * enabled or disabled.  If the state changed, we need to reset.
   9807	 */
   9808	if ((features & NETIF_F_NTUPLE) || (features & NETIF_F_HW_TC)) {
   9809		/* turn off ATR, enable perfect filters and reset */
   9810		if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
   9811			need_reset = true;
   9812
   9813		adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
   9814		adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
   9815	} else {
   9816		/* turn off perfect filters, enable ATR and reset */
   9817		if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)
   9818			need_reset = true;
   9819
   9820		adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
   9821
   9822		/* We cannot enable ATR if SR-IOV is enabled */
   9823		if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED ||
   9824		    /* We cannot enable ATR if we have 2 or more tcs */
   9825		    (adapter->hw_tcs > 1) ||
   9826		    /* We cannot enable ATR if RSS is disabled */
   9827		    (adapter->ring_feature[RING_F_RSS].limit <= 1) ||
   9828		    /* A sample rate of 0 indicates ATR disabled */
   9829		    (!adapter->atr_sample_rate))
   9830			; /* do nothing not supported */
   9831		else /* otherwise supported and set the flag */
   9832			adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
   9833	}
   9834
   9835	if (changed & NETIF_F_RXALL)
   9836		need_reset = true;
   9837
   9838	netdev->features = features;
   9839
   9840	if ((changed & NETIF_F_HW_L2FW_DOFFLOAD) && adapter->num_rx_pools > 1)
   9841		ixgbe_reset_l2fw_offload(adapter);
   9842	else if (need_reset)
   9843		ixgbe_do_reset(netdev);
   9844	else if (changed & (NETIF_F_HW_VLAN_CTAG_RX |
   9845			    NETIF_F_HW_VLAN_CTAG_FILTER))
   9846		ixgbe_set_rx_mode(netdev);
   9847
   9848	return 1;
   9849}
   9850
   9851static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
   9852			     struct net_device *dev,
   9853			     const unsigned char *addr, u16 vid,
   9854			     u16 flags,
   9855			     struct netlink_ext_ack *extack)
   9856{
   9857	/* guarantee we can provide a unique filter for the unicast address */
   9858	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
   9859		struct ixgbe_adapter *adapter = netdev_priv(dev);
   9860		u16 pool = VMDQ_P(0);
   9861
   9862		if (netdev_uc_count(dev) >= ixgbe_available_rars(adapter, pool))
   9863			return -ENOMEM;
   9864	}
   9865
   9866	return ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, flags);
   9867}
   9868
   9869/**
   9870 * ixgbe_configure_bridge_mode - set various bridge modes
   9871 * @adapter: the private structure
   9872 * @mode: requested bridge mode
   9873 *
   9874 * Configure some settings require for various bridge modes.
   9875 **/
   9876static int ixgbe_configure_bridge_mode(struct ixgbe_adapter *adapter,
   9877				       __u16 mode)
   9878{
   9879	struct ixgbe_hw *hw = &adapter->hw;
   9880	unsigned int p, num_pools;
   9881	u32 vmdctl;
   9882
   9883	switch (mode) {
   9884	case BRIDGE_MODE_VEPA:
   9885		/* disable Tx loopback, rely on switch hairpin mode */
   9886		IXGBE_WRITE_REG(&adapter->hw, IXGBE_PFDTXGSWC, 0);
   9887
   9888		/* must enable Rx switching replication to allow multicast
   9889		 * packet reception on all VFs, and to enable source address
   9890		 * pruning.
   9891		 */
   9892		vmdctl = IXGBE_READ_REG(hw, IXGBE_VMD_CTL);
   9893		vmdctl |= IXGBE_VT_CTL_REPLEN;
   9894		IXGBE_WRITE_REG(hw, IXGBE_VMD_CTL, vmdctl);
   9895
   9896		/* enable Rx source address pruning. Note, this requires
   9897		 * replication to be enabled or else it does nothing.
   9898		 */
   9899		num_pools = adapter->num_vfs + adapter->num_rx_pools;
   9900		for (p = 0; p < num_pools; p++) {
   9901			if (hw->mac.ops.set_source_address_pruning)
   9902				hw->mac.ops.set_source_address_pruning(hw,
   9903								       true,
   9904								       p);
   9905		}
   9906		break;
   9907	case BRIDGE_MODE_VEB:
   9908		/* enable Tx loopback for internal VF/PF communication */
   9909		IXGBE_WRITE_REG(&adapter->hw, IXGBE_PFDTXGSWC,
   9910				IXGBE_PFDTXGSWC_VT_LBEN);
   9911
   9912		/* disable Rx switching replication unless we have SR-IOV
   9913		 * virtual functions
   9914		 */
   9915		vmdctl = IXGBE_READ_REG(hw, IXGBE_VMD_CTL);
   9916		if (!adapter->num_vfs)
   9917			vmdctl &= ~IXGBE_VT_CTL_REPLEN;
   9918		IXGBE_WRITE_REG(hw, IXGBE_VMD_CTL, vmdctl);
   9919
   9920		/* disable Rx source address pruning, since we don't expect to
   9921		 * be receiving external loopback of our transmitted frames.
   9922		 */
   9923		num_pools = adapter->num_vfs + adapter->num_rx_pools;
   9924		for (p = 0; p < num_pools; p++) {
   9925			if (hw->mac.ops.set_source_address_pruning)
   9926				hw->mac.ops.set_source_address_pruning(hw,
   9927								       false,
   9928								       p);
   9929		}
   9930		break;
   9931	default:
   9932		return -EINVAL;
   9933	}
   9934
   9935	adapter->bridge_mode = mode;
   9936
   9937	e_info(drv, "enabling bridge mode: %s\n",
   9938	       mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
   9939
   9940	return 0;
   9941}
   9942
   9943static int ixgbe_ndo_bridge_setlink(struct net_device *dev,
   9944				    struct nlmsghdr *nlh, u16 flags,
   9945				    struct netlink_ext_ack *extack)
   9946{
   9947	struct ixgbe_adapter *adapter = netdev_priv(dev);
   9948	struct nlattr *attr, *br_spec;
   9949	int rem;
   9950
   9951	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
   9952		return -EOPNOTSUPP;
   9953
   9954	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
   9955	if (!br_spec)
   9956		return -EINVAL;
   9957
   9958	nla_for_each_nested(attr, br_spec, rem) {
   9959		int status;
   9960		__u16 mode;
   9961
   9962		if (nla_type(attr) != IFLA_BRIDGE_MODE)
   9963			continue;
   9964
   9965		if (nla_len(attr) < sizeof(mode))
   9966			return -EINVAL;
   9967
   9968		mode = nla_get_u16(attr);
   9969		status = ixgbe_configure_bridge_mode(adapter, mode);
   9970		if (status)
   9971			return status;
   9972
   9973		break;
   9974	}
   9975
   9976	return 0;
   9977}
   9978
   9979static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
   9980				    struct net_device *dev,
   9981				    u32 filter_mask, int nlflags)
   9982{
   9983	struct ixgbe_adapter *adapter = netdev_priv(dev);
   9984
   9985	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
   9986		return 0;
   9987
   9988	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
   9989				       adapter->bridge_mode, 0, 0, nlflags,
   9990				       filter_mask, NULL);
   9991}
   9992
   9993static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
   9994{
   9995	struct ixgbe_adapter *adapter = netdev_priv(pdev);
   9996	struct ixgbe_fwd_adapter *accel;
   9997	int tcs = adapter->hw_tcs ? : 1;
   9998	int pool, err;
   9999
  10000	if (adapter->xdp_prog) {
  10001		e_warn(probe, "L2FW offload is not supported with XDP\n");
  10002		return ERR_PTR(-EINVAL);
  10003	}
  10004
  10005	/* The hardware supported by ixgbe only filters on the destination MAC
  10006	 * address. In order to avoid issues we only support offloading modes
  10007	 * where the hardware can actually provide the functionality.
  10008	 */
  10009	if (!macvlan_supports_dest_filter(vdev))
  10010		return ERR_PTR(-EMEDIUMTYPE);
  10011
  10012	/* We need to lock down the macvlan to be a single queue device so that
  10013	 * we can reuse the tc_to_txq field in the macvlan netdev to represent
  10014	 * the queue mapping to our netdev.
  10015	 */
  10016	if (netif_is_multiqueue(vdev))
  10017		return ERR_PTR(-ERANGE);
  10018
  10019	pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
  10020	if (pool == adapter->num_rx_pools) {
  10021		u16 used_pools = adapter->num_vfs + adapter->num_rx_pools;
  10022		u16 reserved_pools;
  10023
  10024		if (((adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
  10025		     adapter->num_rx_pools >= (MAX_TX_QUEUES / tcs)) ||
  10026		    adapter->num_rx_pools > IXGBE_MAX_MACVLANS)
  10027			return ERR_PTR(-EBUSY);
  10028
  10029		/* Hardware has a limited number of available pools. Each VF,
  10030		 * and the PF require a pool. Check to ensure we don't
  10031		 * attempt to use more then the available number of pools.
  10032		 */
  10033		if (used_pools >= IXGBE_MAX_VF_FUNCTIONS)
  10034			return ERR_PTR(-EBUSY);
  10035
  10036		/* Enable VMDq flag so device will be set in VM mode */
  10037		adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED |
  10038				  IXGBE_FLAG_SRIOV_ENABLED;
  10039
  10040		/* Try to reserve as many queues per pool as possible,
  10041		 * we start with the configurations that support 4 queues
  10042		 * per pools, followed by 2, and then by just 1 per pool.
  10043		 */
  10044		if (used_pools < 32 && adapter->num_rx_pools < 16)
  10045			reserved_pools = min_t(u16,
  10046					       32 - used_pools,
  10047					       16 - adapter->num_rx_pools);
  10048		else if (adapter->num_rx_pools < 32)
  10049			reserved_pools = min_t(u16,
  10050					       64 - used_pools,
  10051					       32 - adapter->num_rx_pools);
  10052		else
  10053			reserved_pools = 64 - used_pools;
  10054
  10055
  10056		if (!reserved_pools)
  10057			return ERR_PTR(-EBUSY);
  10058
  10059		adapter->ring_feature[RING_F_VMDQ].limit += reserved_pools;
  10060
  10061		/* Force reinit of ring allocation with VMDQ enabled */
  10062		err = ixgbe_setup_tc(pdev, adapter->hw_tcs);
  10063		if (err)
  10064			return ERR_PTR(err);
  10065
  10066		if (pool >= adapter->num_rx_pools)
  10067			return ERR_PTR(-ENOMEM);
  10068	}
  10069
  10070	accel = kzalloc(sizeof(*accel), GFP_KERNEL);
  10071	if (!accel)
  10072		return ERR_PTR(-ENOMEM);
  10073
  10074	set_bit(pool, adapter->fwd_bitmask);
  10075	netdev_set_sb_channel(vdev, pool);
  10076	accel->pool = pool;
  10077	accel->netdev = vdev;
  10078
  10079	if (!netif_running(pdev))
  10080		return accel;
  10081
  10082	err = ixgbe_fwd_ring_up(adapter, accel);
  10083	if (err)
  10084		return ERR_PTR(err);
  10085
  10086	return accel;
  10087}
  10088
  10089static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
  10090{
  10091	struct ixgbe_fwd_adapter *accel = priv;
  10092	struct ixgbe_adapter *adapter = netdev_priv(pdev);
  10093	unsigned int rxbase = accel->rx_base_queue;
  10094	unsigned int i;
  10095
  10096	/* delete unicast filter associated with offloaded interface */
  10097	ixgbe_del_mac_filter(adapter, accel->netdev->dev_addr,
  10098			     VMDQ_P(accel->pool));
  10099
  10100	/* Allow remaining Rx packets to get flushed out of the
  10101	 * Rx FIFO before we drop the netdev for the ring.
  10102	 */
  10103	usleep_range(10000, 20000);
  10104
  10105	for (i = 0; i < adapter->num_rx_queues_per_pool; i++) {
  10106		struct ixgbe_ring *ring = adapter->rx_ring[rxbase + i];
  10107		struct ixgbe_q_vector *qv = ring->q_vector;
  10108
  10109		/* Make sure we aren't processing any packets and clear
  10110		 * netdev to shut down the ring.
  10111		 */
  10112		if (netif_running(adapter->netdev))
  10113			napi_synchronize(&qv->napi);
  10114		ring->netdev = NULL;
  10115	}
  10116
  10117	/* unbind the queues and drop the subordinate channel config */
  10118	netdev_unbind_sb_channel(pdev, accel->netdev);
  10119	netdev_set_sb_channel(accel->netdev, 0);
  10120
  10121	clear_bit(accel->pool, adapter->fwd_bitmask);
  10122	kfree(accel);
  10123}
  10124
  10125#define IXGBE_MAX_MAC_HDR_LEN		127
  10126#define IXGBE_MAX_NETWORK_HDR_LEN	511
  10127
  10128static netdev_features_t
  10129ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
  10130		     netdev_features_t features)
  10131{
  10132	unsigned int network_hdr_len, mac_hdr_len;
  10133
  10134	/* Make certain the headers can be described by a context descriptor */
  10135	mac_hdr_len = skb_network_header(skb) - skb->data;
  10136	if (unlikely(mac_hdr_len > IXGBE_MAX_MAC_HDR_LEN))
  10137		return features & ~(NETIF_F_HW_CSUM |
  10138				    NETIF_F_SCTP_CRC |
  10139				    NETIF_F_GSO_UDP_L4 |
  10140				    NETIF_F_HW_VLAN_CTAG_TX |
  10141				    NETIF_F_TSO |
  10142				    NETIF_F_TSO6);
  10143
  10144	network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
  10145	if (unlikely(network_hdr_len >  IXGBE_MAX_NETWORK_HDR_LEN))
  10146		return features & ~(NETIF_F_HW_CSUM |
  10147				    NETIF_F_SCTP_CRC |
  10148				    NETIF_F_GSO_UDP_L4 |
  10149				    NETIF_F_TSO |
  10150				    NETIF_F_TSO6);
  10151
  10152	/* We can only support IPV4 TSO in tunnels if we can mangle the
  10153	 * inner IP ID field, so strip TSO if MANGLEID is not supported.
  10154	 * IPsec offoad sets skb->encapsulation but still can handle
  10155	 * the TSO, so it's the exception.
  10156	 */
  10157	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) {
  10158#ifdef CONFIG_IXGBE_IPSEC
  10159		if (!secpath_exists(skb))
  10160#endif
  10161			features &= ~NETIF_F_TSO;
  10162	}
  10163
  10164	return features;
  10165}
  10166
  10167static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
  10168{
  10169	int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
  10170	struct ixgbe_adapter *adapter = netdev_priv(dev);
  10171	struct bpf_prog *old_prog;
  10172	bool need_reset;
  10173	int num_queues;
  10174
  10175	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
  10176		return -EINVAL;
  10177
  10178	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
  10179		return -EINVAL;
  10180
  10181	/* verify ixgbe ring attributes are sufficient for XDP */
  10182	for (i = 0; i < adapter->num_rx_queues; i++) {
  10183		struct ixgbe_ring *ring = adapter->rx_ring[i];
  10184
  10185		if (ring_is_rsc_enabled(ring))
  10186			return -EINVAL;
  10187
  10188		if (frame_size > ixgbe_rx_bufsz(ring))
  10189			return -EINVAL;
  10190	}
  10191
  10192	/* if the number of cpus is much larger than the maximum of queues,
  10193	 * we should stop it and then return with ENOMEM like before.
  10194	 */
  10195	if (nr_cpu_ids > IXGBE_MAX_XDP_QS * 2)
  10196		return -ENOMEM;
  10197	else if (nr_cpu_ids > IXGBE_MAX_XDP_QS)
  10198		static_branch_inc(&ixgbe_xdp_locking_key);
  10199
  10200	old_prog = xchg(&adapter->xdp_prog, prog);
  10201	need_reset = (!!prog != !!old_prog);
  10202
  10203	/* If transitioning XDP modes reconfigure rings */
  10204	if (need_reset) {
  10205		int err;
  10206
  10207		if (!prog)
  10208			/* Wait until ndo_xsk_wakeup completes. */
  10209			synchronize_rcu();
  10210		err = ixgbe_setup_tc(dev, adapter->hw_tcs);
  10211
  10212		if (err) {
  10213			rcu_assign_pointer(adapter->xdp_prog, old_prog);
  10214			return -EINVAL;
  10215		}
  10216	} else {
  10217		for (i = 0; i < adapter->num_rx_queues; i++)
  10218			(void)xchg(&adapter->rx_ring[i]->xdp_prog,
  10219			    adapter->xdp_prog);
  10220	}
  10221
  10222	if (old_prog)
  10223		bpf_prog_put(old_prog);
  10224
  10225	/* Kick start the NAPI context if there is an AF_XDP socket open
  10226	 * on that queue id. This so that receiving will start.
  10227	 */
  10228	if (need_reset && prog) {
  10229		num_queues = min_t(int, adapter->num_rx_queues,
  10230				   adapter->num_xdp_queues);
  10231		for (i = 0; i < num_queues; i++)
  10232			if (adapter->xdp_ring[i]->xsk_pool)
  10233				(void)ixgbe_xsk_wakeup(adapter->netdev, i,
  10234						       XDP_WAKEUP_RX);
  10235	}
  10236
  10237	return 0;
  10238}
  10239
  10240static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
  10241{
  10242	struct ixgbe_adapter *adapter = netdev_priv(dev);
  10243
  10244	switch (xdp->command) {
  10245	case XDP_SETUP_PROG:
  10246		return ixgbe_xdp_setup(dev, xdp->prog);
  10247	case XDP_SETUP_XSK_POOL:
  10248		return ixgbe_xsk_pool_setup(adapter, xdp->xsk.pool,
  10249					    xdp->xsk.queue_id);
  10250
  10251	default:
  10252		return -EINVAL;
  10253	}
  10254}
  10255
  10256void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring)
  10257{
  10258	/* Force memory writes to complete before letting h/w know there
  10259	 * are new descriptors to fetch.
  10260	 */
  10261	wmb();
  10262	writel(ring->next_to_use, ring->tail);
  10263}
  10264
  10265void ixgbe_xdp_ring_update_tail_locked(struct ixgbe_ring *ring)
  10266{
  10267	if (static_branch_unlikely(&ixgbe_xdp_locking_key))
  10268		spin_lock(&ring->tx_lock);
  10269	ixgbe_xdp_ring_update_tail(ring);
  10270	if (static_branch_unlikely(&ixgbe_xdp_locking_key))
  10271		spin_unlock(&ring->tx_lock);
  10272}
  10273
  10274static int ixgbe_xdp_xmit(struct net_device *dev, int n,
  10275			  struct xdp_frame **frames, u32 flags)
  10276{
  10277	struct ixgbe_adapter *adapter = netdev_priv(dev);
  10278	struct ixgbe_ring *ring;
  10279	int nxmit = 0;
  10280	int i;
  10281
  10282	if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
  10283		return -ENETDOWN;
  10284
  10285	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
  10286		return -EINVAL;
  10287
  10288	/* During program transitions its possible adapter->xdp_prog is assigned
  10289	 * but ring has not been configured yet. In this case simply abort xmit.
  10290	 */
  10291	ring = adapter->xdp_prog ? ixgbe_determine_xdp_ring(adapter) : NULL;
  10292	if (unlikely(!ring))
  10293		return -ENXIO;
  10294
  10295	if (unlikely(test_bit(__IXGBE_TX_DISABLED, &ring->state)))
  10296		return -ENXIO;
  10297
  10298	if (static_branch_unlikely(&ixgbe_xdp_locking_key))
  10299		spin_lock(&ring->tx_lock);
  10300
  10301	for (i = 0; i < n; i++) {
  10302		struct xdp_frame *xdpf = frames[i];
  10303		int err;
  10304
  10305		err = ixgbe_xmit_xdp_ring(ring, xdpf);
  10306		if (err != IXGBE_XDP_TX)
  10307			break;
  10308		nxmit++;
  10309	}
  10310
  10311	if (unlikely(flags & XDP_XMIT_FLUSH))
  10312		ixgbe_xdp_ring_update_tail(ring);
  10313
  10314	if (static_branch_unlikely(&ixgbe_xdp_locking_key))
  10315		spin_unlock(&ring->tx_lock);
  10316
  10317	return nxmit;
  10318}
  10319
  10320static const struct net_device_ops ixgbe_netdev_ops = {
  10321	.ndo_open		= ixgbe_open,
  10322	.ndo_stop		= ixgbe_close,
  10323	.ndo_start_xmit		= ixgbe_xmit_frame,
  10324	.ndo_set_rx_mode	= ixgbe_set_rx_mode,
  10325	.ndo_validate_addr	= eth_validate_addr,
  10326	.ndo_set_mac_address	= ixgbe_set_mac,
  10327	.ndo_change_mtu		= ixgbe_change_mtu,
  10328	.ndo_tx_timeout		= ixgbe_tx_timeout,
  10329	.ndo_set_tx_maxrate	= ixgbe_tx_maxrate,
  10330	.ndo_vlan_rx_add_vid	= ixgbe_vlan_rx_add_vid,
  10331	.ndo_vlan_rx_kill_vid	= ixgbe_vlan_rx_kill_vid,
  10332	.ndo_eth_ioctl		= ixgbe_ioctl,
  10333	.ndo_set_vf_mac		= ixgbe_ndo_set_vf_mac,
  10334	.ndo_set_vf_vlan	= ixgbe_ndo_set_vf_vlan,
  10335	.ndo_set_vf_rate	= ixgbe_ndo_set_vf_bw,
  10336	.ndo_set_vf_spoofchk	= ixgbe_ndo_set_vf_spoofchk,
  10337	.ndo_set_vf_link_state	= ixgbe_ndo_set_vf_link_state,
  10338	.ndo_set_vf_rss_query_en = ixgbe_ndo_set_vf_rss_query_en,
  10339	.ndo_set_vf_trust	= ixgbe_ndo_set_vf_trust,
  10340	.ndo_get_vf_config	= ixgbe_ndo_get_vf_config,
  10341	.ndo_get_stats64	= ixgbe_get_stats64,
  10342	.ndo_setup_tc		= __ixgbe_setup_tc,
  10343#ifdef IXGBE_FCOE
  10344	.ndo_select_queue	= ixgbe_select_queue,
  10345	.ndo_fcoe_ddp_setup = ixgbe_fcoe_ddp_get,
  10346	.ndo_fcoe_ddp_target = ixgbe_fcoe_ddp_target,
  10347	.ndo_fcoe_ddp_done = ixgbe_fcoe_ddp_put,
  10348	.ndo_fcoe_enable = ixgbe_fcoe_enable,
  10349	.ndo_fcoe_disable = ixgbe_fcoe_disable,
  10350	.ndo_fcoe_get_wwn = ixgbe_fcoe_get_wwn,
  10351	.ndo_fcoe_get_hbainfo = ixgbe_fcoe_get_hbainfo,
  10352#endif /* IXGBE_FCOE */
  10353	.ndo_set_features = ixgbe_set_features,
  10354	.ndo_fix_features = ixgbe_fix_features,
  10355	.ndo_fdb_add		= ixgbe_ndo_fdb_add,
  10356	.ndo_bridge_setlink	= ixgbe_ndo_bridge_setlink,
  10357	.ndo_bridge_getlink	= ixgbe_ndo_bridge_getlink,
  10358	.ndo_dfwd_add_station	= ixgbe_fwd_add,
  10359	.ndo_dfwd_del_station	= ixgbe_fwd_del,
  10360	.ndo_features_check	= ixgbe_features_check,
  10361	.ndo_bpf		= ixgbe_xdp,
  10362	.ndo_xdp_xmit		= ixgbe_xdp_xmit,
  10363	.ndo_xsk_wakeup         = ixgbe_xsk_wakeup,
  10364};
  10365
  10366static void ixgbe_disable_txr_hw(struct ixgbe_adapter *adapter,
  10367				 struct ixgbe_ring *tx_ring)
  10368{
  10369	unsigned long wait_delay, delay_interval;
  10370	struct ixgbe_hw *hw = &adapter->hw;
  10371	u8 reg_idx = tx_ring->reg_idx;
  10372	int wait_loop;
  10373	u32 txdctl;
  10374
  10375	IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
  10376
  10377	/* delay mechanism from ixgbe_disable_tx */
  10378	delay_interval = ixgbe_get_completion_timeout(adapter) / 100;
  10379
  10380	wait_loop = IXGBE_MAX_RX_DESC_POLL;
  10381	wait_delay = delay_interval;
  10382
  10383	while (wait_loop--) {
  10384		usleep_range(wait_delay, wait_delay + 10);
  10385		wait_delay += delay_interval * 2;
  10386		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx));
  10387
  10388		if (!(txdctl & IXGBE_TXDCTL_ENABLE))
  10389			return;
  10390	}
  10391
  10392	e_err(drv, "TXDCTL.ENABLE not cleared within the polling period\n");
  10393}
  10394
  10395static void ixgbe_disable_txr(struct ixgbe_adapter *adapter,
  10396			      struct ixgbe_ring *tx_ring)
  10397{
  10398	set_bit(__IXGBE_TX_DISABLED, &tx_ring->state);
  10399	ixgbe_disable_txr_hw(adapter, tx_ring);
  10400}
  10401
  10402static void ixgbe_disable_rxr_hw(struct ixgbe_adapter *adapter,
  10403				 struct ixgbe_ring *rx_ring)
  10404{
  10405	unsigned long wait_delay, delay_interval;
  10406	struct ixgbe_hw *hw = &adapter->hw;
  10407	u8 reg_idx = rx_ring->reg_idx;
  10408	int wait_loop;
  10409	u32 rxdctl;
  10410
  10411	rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
  10412	rxdctl &= ~IXGBE_RXDCTL_ENABLE;
  10413	rxdctl |= IXGBE_RXDCTL_SWFLSH;
  10414
  10415	/* write value back with RXDCTL.ENABLE bit cleared */
  10416	IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
  10417
  10418	/* RXDCTL.EN may not change on 82598 if link is down, so skip it */
  10419	if (hw->mac.type == ixgbe_mac_82598EB &&
  10420	    !(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
  10421		return;
  10422
  10423	/* delay mechanism from ixgbe_disable_rx */
  10424	delay_interval = ixgbe_get_completion_timeout(adapter) / 100;
  10425
  10426	wait_loop = IXGBE_MAX_RX_DESC_POLL;
  10427	wait_delay = delay_interval;
  10428
  10429	while (wait_loop--) {
  10430		usleep_range(wait_delay, wait_delay + 10);
  10431		wait_delay += delay_interval * 2;
  10432		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
  10433
  10434		if (!(rxdctl & IXGBE_RXDCTL_ENABLE))
  10435			return;
  10436	}
  10437
  10438	e_err(drv, "RXDCTL.ENABLE not cleared within the polling period\n");
  10439}
  10440
  10441static void ixgbe_reset_txr_stats(struct ixgbe_ring *tx_ring)
  10442{
  10443	memset(&tx_ring->stats, 0, sizeof(tx_ring->stats));
  10444	memset(&tx_ring->tx_stats, 0, sizeof(tx_ring->tx_stats));
  10445}
  10446
  10447static void ixgbe_reset_rxr_stats(struct ixgbe_ring *rx_ring)
  10448{
  10449	memset(&rx_ring->stats, 0, sizeof(rx_ring->stats));
  10450	memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats));
  10451}
  10452
  10453/**
  10454 * ixgbe_txrx_ring_disable - Disable Rx/Tx/XDP Tx rings
  10455 * @adapter: adapter structure
  10456 * @ring: ring index
  10457 *
  10458 * This function disables a certain Rx/Tx/XDP Tx ring. The function
  10459 * assumes that the netdev is running.
  10460 **/
  10461void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring)
  10462{
  10463	struct ixgbe_ring *rx_ring, *tx_ring, *xdp_ring;
  10464
  10465	rx_ring = adapter->rx_ring[ring];
  10466	tx_ring = adapter->tx_ring[ring];
  10467	xdp_ring = adapter->xdp_ring[ring];
  10468
  10469	ixgbe_disable_txr(adapter, tx_ring);
  10470	if (xdp_ring)
  10471		ixgbe_disable_txr(adapter, xdp_ring);
  10472	ixgbe_disable_rxr_hw(adapter, rx_ring);
  10473
  10474	if (xdp_ring)
  10475		synchronize_rcu();
  10476
  10477	/* Rx/Tx/XDP Tx share the same napi context. */
  10478	napi_disable(&rx_ring->q_vector->napi);
  10479
  10480	ixgbe_clean_tx_ring(tx_ring);
  10481	if (xdp_ring)
  10482		ixgbe_clean_tx_ring(xdp_ring);
  10483	ixgbe_clean_rx_ring(rx_ring);
  10484
  10485	ixgbe_reset_txr_stats(tx_ring);
  10486	if (xdp_ring)
  10487		ixgbe_reset_txr_stats(xdp_ring);
  10488	ixgbe_reset_rxr_stats(rx_ring);
  10489}
  10490
  10491/**
  10492 * ixgbe_txrx_ring_enable - Enable Rx/Tx/XDP Tx rings
  10493 * @adapter: adapter structure
  10494 * @ring: ring index
  10495 *
  10496 * This function enables a certain Rx/Tx/XDP Tx ring. The function
  10497 * assumes that the netdev is running.
  10498 **/
  10499void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring)
  10500{
  10501	struct ixgbe_ring *rx_ring, *tx_ring, *xdp_ring;
  10502
  10503	rx_ring = adapter->rx_ring[ring];
  10504	tx_ring = adapter->tx_ring[ring];
  10505	xdp_ring = adapter->xdp_ring[ring];
  10506
  10507	/* Rx/Tx/XDP Tx share the same napi context. */
  10508	napi_enable(&rx_ring->q_vector->napi);
  10509
  10510	ixgbe_configure_tx_ring(adapter, tx_ring);
  10511	if (xdp_ring)
  10512		ixgbe_configure_tx_ring(adapter, xdp_ring);
  10513	ixgbe_configure_rx_ring(adapter, rx_ring);
  10514
  10515	clear_bit(__IXGBE_TX_DISABLED, &tx_ring->state);
  10516	if (xdp_ring)
  10517		clear_bit(__IXGBE_TX_DISABLED, &xdp_ring->state);
  10518}
  10519
  10520/**
  10521 * ixgbe_enumerate_functions - Get the number of ports this device has
  10522 * @adapter: adapter structure
  10523 *
  10524 * This function enumerates the phsyical functions co-located on a single slot,
  10525 * in order to determine how many ports a device has. This is most useful in
  10526 * determining the required GT/s of PCIe bandwidth necessary for optimal
  10527 * performance.
  10528 **/
  10529static inline int ixgbe_enumerate_functions(struct ixgbe_adapter *adapter)
  10530{
  10531	struct pci_dev *entry, *pdev = adapter->pdev;
  10532	int physfns = 0;
  10533
  10534	/* Some cards can not use the generic count PCIe functions method,
  10535	 * because they are behind a parent switch, so we hardcode these with
  10536	 * the correct number of functions.
  10537	 */
  10538	if (ixgbe_pcie_from_parent(&adapter->hw))
  10539		physfns = 4;
  10540
  10541	list_for_each_entry(entry, &adapter->pdev->bus->devices, bus_list) {
  10542		/* don't count virtual functions */
  10543		if (entry->is_virtfn)
  10544			continue;
  10545
  10546		/* When the devices on the bus don't all match our device ID,
  10547		 * we can't reliably determine the correct number of
  10548		 * functions. This can occur if a function has been direct
  10549		 * attached to a virtual machine using VT-d, for example. In
  10550		 * this case, simply return -1 to indicate this.
  10551		 */
  10552		if ((entry->vendor != pdev->vendor) ||
  10553		    (entry->device != pdev->device))
  10554			return -1;
  10555
  10556		physfns++;
  10557	}
  10558
  10559	return physfns;
  10560}
  10561
  10562/**
  10563 * ixgbe_wol_supported - Check whether device supports WoL
  10564 * @adapter: the adapter private structure
  10565 * @device_id: the device ID
  10566 * @subdevice_id: the subsystem device ID
  10567 *
  10568 * This function is used by probe and ethtool to determine
  10569 * which devices have WoL support
  10570 *
  10571 **/
  10572bool ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id,
  10573			 u16 subdevice_id)
  10574{
  10575	struct ixgbe_hw *hw = &adapter->hw;
  10576	u16 wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK;
  10577
  10578	/* WOL not supported on 82598 */
  10579	if (hw->mac.type == ixgbe_mac_82598EB)
  10580		return false;
  10581
  10582	/* check eeprom to see if WOL is enabled for X540 and newer */
  10583	if (hw->mac.type >= ixgbe_mac_X540) {
  10584		if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) ||
  10585		    ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) &&
  10586		     (hw->bus.func == 0)))
  10587			return true;
  10588	}
  10589
  10590	/* WOL is determined based on device IDs for 82599 MACs */
  10591	switch (device_id) {
  10592	case IXGBE_DEV_ID_82599_SFP:
  10593		/* Only these subdevices could supports WOL */
  10594		switch (subdevice_id) {
  10595		case IXGBE_SUBDEV_ID_82599_560FLR:
  10596		case IXGBE_SUBDEV_ID_82599_LOM_SNAP6:
  10597		case IXGBE_SUBDEV_ID_82599_SFP_WOL0:
  10598		case IXGBE_SUBDEV_ID_82599_SFP_2OCP:
  10599			/* only support first port */
  10600			if (hw->bus.func != 0)
  10601				break;
  10602			fallthrough;
  10603		case IXGBE_SUBDEV_ID_82599_SP_560FLR:
  10604		case IXGBE_SUBDEV_ID_82599_SFP:
  10605		case IXGBE_SUBDEV_ID_82599_RNDC:
  10606		case IXGBE_SUBDEV_ID_82599_ECNA_DP:
  10607		case IXGBE_SUBDEV_ID_82599_SFP_1OCP:
  10608		case IXGBE_SUBDEV_ID_82599_SFP_LOM_OEM1:
  10609		case IXGBE_SUBDEV_ID_82599_SFP_LOM_OEM2:
  10610			return true;
  10611		}
  10612		break;
  10613	case IXGBE_DEV_ID_82599EN_SFP:
  10614		/* Only these subdevices support WOL */
  10615		switch (subdevice_id) {
  10616		case IXGBE_SUBDEV_ID_82599EN_SFP_OCP1:
  10617			return true;
  10618		}
  10619		break;
  10620	case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
  10621		/* All except this subdevice support WOL */
  10622		if (subdevice_id != IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ)
  10623			return true;
  10624		break;
  10625	case IXGBE_DEV_ID_82599_KX4:
  10626		return  true;
  10627	default:
  10628		break;
  10629	}
  10630
  10631	return false;
  10632}
  10633
  10634/**
  10635 * ixgbe_set_fw_version - Set FW version
  10636 * @adapter: the adapter private structure
  10637 *
  10638 * This function is used by probe and ethtool to determine the FW version to
  10639 * format to display. The FW version is taken from the EEPROM/NVM.
  10640 */
  10641static void ixgbe_set_fw_version(struct ixgbe_adapter *adapter)
  10642{
  10643	struct ixgbe_hw *hw = &adapter->hw;
  10644	struct ixgbe_nvm_version nvm_ver;
  10645
  10646	ixgbe_get_oem_prod_version(hw, &nvm_ver);
  10647	if (nvm_ver.oem_valid) {
  10648		snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
  10649			 "%x.%x.%x", nvm_ver.oem_major, nvm_ver.oem_minor,
  10650			 nvm_ver.oem_release);
  10651		return;
  10652	}
  10653
  10654	ixgbe_get_etk_id(hw, &nvm_ver);
  10655	ixgbe_get_orom_version(hw, &nvm_ver);
  10656
  10657	if (nvm_ver.or_valid) {
  10658		snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
  10659			 "0x%08x, %d.%d.%d", nvm_ver.etk_id, nvm_ver.or_major,
  10660			 nvm_ver.or_build, nvm_ver.or_patch);
  10661		return;
  10662	}
  10663
  10664	/* Set ETrack ID format */
  10665	snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
  10666		 "0x%08x", nvm_ver.etk_id);
  10667}
  10668
  10669/**
  10670 * ixgbe_probe - Device Initialization Routine
  10671 * @pdev: PCI device information struct
  10672 * @ent: entry in ixgbe_pci_tbl
  10673 *
  10674 * Returns 0 on success, negative on failure
  10675 *
  10676 * ixgbe_probe initializes an adapter identified by a pci_dev structure.
  10677 * The OS initialization, configuring of the adapter private structure,
  10678 * and a hardware reset occur.
  10679 **/
  10680static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
  10681{
  10682	struct net_device *netdev;
  10683	struct ixgbe_adapter *adapter = NULL;
  10684	struct ixgbe_hw *hw;
  10685	const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data];
  10686	unsigned int indices = MAX_TX_QUEUES;
  10687	u8 part_str[IXGBE_PBANUM_LENGTH];
  10688	int i, err, expected_gts;
  10689	bool disable_dev = false;
  10690#ifdef IXGBE_FCOE
  10691	u16 device_caps;
  10692#endif
  10693	u32 eec;
  10694
  10695	/* Catch broken hardware that put the wrong VF device ID in
  10696	 * the PCIe SR-IOV capability.
  10697	 */
  10698	if (pdev->is_virtfn) {
  10699		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
  10700		     pci_name(pdev), pdev->vendor, pdev->device);
  10701		return -EINVAL;
  10702	}
  10703
  10704	err = pci_enable_device_mem(pdev);
  10705	if (err)
  10706		return err;
  10707
  10708	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
  10709	if (err) {
  10710		dev_err(&pdev->dev,
  10711			"No usable DMA configuration, aborting\n");
  10712		goto err_dma;
  10713	}
  10714
  10715	err = pci_request_mem_regions(pdev, ixgbe_driver_name);
  10716	if (err) {
  10717		dev_err(&pdev->dev,
  10718			"pci_request_selected_regions failed 0x%x\n", err);
  10719		goto err_pci_reg;
  10720	}
  10721
  10722	pci_enable_pcie_error_reporting(pdev);
  10723
  10724	pci_set_master(pdev);
  10725	pci_save_state(pdev);
  10726
  10727	if (ii->mac == ixgbe_mac_82598EB) {
  10728#ifdef CONFIG_IXGBE_DCB
  10729		/* 8 TC w/ 4 queues per TC */
  10730		indices = 4 * MAX_TRAFFIC_CLASS;
  10731#else
  10732		indices = IXGBE_MAX_RSS_INDICES;
  10733#endif
  10734	}
  10735
  10736	netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices);
  10737	if (!netdev) {
  10738		err = -ENOMEM;
  10739		goto err_alloc_etherdev;
  10740	}
  10741
  10742	SET_NETDEV_DEV(netdev, &pdev->dev);
  10743
  10744	adapter = netdev_priv(netdev);
  10745
  10746	adapter->netdev = netdev;
  10747	adapter->pdev = pdev;
  10748	hw = &adapter->hw;
  10749	hw->back = adapter;
  10750	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
  10751
  10752	hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
  10753			      pci_resource_len(pdev, 0));
  10754	adapter->io_addr = hw->hw_addr;
  10755	if (!hw->hw_addr) {
  10756		err = -EIO;
  10757		goto err_ioremap;
  10758	}
  10759
  10760	netdev->netdev_ops = &ixgbe_netdev_ops;
  10761	ixgbe_set_ethtool_ops(netdev);
  10762	netdev->watchdog_timeo = 5 * HZ;
  10763	strlcpy(netdev->name, pci_name(pdev), sizeof(netdev->name));
  10764
  10765	/* Setup hw api */
  10766	hw->mac.ops   = *ii->mac_ops;
  10767	hw->mac.type  = ii->mac;
  10768	hw->mvals     = ii->mvals;
  10769	if (ii->link_ops)
  10770		hw->link.ops  = *ii->link_ops;
  10771
  10772	/* EEPROM */
  10773	hw->eeprom.ops = *ii->eeprom_ops;
  10774	eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
  10775	if (ixgbe_removed(hw->hw_addr)) {
  10776		err = -EIO;
  10777		goto err_ioremap;
  10778	}
  10779	/* If EEPROM is valid (bit 8 = 1), use default otherwise use bit bang */
  10780	if (!(eec & BIT(8)))
  10781		hw->eeprom.ops.read = &ixgbe_read_eeprom_bit_bang_generic;
  10782
  10783	/* PHY */
  10784	hw->phy.ops = *ii->phy_ops;
  10785	hw->phy.sfp_type = ixgbe_sfp_type_unknown;
  10786	/* ixgbe_identify_phy_generic will set prtad and mmds properly */
  10787	hw->phy.mdio.prtad = MDIO_PRTAD_NONE;
  10788	hw->phy.mdio.mmds = 0;
  10789	hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
  10790	hw->phy.mdio.dev = netdev;
  10791	hw->phy.mdio.mdio_read = ixgbe_mdio_read;
  10792	hw->phy.mdio.mdio_write = ixgbe_mdio_write;
  10793
  10794	/* setup the private structure */
  10795	err = ixgbe_sw_init(adapter, ii);
  10796	if (err)
  10797		goto err_sw_init;
  10798
  10799	if (adapter->hw.mac.type == ixgbe_mac_82599EB)
  10800		adapter->flags2 |= IXGBE_FLAG2_AUTO_DISABLE_VF;
  10801
  10802	switch (adapter->hw.mac.type) {
  10803	case ixgbe_mac_X550:
  10804	case ixgbe_mac_X550EM_x:
  10805		netdev->udp_tunnel_nic_info = &ixgbe_udp_tunnels_x550;
  10806		break;
  10807	case ixgbe_mac_x550em_a:
  10808		netdev->udp_tunnel_nic_info = &ixgbe_udp_tunnels_x550em_a;
  10809		break;
  10810	default:
  10811		break;
  10812	}
  10813
  10814	/* Make sure the SWFW semaphore is in a valid state */
  10815	if (hw->mac.ops.init_swfw_sync)
  10816		hw->mac.ops.init_swfw_sync(hw);
  10817
  10818	/* Make it possible the adapter to be woken up via WOL */
  10819	switch (adapter->hw.mac.type) {
  10820	case ixgbe_mac_82599EB:
  10821	case ixgbe_mac_X540:
  10822	case ixgbe_mac_X550:
  10823	case ixgbe_mac_X550EM_x:
  10824	case ixgbe_mac_x550em_a:
  10825		IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0);
  10826		break;
  10827	default:
  10828		break;
  10829	}
  10830
  10831	/*
  10832	 * If there is a fan on this device and it has failed log the
  10833	 * failure.
  10834	 */
  10835	if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) {
  10836		u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
  10837		if (esdp & IXGBE_ESDP_SDP1)
  10838			e_crit(probe, "Fan has stopped, replace the adapter\n");
  10839	}
  10840
  10841	if (allow_unsupported_sfp)
  10842		hw->allow_unsupported_sfp = allow_unsupported_sfp;
  10843
  10844	/* reset_hw fills in the perm_addr as well */
  10845	hw->phy.reset_if_overtemp = true;
  10846	err = hw->mac.ops.reset_hw(hw);
  10847	hw->phy.reset_if_overtemp = false;
  10848	ixgbe_set_eee_capable(adapter);
  10849	if (err == IXGBE_ERR_SFP_NOT_PRESENT) {
  10850		err = 0;
  10851	} else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
  10852		e_dev_err("failed to load because an unsupported SFP+ or QSFP module type was detected.\n");
  10853		e_dev_err("Reload the driver after installing a supported module.\n");
  10854		goto err_sw_init;
  10855	} else if (err) {
  10856		e_dev_err("HW Init failed: %d\n", err);
  10857		goto err_sw_init;
  10858	}
  10859
  10860#ifdef CONFIG_PCI_IOV
  10861	/* SR-IOV not supported on the 82598 */
  10862	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
  10863		goto skip_sriov;
  10864	/* Mailbox */
  10865	ixgbe_init_mbx_params_pf(hw);
  10866	hw->mbx.ops = ii->mbx_ops;
  10867	pci_sriov_set_totalvfs(pdev, IXGBE_MAX_VFS_DRV_LIMIT);
  10868	ixgbe_enable_sriov(adapter, max_vfs);
  10869skip_sriov:
  10870
  10871#endif
  10872	netdev->features = NETIF_F_SG |
  10873			   NETIF_F_TSO |
  10874			   NETIF_F_TSO6 |
  10875			   NETIF_F_RXHASH |
  10876			   NETIF_F_RXCSUM |
  10877			   NETIF_F_HW_CSUM;
  10878
  10879#define IXGBE_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
  10880				    NETIF_F_GSO_GRE_CSUM | \
  10881				    NETIF_F_GSO_IPXIP4 | \
  10882				    NETIF_F_GSO_IPXIP6 | \
  10883				    NETIF_F_GSO_UDP_TUNNEL | \
  10884				    NETIF_F_GSO_UDP_TUNNEL_CSUM)
  10885
  10886	netdev->gso_partial_features = IXGBE_GSO_PARTIAL_FEATURES;
  10887	netdev->features |= NETIF_F_GSO_PARTIAL |
  10888			    IXGBE_GSO_PARTIAL_FEATURES;
  10889
  10890	if (hw->mac.type >= ixgbe_mac_82599EB)
  10891		netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_GSO_UDP_L4;
  10892
  10893#ifdef CONFIG_IXGBE_IPSEC
  10894#define IXGBE_ESP_FEATURES	(NETIF_F_HW_ESP | \
  10895				 NETIF_F_HW_ESP_TX_CSUM | \
  10896				 NETIF_F_GSO_ESP)
  10897
  10898	if (adapter->ipsec)
  10899		netdev->features |= IXGBE_ESP_FEATURES;
  10900#endif
  10901	/* copy netdev features into list of user selectable features */
  10902	netdev->hw_features |= netdev->features |
  10903			       NETIF_F_HW_VLAN_CTAG_FILTER |
  10904			       NETIF_F_HW_VLAN_CTAG_RX |
  10905			       NETIF_F_HW_VLAN_CTAG_TX |
  10906			       NETIF_F_RXALL |
  10907			       NETIF_F_HW_L2FW_DOFFLOAD;
  10908
  10909	if (hw->mac.type >= ixgbe_mac_82599EB)
  10910		netdev->hw_features |= NETIF_F_NTUPLE |
  10911				       NETIF_F_HW_TC;
  10912
  10913	netdev->features |= NETIF_F_HIGHDMA;
  10914
  10915	netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
  10916	netdev->hw_enc_features |= netdev->vlan_features;
  10917	netdev->mpls_features |= NETIF_F_SG |
  10918				 NETIF_F_TSO |
  10919				 NETIF_F_TSO6 |
  10920				 NETIF_F_HW_CSUM;
  10921	netdev->mpls_features |= IXGBE_GSO_PARTIAL_FEATURES;
  10922
  10923	/* set this bit last since it cannot be part of vlan_features */
  10924	netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
  10925			    NETIF_F_HW_VLAN_CTAG_RX |
  10926			    NETIF_F_HW_VLAN_CTAG_TX;
  10927
  10928	netdev->priv_flags |= IFF_UNICAST_FLT;
  10929	netdev->priv_flags |= IFF_SUPP_NOFCS;
  10930
  10931	/* MTU range: 68 - 9710 */
  10932	netdev->min_mtu = ETH_MIN_MTU;
  10933	netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);
  10934
  10935#ifdef CONFIG_IXGBE_DCB
  10936	if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE)
  10937		netdev->dcbnl_ops = &ixgbe_dcbnl_ops;
  10938#endif
  10939
  10940#ifdef IXGBE_FCOE
  10941	if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) {
  10942		unsigned int fcoe_l;
  10943
  10944		if (hw->mac.ops.get_device_caps) {
  10945			hw->mac.ops.get_device_caps(hw, &device_caps);
  10946			if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS)
  10947				adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE;
  10948		}
  10949
  10950
  10951		fcoe_l = min_t(int, IXGBE_FCRETA_SIZE, num_online_cpus());
  10952		adapter->ring_feature[RING_F_FCOE].limit = fcoe_l;
  10953
  10954		netdev->features |= NETIF_F_FSO |
  10955				    NETIF_F_FCOE_CRC;
  10956
  10957		netdev->vlan_features |= NETIF_F_FSO |
  10958					 NETIF_F_FCOE_CRC |
  10959					 NETIF_F_FCOE_MTU;
  10960	}
  10961#endif /* IXGBE_FCOE */
  10962	if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)
  10963		netdev->hw_features |= NETIF_F_LRO;
  10964	if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
  10965		netdev->features |= NETIF_F_LRO;
  10966
  10967	if (ixgbe_check_fw_error(adapter)) {
  10968		err = -EIO;
  10969		goto err_sw_init;
  10970	}
  10971
  10972	/* make sure the EEPROM is good */
  10973	if (hw->eeprom.ops.validate_checksum(hw, NULL) < 0) {
  10974		e_dev_err("The EEPROM Checksum Is Not Valid\n");
  10975		err = -EIO;
  10976		goto err_sw_init;
  10977	}
  10978
  10979	eth_platform_get_mac_address(&adapter->pdev->dev,
  10980				     adapter->hw.mac.perm_addr);
  10981
  10982	eth_hw_addr_set(netdev, hw->mac.perm_addr);
  10983
  10984	if (!is_valid_ether_addr(netdev->dev_addr)) {
  10985		e_dev_err("invalid MAC address\n");
  10986		err = -EIO;
  10987		goto err_sw_init;
  10988	}
  10989
  10990	/* Set hw->mac.addr to permanent MAC address */
  10991	ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
  10992	ixgbe_mac_set_default_filter(adapter);
  10993
  10994	timer_setup(&adapter->service_timer, ixgbe_service_timer, 0);
  10995
  10996	if (ixgbe_removed(hw->hw_addr)) {
  10997		err = -EIO;
  10998		goto err_sw_init;
  10999	}
  11000	INIT_WORK(&adapter->service_task, ixgbe_service_task);
  11001	set_bit(__IXGBE_SERVICE_INITED, &adapter->state);
  11002	clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state);
  11003
  11004	err = ixgbe_init_interrupt_scheme(adapter);
  11005	if (err)
  11006		goto err_sw_init;
  11007
  11008	for (i = 0; i < adapter->num_rx_queues; i++)
  11009		u64_stats_init(&adapter->rx_ring[i]->syncp);
  11010	for (i = 0; i < adapter->num_tx_queues; i++)
  11011		u64_stats_init(&adapter->tx_ring[i]->syncp);
  11012	for (i = 0; i < adapter->num_xdp_queues; i++)
  11013		u64_stats_init(&adapter->xdp_ring[i]->syncp);
  11014
  11015	/* WOL not supported for all devices */
  11016	adapter->wol = 0;
  11017	hw->eeprom.ops.read(hw, 0x2c, &adapter->eeprom_cap);
  11018	hw->wol_enabled = ixgbe_wol_supported(adapter, pdev->device,
  11019						pdev->subsystem_device);
  11020	if (hw->wol_enabled)
  11021		adapter->wol = IXGBE_WUFC_MAG;
  11022
  11023	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
  11024
  11025	/* save off EEPROM version number */
  11026	ixgbe_set_fw_version(adapter);
  11027
  11028	/* pick up the PCI bus settings for reporting later */
  11029	if (ixgbe_pcie_from_parent(hw))
  11030		ixgbe_get_parent_bus_info(adapter);
  11031	else
  11032		 hw->mac.ops.get_bus_info(hw);
  11033
  11034	/* calculate the expected PCIe bandwidth required for optimal
  11035	 * performance. Note that some older parts will never have enough
  11036	 * bandwidth due to being older generation PCIe parts. We clamp these
  11037	 * parts to ensure no warning is displayed if it can't be fixed.
  11038	 */
  11039	switch (hw->mac.type) {
  11040	case ixgbe_mac_82598EB:
  11041		expected_gts = min(ixgbe_enumerate_functions(adapter) * 10, 16);
  11042		break;
  11043	default:
  11044		expected_gts = ixgbe_enumerate_functions(adapter) * 10;
  11045		break;
  11046	}
  11047
  11048	/* don't check link if we failed to enumerate functions */
  11049	if (expected_gts > 0)
  11050		ixgbe_check_minimum_link(adapter, expected_gts);
  11051
  11052	err = ixgbe_read_pba_string_generic(hw, part_str, sizeof(part_str));
  11053	if (err)
  11054		strlcpy(part_str, "Unknown", sizeof(part_str));
  11055	if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present)
  11056		e_dev_info("MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n",
  11057			   hw->mac.type, hw->phy.type, hw->phy.sfp_type,
  11058			   part_str);
  11059	else
  11060		e_dev_info("MAC: %d, PHY: %d, PBA No: %s\n",
  11061			   hw->mac.type, hw->phy.type, part_str);
  11062
  11063	e_dev_info("%pM\n", netdev->dev_addr);
  11064
  11065	/* reset the hardware with the new settings */
  11066	err = hw->mac.ops.start_hw(hw);
  11067	if (err == IXGBE_ERR_EEPROM_VERSION) {
  11068		/* We are running on a pre-production device, log a warning */
  11069		e_dev_warn("This device is a pre-production adapter/LOM. "
  11070			   "Please be aware there may be issues associated "
  11071			   "with your hardware.  If you are experiencing "
  11072			   "problems please contact your Intel or hardware "
  11073			   "representative who provided you with this "
  11074			   "hardware.\n");
  11075	}
  11076	strcpy(netdev->name, "eth%d");
  11077	pci_set_drvdata(pdev, adapter);
  11078	err = register_netdev(netdev);
  11079	if (err)
  11080		goto err_register;
  11081
  11082
  11083	/* power down the optics for 82599 SFP+ fiber */
  11084	if (hw->mac.ops.disable_tx_laser)
  11085		hw->mac.ops.disable_tx_laser(hw);
  11086
  11087	/* carrier off reporting is important to ethtool even BEFORE open */
  11088	netif_carrier_off(netdev);
  11089
  11090#ifdef CONFIG_IXGBE_DCA
  11091	if (dca_add_requester(&pdev->dev) == 0) {
  11092		adapter->flags |= IXGBE_FLAG_DCA_ENABLED;
  11093		ixgbe_setup_dca(adapter);
  11094	}
  11095#endif
  11096	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
  11097		e_info(probe, "IOV is enabled with %d VFs\n", adapter->num_vfs);
  11098		for (i = 0; i < adapter->num_vfs; i++)
  11099			ixgbe_vf_configuration(pdev, (i | 0x10000000));
  11100	}
  11101
  11102	/* firmware requires driver version to be 0xFFFFFFFF
  11103	 * since os does not support feature
  11104	 */
  11105	if (hw->mac.ops.set_fw_drv_ver)
  11106		hw->mac.ops.set_fw_drv_ver(hw, 0xFF, 0xFF, 0xFF, 0xFF,
  11107					   sizeof(UTS_RELEASE) - 1,
  11108					   UTS_RELEASE);
  11109
  11110	/* add san mac addr to netdev */
  11111	ixgbe_add_sanmac_netdev(netdev);
  11112
  11113	e_dev_info("%s\n", ixgbe_default_device_descr);
  11114
  11115#ifdef CONFIG_IXGBE_HWMON
  11116	if (ixgbe_sysfs_init(adapter))
  11117		e_err(probe, "failed to allocate sysfs resources\n");
  11118#endif /* CONFIG_IXGBE_HWMON */
  11119
  11120	ixgbe_dbg_adapter_init(adapter);
  11121
  11122	/* setup link for SFP devices with MNG FW, else wait for IXGBE_UP */
  11123	if (ixgbe_mng_enabled(hw) && ixgbe_is_sfp(hw) && hw->mac.ops.setup_link)
  11124		hw->mac.ops.setup_link(hw,
  11125			IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL,
  11126			true);
  11127
  11128	err = ixgbe_mii_bus_init(hw);
  11129	if (err)
  11130		goto err_netdev;
  11131
  11132	return 0;
  11133
  11134err_netdev:
  11135	unregister_netdev(netdev);
  11136err_register:
  11137	ixgbe_release_hw_control(adapter);
  11138	ixgbe_clear_interrupt_scheme(adapter);
  11139err_sw_init:
  11140	ixgbe_disable_sriov(adapter);
  11141	adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP;
  11142	iounmap(adapter->io_addr);
  11143	kfree(adapter->jump_tables[0]);
  11144	kfree(adapter->mac_table);
  11145	kfree(adapter->rss_key);
  11146	bitmap_free(adapter->af_xdp_zc_qps);
  11147err_ioremap:
  11148	disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
  11149	free_netdev(netdev);
  11150err_alloc_etherdev:
  11151	pci_disable_pcie_error_reporting(pdev);
  11152	pci_release_mem_regions(pdev);
  11153err_pci_reg:
  11154err_dma:
  11155	if (!adapter || disable_dev)
  11156		pci_disable_device(pdev);
  11157	return err;
  11158}
  11159
  11160/**
  11161 * ixgbe_remove - Device Removal Routine
  11162 * @pdev: PCI device information struct
  11163 *
  11164 * ixgbe_remove is called by the PCI subsystem to alert the driver
  11165 * that it should release a PCI device.  The could be caused by a
  11166 * Hot-Plug event, or because the driver is going to be removed from
  11167 * memory.
  11168 **/
  11169static void ixgbe_remove(struct pci_dev *pdev)
  11170{
  11171	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
  11172	struct net_device *netdev;
  11173	bool disable_dev;
  11174	int i;
  11175
  11176	/* if !adapter then we already cleaned up in probe */
  11177	if (!adapter)
  11178		return;
  11179
  11180	netdev  = adapter->netdev;
  11181	ixgbe_dbg_adapter_exit(adapter);
  11182
  11183	set_bit(__IXGBE_REMOVING, &adapter->state);
  11184	cancel_work_sync(&adapter->service_task);
  11185
  11186	if (adapter->mii_bus)
  11187		mdiobus_unregister(adapter->mii_bus);
  11188
  11189#ifdef CONFIG_IXGBE_DCA
  11190	if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) {
  11191		adapter->flags &= ~IXGBE_FLAG_DCA_ENABLED;
  11192		dca_remove_requester(&pdev->dev);
  11193		IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
  11194				IXGBE_DCA_CTRL_DCA_DISABLE);
  11195	}
  11196
  11197#endif
  11198#ifdef CONFIG_IXGBE_HWMON
  11199	ixgbe_sysfs_exit(adapter);
  11200#endif /* CONFIG_IXGBE_HWMON */
  11201
  11202	/* remove the added san mac */
  11203	ixgbe_del_sanmac_netdev(netdev);
  11204
  11205#ifdef CONFIG_PCI_IOV
  11206	ixgbe_disable_sriov(adapter);
  11207#endif
  11208	if (netdev->reg_state == NETREG_REGISTERED)
  11209		unregister_netdev(netdev);
  11210
  11211	ixgbe_stop_ipsec_offload(adapter);
  11212	ixgbe_clear_interrupt_scheme(adapter);
  11213
  11214	ixgbe_release_hw_control(adapter);
  11215
  11216#ifdef CONFIG_DCB
  11217	kfree(adapter->ixgbe_ieee_pfc);
  11218	kfree(adapter->ixgbe_ieee_ets);
  11219
  11220#endif
  11221	iounmap(adapter->io_addr);
  11222	pci_release_mem_regions(pdev);
  11223
  11224	e_dev_info("complete\n");
  11225
  11226	for (i = 0; i < IXGBE_MAX_LINK_HANDLE; i++) {
  11227		if (adapter->jump_tables[i]) {
  11228			kfree(adapter->jump_tables[i]->input);
  11229			kfree(adapter->jump_tables[i]->mask);
  11230		}
  11231		kfree(adapter->jump_tables[i]);
  11232	}
  11233
  11234	kfree(adapter->mac_table);
  11235	kfree(adapter->rss_key);
  11236	bitmap_free(adapter->af_xdp_zc_qps);
  11237	disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
  11238	free_netdev(netdev);
  11239
  11240	pci_disable_pcie_error_reporting(pdev);
  11241
  11242	if (disable_dev)
  11243		pci_disable_device(pdev);
  11244}
  11245
  11246/**
  11247 * ixgbe_io_error_detected - called when PCI error is detected
  11248 * @pdev: Pointer to PCI device
  11249 * @state: The current pci connection state
  11250 *
  11251 * This function is called after a PCI bus error affecting
  11252 * this device has been detected.
  11253 */
  11254static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev,
  11255						pci_channel_state_t state)
  11256{
  11257	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
  11258	struct net_device *netdev = adapter->netdev;
  11259
  11260#ifdef CONFIG_PCI_IOV
  11261	struct ixgbe_hw *hw = &adapter->hw;
  11262	struct pci_dev *bdev, *vfdev;
  11263	u32 dw0, dw1, dw2, dw3;
  11264	int vf, pos;
  11265	u16 req_id, pf_func;
  11266
  11267	if (adapter->hw.mac.type == ixgbe_mac_82598EB ||
  11268	    adapter->num_vfs == 0)
  11269		goto skip_bad_vf_detection;
  11270
  11271	bdev = pdev->bus->self;
  11272	while (bdev && (pci_pcie_type(bdev) != PCI_EXP_TYPE_ROOT_PORT))
  11273		bdev = bdev->bus->self;
  11274
  11275	if (!bdev)
  11276		goto skip_bad_vf_detection;
  11277
  11278	pos = pci_find_ext_capability(bdev, PCI_EXT_CAP_ID_ERR);
  11279	if (!pos)
  11280		goto skip_bad_vf_detection;
  11281
  11282	dw0 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG);
  11283	dw1 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG + 4);
  11284	dw2 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG + 8);
  11285	dw3 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG + 12);
  11286	if (ixgbe_removed(hw->hw_addr))
  11287		goto skip_bad_vf_detection;
  11288
  11289	req_id = dw1 >> 16;
  11290	/* On the 82599 if bit 7 of the requestor ID is set then it's a VF */
  11291	if (!(req_id & 0x0080))
  11292		goto skip_bad_vf_detection;
  11293
  11294	pf_func = req_id & 0x01;
  11295	if ((pf_func & 1) == (pdev->devfn & 1)) {
  11296		unsigned int device_id;
  11297
  11298		vf = (req_id & 0x7F) >> 1;
  11299		e_dev_err("VF %d has caused a PCIe error\n", vf);
  11300		e_dev_err("TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: "
  11301				"%8.8x\tdw3: %8.8x\n",
  11302		dw0, dw1, dw2, dw3);
  11303		switch (adapter->hw.mac.type) {
  11304		case ixgbe_mac_82599EB:
  11305			device_id = IXGBE_82599_VF_DEVICE_ID;
  11306			break;
  11307		case ixgbe_mac_X540:
  11308			device_id = IXGBE_X540_VF_DEVICE_ID;
  11309			break;
  11310		case ixgbe_mac_X550:
  11311			device_id = IXGBE_DEV_ID_X550_VF;
  11312			break;
  11313		case ixgbe_mac_X550EM_x:
  11314			device_id = IXGBE_DEV_ID_X550EM_X_VF;
  11315			break;
  11316		case ixgbe_mac_x550em_a:
  11317			device_id = IXGBE_DEV_ID_X550EM_A_VF;
  11318			break;
  11319		default:
  11320			device_id = 0;
  11321			break;
  11322		}
  11323
  11324		/* Find the pci device of the offending VF */
  11325		vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, device_id, NULL);
  11326		while (vfdev) {
  11327			if (vfdev->devfn == (req_id & 0xFF))
  11328				break;
  11329			vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
  11330					       device_id, vfdev);
  11331		}
  11332		/*
  11333		 * There's a slim chance the VF could have been hot plugged,
  11334		 * so if it is no longer present we don't need to issue the
  11335		 * VFLR.  Just clean up the AER in that case.
  11336		 */
  11337		if (vfdev) {
  11338			pcie_flr(vfdev);
  11339			/* Free device reference count */
  11340			pci_dev_put(vfdev);
  11341		}
  11342	}
  11343
  11344	/*
  11345	 * Even though the error may have occurred on the other port
  11346	 * we still need to increment the vf error reference count for
  11347	 * both ports because the I/O resume function will be called
  11348	 * for both of them.
  11349	 */
  11350	adapter->vferr_refcount++;
  11351
  11352	return PCI_ERS_RESULT_RECOVERED;
  11353
  11354skip_bad_vf_detection:
  11355#endif /* CONFIG_PCI_IOV */
  11356	if (!test_bit(__IXGBE_SERVICE_INITED, &adapter->state))
  11357		return PCI_ERS_RESULT_DISCONNECT;
  11358
  11359	if (!netif_device_present(netdev))
  11360		return PCI_ERS_RESULT_DISCONNECT;
  11361
  11362	rtnl_lock();
  11363	netif_device_detach(netdev);
  11364
  11365	if (netif_running(netdev))
  11366		ixgbe_close_suspend(adapter);
  11367
  11368	if (state == pci_channel_io_perm_failure) {
  11369		rtnl_unlock();
  11370		return PCI_ERS_RESULT_DISCONNECT;
  11371	}
  11372
  11373	if (!test_and_set_bit(__IXGBE_DISABLED, &adapter->state))
  11374		pci_disable_device(pdev);
  11375	rtnl_unlock();
  11376
  11377	/* Request a slot reset. */
  11378	return PCI_ERS_RESULT_NEED_RESET;
  11379}
  11380
  11381/**
  11382 * ixgbe_io_slot_reset - called after the pci bus has been reset.
  11383 * @pdev: Pointer to PCI device
  11384 *
  11385 * Restart the card from scratch, as if from a cold-boot.
  11386 */
  11387static pci_ers_result_t ixgbe_io_slot_reset(struct pci_dev *pdev)
  11388{
  11389	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
  11390	pci_ers_result_t result;
  11391
  11392	if (pci_enable_device_mem(pdev)) {
  11393		e_err(probe, "Cannot re-enable PCI device after reset.\n");
  11394		result = PCI_ERS_RESULT_DISCONNECT;
  11395	} else {
  11396		smp_mb__before_atomic();
  11397		clear_bit(__IXGBE_DISABLED, &adapter->state);
  11398		adapter->hw.hw_addr = adapter->io_addr;
  11399		pci_set_master(pdev);
  11400		pci_restore_state(pdev);
  11401		pci_save_state(pdev);
  11402
  11403		pci_wake_from_d3(pdev, false);
  11404
  11405		ixgbe_reset(adapter);
  11406		IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0);
  11407		result = PCI_ERS_RESULT_RECOVERED;
  11408	}
  11409
  11410	return result;
  11411}
  11412
  11413/**
  11414 * ixgbe_io_resume - called when traffic can start flowing again.
  11415 * @pdev: Pointer to PCI device
  11416 *
  11417 * This callback is called when the error recovery driver tells us that
  11418 * its OK to resume normal operation.
  11419 */
  11420static void ixgbe_io_resume(struct pci_dev *pdev)
  11421{
  11422	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
  11423	struct net_device *netdev = adapter->netdev;
  11424
  11425#ifdef CONFIG_PCI_IOV
  11426	if (adapter->vferr_refcount) {
  11427		e_info(drv, "Resuming after VF err\n");
  11428		adapter->vferr_refcount--;
  11429		return;
  11430	}
  11431
  11432#endif
  11433	rtnl_lock();
  11434	if (netif_running(netdev))
  11435		ixgbe_open(netdev);
  11436
  11437	netif_device_attach(netdev);
  11438	rtnl_unlock();
  11439}
  11440
  11441static const struct pci_error_handlers ixgbe_err_handler = {
  11442	.error_detected = ixgbe_io_error_detected,
  11443	.slot_reset = ixgbe_io_slot_reset,
  11444	.resume = ixgbe_io_resume,
  11445};
  11446
  11447static SIMPLE_DEV_PM_OPS(ixgbe_pm_ops, ixgbe_suspend, ixgbe_resume);
  11448
  11449static struct pci_driver ixgbe_driver = {
  11450	.name      = ixgbe_driver_name,
  11451	.id_table  = ixgbe_pci_tbl,
  11452	.probe     = ixgbe_probe,
  11453	.remove    = ixgbe_remove,
  11454	.driver.pm = &ixgbe_pm_ops,
  11455	.shutdown  = ixgbe_shutdown,
  11456	.sriov_configure = ixgbe_pci_sriov_configure,
  11457	.err_handler = &ixgbe_err_handler
  11458};
  11459
  11460/**
  11461 * ixgbe_init_module - Driver Registration Routine
  11462 *
  11463 * ixgbe_init_module is the first routine called when the driver is
  11464 * loaded. All it does is register with the PCI subsystem.
  11465 **/
  11466static int __init ixgbe_init_module(void)
  11467{
  11468	int ret;
  11469	pr_info("%s\n", ixgbe_driver_string);
  11470	pr_info("%s\n", ixgbe_copyright);
  11471
  11472	ixgbe_wq = create_singlethread_workqueue(ixgbe_driver_name);
  11473	if (!ixgbe_wq) {
  11474		pr_err("%s: Failed to create workqueue\n", ixgbe_driver_name);
  11475		return -ENOMEM;
  11476	}
  11477
  11478	ixgbe_dbg_init();
  11479
  11480	ret = pci_register_driver(&ixgbe_driver);
  11481	if (ret) {
  11482		destroy_workqueue(ixgbe_wq);
  11483		ixgbe_dbg_exit();
  11484		return ret;
  11485	}
  11486
  11487#ifdef CONFIG_IXGBE_DCA
  11488	dca_register_notify(&dca_notifier);
  11489#endif
  11490
  11491	return 0;
  11492}
  11493
  11494module_init(ixgbe_init_module);
  11495
  11496/**
  11497 * ixgbe_exit_module - Driver Exit Cleanup Routine
  11498 *
  11499 * ixgbe_exit_module is called just before the driver is removed
  11500 * from memory.
  11501 **/
  11502static void __exit ixgbe_exit_module(void)
  11503{
  11504#ifdef CONFIG_IXGBE_DCA
  11505	dca_unregister_notify(&dca_notifier);
  11506#endif
  11507	pci_unregister_driver(&ixgbe_driver);
  11508
  11509	ixgbe_dbg_exit();
  11510	if (ixgbe_wq) {
  11511		destroy_workqueue(ixgbe_wq);
  11512		ixgbe_wq = NULL;
  11513	}
  11514}
  11515
  11516#ifdef CONFIG_IXGBE_DCA
  11517static int ixgbe_notify_dca(struct notifier_block *nb, unsigned long event,
  11518			    void *p)
  11519{
  11520	int ret_val;
  11521
  11522	ret_val = driver_for_each_device(&ixgbe_driver.driver, NULL, &event,
  11523					 __ixgbe_notify_dca);
  11524
  11525	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
  11526}
  11527
  11528#endif /* CONFIG_IXGBE_DCA */
  11529
  11530module_exit(ixgbe_exit_module);
  11531
  11532/* ixgbe_main.c */