cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

be_main.c (164230B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2005 - 2016 Broadcom
      4 * All rights reserved.
      5 *
      6 * Contact Information:
      7 * linux-drivers@emulex.com
      8 *
      9 * Emulex
     10 * 3333 Susan Street
     11 * Costa Mesa, CA 92626
     12 */
     13
     14#include <linux/prefetch.h>
     15#include <linux/module.h>
     16#include "be.h"
     17#include "be_cmds.h"
     18#include <asm/div64.h>
     19#include <linux/aer.h>
     20#include <linux/if_bridge.h>
     21#include <net/busy_poll.h>
     22#include <net/vxlan.h>
     23
     24MODULE_DESCRIPTION(DRV_DESC);
     25MODULE_AUTHOR("Emulex Corporation");
     26MODULE_LICENSE("GPL");
     27
     28/* num_vfs module param is obsolete.
     29 * Use sysfs method to enable/disable VFs.
     30 */
     31static unsigned int num_vfs;
     32module_param(num_vfs, uint, 0444);
     33MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
     34
     35static ushort rx_frag_size = 2048;
     36module_param(rx_frag_size, ushort, 0444);
     37MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
     38
     39/* Per-module error detection/recovery workq shared across all functions.
     40 * Each function schedules its own work request on this shared workq.
     41 */
     42static struct workqueue_struct *be_err_recovery_workq;
     43
     44static const struct pci_device_id be_dev_ids[] = {
     45#ifdef CONFIG_BE2NET_BE2
     46	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
     47	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
     48#endif /* CONFIG_BE2NET_BE2 */
     49#ifdef CONFIG_BE2NET_BE3
     50	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
     51	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
     52#endif /* CONFIG_BE2NET_BE3 */
     53#ifdef CONFIG_BE2NET_LANCER
     54	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
     55	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
     56#endif /* CONFIG_BE2NET_LANCER */
     57#ifdef CONFIG_BE2NET_SKYHAWK
     58	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
     59	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
     60#endif /* CONFIG_BE2NET_SKYHAWK */
     61	{ 0 }
     62};
     63MODULE_DEVICE_TABLE(pci, be_dev_ids);
     64
     65/* Workqueue used by all functions for defering cmd calls to the adapter */
     66static struct workqueue_struct *be_wq;
     67
     68/* UE Status Low CSR */
     69static const char * const ue_status_low_desc[] = {
     70	"CEV",
     71	"CTX",
     72	"DBUF",
     73	"ERX",
     74	"Host",
     75	"MPU",
     76	"NDMA",
     77	"PTC ",
     78	"RDMA ",
     79	"RXF ",
     80	"RXIPS ",
     81	"RXULP0 ",
     82	"RXULP1 ",
     83	"RXULP2 ",
     84	"TIM ",
     85	"TPOST ",
     86	"TPRE ",
     87	"TXIPS ",
     88	"TXULP0 ",
     89	"TXULP1 ",
     90	"UC ",
     91	"WDMA ",
     92	"TXULP2 ",
     93	"HOST1 ",
     94	"P0_OB_LINK ",
     95	"P1_OB_LINK ",
     96	"HOST_GPIO ",
     97	"MBOX ",
     98	"ERX2 ",
     99	"SPARE ",
    100	"JTAG ",
    101	"MPU_INTPEND "
    102};
    103
    104/* UE Status High CSR */
    105static const char * const ue_status_hi_desc[] = {
    106	"LPCMEMHOST",
    107	"MGMT_MAC",
    108	"PCS0ONLINE",
    109	"MPU_IRAM",
    110	"PCS1ONLINE",
    111	"PCTL0",
    112	"PCTL1",
    113	"PMEM",
    114	"RR",
    115	"TXPB",
    116	"RXPP",
    117	"XAUI",
    118	"TXP",
    119	"ARM",
    120	"IPC",
    121	"HOST2",
    122	"HOST3",
    123	"HOST4",
    124	"HOST5",
    125	"HOST6",
    126	"HOST7",
    127	"ECRC",
    128	"Poison TLP",
    129	"NETC",
    130	"PERIPH",
    131	"LLTXULP",
    132	"D2P",
    133	"RCON",
    134	"LDMA",
    135	"LLTXP",
    136	"LLTXPB",
    137	"Unknown"
    138};
    139
    140#define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
    141				 BE_IF_FLAGS_BROADCAST | \
    142				 BE_IF_FLAGS_MULTICAST | \
    143				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
    144
    145static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
    146{
    147	struct be_dma_mem *mem = &q->dma_mem;
    148
    149	if (mem->va) {
    150		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
    151				  mem->dma);
    152		mem->va = NULL;
    153	}
    154}
    155
    156static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
    157			  u16 len, u16 entry_size)
    158{
    159	struct be_dma_mem *mem = &q->dma_mem;
    160
    161	memset(q, 0, sizeof(*q));
    162	q->len = len;
    163	q->entry_size = entry_size;
    164	mem->size = len * entry_size;
    165	mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
    166				     &mem->dma, GFP_KERNEL);
    167	if (!mem->va)
    168		return -ENOMEM;
    169	return 0;
    170}
    171
    172static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
    173{
    174	u32 reg, enabled;
    175
    176	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
    177			      &reg);
    178	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
    179
    180	if (!enabled && enable)
    181		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
    182	else if (enabled && !enable)
    183		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
    184	else
    185		return;
    186
    187	pci_write_config_dword(adapter->pdev,
    188			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
    189}
    190
    191static void be_intr_set(struct be_adapter *adapter, bool enable)
    192{
    193	int status = 0;
    194
    195	/* On lancer interrupts can't be controlled via this register */
    196	if (lancer_chip(adapter))
    197		return;
    198
    199	if (be_check_error(adapter, BE_ERROR_EEH))
    200		return;
    201
    202	status = be_cmd_intr_set(adapter, enable);
    203	if (status)
    204		be_reg_intr_set(adapter, enable);
    205}
    206
    207static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
    208{
    209	u32 val = 0;
    210
    211	if (be_check_error(adapter, BE_ERROR_HW))
    212		return;
    213
    214	val |= qid & DB_RQ_RING_ID_MASK;
    215	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
    216
    217	wmb();
    218	iowrite32(val, adapter->db + DB_RQ_OFFSET);
    219}
    220
    221static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
    222			  u16 posted)
    223{
    224	u32 val = 0;
    225
    226	if (be_check_error(adapter, BE_ERROR_HW))
    227		return;
    228
    229	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
    230	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
    231
    232	wmb();
    233	iowrite32(val, adapter->db + txo->db_offset);
    234}
    235
    236static void be_eq_notify(struct be_adapter *adapter, u16 qid,
    237			 bool arm, bool clear_int, u16 num_popped,
    238			 u32 eq_delay_mult_enc)
    239{
    240	u32 val = 0;
    241
    242	val |= qid & DB_EQ_RING_ID_MASK;
    243	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
    244
    245	if (be_check_error(adapter, BE_ERROR_HW))
    246		return;
    247
    248	if (arm)
    249		val |= 1 << DB_EQ_REARM_SHIFT;
    250	if (clear_int)
    251		val |= 1 << DB_EQ_CLR_SHIFT;
    252	val |= 1 << DB_EQ_EVNT_SHIFT;
    253	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
    254	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
    255	iowrite32(val, adapter->db + DB_EQ_OFFSET);
    256}
    257
    258void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
    259{
    260	u32 val = 0;
    261
    262	val |= qid & DB_CQ_RING_ID_MASK;
    263	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
    264			DB_CQ_RING_ID_EXT_MASK_SHIFT);
    265
    266	if (be_check_error(adapter, BE_ERROR_HW))
    267		return;
    268
    269	if (arm)
    270		val |= 1 << DB_CQ_REARM_SHIFT;
    271	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
    272	iowrite32(val, adapter->db + DB_CQ_OFFSET);
    273}
    274
    275static int be_dev_mac_add(struct be_adapter *adapter, const u8 *mac)
    276{
    277	int i;
    278
    279	/* Check if mac has already been added as part of uc-list */
    280	for (i = 0; i < adapter->uc_macs; i++) {
    281		if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
    282			/* mac already added, skip addition */
    283			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
    284			return 0;
    285		}
    286	}
    287
    288	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
    289			       &adapter->pmac_id[0], 0);
    290}
    291
    292static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
    293{
    294	int i;
    295
    296	/* Skip deletion if the programmed mac is
    297	 * being used in uc-list
    298	 */
    299	for (i = 0; i < adapter->uc_macs; i++) {
    300		if (adapter->pmac_id[i + 1] == pmac_id)
    301			return;
    302	}
    303	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
    304}
    305
    306static int be_mac_addr_set(struct net_device *netdev, void *p)
    307{
    308	struct be_adapter *adapter = netdev_priv(netdev);
    309	struct device *dev = &adapter->pdev->dev;
    310	struct sockaddr *addr = p;
    311	int status;
    312	u8 mac[ETH_ALEN];
    313	u32 old_pmac_id = adapter->pmac_id[0];
    314
    315	if (!is_valid_ether_addr(addr->sa_data))
    316		return -EADDRNOTAVAIL;
    317
    318	/* Proceed further only if, User provided MAC is different
    319	 * from active MAC
    320	 */
    321	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
    322		return 0;
    323
    324	/* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
    325	 * address
    326	 */
    327	if (BEx_chip(adapter) && be_virtfn(adapter) &&
    328	    !check_privilege(adapter, BE_PRIV_FILTMGMT))
    329		return -EPERM;
    330
    331	/* if device is not running, copy MAC to netdev->dev_addr */
    332	if (!netif_running(netdev))
    333		goto done;
    334
    335	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
    336	 * privilege or if PF did not provision the new MAC address.
    337	 * On BE3, this cmd will always fail if the VF doesn't have the
    338	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
    339	 * the MAC for the VF.
    340	 */
    341	mutex_lock(&adapter->rx_filter_lock);
    342	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
    343	if (!status) {
    344
    345		/* Delete the old programmed MAC. This call may fail if the
    346		 * old MAC was already deleted by the PF driver.
    347		 */
    348		if (adapter->pmac_id[0] != old_pmac_id)
    349			be_dev_mac_del(adapter, old_pmac_id);
    350	}
    351
    352	mutex_unlock(&adapter->rx_filter_lock);
    353	/* Decide if the new MAC is successfully activated only after
    354	 * querying the FW
    355	 */
    356	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
    357				       adapter->if_handle, true, 0);
    358	if (status)
    359		goto err;
    360
    361	/* The MAC change did not happen, either due to lack of privilege
    362	 * or PF didn't pre-provision.
    363	 */
    364	if (!ether_addr_equal(addr->sa_data, mac)) {
    365		status = -EPERM;
    366		goto err;
    367	}
    368
    369	/* Remember currently programmed MAC */
    370	ether_addr_copy(adapter->dev_mac, addr->sa_data);
    371done:
    372	eth_hw_addr_set(netdev, addr->sa_data);
    373	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
    374	return 0;
    375err:
    376	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
    377	return status;
    378}
    379
    380/* BE2 supports only v0 cmd */
    381static void *hw_stats_from_cmd(struct be_adapter *adapter)
    382{
    383	if (BE2_chip(adapter)) {
    384		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
    385
    386		return &cmd->hw_stats;
    387	} else if (BE3_chip(adapter)) {
    388		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
    389
    390		return &cmd->hw_stats;
    391	} else {
    392		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
    393
    394		return &cmd->hw_stats;
    395	}
    396}
    397
    398/* BE2 supports only v0 cmd */
    399static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
    400{
    401	if (BE2_chip(adapter)) {
    402		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
    403
    404		return &hw_stats->erx;
    405	} else if (BE3_chip(adapter)) {
    406		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
    407
    408		return &hw_stats->erx;
    409	} else {
    410		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
    411
    412		return &hw_stats->erx;
    413	}
    414}
    415
    416static void populate_be_v0_stats(struct be_adapter *adapter)
    417{
    418	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
    419	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
    420	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
    421	struct be_port_rxf_stats_v0 *port_stats =
    422					&rxf_stats->port[adapter->port_num];
    423	struct be_drv_stats *drvs = &adapter->drv_stats;
    424
    425	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
    426	drvs->rx_pause_frames = port_stats->rx_pause_frames;
    427	drvs->rx_crc_errors = port_stats->rx_crc_errors;
    428	drvs->rx_control_frames = port_stats->rx_control_frames;
    429	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
    430	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
    431	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
    432	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
    433	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
    434	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
    435	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
    436	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
    437	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
    438	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
    439	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
    440	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
    441	drvs->rx_dropped_header_too_small =
    442		port_stats->rx_dropped_header_too_small;
    443	drvs->rx_address_filtered =
    444					port_stats->rx_address_filtered +
    445					port_stats->rx_vlan_filtered;
    446	drvs->rx_alignment_symbol_errors =
    447		port_stats->rx_alignment_symbol_errors;
    448
    449	drvs->tx_pauseframes = port_stats->tx_pauseframes;
    450	drvs->tx_controlframes = port_stats->tx_controlframes;
    451
    452	if (adapter->port_num)
    453		drvs->jabber_events = rxf_stats->port1_jabber_events;
    454	else
    455		drvs->jabber_events = rxf_stats->port0_jabber_events;
    456	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
    457	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
    458	drvs->forwarded_packets = rxf_stats->forwarded_packets;
    459	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
    460	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
    461	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
    462	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
    463}
    464
    465static void populate_be_v1_stats(struct be_adapter *adapter)
    466{
    467	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
    468	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
    469	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
    470	struct be_port_rxf_stats_v1 *port_stats =
    471					&rxf_stats->port[adapter->port_num];
    472	struct be_drv_stats *drvs = &adapter->drv_stats;
    473
    474	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
    475	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
    476	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
    477	drvs->rx_pause_frames = port_stats->rx_pause_frames;
    478	drvs->rx_crc_errors = port_stats->rx_crc_errors;
    479	drvs->rx_control_frames = port_stats->rx_control_frames;
    480	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
    481	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
    482	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
    483	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
    484	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
    485	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
    486	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
    487	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
    488	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
    489	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
    490	drvs->rx_dropped_header_too_small =
    491		port_stats->rx_dropped_header_too_small;
    492	drvs->rx_input_fifo_overflow_drop =
    493		port_stats->rx_input_fifo_overflow_drop;
    494	drvs->rx_address_filtered = port_stats->rx_address_filtered;
    495	drvs->rx_alignment_symbol_errors =
    496		port_stats->rx_alignment_symbol_errors;
    497	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
    498	drvs->tx_pauseframes = port_stats->tx_pauseframes;
    499	drvs->tx_controlframes = port_stats->tx_controlframes;
    500	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
    501	drvs->jabber_events = port_stats->jabber_events;
    502	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
    503	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
    504	drvs->forwarded_packets = rxf_stats->forwarded_packets;
    505	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
    506	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
    507	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
    508	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
    509}
    510
    511static void populate_be_v2_stats(struct be_adapter *adapter)
    512{
    513	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
    514	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
    515	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
    516	struct be_port_rxf_stats_v2 *port_stats =
    517					&rxf_stats->port[adapter->port_num];
    518	struct be_drv_stats *drvs = &adapter->drv_stats;
    519
    520	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
    521	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
    522	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
    523	drvs->rx_pause_frames = port_stats->rx_pause_frames;
    524	drvs->rx_crc_errors = port_stats->rx_crc_errors;
    525	drvs->rx_control_frames = port_stats->rx_control_frames;
    526	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
    527	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
    528	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
    529	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
    530	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
    531	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
    532	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
    533	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
    534	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
    535	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
    536	drvs->rx_dropped_header_too_small =
    537		port_stats->rx_dropped_header_too_small;
    538	drvs->rx_input_fifo_overflow_drop =
    539		port_stats->rx_input_fifo_overflow_drop;
    540	drvs->rx_address_filtered = port_stats->rx_address_filtered;
    541	drvs->rx_alignment_symbol_errors =
    542		port_stats->rx_alignment_symbol_errors;
    543	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
    544	drvs->tx_pauseframes = port_stats->tx_pauseframes;
    545	drvs->tx_controlframes = port_stats->tx_controlframes;
    546	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
    547	drvs->jabber_events = port_stats->jabber_events;
    548	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
    549	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
    550	drvs->forwarded_packets = rxf_stats->forwarded_packets;
    551	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
    552	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
    553	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
    554	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
    555	if (be_roce_supported(adapter)) {
    556		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
    557		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
    558		drvs->rx_roce_frames = port_stats->roce_frames_received;
    559		drvs->roce_drops_crc = port_stats->roce_drops_crc;
    560		drvs->roce_drops_payload_len =
    561			port_stats->roce_drops_payload_len;
    562	}
    563}
    564
    565static void populate_lancer_stats(struct be_adapter *adapter)
    566{
    567	struct be_drv_stats *drvs = &adapter->drv_stats;
    568	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
    569
    570	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
    571	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
    572	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
    573	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
    574	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
    575	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
    576	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
    577	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
    578	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
    579	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
    580	drvs->rx_dropped_tcp_length =
    581				pport_stats->rx_dropped_invalid_tcp_length;
    582	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
    583	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
    584	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
    585	drvs->rx_dropped_header_too_small =
    586				pport_stats->rx_dropped_header_too_small;
    587	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
    588	drvs->rx_address_filtered =
    589					pport_stats->rx_address_filtered +
    590					pport_stats->rx_vlan_filtered;
    591	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
    592	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
    593	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
    594	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
    595	drvs->jabber_events = pport_stats->rx_jabbers;
    596	drvs->forwarded_packets = pport_stats->num_forwards_lo;
    597	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
    598	drvs->rx_drops_too_many_frags =
    599				pport_stats->rx_drops_too_many_frags_lo;
    600}
    601
    602static void accumulate_16bit_val(u32 *acc, u16 val)
    603{
    604#define lo(x)			(x & 0xFFFF)
    605#define hi(x)			(x & 0xFFFF0000)
    606	bool wrapped = val < lo(*acc);
    607	u32 newacc = hi(*acc) + val;
    608
    609	if (wrapped)
    610		newacc += 65536;
    611	WRITE_ONCE(*acc, newacc);
    612}
    613
    614static void populate_erx_stats(struct be_adapter *adapter,
    615			       struct be_rx_obj *rxo, u32 erx_stat)
    616{
    617	if (!BEx_chip(adapter))
    618		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
    619	else
    620		/* below erx HW counter can actually wrap around after
    621		 * 65535. Driver accumulates a 32-bit value
    622		 */
    623		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
    624				     (u16)erx_stat);
    625}
    626
    627void be_parse_stats(struct be_adapter *adapter)
    628{
    629	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
    630	struct be_rx_obj *rxo;
    631	int i;
    632	u32 erx_stat;
    633
    634	if (lancer_chip(adapter)) {
    635		populate_lancer_stats(adapter);
    636	} else {
    637		if (BE2_chip(adapter))
    638			populate_be_v0_stats(adapter);
    639		else if (BE3_chip(adapter))
    640			/* for BE3 */
    641			populate_be_v1_stats(adapter);
    642		else
    643			populate_be_v2_stats(adapter);
    644
    645		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
    646		for_all_rx_queues(adapter, rxo, i) {
    647			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
    648			populate_erx_stats(adapter, rxo, erx_stat);
    649		}
    650	}
    651}
    652
    653static void be_get_stats64(struct net_device *netdev,
    654			   struct rtnl_link_stats64 *stats)
    655{
    656	struct be_adapter *adapter = netdev_priv(netdev);
    657	struct be_drv_stats *drvs = &adapter->drv_stats;
    658	struct be_rx_obj *rxo;
    659	struct be_tx_obj *txo;
    660	u64 pkts, bytes;
    661	unsigned int start;
    662	int i;
    663
    664	for_all_rx_queues(adapter, rxo, i) {
    665		const struct be_rx_stats *rx_stats = rx_stats(rxo);
    666
    667		do {
    668			start = u64_stats_fetch_begin_irq(&rx_stats->sync);
    669			pkts = rx_stats(rxo)->rx_pkts;
    670			bytes = rx_stats(rxo)->rx_bytes;
    671		} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
    672		stats->rx_packets += pkts;
    673		stats->rx_bytes += bytes;
    674		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
    675		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
    676					rx_stats(rxo)->rx_drops_no_frags;
    677	}
    678
    679	for_all_tx_queues(adapter, txo, i) {
    680		const struct be_tx_stats *tx_stats = tx_stats(txo);
    681
    682		do {
    683			start = u64_stats_fetch_begin_irq(&tx_stats->sync);
    684			pkts = tx_stats(txo)->tx_pkts;
    685			bytes = tx_stats(txo)->tx_bytes;
    686		} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
    687		stats->tx_packets += pkts;
    688		stats->tx_bytes += bytes;
    689	}
    690
    691	/* bad pkts received */
    692	stats->rx_errors = drvs->rx_crc_errors +
    693		drvs->rx_alignment_symbol_errors +
    694		drvs->rx_in_range_errors +
    695		drvs->rx_out_range_errors +
    696		drvs->rx_frame_too_long +
    697		drvs->rx_dropped_too_small +
    698		drvs->rx_dropped_too_short +
    699		drvs->rx_dropped_header_too_small +
    700		drvs->rx_dropped_tcp_length +
    701		drvs->rx_dropped_runt;
    702
    703	/* detailed rx errors */
    704	stats->rx_length_errors = drvs->rx_in_range_errors +
    705		drvs->rx_out_range_errors +
    706		drvs->rx_frame_too_long;
    707
    708	stats->rx_crc_errors = drvs->rx_crc_errors;
    709
    710	/* frame alignment errors */
    711	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
    712
    713	/* receiver fifo overrun */
    714	/* drops_no_pbuf is no per i/f, it's per BE card */
    715	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
    716				drvs->rx_input_fifo_overflow_drop +
    717				drvs->rx_drops_no_pbuf;
    718}
    719
    720void be_link_status_update(struct be_adapter *adapter, u8 link_status)
    721{
    722	struct net_device *netdev = adapter->netdev;
    723
    724	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
    725		netif_carrier_off(netdev);
    726		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
    727	}
    728
    729	if (link_status)
    730		netif_carrier_on(netdev);
    731	else
    732		netif_carrier_off(netdev);
    733
    734	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
    735}
    736
    737static int be_gso_hdr_len(struct sk_buff *skb)
    738{
    739	if (skb->encapsulation)
    740		return skb_inner_transport_offset(skb) +
    741		       inner_tcp_hdrlen(skb);
    742	return skb_transport_offset(skb) + tcp_hdrlen(skb);
    743}
    744
    745static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
    746{
    747	struct be_tx_stats *stats = tx_stats(txo);
    748	u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
    749	/* Account for headers which get duplicated in TSO pkt */
    750	u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
    751
    752	u64_stats_update_begin(&stats->sync);
    753	stats->tx_reqs++;
    754	stats->tx_bytes += skb->len + dup_hdr_len;
    755	stats->tx_pkts += tx_pkts;
    756	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
    757		stats->tx_vxlan_offload_pkts += tx_pkts;
    758	u64_stats_update_end(&stats->sync);
    759}
    760
    761/* Returns number of WRBs needed for the skb */
    762static u32 skb_wrb_cnt(struct sk_buff *skb)
    763{
    764	/* +1 for the header wrb */
    765	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
    766}
    767
    768static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
    769{
    770	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
    771	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
    772	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
    773	wrb->rsvd0 = 0;
    774}
    775
    776/* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
    777 * to avoid the swap and shift/mask operations in wrb_fill().
    778 */
    779static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
    780{
    781	wrb->frag_pa_hi = 0;
    782	wrb->frag_pa_lo = 0;
    783	wrb->frag_len = 0;
    784	wrb->rsvd0 = 0;
    785}
    786
    787static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
    788				     struct sk_buff *skb)
    789{
    790	u8 vlan_prio;
    791	u16 vlan_tag;
    792
    793	vlan_tag = skb_vlan_tag_get(skb);
    794	vlan_prio = skb_vlan_tag_get_prio(skb);
    795	/* If vlan priority provided by OS is NOT in available bmap */
    796	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
    797		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
    798				adapter->recommended_prio_bits;
    799
    800	return vlan_tag;
    801}
    802
    803/* Used only for IP tunnel packets */
    804static u16 skb_inner_ip_proto(struct sk_buff *skb)
    805{
    806	return (inner_ip_hdr(skb)->version == 4) ?
    807		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
    808}
    809
    810static u16 skb_ip_proto(struct sk_buff *skb)
    811{
    812	return (ip_hdr(skb)->version == 4) ?
    813		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
    814}
    815
    816static inline bool be_is_txq_full(struct be_tx_obj *txo)
    817{
    818	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
    819}
    820
    821static inline bool be_can_txq_wake(struct be_tx_obj *txo)
    822{
    823	return atomic_read(&txo->q.used) < txo->q.len / 2;
    824}
    825
    826static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
    827{
    828	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
    829}
    830
    831static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
    832				       struct sk_buff *skb,
    833				       struct be_wrb_params *wrb_params)
    834{
    835	u16 proto;
    836
    837	if (skb_is_gso(skb)) {
    838		BE_WRB_F_SET(wrb_params->features, LSO, 1);
    839		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
    840		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
    841			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
    842	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
    843		if (skb->encapsulation) {
    844			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
    845			proto = skb_inner_ip_proto(skb);
    846		} else {
    847			proto = skb_ip_proto(skb);
    848		}
    849		if (proto == IPPROTO_TCP)
    850			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
    851		else if (proto == IPPROTO_UDP)
    852			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
    853	}
    854
    855	if (skb_vlan_tag_present(skb)) {
    856		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
    857		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
    858	}
    859
    860	BE_WRB_F_SET(wrb_params->features, CRC, 1);
    861}
    862
    863static void wrb_fill_hdr(struct be_adapter *adapter,
    864			 struct be_eth_hdr_wrb *hdr,
    865			 struct be_wrb_params *wrb_params,
    866			 struct sk_buff *skb)
    867{
    868	memset(hdr, 0, sizeof(*hdr));
    869
    870	SET_TX_WRB_HDR_BITS(crc, hdr,
    871			    BE_WRB_F_GET(wrb_params->features, CRC));
    872	SET_TX_WRB_HDR_BITS(ipcs, hdr,
    873			    BE_WRB_F_GET(wrb_params->features, IPCS));
    874	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
    875			    BE_WRB_F_GET(wrb_params->features, TCPCS));
    876	SET_TX_WRB_HDR_BITS(udpcs, hdr,
    877			    BE_WRB_F_GET(wrb_params->features, UDPCS));
    878
    879	SET_TX_WRB_HDR_BITS(lso, hdr,
    880			    BE_WRB_F_GET(wrb_params->features, LSO));
    881	SET_TX_WRB_HDR_BITS(lso6, hdr,
    882			    BE_WRB_F_GET(wrb_params->features, LSO6));
    883	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
    884
    885	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
    886	 * hack is not needed, the evt bit is set while ringing DB.
    887	 */
    888	SET_TX_WRB_HDR_BITS(event, hdr,
    889			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
    890	SET_TX_WRB_HDR_BITS(vlan, hdr,
    891			    BE_WRB_F_GET(wrb_params->features, VLAN));
    892	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
    893
    894	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
    895	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
    896	SET_TX_WRB_HDR_BITS(mgmt, hdr,
    897			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
    898}
    899
    900static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
    901			  bool unmap_single)
    902{
    903	dma_addr_t dma;
    904	u32 frag_len = le32_to_cpu(wrb->frag_len);
    905
    906
    907	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
    908		(u64)le32_to_cpu(wrb->frag_pa_lo);
    909	if (frag_len) {
    910		if (unmap_single)
    911			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
    912		else
    913			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
    914	}
    915}
    916
    917/* Grab a WRB header for xmit */
    918static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
    919{
    920	u32 head = txo->q.head;
    921
    922	queue_head_inc(&txo->q);
    923	return head;
    924}
    925
    926/* Set up the WRB header for xmit */
    927static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
    928				struct be_tx_obj *txo,
    929				struct be_wrb_params *wrb_params,
    930				struct sk_buff *skb, u16 head)
    931{
    932	u32 num_frags = skb_wrb_cnt(skb);
    933	struct be_queue_info *txq = &txo->q;
    934	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
    935
    936	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
    937	be_dws_cpu_to_le(hdr, sizeof(*hdr));
    938
    939	BUG_ON(txo->sent_skb_list[head]);
    940	txo->sent_skb_list[head] = skb;
    941	txo->last_req_hdr = head;
    942	atomic_add(num_frags, &txq->used);
    943	txo->last_req_wrb_cnt = num_frags;
    944	txo->pend_wrb_cnt += num_frags;
    945}
    946
    947/* Setup a WRB fragment (buffer descriptor) for xmit */
    948static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
    949				 int len)
    950{
    951	struct be_eth_wrb *wrb;
    952	struct be_queue_info *txq = &txo->q;
    953
    954	wrb = queue_head_node(txq);
    955	wrb_fill(wrb, busaddr, len);
    956	queue_head_inc(txq);
    957}
    958
    959/* Bring the queue back to the state it was in before be_xmit_enqueue() routine
    960 * was invoked. The producer index is restored to the previous packet and the
    961 * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
    962 */
    963static void be_xmit_restore(struct be_adapter *adapter,
    964			    struct be_tx_obj *txo, u32 head, bool map_single,
    965			    u32 copied)
    966{
    967	struct device *dev;
    968	struct be_eth_wrb *wrb;
    969	struct be_queue_info *txq = &txo->q;
    970
    971	dev = &adapter->pdev->dev;
    972	txq->head = head;
    973
    974	/* skip the first wrb (hdr); it's not mapped */
    975	queue_head_inc(txq);
    976	while (copied) {
    977		wrb = queue_head_node(txq);
    978		unmap_tx_frag(dev, wrb, map_single);
    979		map_single = false;
    980		copied -= le32_to_cpu(wrb->frag_len);
    981		queue_head_inc(txq);
    982	}
    983
    984	txq->head = head;
    985}
    986
    987/* Enqueue the given packet for transmit. This routine allocates WRBs for the
    988 * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
    989 * of WRBs used up by the packet.
    990 */
    991static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
    992			   struct sk_buff *skb,
    993			   struct be_wrb_params *wrb_params)
    994{
    995	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
    996	struct device *dev = &adapter->pdev->dev;
    997	bool map_single = false;
    998	u32 head;
    999	dma_addr_t busaddr;
   1000	int len;
   1001
   1002	head = be_tx_get_wrb_hdr(txo);
   1003
   1004	if (skb->len > skb->data_len) {
   1005		len = skb_headlen(skb);
   1006
   1007		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
   1008		if (dma_mapping_error(dev, busaddr))
   1009			goto dma_err;
   1010		map_single = true;
   1011		be_tx_setup_wrb_frag(txo, busaddr, len);
   1012		copied += len;
   1013	}
   1014
   1015	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
   1016		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
   1017		len = skb_frag_size(frag);
   1018
   1019		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
   1020		if (dma_mapping_error(dev, busaddr))
   1021			goto dma_err;
   1022		be_tx_setup_wrb_frag(txo, busaddr, len);
   1023		copied += len;
   1024	}
   1025
   1026	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
   1027
   1028	be_tx_stats_update(txo, skb);
   1029	return wrb_cnt;
   1030
   1031dma_err:
   1032	adapter->drv_stats.dma_map_errors++;
   1033	be_xmit_restore(adapter, txo, head, map_single, copied);
   1034	return 0;
   1035}
   1036
   1037static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
   1038{
   1039	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
   1040}
   1041
   1042static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
   1043					     struct sk_buff *skb,
   1044					     struct be_wrb_params
   1045					     *wrb_params)
   1046{
   1047	bool insert_vlan = false;
   1048	u16 vlan_tag = 0;
   1049
   1050	skb = skb_share_check(skb, GFP_ATOMIC);
   1051	if (unlikely(!skb))
   1052		return skb;
   1053
   1054	if (skb_vlan_tag_present(skb)) {
   1055		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
   1056		insert_vlan = true;
   1057	}
   1058
   1059	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
   1060		if (!insert_vlan) {
   1061			vlan_tag = adapter->pvid;
   1062			insert_vlan = true;
   1063		}
   1064		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
   1065		 * skip VLAN insertion
   1066		 */
   1067		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
   1068	}
   1069
   1070	if (insert_vlan) {
   1071		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
   1072						vlan_tag);
   1073		if (unlikely(!skb))
   1074			return skb;
   1075		__vlan_hwaccel_clear_tag(skb);
   1076	}
   1077
   1078	/* Insert the outer VLAN, if any */
   1079	if (adapter->qnq_vid) {
   1080		vlan_tag = adapter->qnq_vid;
   1081		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
   1082						vlan_tag);
   1083		if (unlikely(!skb))
   1084			return skb;
   1085		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
   1086	}
   1087
   1088	return skb;
   1089}
   1090
   1091static bool be_ipv6_exthdr_check(struct sk_buff *skb)
   1092{
   1093	struct ethhdr *eh = (struct ethhdr *)skb->data;
   1094	u16 offset = ETH_HLEN;
   1095
   1096	if (eh->h_proto == htons(ETH_P_IPV6)) {
   1097		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
   1098
   1099		offset += sizeof(struct ipv6hdr);
   1100		if (ip6h->nexthdr != NEXTHDR_TCP &&
   1101		    ip6h->nexthdr != NEXTHDR_UDP) {
   1102			struct ipv6_opt_hdr *ehdr =
   1103				(struct ipv6_opt_hdr *)(skb->data + offset);
   1104
   1105			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
   1106			if (ehdr->hdrlen == 0xff)
   1107				return true;
   1108		}
   1109	}
   1110	return false;
   1111}
   1112
   1113static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
   1114{
   1115	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
   1116}
   1117
   1118static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
   1119{
   1120	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
   1121}
   1122
   1123static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
   1124						  struct sk_buff *skb,
   1125						  struct be_wrb_params
   1126						  *wrb_params)
   1127{
   1128	struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
   1129	unsigned int eth_hdr_len;
   1130	struct iphdr *ip;
   1131
   1132	/* For padded packets, BE HW modifies tot_len field in IP header
   1133	 * incorrecly when VLAN tag is inserted by HW.
   1134	 * For padded packets, Lancer computes incorrect checksum.
   1135	 */
   1136	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
   1137						VLAN_ETH_HLEN : ETH_HLEN;
   1138	if (skb->len <= 60 &&
   1139	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
   1140	    is_ipv4_pkt(skb)) {
   1141		ip = (struct iphdr *)ip_hdr(skb);
   1142		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
   1143	}
   1144
   1145	/* If vlan tag is already inlined in the packet, skip HW VLAN
   1146	 * tagging in pvid-tagging mode
   1147	 */
   1148	if (be_pvid_tagging_enabled(adapter) &&
   1149	    veh->h_vlan_proto == htons(ETH_P_8021Q))
   1150		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
   1151
   1152	/* HW has a bug wherein it will calculate CSUM for VLAN
   1153	 * pkts even though it is disabled.
   1154	 * Manually insert VLAN in pkt.
   1155	 */
   1156	if (skb->ip_summed != CHECKSUM_PARTIAL &&
   1157	    skb_vlan_tag_present(skb)) {
   1158		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
   1159		if (unlikely(!skb))
   1160			goto err;
   1161	}
   1162
   1163	/* HW may lockup when VLAN HW tagging is requested on
   1164	 * certain ipv6 packets. Drop such pkts if the HW workaround to
   1165	 * skip HW tagging is not enabled by FW.
   1166	 */
   1167	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
   1168		     (adapter->pvid || adapter->qnq_vid) &&
   1169		     !qnq_async_evt_rcvd(adapter)))
   1170		goto tx_drop;
   1171
   1172	/* Manual VLAN tag insertion to prevent:
   1173	 * ASIC lockup when the ASIC inserts VLAN tag into
   1174	 * certain ipv6 packets. Insert VLAN tags in driver,
   1175	 * and set event, completion, vlan bits accordingly
   1176	 * in the Tx WRB.
   1177	 */
   1178	if (be_ipv6_tx_stall_chk(adapter, skb) &&
   1179	    be_vlan_tag_tx_chk(adapter, skb)) {
   1180		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
   1181		if (unlikely(!skb))
   1182			goto err;
   1183	}
   1184
   1185	return skb;
   1186tx_drop:
   1187	dev_kfree_skb_any(skb);
   1188err:
   1189	return NULL;
   1190}
   1191
   1192static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
   1193					   struct sk_buff *skb,
   1194					   struct be_wrb_params *wrb_params)
   1195{
   1196	int err;
   1197
   1198	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
   1199	 * packets that are 32b or less may cause a transmit stall
   1200	 * on that port. The workaround is to pad such packets
   1201	 * (len <= 32 bytes) to a minimum length of 36b.
   1202	 */
   1203	if (skb->len <= 32) {
   1204		if (skb_put_padto(skb, 36))
   1205			return NULL;
   1206	}
   1207
   1208	if (BEx_chip(adapter) || lancer_chip(adapter)) {
   1209		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
   1210		if (!skb)
   1211			return NULL;
   1212	}
   1213
   1214	/* The stack can send us skbs with length greater than
   1215	 * what the HW can handle. Trim the extra bytes.
   1216	 */
   1217	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
   1218	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
   1219	WARN_ON(err);
   1220
   1221	return skb;
   1222}
   1223
   1224static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
   1225{
   1226	struct be_queue_info *txq = &txo->q;
   1227	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
   1228
   1229	/* Mark the last request eventable if it hasn't been marked already */
   1230	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
   1231		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
   1232
   1233	/* compose a dummy wrb if there are odd set of wrbs to notify */
   1234	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
   1235		wrb_fill_dummy(queue_head_node(txq));
   1236		queue_head_inc(txq);
   1237		atomic_inc(&txq->used);
   1238		txo->pend_wrb_cnt++;
   1239		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
   1240					   TX_HDR_WRB_NUM_SHIFT);
   1241		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
   1242					  TX_HDR_WRB_NUM_SHIFT);
   1243	}
   1244	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
   1245	txo->pend_wrb_cnt = 0;
   1246}
   1247
   1248/* OS2BMC related */
   1249
   1250#define DHCP_CLIENT_PORT	68
   1251#define DHCP_SERVER_PORT	67
   1252#define NET_BIOS_PORT1		137
   1253#define NET_BIOS_PORT2		138
   1254#define DHCPV6_RAS_PORT		547
   1255
   1256#define is_mc_allowed_on_bmc(adapter, eh)	\
   1257	(!is_multicast_filt_enabled(adapter) &&	\
   1258	 is_multicast_ether_addr(eh->h_dest) &&	\
   1259	 !is_broadcast_ether_addr(eh->h_dest))
   1260
   1261#define is_bc_allowed_on_bmc(adapter, eh)	\
   1262	(!is_broadcast_filt_enabled(adapter) &&	\
   1263	 is_broadcast_ether_addr(eh->h_dest))
   1264
   1265#define is_arp_allowed_on_bmc(adapter, skb)	\
   1266	(is_arp(skb) && is_arp_filt_enabled(adapter))
   1267
   1268#define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
   1269
   1270#define is_arp_filt_enabled(adapter)	\
   1271		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
   1272
   1273#define is_dhcp_client_filt_enabled(adapter)	\
   1274		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
   1275
   1276#define is_dhcp_srvr_filt_enabled(adapter)	\
   1277		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
   1278
   1279#define is_nbios_filt_enabled(adapter)	\
   1280		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
   1281
   1282#define is_ipv6_na_filt_enabled(adapter)	\
   1283		(adapter->bmc_filt_mask &	\
   1284			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
   1285
   1286#define is_ipv6_ra_filt_enabled(adapter)	\
   1287		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
   1288
   1289#define is_ipv6_ras_filt_enabled(adapter)	\
   1290		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
   1291
   1292#define is_broadcast_filt_enabled(adapter)	\
   1293		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
   1294
   1295#define is_multicast_filt_enabled(adapter)	\
   1296		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
   1297
   1298static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
   1299			       struct sk_buff **skb)
   1300{
   1301	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
   1302	bool os2bmc = false;
   1303
   1304	if (!be_is_os2bmc_enabled(adapter))
   1305		goto done;
   1306
   1307	if (!is_multicast_ether_addr(eh->h_dest))
   1308		goto done;
   1309
   1310	if (is_mc_allowed_on_bmc(adapter, eh) ||
   1311	    is_bc_allowed_on_bmc(adapter, eh) ||
   1312	    is_arp_allowed_on_bmc(adapter, (*skb))) {
   1313		os2bmc = true;
   1314		goto done;
   1315	}
   1316
   1317	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
   1318		struct ipv6hdr *hdr = ipv6_hdr((*skb));
   1319		u8 nexthdr = hdr->nexthdr;
   1320
   1321		if (nexthdr == IPPROTO_ICMPV6) {
   1322			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
   1323
   1324			switch (icmp6->icmp6_type) {
   1325			case NDISC_ROUTER_ADVERTISEMENT:
   1326				os2bmc = is_ipv6_ra_filt_enabled(adapter);
   1327				goto done;
   1328			case NDISC_NEIGHBOUR_ADVERTISEMENT:
   1329				os2bmc = is_ipv6_na_filt_enabled(adapter);
   1330				goto done;
   1331			default:
   1332				break;
   1333			}
   1334		}
   1335	}
   1336
   1337	if (is_udp_pkt((*skb))) {
   1338		struct udphdr *udp = udp_hdr((*skb));
   1339
   1340		switch (ntohs(udp->dest)) {
   1341		case DHCP_CLIENT_PORT:
   1342			os2bmc = is_dhcp_client_filt_enabled(adapter);
   1343			goto done;
   1344		case DHCP_SERVER_PORT:
   1345			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
   1346			goto done;
   1347		case NET_BIOS_PORT1:
   1348		case NET_BIOS_PORT2:
   1349			os2bmc = is_nbios_filt_enabled(adapter);
   1350			goto done;
   1351		case DHCPV6_RAS_PORT:
   1352			os2bmc = is_ipv6_ras_filt_enabled(adapter);
   1353			goto done;
   1354		default:
   1355			break;
   1356		}
   1357	}
   1358done:
   1359	/* For packets over a vlan, which are destined
   1360	 * to BMC, asic expects the vlan to be inline in the packet.
   1361	 */
   1362	if (os2bmc)
   1363		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
   1364
   1365	return os2bmc;
   1366}
   1367
   1368static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
   1369{
   1370	struct be_adapter *adapter = netdev_priv(netdev);
   1371	u16 q_idx = skb_get_queue_mapping(skb);
   1372	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
   1373	struct be_wrb_params wrb_params = { 0 };
   1374	bool flush = !netdev_xmit_more();
   1375	u16 wrb_cnt;
   1376
   1377	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
   1378	if (unlikely(!skb))
   1379		goto drop;
   1380
   1381	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
   1382
   1383	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
   1384	if (unlikely(!wrb_cnt)) {
   1385		dev_kfree_skb_any(skb);
   1386		goto drop;
   1387	}
   1388
   1389	/* if os2bmc is enabled and if the pkt is destined to bmc,
   1390	 * enqueue the pkt a 2nd time with mgmt bit set.
   1391	 */
   1392	if (be_send_pkt_to_bmc(adapter, &skb)) {
   1393		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
   1394		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
   1395		if (unlikely(!wrb_cnt))
   1396			goto drop;
   1397		else
   1398			skb_get(skb);
   1399	}
   1400
   1401	if (be_is_txq_full(txo)) {
   1402		netif_stop_subqueue(netdev, q_idx);
   1403		tx_stats(txo)->tx_stops++;
   1404	}
   1405
   1406	if (flush || __netif_subqueue_stopped(netdev, q_idx))
   1407		be_xmit_flush(adapter, txo);
   1408
   1409	return NETDEV_TX_OK;
   1410drop:
   1411	tx_stats(txo)->tx_drv_drops++;
   1412	/* Flush the already enqueued tx requests */
   1413	if (flush && txo->pend_wrb_cnt)
   1414		be_xmit_flush(adapter, txo);
   1415
   1416	return NETDEV_TX_OK;
   1417}
   1418
   1419static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
   1420{
   1421	struct be_adapter *adapter = netdev_priv(netdev);
   1422	struct device *dev = &adapter->pdev->dev;
   1423	struct be_tx_obj *txo;
   1424	struct sk_buff *skb;
   1425	struct tcphdr *tcphdr;
   1426	struct udphdr *udphdr;
   1427	u32 *entry;
   1428	int status;
   1429	int i, j;
   1430
   1431	for_all_tx_queues(adapter, txo, i) {
   1432		dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
   1433			 i, txo->q.head, txo->q.tail,
   1434			 atomic_read(&txo->q.used), txo->q.id);
   1435
   1436		entry = txo->q.dma_mem.va;
   1437		for (j = 0; j < TX_Q_LEN * 4; j += 4) {
   1438			if (entry[j] != 0 || entry[j + 1] != 0 ||
   1439			    entry[j + 2] != 0 || entry[j + 3] != 0) {
   1440				dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
   1441					 j, entry[j], entry[j + 1],
   1442					 entry[j + 2], entry[j + 3]);
   1443			}
   1444		}
   1445
   1446		entry = txo->cq.dma_mem.va;
   1447		dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
   1448			 i, txo->cq.head, txo->cq.tail,
   1449			 atomic_read(&txo->cq.used));
   1450		for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
   1451			if (entry[j] != 0 || entry[j + 1] != 0 ||
   1452			    entry[j + 2] != 0 || entry[j + 3] != 0) {
   1453				dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
   1454					 j, entry[j], entry[j + 1],
   1455					 entry[j + 2], entry[j + 3]);
   1456			}
   1457		}
   1458
   1459		for (j = 0; j < TX_Q_LEN; j++) {
   1460			if (txo->sent_skb_list[j]) {
   1461				skb = txo->sent_skb_list[j];
   1462				if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
   1463					tcphdr = tcp_hdr(skb);
   1464					dev_info(dev, "TCP source port %d\n",
   1465						 ntohs(tcphdr->source));
   1466					dev_info(dev, "TCP dest port %d\n",
   1467						 ntohs(tcphdr->dest));
   1468					dev_info(dev, "TCP sequence num %d\n",
   1469						 ntohs(tcphdr->seq));
   1470					dev_info(dev, "TCP ack_seq %d\n",
   1471						 ntohs(tcphdr->ack_seq));
   1472				} else if (ip_hdr(skb)->protocol ==
   1473					   IPPROTO_UDP) {
   1474					udphdr = udp_hdr(skb);
   1475					dev_info(dev, "UDP source port %d\n",
   1476						 ntohs(udphdr->source));
   1477					dev_info(dev, "UDP dest port %d\n",
   1478						 ntohs(udphdr->dest));
   1479				}
   1480				dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
   1481					 j, skb, skb->len, skb->protocol);
   1482			}
   1483		}
   1484	}
   1485
   1486	if (lancer_chip(adapter)) {
   1487		dev_info(dev, "Initiating reset due to tx timeout\n");
   1488		dev_info(dev, "Resetting adapter\n");
   1489		status = lancer_physdev_ctrl(adapter,
   1490					     PHYSDEV_CONTROL_FW_RESET_MASK);
   1491		if (status)
   1492			dev_err(dev, "Reset failed .. Reboot server\n");
   1493	}
   1494}
   1495
   1496static inline bool be_in_all_promisc(struct be_adapter *adapter)
   1497{
   1498	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
   1499			BE_IF_FLAGS_ALL_PROMISCUOUS;
   1500}
   1501
   1502static int be_set_vlan_promisc(struct be_adapter *adapter)
   1503{
   1504	struct device *dev = &adapter->pdev->dev;
   1505	int status;
   1506
   1507	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
   1508		return 0;
   1509
   1510	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
   1511	if (!status) {
   1512		dev_info(dev, "Enabled VLAN promiscuous mode\n");
   1513		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
   1514	} else {
   1515		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
   1516	}
   1517	return status;
   1518}
   1519
   1520static int be_clear_vlan_promisc(struct be_adapter *adapter)
   1521{
   1522	struct device *dev = &adapter->pdev->dev;
   1523	int status;
   1524
   1525	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
   1526	if (!status) {
   1527		dev_info(dev, "Disabling VLAN promiscuous mode\n");
   1528		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
   1529	}
   1530	return status;
   1531}
   1532
   1533/*
   1534 * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
   1535 * If the user configures more, place BE in vlan promiscuous mode.
   1536 */
   1537static int be_vid_config(struct be_adapter *adapter)
   1538{
   1539	struct device *dev = &adapter->pdev->dev;
   1540	u16 vids[BE_NUM_VLANS_SUPPORTED];
   1541	u16 num = 0, i = 0;
   1542	int status = 0;
   1543
   1544	/* No need to change the VLAN state if the I/F is in promiscuous */
   1545	if (adapter->netdev->flags & IFF_PROMISC)
   1546		return 0;
   1547
   1548	if (adapter->vlans_added > be_max_vlans(adapter))
   1549		return be_set_vlan_promisc(adapter);
   1550
   1551	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
   1552		status = be_clear_vlan_promisc(adapter);
   1553		if (status)
   1554			return status;
   1555	}
   1556	/* Construct VLAN Table to give to HW */
   1557	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
   1558		vids[num++] = cpu_to_le16(i);
   1559
   1560	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
   1561	if (status) {
   1562		dev_err(dev, "Setting HW VLAN filtering failed\n");
   1563		/* Set to VLAN promisc mode as setting VLAN filter failed */
   1564		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
   1565		    addl_status(status) ==
   1566				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
   1567			return be_set_vlan_promisc(adapter);
   1568	}
   1569	return status;
   1570}
   1571
   1572static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
   1573{
   1574	struct be_adapter *adapter = netdev_priv(netdev);
   1575	int status = 0;
   1576
   1577	mutex_lock(&adapter->rx_filter_lock);
   1578
   1579	/* Packets with VID 0 are always received by Lancer by default */
   1580	if (lancer_chip(adapter) && vid == 0)
   1581		goto done;
   1582
   1583	if (test_bit(vid, adapter->vids))
   1584		goto done;
   1585
   1586	set_bit(vid, adapter->vids);
   1587	adapter->vlans_added++;
   1588
   1589	status = be_vid_config(adapter);
   1590done:
   1591	mutex_unlock(&adapter->rx_filter_lock);
   1592	return status;
   1593}
   1594
   1595static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
   1596{
   1597	struct be_adapter *adapter = netdev_priv(netdev);
   1598	int status = 0;
   1599
   1600	mutex_lock(&adapter->rx_filter_lock);
   1601
   1602	/* Packets with VID 0 are always received by Lancer by default */
   1603	if (lancer_chip(adapter) && vid == 0)
   1604		goto done;
   1605
   1606	if (!test_bit(vid, adapter->vids))
   1607		goto done;
   1608
   1609	clear_bit(vid, adapter->vids);
   1610	adapter->vlans_added--;
   1611
   1612	status = be_vid_config(adapter);
   1613done:
   1614	mutex_unlock(&adapter->rx_filter_lock);
   1615	return status;
   1616}
   1617
   1618static void be_set_all_promisc(struct be_adapter *adapter)
   1619{
   1620	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
   1621	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
   1622}
   1623
   1624static void be_set_mc_promisc(struct be_adapter *adapter)
   1625{
   1626	int status;
   1627
   1628	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
   1629		return;
   1630
   1631	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
   1632	if (!status)
   1633		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
   1634}
   1635
   1636static void be_set_uc_promisc(struct be_adapter *adapter)
   1637{
   1638	int status;
   1639
   1640	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
   1641		return;
   1642
   1643	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
   1644	if (!status)
   1645		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
   1646}
   1647
   1648static void be_clear_uc_promisc(struct be_adapter *adapter)
   1649{
   1650	int status;
   1651
   1652	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
   1653		return;
   1654
   1655	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
   1656	if (!status)
   1657		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
   1658}
   1659
   1660/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
   1661 * We use a single callback function for both sync and unsync. We really don't
   1662 * add/remove addresses through this callback. But, we use it to detect changes
   1663 * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
   1664 */
   1665static int be_uc_list_update(struct net_device *netdev,
   1666			     const unsigned char *addr)
   1667{
   1668	struct be_adapter *adapter = netdev_priv(netdev);
   1669
   1670	adapter->update_uc_list = true;
   1671	return 0;
   1672}
   1673
   1674static int be_mc_list_update(struct net_device *netdev,
   1675			     const unsigned char *addr)
   1676{
   1677	struct be_adapter *adapter = netdev_priv(netdev);
   1678
   1679	adapter->update_mc_list = true;
   1680	return 0;
   1681}
   1682
   1683static void be_set_mc_list(struct be_adapter *adapter)
   1684{
   1685	struct net_device *netdev = adapter->netdev;
   1686	struct netdev_hw_addr *ha;
   1687	bool mc_promisc = false;
   1688	int status;
   1689
   1690	netif_addr_lock_bh(netdev);
   1691	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
   1692
   1693	if (netdev->flags & IFF_PROMISC) {
   1694		adapter->update_mc_list = false;
   1695	} else if (netdev->flags & IFF_ALLMULTI ||
   1696		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
   1697		/* Enable multicast promisc if num configured exceeds
   1698		 * what we support
   1699		 */
   1700		mc_promisc = true;
   1701		adapter->update_mc_list = false;
   1702	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
   1703		/* Update mc-list unconditionally if the iface was previously
   1704		 * in mc-promisc mode and now is out of that mode.
   1705		 */
   1706		adapter->update_mc_list = true;
   1707	}
   1708
   1709	if (adapter->update_mc_list) {
   1710		int i = 0;
   1711
   1712		/* cache the mc-list in adapter */
   1713		netdev_for_each_mc_addr(ha, netdev) {
   1714			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
   1715			i++;
   1716		}
   1717		adapter->mc_count = netdev_mc_count(netdev);
   1718	}
   1719	netif_addr_unlock_bh(netdev);
   1720
   1721	if (mc_promisc) {
   1722		be_set_mc_promisc(adapter);
   1723	} else if (adapter->update_mc_list) {
   1724		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
   1725		if (!status)
   1726			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
   1727		else
   1728			be_set_mc_promisc(adapter);
   1729
   1730		adapter->update_mc_list = false;
   1731	}
   1732}
   1733
   1734static void be_clear_mc_list(struct be_adapter *adapter)
   1735{
   1736	struct net_device *netdev = adapter->netdev;
   1737
   1738	__dev_mc_unsync(netdev, NULL);
   1739	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
   1740	adapter->mc_count = 0;
   1741}
   1742
   1743static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
   1744{
   1745	if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
   1746		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
   1747		return 0;
   1748	}
   1749
   1750	return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
   1751			       adapter->if_handle,
   1752			       &adapter->pmac_id[uc_idx + 1], 0);
   1753}
   1754
   1755static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
   1756{
   1757	if (pmac_id == adapter->pmac_id[0])
   1758		return;
   1759
   1760	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
   1761}
   1762
   1763static void be_set_uc_list(struct be_adapter *adapter)
   1764{
   1765	struct net_device *netdev = adapter->netdev;
   1766	struct netdev_hw_addr *ha;
   1767	bool uc_promisc = false;
   1768	int curr_uc_macs = 0, i;
   1769
   1770	netif_addr_lock_bh(netdev);
   1771	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
   1772
   1773	if (netdev->flags & IFF_PROMISC) {
   1774		adapter->update_uc_list = false;
   1775	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
   1776		uc_promisc = true;
   1777		adapter->update_uc_list = false;
   1778	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
   1779		/* Update uc-list unconditionally if the iface was previously
   1780		 * in uc-promisc mode and now is out of that mode.
   1781		 */
   1782		adapter->update_uc_list = true;
   1783	}
   1784
   1785	if (adapter->update_uc_list) {
   1786		/* cache the uc-list in adapter array */
   1787		i = 0;
   1788		netdev_for_each_uc_addr(ha, netdev) {
   1789			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
   1790			i++;
   1791		}
   1792		curr_uc_macs = netdev_uc_count(netdev);
   1793	}
   1794	netif_addr_unlock_bh(netdev);
   1795
   1796	if (uc_promisc) {
   1797		be_set_uc_promisc(adapter);
   1798	} else if (adapter->update_uc_list) {
   1799		be_clear_uc_promisc(adapter);
   1800
   1801		for (i = 0; i < adapter->uc_macs; i++)
   1802			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
   1803
   1804		for (i = 0; i < curr_uc_macs; i++)
   1805			be_uc_mac_add(adapter, i);
   1806		adapter->uc_macs = curr_uc_macs;
   1807		adapter->update_uc_list = false;
   1808	}
   1809}
   1810
   1811static void be_clear_uc_list(struct be_adapter *adapter)
   1812{
   1813	struct net_device *netdev = adapter->netdev;
   1814	int i;
   1815
   1816	__dev_uc_unsync(netdev, NULL);
   1817	for (i = 0; i < adapter->uc_macs; i++)
   1818		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
   1819
   1820	adapter->uc_macs = 0;
   1821}
   1822
   1823static void __be_set_rx_mode(struct be_adapter *adapter)
   1824{
   1825	struct net_device *netdev = adapter->netdev;
   1826
   1827	mutex_lock(&adapter->rx_filter_lock);
   1828
   1829	if (netdev->flags & IFF_PROMISC) {
   1830		if (!be_in_all_promisc(adapter))
   1831			be_set_all_promisc(adapter);
   1832	} else if (be_in_all_promisc(adapter)) {
   1833		/* We need to re-program the vlan-list or clear
   1834		 * vlan-promisc mode (if needed) when the interface
   1835		 * comes out of promisc mode.
   1836		 */
   1837		be_vid_config(adapter);
   1838	}
   1839
   1840	be_set_uc_list(adapter);
   1841	be_set_mc_list(adapter);
   1842
   1843	mutex_unlock(&adapter->rx_filter_lock);
   1844}
   1845
   1846static void be_work_set_rx_mode(struct work_struct *work)
   1847{
   1848	struct be_cmd_work *cmd_work =
   1849				container_of(work, struct be_cmd_work, work);
   1850
   1851	__be_set_rx_mode(cmd_work->adapter);
   1852	kfree(cmd_work);
   1853}
   1854
   1855static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
   1856{
   1857	struct be_adapter *adapter = netdev_priv(netdev);
   1858	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
   1859	int status;
   1860
   1861	if (!sriov_enabled(adapter))
   1862		return -EPERM;
   1863
   1864	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
   1865		return -EINVAL;
   1866
   1867	/* Proceed further only if user provided MAC is different
   1868	 * from active MAC
   1869	 */
   1870	if (ether_addr_equal(mac, vf_cfg->mac_addr))
   1871		return 0;
   1872
   1873	if (BEx_chip(adapter)) {
   1874		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
   1875				vf + 1);
   1876
   1877		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
   1878					 &vf_cfg->pmac_id, vf + 1);
   1879	} else {
   1880		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
   1881					vf + 1);
   1882	}
   1883
   1884	if (status) {
   1885		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
   1886			mac, vf, status);
   1887		return be_cmd_status(status);
   1888	}
   1889
   1890	ether_addr_copy(vf_cfg->mac_addr, mac);
   1891
   1892	return 0;
   1893}
   1894
   1895static int be_get_vf_config(struct net_device *netdev, int vf,
   1896			    struct ifla_vf_info *vi)
   1897{
   1898	struct be_adapter *adapter = netdev_priv(netdev);
   1899	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
   1900
   1901	if (!sriov_enabled(adapter))
   1902		return -EPERM;
   1903
   1904	if (vf >= adapter->num_vfs)
   1905		return -EINVAL;
   1906
   1907	vi->vf = vf;
   1908	vi->max_tx_rate = vf_cfg->tx_rate;
   1909	vi->min_tx_rate = 0;
   1910	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
   1911	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
   1912	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
   1913	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
   1914	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
   1915
   1916	return 0;
   1917}
   1918
   1919static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
   1920{
   1921	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
   1922	u16 vids[BE_NUM_VLANS_SUPPORTED];
   1923	int vf_if_id = vf_cfg->if_handle;
   1924	int status;
   1925
   1926	/* Enable Transparent VLAN Tagging */
   1927	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
   1928	if (status)
   1929		return status;
   1930
   1931	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
   1932	vids[0] = 0;
   1933	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
   1934	if (!status)
   1935		dev_info(&adapter->pdev->dev,
   1936			 "Cleared guest VLANs on VF%d", vf);
   1937
   1938	/* After TVT is enabled, disallow VFs to program VLAN filters */
   1939	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
   1940		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
   1941						  ~BE_PRIV_FILTMGMT, vf + 1);
   1942		if (!status)
   1943			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
   1944	}
   1945	return 0;
   1946}
   1947
   1948static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
   1949{
   1950	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
   1951	struct device *dev = &adapter->pdev->dev;
   1952	int status;
   1953
   1954	/* Reset Transparent VLAN Tagging. */
   1955	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
   1956				       vf_cfg->if_handle, 0, 0);
   1957	if (status)
   1958		return status;
   1959
   1960	/* Allow VFs to program VLAN filtering */
   1961	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
   1962		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
   1963						  BE_PRIV_FILTMGMT, vf + 1);
   1964		if (!status) {
   1965			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
   1966			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
   1967		}
   1968	}
   1969
   1970	dev_info(dev,
   1971		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
   1972	return 0;
   1973}
   1974
   1975static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
   1976			  __be16 vlan_proto)
   1977{
   1978	struct be_adapter *adapter = netdev_priv(netdev);
   1979	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
   1980	int status;
   1981
   1982	if (!sriov_enabled(adapter))
   1983		return -EPERM;
   1984
   1985	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
   1986		return -EINVAL;
   1987
   1988	if (vlan_proto != htons(ETH_P_8021Q))
   1989		return -EPROTONOSUPPORT;
   1990
   1991	if (vlan || qos) {
   1992		vlan |= qos << VLAN_PRIO_SHIFT;
   1993		status = be_set_vf_tvt(adapter, vf, vlan);
   1994	} else {
   1995		status = be_clear_vf_tvt(adapter, vf);
   1996	}
   1997
   1998	if (status) {
   1999		dev_err(&adapter->pdev->dev,
   2000			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
   2001			status);
   2002		return be_cmd_status(status);
   2003	}
   2004
   2005	vf_cfg->vlan_tag = vlan;
   2006	return 0;
   2007}
   2008
   2009static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
   2010			     int min_tx_rate, int max_tx_rate)
   2011{
   2012	struct be_adapter *adapter = netdev_priv(netdev);
   2013	struct device *dev = &adapter->pdev->dev;
   2014	int percent_rate, status = 0;
   2015	u16 link_speed = 0;
   2016	u8 link_status;
   2017
   2018	if (!sriov_enabled(adapter))
   2019		return -EPERM;
   2020
   2021	if (vf >= adapter->num_vfs)
   2022		return -EINVAL;
   2023
   2024	if (min_tx_rate)
   2025		return -EINVAL;
   2026
   2027	if (!max_tx_rate)
   2028		goto config_qos;
   2029
   2030	status = be_cmd_link_status_query(adapter, &link_speed,
   2031					  &link_status, 0);
   2032	if (status)
   2033		goto err;
   2034
   2035	if (!link_status) {
   2036		dev_err(dev, "TX-rate setting not allowed when link is down\n");
   2037		status = -ENETDOWN;
   2038		goto err;
   2039	}
   2040
   2041	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
   2042		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
   2043			link_speed);
   2044		status = -EINVAL;
   2045		goto err;
   2046	}
   2047
   2048	/* On Skyhawk the QOS setting must be done only as a % value */
   2049	percent_rate = link_speed / 100;
   2050	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
   2051		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
   2052			percent_rate);
   2053		status = -EINVAL;
   2054		goto err;
   2055	}
   2056
   2057config_qos:
   2058	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
   2059	if (status)
   2060		goto err;
   2061
   2062	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
   2063	return 0;
   2064
   2065err:
   2066	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
   2067		max_tx_rate, vf);
   2068	return be_cmd_status(status);
   2069}
   2070
   2071static int be_set_vf_link_state(struct net_device *netdev, int vf,
   2072				int link_state)
   2073{
   2074	struct be_adapter *adapter = netdev_priv(netdev);
   2075	int status;
   2076
   2077	if (!sriov_enabled(adapter))
   2078		return -EPERM;
   2079
   2080	if (vf >= adapter->num_vfs)
   2081		return -EINVAL;
   2082
   2083	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
   2084	if (status) {
   2085		dev_err(&adapter->pdev->dev,
   2086			"Link state change on VF %d failed: %#x\n", vf, status);
   2087		return be_cmd_status(status);
   2088	}
   2089
   2090	adapter->vf_cfg[vf].plink_tracking = link_state;
   2091
   2092	return 0;
   2093}
   2094
   2095static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
   2096{
   2097	struct be_adapter *adapter = netdev_priv(netdev);
   2098	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
   2099	u8 spoofchk;
   2100	int status;
   2101
   2102	if (!sriov_enabled(adapter))
   2103		return -EPERM;
   2104
   2105	if (vf >= adapter->num_vfs)
   2106		return -EINVAL;
   2107
   2108	if (BEx_chip(adapter))
   2109		return -EOPNOTSUPP;
   2110
   2111	if (enable == vf_cfg->spoofchk)
   2112		return 0;
   2113
   2114	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
   2115
   2116	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
   2117				       0, spoofchk);
   2118	if (status) {
   2119		dev_err(&adapter->pdev->dev,
   2120			"Spoofchk change on VF %d failed: %#x\n", vf, status);
   2121		return be_cmd_status(status);
   2122	}
   2123
   2124	vf_cfg->spoofchk = enable;
   2125	return 0;
   2126}
   2127
   2128static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
   2129			  ulong now)
   2130{
   2131	aic->rx_pkts_prev = rx_pkts;
   2132	aic->tx_reqs_prev = tx_pkts;
   2133	aic->jiffies = now;
   2134}
   2135
   2136static int be_get_new_eqd(struct be_eq_obj *eqo)
   2137{
   2138	struct be_adapter *adapter = eqo->adapter;
   2139	int eqd, start;
   2140	struct be_aic_obj *aic;
   2141	struct be_rx_obj *rxo;
   2142	struct be_tx_obj *txo;
   2143	u64 rx_pkts = 0, tx_pkts = 0;
   2144	ulong now;
   2145	u32 pps, delta;
   2146	int i;
   2147
   2148	aic = &adapter->aic_obj[eqo->idx];
   2149	if (!adapter->aic_enabled) {
   2150		if (aic->jiffies)
   2151			aic->jiffies = 0;
   2152		eqd = aic->et_eqd;
   2153		return eqd;
   2154	}
   2155
   2156	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
   2157		do {
   2158			start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
   2159			rx_pkts += rxo->stats.rx_pkts;
   2160		} while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
   2161	}
   2162
   2163	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
   2164		do {
   2165			start = u64_stats_fetch_begin_irq(&txo->stats.sync);
   2166			tx_pkts += txo->stats.tx_reqs;
   2167		} while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
   2168	}
   2169
   2170	/* Skip, if wrapped around or first calculation */
   2171	now = jiffies;
   2172	if (!aic->jiffies || time_before(now, aic->jiffies) ||
   2173	    rx_pkts < aic->rx_pkts_prev ||
   2174	    tx_pkts < aic->tx_reqs_prev) {
   2175		be_aic_update(aic, rx_pkts, tx_pkts, now);
   2176		return aic->prev_eqd;
   2177	}
   2178
   2179	delta = jiffies_to_msecs(now - aic->jiffies);
   2180	if (delta == 0)
   2181		return aic->prev_eqd;
   2182
   2183	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
   2184		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
   2185	eqd = (pps / 15000) << 2;
   2186
   2187	if (eqd < 8)
   2188		eqd = 0;
   2189	eqd = min_t(u32, eqd, aic->max_eqd);
   2190	eqd = max_t(u32, eqd, aic->min_eqd);
   2191
   2192	be_aic_update(aic, rx_pkts, tx_pkts, now);
   2193
   2194	return eqd;
   2195}
   2196
   2197/* For Skyhawk-R only */
   2198static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
   2199{
   2200	struct be_adapter *adapter = eqo->adapter;
   2201	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
   2202	ulong now = jiffies;
   2203	int eqd;
   2204	u32 mult_enc;
   2205
   2206	if (!adapter->aic_enabled)
   2207		return 0;
   2208
   2209	if (jiffies_to_msecs(now - aic->jiffies) < 1)
   2210		eqd = aic->prev_eqd;
   2211	else
   2212		eqd = be_get_new_eqd(eqo);
   2213
   2214	if (eqd > 100)
   2215		mult_enc = R2I_DLY_ENC_1;
   2216	else if (eqd > 60)
   2217		mult_enc = R2I_DLY_ENC_2;
   2218	else if (eqd > 20)
   2219		mult_enc = R2I_DLY_ENC_3;
   2220	else
   2221		mult_enc = R2I_DLY_ENC_0;
   2222
   2223	aic->prev_eqd = eqd;
   2224
   2225	return mult_enc;
   2226}
   2227
   2228void be_eqd_update(struct be_adapter *adapter, bool force_update)
   2229{
   2230	struct be_set_eqd set_eqd[MAX_EVT_QS];
   2231	struct be_aic_obj *aic;
   2232	struct be_eq_obj *eqo;
   2233	int i, num = 0, eqd;
   2234
   2235	for_all_evt_queues(adapter, eqo, i) {
   2236		aic = &adapter->aic_obj[eqo->idx];
   2237		eqd = be_get_new_eqd(eqo);
   2238		if (force_update || eqd != aic->prev_eqd) {
   2239			set_eqd[num].delay_multiplier = (eqd * 65)/100;
   2240			set_eqd[num].eq_id = eqo->q.id;
   2241			aic->prev_eqd = eqd;
   2242			num++;
   2243		}
   2244	}
   2245
   2246	if (num)
   2247		be_cmd_modify_eqd(adapter, set_eqd, num);
   2248}
   2249
   2250static void be_rx_stats_update(struct be_rx_obj *rxo,
   2251			       struct be_rx_compl_info *rxcp)
   2252{
   2253	struct be_rx_stats *stats = rx_stats(rxo);
   2254
   2255	u64_stats_update_begin(&stats->sync);
   2256	stats->rx_compl++;
   2257	stats->rx_bytes += rxcp->pkt_size;
   2258	stats->rx_pkts++;
   2259	if (rxcp->tunneled)
   2260		stats->rx_vxlan_offload_pkts++;
   2261	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
   2262		stats->rx_mcast_pkts++;
   2263	if (rxcp->err)
   2264		stats->rx_compl_err++;
   2265	u64_stats_update_end(&stats->sync);
   2266}
   2267
   2268static inline bool csum_passed(struct be_rx_compl_info *rxcp)
   2269{
   2270	/* L4 checksum is not reliable for non TCP/UDP packets.
   2271	 * Also ignore ipcksm for ipv6 pkts
   2272	 */
   2273	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
   2274		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
   2275}
   2276
   2277static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
   2278{
   2279	struct be_adapter *adapter = rxo->adapter;
   2280	struct be_rx_page_info *rx_page_info;
   2281	struct be_queue_info *rxq = &rxo->q;
   2282	u32 frag_idx = rxq->tail;
   2283
   2284	rx_page_info = &rxo->page_info_tbl[frag_idx];
   2285	BUG_ON(!rx_page_info->page);
   2286
   2287	if (rx_page_info->last_frag) {
   2288		dma_unmap_page(&adapter->pdev->dev,
   2289			       dma_unmap_addr(rx_page_info, bus),
   2290			       adapter->big_page_size, DMA_FROM_DEVICE);
   2291		rx_page_info->last_frag = false;
   2292	} else {
   2293		dma_sync_single_for_cpu(&adapter->pdev->dev,
   2294					dma_unmap_addr(rx_page_info, bus),
   2295					rx_frag_size, DMA_FROM_DEVICE);
   2296	}
   2297
   2298	queue_tail_inc(rxq);
   2299	atomic_dec(&rxq->used);
   2300	return rx_page_info;
   2301}
   2302
   2303/* Throwaway the data in the Rx completion */
   2304static void be_rx_compl_discard(struct be_rx_obj *rxo,
   2305				struct be_rx_compl_info *rxcp)
   2306{
   2307	struct be_rx_page_info *page_info;
   2308	u16 i, num_rcvd = rxcp->num_rcvd;
   2309
   2310	for (i = 0; i < num_rcvd; i++) {
   2311		page_info = get_rx_page_info(rxo);
   2312		put_page(page_info->page);
   2313		memset(page_info, 0, sizeof(*page_info));
   2314	}
   2315}
   2316
   2317/*
   2318 * skb_fill_rx_data forms a complete skb for an ether frame
   2319 * indicated by rxcp.
   2320 */
   2321static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
   2322			     struct be_rx_compl_info *rxcp)
   2323{
   2324	struct be_rx_page_info *page_info;
   2325	u16 i, j;
   2326	u16 hdr_len, curr_frag_len, remaining;
   2327	u8 *start;
   2328
   2329	page_info = get_rx_page_info(rxo);
   2330	start = page_address(page_info->page) + page_info->page_offset;
   2331	prefetch(start);
   2332
   2333	/* Copy data in the first descriptor of this completion */
   2334	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
   2335
   2336	skb->len = curr_frag_len;
   2337	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
   2338		memcpy(skb->data, start, curr_frag_len);
   2339		/* Complete packet has now been moved to data */
   2340		put_page(page_info->page);
   2341		skb->data_len = 0;
   2342		skb->tail += curr_frag_len;
   2343	} else {
   2344		hdr_len = ETH_HLEN;
   2345		memcpy(skb->data, start, hdr_len);
   2346		skb_shinfo(skb)->nr_frags = 1;
   2347		skb_frag_set_page(skb, 0, page_info->page);
   2348		skb_frag_off_set(&skb_shinfo(skb)->frags[0],
   2349				 page_info->page_offset + hdr_len);
   2350		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
   2351				  curr_frag_len - hdr_len);
   2352		skb->data_len = curr_frag_len - hdr_len;
   2353		skb->truesize += rx_frag_size;
   2354		skb->tail += hdr_len;
   2355	}
   2356	page_info->page = NULL;
   2357
   2358	if (rxcp->pkt_size <= rx_frag_size) {
   2359		BUG_ON(rxcp->num_rcvd != 1);
   2360		return;
   2361	}
   2362
   2363	/* More frags present for this completion */
   2364	remaining = rxcp->pkt_size - curr_frag_len;
   2365	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
   2366		page_info = get_rx_page_info(rxo);
   2367		curr_frag_len = min(remaining, rx_frag_size);
   2368
   2369		/* Coalesce all frags from the same physical page in one slot */
   2370		if (page_info->page_offset == 0) {
   2371			/* Fresh page */
   2372			j++;
   2373			skb_frag_set_page(skb, j, page_info->page);
   2374			skb_frag_off_set(&skb_shinfo(skb)->frags[j],
   2375					 page_info->page_offset);
   2376			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
   2377			skb_shinfo(skb)->nr_frags++;
   2378		} else {
   2379			put_page(page_info->page);
   2380		}
   2381
   2382		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
   2383		skb->len += curr_frag_len;
   2384		skb->data_len += curr_frag_len;
   2385		skb->truesize += rx_frag_size;
   2386		remaining -= curr_frag_len;
   2387		page_info->page = NULL;
   2388	}
   2389	BUG_ON(j > MAX_SKB_FRAGS);
   2390}
   2391
   2392/* Process the RX completion indicated by rxcp when GRO is disabled */
   2393static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
   2394				struct be_rx_compl_info *rxcp)
   2395{
   2396	struct be_adapter *adapter = rxo->adapter;
   2397	struct net_device *netdev = adapter->netdev;
   2398	struct sk_buff *skb;
   2399
   2400	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
   2401	if (unlikely(!skb)) {
   2402		rx_stats(rxo)->rx_drops_no_skbs++;
   2403		be_rx_compl_discard(rxo, rxcp);
   2404		return;
   2405	}
   2406
   2407	skb_fill_rx_data(rxo, skb, rxcp);
   2408
   2409	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
   2410		skb->ip_summed = CHECKSUM_UNNECESSARY;
   2411	else
   2412		skb_checksum_none_assert(skb);
   2413
   2414	skb->protocol = eth_type_trans(skb, netdev);
   2415	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
   2416	if (netdev->features & NETIF_F_RXHASH)
   2417		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
   2418
   2419	skb->csum_level = rxcp->tunneled;
   2420	skb_mark_napi_id(skb, napi);
   2421
   2422	if (rxcp->vlanf)
   2423		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
   2424
   2425	netif_receive_skb(skb);
   2426}
   2427
   2428/* Process the RX completion indicated by rxcp when GRO is enabled */
   2429static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
   2430				    struct napi_struct *napi,
   2431				    struct be_rx_compl_info *rxcp)
   2432{
   2433	struct be_adapter *adapter = rxo->adapter;
   2434	struct be_rx_page_info *page_info;
   2435	struct sk_buff *skb = NULL;
   2436	u16 remaining, curr_frag_len;
   2437	u16 i, j;
   2438
   2439	skb = napi_get_frags(napi);
   2440	if (!skb) {
   2441		be_rx_compl_discard(rxo, rxcp);
   2442		return;
   2443	}
   2444
   2445	remaining = rxcp->pkt_size;
   2446	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
   2447		page_info = get_rx_page_info(rxo);
   2448
   2449		curr_frag_len = min(remaining, rx_frag_size);
   2450
   2451		/* Coalesce all frags from the same physical page in one slot */
   2452		if (i == 0 || page_info->page_offset == 0) {
   2453			/* First frag or Fresh page */
   2454			j++;
   2455			skb_frag_set_page(skb, j, page_info->page);
   2456			skb_frag_off_set(&skb_shinfo(skb)->frags[j],
   2457					 page_info->page_offset);
   2458			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
   2459		} else {
   2460			put_page(page_info->page);
   2461		}
   2462		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
   2463		skb->truesize += rx_frag_size;
   2464		remaining -= curr_frag_len;
   2465		memset(page_info, 0, sizeof(*page_info));
   2466	}
   2467	BUG_ON(j > MAX_SKB_FRAGS);
   2468
   2469	skb_shinfo(skb)->nr_frags = j + 1;
   2470	skb->len = rxcp->pkt_size;
   2471	skb->data_len = rxcp->pkt_size;
   2472	skb->ip_summed = CHECKSUM_UNNECESSARY;
   2473	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
   2474	if (adapter->netdev->features & NETIF_F_RXHASH)
   2475		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
   2476
   2477	skb->csum_level = rxcp->tunneled;
   2478
   2479	if (rxcp->vlanf)
   2480		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
   2481
   2482	napi_gro_frags(napi);
   2483}
   2484
   2485static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
   2486				 struct be_rx_compl_info *rxcp)
   2487{
   2488	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
   2489	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
   2490	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
   2491	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
   2492	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
   2493	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
   2494	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
   2495	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
   2496	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
   2497	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
   2498	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
   2499	if (rxcp->vlanf) {
   2500		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
   2501		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
   2502	}
   2503	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
   2504	rxcp->tunneled =
   2505		GET_RX_COMPL_V1_BITS(tunneled, compl);
   2506}
   2507
   2508static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
   2509				 struct be_rx_compl_info *rxcp)
   2510{
   2511	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
   2512	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
   2513	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
   2514	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
   2515	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
   2516	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
   2517	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
   2518	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
   2519	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
   2520	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
   2521	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
   2522	if (rxcp->vlanf) {
   2523		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
   2524		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
   2525	}
   2526	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
   2527	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
   2528}
   2529
   2530static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
   2531{
   2532	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
   2533	struct be_rx_compl_info *rxcp = &rxo->rxcp;
   2534	struct be_adapter *adapter = rxo->adapter;
   2535
   2536	/* For checking the valid bit it is Ok to use either definition as the
   2537	 * valid bit is at the same position in both v0 and v1 Rx compl */
   2538	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
   2539		return NULL;
   2540
   2541	rmb();
   2542	be_dws_le_to_cpu(compl, sizeof(*compl));
   2543
   2544	if (adapter->be3_native)
   2545		be_parse_rx_compl_v1(compl, rxcp);
   2546	else
   2547		be_parse_rx_compl_v0(compl, rxcp);
   2548
   2549	if (rxcp->ip_frag)
   2550		rxcp->l4_csum = 0;
   2551
   2552	if (rxcp->vlanf) {
   2553		/* In QNQ modes, if qnq bit is not set, then the packet was
   2554		 * tagged only with the transparent outer vlan-tag and must
   2555		 * not be treated as a vlan packet by host
   2556		 */
   2557		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
   2558			rxcp->vlanf = 0;
   2559
   2560		if (!lancer_chip(adapter))
   2561			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
   2562
   2563		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
   2564		    !test_bit(rxcp->vlan_tag, adapter->vids))
   2565			rxcp->vlanf = 0;
   2566	}
   2567
   2568	/* As the compl has been parsed, reset it; we wont touch it again */
   2569	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
   2570
   2571	queue_tail_inc(&rxo->cq);
   2572	return rxcp;
   2573}
   2574
   2575static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
   2576{
   2577	u32 order = get_order(size);
   2578
   2579	if (order > 0)
   2580		gfp |= __GFP_COMP;
   2581	return  alloc_pages(gfp, order);
   2582}
   2583
   2584/*
   2585 * Allocate a page, split it to fragments of size rx_frag_size and post as
   2586 * receive buffers to BE
   2587 */
   2588static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
   2589{
   2590	struct be_adapter *adapter = rxo->adapter;
   2591	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
   2592	struct be_queue_info *rxq = &rxo->q;
   2593	struct page *pagep = NULL;
   2594	struct device *dev = &adapter->pdev->dev;
   2595	struct be_eth_rx_d *rxd;
   2596	u64 page_dmaaddr = 0, frag_dmaaddr;
   2597	u32 posted, page_offset = 0, notify = 0;
   2598
   2599	page_info = &rxo->page_info_tbl[rxq->head];
   2600	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
   2601		if (!pagep) {
   2602			pagep = be_alloc_pages(adapter->big_page_size, gfp);
   2603			if (unlikely(!pagep)) {
   2604				rx_stats(rxo)->rx_post_fail++;
   2605				break;
   2606			}
   2607			page_dmaaddr = dma_map_page(dev, pagep, 0,
   2608						    adapter->big_page_size,
   2609						    DMA_FROM_DEVICE);
   2610			if (dma_mapping_error(dev, page_dmaaddr)) {
   2611				put_page(pagep);
   2612				pagep = NULL;
   2613				adapter->drv_stats.dma_map_errors++;
   2614				break;
   2615			}
   2616			page_offset = 0;
   2617		} else {
   2618			get_page(pagep);
   2619			page_offset += rx_frag_size;
   2620		}
   2621		page_info->page_offset = page_offset;
   2622		page_info->page = pagep;
   2623
   2624		rxd = queue_head_node(rxq);
   2625		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
   2626		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
   2627		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
   2628
   2629		/* Any space left in the current big page for another frag? */
   2630		if ((page_offset + rx_frag_size + rx_frag_size) >
   2631					adapter->big_page_size) {
   2632			pagep = NULL;
   2633			page_info->last_frag = true;
   2634			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
   2635		} else {
   2636			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
   2637		}
   2638
   2639		prev_page_info = page_info;
   2640		queue_head_inc(rxq);
   2641		page_info = &rxo->page_info_tbl[rxq->head];
   2642	}
   2643
   2644	/* Mark the last frag of a page when we break out of the above loop
   2645	 * with no more slots available in the RXQ
   2646	 */
   2647	if (pagep) {
   2648		prev_page_info->last_frag = true;
   2649		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
   2650	}
   2651
   2652	if (posted) {
   2653		atomic_add(posted, &rxq->used);
   2654		if (rxo->rx_post_starved)
   2655			rxo->rx_post_starved = false;
   2656		do {
   2657			notify = min(MAX_NUM_POST_ERX_DB, posted);
   2658			be_rxq_notify(adapter, rxq->id, notify);
   2659			posted -= notify;
   2660		} while (posted);
   2661	} else if (atomic_read(&rxq->used) == 0) {
   2662		/* Let be_worker replenish when memory is available */
   2663		rxo->rx_post_starved = true;
   2664	}
   2665}
   2666
   2667static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
   2668{
   2669	switch (status) {
   2670	case BE_TX_COMP_HDR_PARSE_ERR:
   2671		tx_stats(txo)->tx_hdr_parse_err++;
   2672		break;
   2673	case BE_TX_COMP_NDMA_ERR:
   2674		tx_stats(txo)->tx_dma_err++;
   2675		break;
   2676	case BE_TX_COMP_ACL_ERR:
   2677		tx_stats(txo)->tx_spoof_check_err++;
   2678		break;
   2679	}
   2680}
   2681
   2682static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
   2683{
   2684	switch (status) {
   2685	case LANCER_TX_COMP_LSO_ERR:
   2686		tx_stats(txo)->tx_tso_err++;
   2687		break;
   2688	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
   2689	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
   2690		tx_stats(txo)->tx_spoof_check_err++;
   2691		break;
   2692	case LANCER_TX_COMP_QINQ_ERR:
   2693		tx_stats(txo)->tx_qinq_err++;
   2694		break;
   2695	case LANCER_TX_COMP_PARITY_ERR:
   2696		tx_stats(txo)->tx_internal_parity_err++;
   2697		break;
   2698	case LANCER_TX_COMP_DMA_ERR:
   2699		tx_stats(txo)->tx_dma_err++;
   2700		break;
   2701	case LANCER_TX_COMP_SGE_ERR:
   2702		tx_stats(txo)->tx_sge_err++;
   2703		break;
   2704	}
   2705}
   2706
   2707static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
   2708						struct be_tx_obj *txo)
   2709{
   2710	struct be_queue_info *tx_cq = &txo->cq;
   2711	struct be_tx_compl_info *txcp = &txo->txcp;
   2712	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
   2713
   2714	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
   2715		return NULL;
   2716
   2717	/* Ensure load ordering of valid bit dword and other dwords below */
   2718	rmb();
   2719	be_dws_le_to_cpu(compl, sizeof(*compl));
   2720
   2721	txcp->status = GET_TX_COMPL_BITS(status, compl);
   2722	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
   2723
   2724	if (txcp->status) {
   2725		if (lancer_chip(adapter)) {
   2726			lancer_update_tx_err(txo, txcp->status);
   2727			/* Reset the adapter incase of TSO,
   2728			 * SGE or Parity error
   2729			 */
   2730			if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
   2731			    txcp->status == LANCER_TX_COMP_PARITY_ERR ||
   2732			    txcp->status == LANCER_TX_COMP_SGE_ERR)
   2733				be_set_error(adapter, BE_ERROR_TX);
   2734		} else {
   2735			be_update_tx_err(txo, txcp->status);
   2736		}
   2737	}
   2738
   2739	if (be_check_error(adapter, BE_ERROR_TX))
   2740		return NULL;
   2741
   2742	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
   2743	queue_tail_inc(tx_cq);
   2744	return txcp;
   2745}
   2746
   2747static u16 be_tx_compl_process(struct be_adapter *adapter,
   2748			       struct be_tx_obj *txo, u16 last_index)
   2749{
   2750	struct sk_buff **sent_skbs = txo->sent_skb_list;
   2751	struct be_queue_info *txq = &txo->q;
   2752	struct sk_buff *skb = NULL;
   2753	bool unmap_skb_hdr = false;
   2754	struct be_eth_wrb *wrb;
   2755	u16 num_wrbs = 0;
   2756	u32 frag_index;
   2757
   2758	do {
   2759		if (sent_skbs[txq->tail]) {
   2760			/* Free skb from prev req */
   2761			if (skb)
   2762				dev_consume_skb_any(skb);
   2763			skb = sent_skbs[txq->tail];
   2764			sent_skbs[txq->tail] = NULL;
   2765			queue_tail_inc(txq);  /* skip hdr wrb */
   2766			num_wrbs++;
   2767			unmap_skb_hdr = true;
   2768		}
   2769		wrb = queue_tail_node(txq);
   2770		frag_index = txq->tail;
   2771		unmap_tx_frag(&adapter->pdev->dev, wrb,
   2772			      (unmap_skb_hdr && skb_headlen(skb)));
   2773		unmap_skb_hdr = false;
   2774		queue_tail_inc(txq);
   2775		num_wrbs++;
   2776	} while (frag_index != last_index);
   2777	dev_consume_skb_any(skb);
   2778
   2779	return num_wrbs;
   2780}
   2781
   2782/* Return the number of events in the event queue */
   2783static inline int events_get(struct be_eq_obj *eqo)
   2784{
   2785	struct be_eq_entry *eqe;
   2786	int num = 0;
   2787
   2788	do {
   2789		eqe = queue_tail_node(&eqo->q);
   2790		if (eqe->evt == 0)
   2791			break;
   2792
   2793		rmb();
   2794		eqe->evt = 0;
   2795		num++;
   2796		queue_tail_inc(&eqo->q);
   2797	} while (true);
   2798
   2799	return num;
   2800}
   2801
   2802/* Leaves the EQ is disarmed state */
   2803static void be_eq_clean(struct be_eq_obj *eqo)
   2804{
   2805	int num = events_get(eqo);
   2806
   2807	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
   2808}
   2809
   2810/* Free posted rx buffers that were not used */
   2811static void be_rxq_clean(struct be_rx_obj *rxo)
   2812{
   2813	struct be_queue_info *rxq = &rxo->q;
   2814	struct be_rx_page_info *page_info;
   2815
   2816	while (atomic_read(&rxq->used) > 0) {
   2817		page_info = get_rx_page_info(rxo);
   2818		put_page(page_info->page);
   2819		memset(page_info, 0, sizeof(*page_info));
   2820	}
   2821	BUG_ON(atomic_read(&rxq->used));
   2822	rxq->tail = 0;
   2823	rxq->head = 0;
   2824}
   2825
   2826static void be_rx_cq_clean(struct be_rx_obj *rxo)
   2827{
   2828	struct be_queue_info *rx_cq = &rxo->cq;
   2829	struct be_rx_compl_info *rxcp;
   2830	struct be_adapter *adapter = rxo->adapter;
   2831	int flush_wait = 0;
   2832
   2833	/* Consume pending rx completions.
   2834	 * Wait for the flush completion (identified by zero num_rcvd)
   2835	 * to arrive. Notify CQ even when there are no more CQ entries
   2836	 * for HW to flush partially coalesced CQ entries.
   2837	 * In Lancer, there is no need to wait for flush compl.
   2838	 */
   2839	for (;;) {
   2840		rxcp = be_rx_compl_get(rxo);
   2841		if (!rxcp) {
   2842			if (lancer_chip(adapter))
   2843				break;
   2844
   2845			if (flush_wait++ > 50 ||
   2846			    be_check_error(adapter,
   2847					   BE_ERROR_HW)) {
   2848				dev_warn(&adapter->pdev->dev,
   2849					 "did not receive flush compl\n");
   2850				break;
   2851			}
   2852			be_cq_notify(adapter, rx_cq->id, true, 0);
   2853			mdelay(1);
   2854		} else {
   2855			be_rx_compl_discard(rxo, rxcp);
   2856			be_cq_notify(adapter, rx_cq->id, false, 1);
   2857			if (rxcp->num_rcvd == 0)
   2858				break;
   2859		}
   2860	}
   2861
   2862	/* After cleanup, leave the CQ in unarmed state */
   2863	be_cq_notify(adapter, rx_cq->id, false, 0);
   2864}
   2865
   2866static void be_tx_compl_clean(struct be_adapter *adapter)
   2867{
   2868	struct device *dev = &adapter->pdev->dev;
   2869	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
   2870	struct be_tx_compl_info *txcp;
   2871	struct be_queue_info *txq;
   2872	u32 end_idx, notified_idx;
   2873	struct be_tx_obj *txo;
   2874	int i, pending_txqs;
   2875
   2876	/* Stop polling for compls when HW has been silent for 10ms */
   2877	do {
   2878		pending_txqs = adapter->num_tx_qs;
   2879
   2880		for_all_tx_queues(adapter, txo, i) {
   2881			cmpl = 0;
   2882			num_wrbs = 0;
   2883			txq = &txo->q;
   2884			while ((txcp = be_tx_compl_get(adapter, txo))) {
   2885				num_wrbs +=
   2886					be_tx_compl_process(adapter, txo,
   2887							    txcp->end_index);
   2888				cmpl++;
   2889			}
   2890			if (cmpl) {
   2891				be_cq_notify(adapter, txo->cq.id, false, cmpl);
   2892				atomic_sub(num_wrbs, &txq->used);
   2893				timeo = 0;
   2894			}
   2895			if (!be_is_tx_compl_pending(txo))
   2896				pending_txqs--;
   2897		}
   2898
   2899		if (pending_txqs == 0 || ++timeo > 10 ||
   2900		    be_check_error(adapter, BE_ERROR_HW))
   2901			break;
   2902
   2903		mdelay(1);
   2904	} while (true);
   2905
   2906	/* Free enqueued TX that was never notified to HW */
   2907	for_all_tx_queues(adapter, txo, i) {
   2908		txq = &txo->q;
   2909
   2910		if (atomic_read(&txq->used)) {
   2911			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
   2912				 i, atomic_read(&txq->used));
   2913			notified_idx = txq->tail;
   2914			end_idx = txq->tail;
   2915			index_adv(&end_idx, atomic_read(&txq->used) - 1,
   2916				  txq->len);
   2917			/* Use the tx-compl process logic to handle requests
   2918			 * that were not sent to the HW.
   2919			 */
   2920			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
   2921			atomic_sub(num_wrbs, &txq->used);
   2922			BUG_ON(atomic_read(&txq->used));
   2923			txo->pend_wrb_cnt = 0;
   2924			/* Since hw was never notified of these requests,
   2925			 * reset TXQ indices
   2926			 */
   2927			txq->head = notified_idx;
   2928			txq->tail = notified_idx;
   2929		}
   2930	}
   2931}
   2932
   2933static void be_evt_queues_destroy(struct be_adapter *adapter)
   2934{
   2935	struct be_eq_obj *eqo;
   2936	int i;
   2937
   2938	for_all_evt_queues(adapter, eqo, i) {
   2939		if (eqo->q.created) {
   2940			be_eq_clean(eqo);
   2941			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
   2942			netif_napi_del(&eqo->napi);
   2943			free_cpumask_var(eqo->affinity_mask);
   2944		}
   2945		be_queue_free(adapter, &eqo->q);
   2946	}
   2947}
   2948
   2949static int be_evt_queues_create(struct be_adapter *adapter)
   2950{
   2951	struct be_queue_info *eq;
   2952	struct be_eq_obj *eqo;
   2953	struct be_aic_obj *aic;
   2954	int i, rc;
   2955
   2956	/* need enough EQs to service both RX and TX queues */
   2957	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
   2958				    max(adapter->cfg_num_rx_irqs,
   2959					adapter->cfg_num_tx_irqs));
   2960
   2961	adapter->aic_enabled = true;
   2962
   2963	for_all_evt_queues(adapter, eqo, i) {
   2964		int numa_node = dev_to_node(&adapter->pdev->dev);
   2965
   2966		aic = &adapter->aic_obj[i];
   2967		eqo->adapter = adapter;
   2968		eqo->idx = i;
   2969		aic->max_eqd = BE_MAX_EQD;
   2970
   2971		eq = &eqo->q;
   2972		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
   2973				    sizeof(struct be_eq_entry));
   2974		if (rc)
   2975			return rc;
   2976
   2977		rc = be_cmd_eq_create(adapter, eqo);
   2978		if (rc)
   2979			return rc;
   2980
   2981		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
   2982			return -ENOMEM;
   2983		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
   2984				eqo->affinity_mask);
   2985		netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
   2986			       NAPI_POLL_WEIGHT);
   2987	}
   2988	return 0;
   2989}
   2990
   2991static void be_mcc_queues_destroy(struct be_adapter *adapter)
   2992{
   2993	struct be_queue_info *q;
   2994
   2995	q = &adapter->mcc_obj.q;
   2996	if (q->created)
   2997		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
   2998	be_queue_free(adapter, q);
   2999
   3000	q = &adapter->mcc_obj.cq;
   3001	if (q->created)
   3002		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
   3003	be_queue_free(adapter, q);
   3004}
   3005
   3006/* Must be called only after TX qs are created as MCC shares TX EQ */
   3007static int be_mcc_queues_create(struct be_adapter *adapter)
   3008{
   3009	struct be_queue_info *q, *cq;
   3010
   3011	cq = &adapter->mcc_obj.cq;
   3012	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
   3013			   sizeof(struct be_mcc_compl)))
   3014		goto err;
   3015
   3016	/* Use the default EQ for MCC completions */
   3017	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
   3018		goto mcc_cq_free;
   3019
   3020	q = &adapter->mcc_obj.q;
   3021	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
   3022		goto mcc_cq_destroy;
   3023
   3024	if (be_cmd_mccq_create(adapter, q, cq))
   3025		goto mcc_q_free;
   3026
   3027	return 0;
   3028
   3029mcc_q_free:
   3030	be_queue_free(adapter, q);
   3031mcc_cq_destroy:
   3032	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
   3033mcc_cq_free:
   3034	be_queue_free(adapter, cq);
   3035err:
   3036	return -1;
   3037}
   3038
   3039static void be_tx_queues_destroy(struct be_adapter *adapter)
   3040{
   3041	struct be_queue_info *q;
   3042	struct be_tx_obj *txo;
   3043	u8 i;
   3044
   3045	for_all_tx_queues(adapter, txo, i) {
   3046		q = &txo->q;
   3047		if (q->created)
   3048			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
   3049		be_queue_free(adapter, q);
   3050
   3051		q = &txo->cq;
   3052		if (q->created)
   3053			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
   3054		be_queue_free(adapter, q);
   3055	}
   3056}
   3057
   3058static int be_tx_qs_create(struct be_adapter *adapter)
   3059{
   3060	struct be_queue_info *cq;
   3061	struct be_tx_obj *txo;
   3062	struct be_eq_obj *eqo;
   3063	int status, i;
   3064
   3065	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
   3066
   3067	for_all_tx_queues(adapter, txo, i) {
   3068		cq = &txo->cq;
   3069		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
   3070					sizeof(struct be_eth_tx_compl));
   3071		if (status)
   3072			return status;
   3073
   3074		u64_stats_init(&txo->stats.sync);
   3075		u64_stats_init(&txo->stats.sync_compl);
   3076
   3077		/* If num_evt_qs is less than num_tx_qs, then more than
   3078		 * one txq share an eq
   3079		 */
   3080		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
   3081		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
   3082		if (status)
   3083			return status;
   3084
   3085		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
   3086					sizeof(struct be_eth_wrb));
   3087		if (status)
   3088			return status;
   3089
   3090		status = be_cmd_txq_create(adapter, txo);
   3091		if (status)
   3092			return status;
   3093
   3094		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
   3095				    eqo->idx);
   3096	}
   3097
   3098	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
   3099		 adapter->num_tx_qs);
   3100	return 0;
   3101}
   3102
   3103static void be_rx_cqs_destroy(struct be_adapter *adapter)
   3104{
   3105	struct be_queue_info *q;
   3106	struct be_rx_obj *rxo;
   3107	int i;
   3108
   3109	for_all_rx_queues(adapter, rxo, i) {
   3110		q = &rxo->cq;
   3111		if (q->created)
   3112			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
   3113		be_queue_free(adapter, q);
   3114	}
   3115}
   3116
   3117static int be_rx_cqs_create(struct be_adapter *adapter)
   3118{
   3119	struct be_queue_info *eq, *cq;
   3120	struct be_rx_obj *rxo;
   3121	int rc, i;
   3122
   3123	adapter->num_rss_qs =
   3124			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
   3125
   3126	/* We'll use RSS only if atleast 2 RSS rings are supported. */
   3127	if (adapter->num_rss_qs < 2)
   3128		adapter->num_rss_qs = 0;
   3129
   3130	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
   3131
   3132	/* When the interface is not capable of RSS rings (and there is no
   3133	 * need to create a default RXQ) we'll still need one RXQ
   3134	 */
   3135	if (adapter->num_rx_qs == 0)
   3136		adapter->num_rx_qs = 1;
   3137
   3138	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
   3139	for_all_rx_queues(adapter, rxo, i) {
   3140		rxo->adapter = adapter;
   3141		cq = &rxo->cq;
   3142		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
   3143				    sizeof(struct be_eth_rx_compl));
   3144		if (rc)
   3145			return rc;
   3146
   3147		u64_stats_init(&rxo->stats.sync);
   3148		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
   3149		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
   3150		if (rc)
   3151			return rc;
   3152	}
   3153
   3154	dev_info(&adapter->pdev->dev,
   3155		 "created %d RX queue(s)\n", adapter->num_rx_qs);
   3156	return 0;
   3157}
   3158
   3159static irqreturn_t be_intx(int irq, void *dev)
   3160{
   3161	struct be_eq_obj *eqo = dev;
   3162	struct be_adapter *adapter = eqo->adapter;
   3163	int num_evts = 0;
   3164
   3165	/* IRQ is not expected when NAPI is scheduled as the EQ
   3166	 * will not be armed.
   3167	 * But, this can happen on Lancer INTx where it takes
   3168	 * a while to de-assert INTx or in BE2 where occasionaly
   3169	 * an interrupt may be raised even when EQ is unarmed.
   3170	 * If NAPI is already scheduled, then counting & notifying
   3171	 * events will orphan them.
   3172	 */
   3173	if (napi_schedule_prep(&eqo->napi)) {
   3174		num_evts = events_get(eqo);
   3175		__napi_schedule(&eqo->napi);
   3176		if (num_evts)
   3177			eqo->spurious_intr = 0;
   3178	}
   3179	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
   3180
   3181	/* Return IRQ_HANDLED only for the the first spurious intr
   3182	 * after a valid intr to stop the kernel from branding
   3183	 * this irq as a bad one!
   3184	 */
   3185	if (num_evts || eqo->spurious_intr++ == 0)
   3186		return IRQ_HANDLED;
   3187	else
   3188		return IRQ_NONE;
   3189}
   3190
   3191static irqreturn_t be_msix(int irq, void *dev)
   3192{
   3193	struct be_eq_obj *eqo = dev;
   3194
   3195	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
   3196	napi_schedule(&eqo->napi);
   3197	return IRQ_HANDLED;
   3198}
   3199
   3200static inline bool do_gro(struct be_rx_compl_info *rxcp)
   3201{
   3202	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
   3203}
   3204
   3205static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
   3206			 int budget)
   3207{
   3208	struct be_adapter *adapter = rxo->adapter;
   3209	struct be_queue_info *rx_cq = &rxo->cq;
   3210	struct be_rx_compl_info *rxcp;
   3211	u32 work_done;
   3212	u32 frags_consumed = 0;
   3213
   3214	for (work_done = 0; work_done < budget; work_done++) {
   3215		rxcp = be_rx_compl_get(rxo);
   3216		if (!rxcp)
   3217			break;
   3218
   3219		/* Is it a flush compl that has no data */
   3220		if (unlikely(rxcp->num_rcvd == 0))
   3221			goto loop_continue;
   3222
   3223		/* Discard compl with partial DMA Lancer B0 */
   3224		if (unlikely(!rxcp->pkt_size)) {
   3225			be_rx_compl_discard(rxo, rxcp);
   3226			goto loop_continue;
   3227		}
   3228
   3229		/* On BE drop pkts that arrive due to imperfect filtering in
   3230		 * promiscuous mode on some skews
   3231		 */
   3232		if (unlikely(rxcp->port != adapter->port_num &&
   3233			     !lancer_chip(adapter))) {
   3234			be_rx_compl_discard(rxo, rxcp);
   3235			goto loop_continue;
   3236		}
   3237
   3238		if (do_gro(rxcp))
   3239			be_rx_compl_process_gro(rxo, napi, rxcp);
   3240		else
   3241			be_rx_compl_process(rxo, napi, rxcp);
   3242
   3243loop_continue:
   3244		frags_consumed += rxcp->num_rcvd;
   3245		be_rx_stats_update(rxo, rxcp);
   3246	}
   3247
   3248	if (work_done) {
   3249		be_cq_notify(adapter, rx_cq->id, true, work_done);
   3250
   3251		/* When an rx-obj gets into post_starved state, just
   3252		 * let be_worker do the posting.
   3253		 */
   3254		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
   3255		    !rxo->rx_post_starved)
   3256			be_post_rx_frags(rxo, GFP_ATOMIC,
   3257					 max_t(u32, MAX_RX_POST,
   3258					       frags_consumed));
   3259	}
   3260
   3261	return work_done;
   3262}
   3263
   3264
   3265static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
   3266			  int idx)
   3267{
   3268	int num_wrbs = 0, work_done = 0;
   3269	struct be_tx_compl_info *txcp;
   3270
   3271	while ((txcp = be_tx_compl_get(adapter, txo))) {
   3272		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
   3273		work_done++;
   3274	}
   3275
   3276	if (work_done) {
   3277		be_cq_notify(adapter, txo->cq.id, true, work_done);
   3278		atomic_sub(num_wrbs, &txo->q.used);
   3279
   3280		/* As Tx wrbs have been freed up, wake up netdev queue
   3281		 * if it was stopped due to lack of tx wrbs.  */
   3282		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
   3283		    be_can_txq_wake(txo)) {
   3284			netif_wake_subqueue(adapter->netdev, idx);
   3285		}
   3286
   3287		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
   3288		tx_stats(txo)->tx_compl += work_done;
   3289		u64_stats_update_end(&tx_stats(txo)->sync_compl);
   3290	}
   3291}
   3292
   3293int be_poll(struct napi_struct *napi, int budget)
   3294{
   3295	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
   3296	struct be_adapter *adapter = eqo->adapter;
   3297	int max_work = 0, work, i, num_evts;
   3298	struct be_rx_obj *rxo;
   3299	struct be_tx_obj *txo;
   3300	u32 mult_enc = 0;
   3301
   3302	num_evts = events_get(eqo);
   3303
   3304	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
   3305		be_process_tx(adapter, txo, i);
   3306
   3307	/* This loop will iterate twice for EQ0 in which
   3308	 * completions of the last RXQ (default one) are also processed
   3309	 * For other EQs the loop iterates only once
   3310	 */
   3311	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
   3312		work = be_process_rx(rxo, napi, budget);
   3313		max_work = max(work, max_work);
   3314	}
   3315
   3316	if (is_mcc_eqo(eqo))
   3317		be_process_mcc(adapter);
   3318
   3319	if (max_work < budget) {
   3320		napi_complete_done(napi, max_work);
   3321
   3322		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
   3323		 * delay via a delay multiplier encoding value
   3324		 */
   3325		if (skyhawk_chip(adapter))
   3326			mult_enc = be_get_eq_delay_mult_enc(eqo);
   3327
   3328		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
   3329			     mult_enc);
   3330	} else {
   3331		/* As we'll continue in polling mode, count and clear events */
   3332		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
   3333	}
   3334	return max_work;
   3335}
   3336
   3337void be_detect_error(struct be_adapter *adapter)
   3338{
   3339	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
   3340	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
   3341	struct device *dev = &adapter->pdev->dev;
   3342	u16 val;
   3343	u32 i;
   3344
   3345	if (be_check_error(adapter, BE_ERROR_HW))
   3346		return;
   3347
   3348	if (lancer_chip(adapter)) {
   3349		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
   3350		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
   3351			be_set_error(adapter, BE_ERROR_UE);
   3352			sliport_err1 = ioread32(adapter->db +
   3353						SLIPORT_ERROR1_OFFSET);
   3354			sliport_err2 = ioread32(adapter->db +
   3355						SLIPORT_ERROR2_OFFSET);
   3356			/* Do not log error messages if its a FW reset */
   3357			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
   3358			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
   3359				dev_info(dev, "Reset is in progress\n");
   3360			} else {
   3361				dev_err(dev, "Error detected in the card\n");
   3362				dev_err(dev, "ERR: sliport status 0x%x\n",
   3363					sliport_status);
   3364				dev_err(dev, "ERR: sliport error1 0x%x\n",
   3365					sliport_err1);
   3366				dev_err(dev, "ERR: sliport error2 0x%x\n",
   3367					sliport_err2);
   3368			}
   3369		}
   3370	} else {
   3371		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
   3372		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
   3373		ue_lo_mask = ioread32(adapter->pcicfg +
   3374				      PCICFG_UE_STATUS_LOW_MASK);
   3375		ue_hi_mask = ioread32(adapter->pcicfg +
   3376				      PCICFG_UE_STATUS_HI_MASK);
   3377
   3378		ue_lo = (ue_lo & ~ue_lo_mask);
   3379		ue_hi = (ue_hi & ~ue_hi_mask);
   3380
   3381		if (ue_lo || ue_hi) {
   3382			/* On certain platforms BE3 hardware can indicate
   3383			 * spurious UEs. In case of a UE in the chip,
   3384			 * the POST register correctly reports either a
   3385			 * FAT_LOG_START state (FW is currently dumping
   3386			 * FAT log data) or a ARMFW_UE state. Check for the
   3387			 * above states to ascertain if the UE is valid or not.
   3388			 */
   3389			if (BE3_chip(adapter)) {
   3390				val = be_POST_stage_get(adapter);
   3391				if ((val & POST_STAGE_FAT_LOG_START)
   3392				     != POST_STAGE_FAT_LOG_START &&
   3393				    (val & POST_STAGE_ARMFW_UE)
   3394				     != POST_STAGE_ARMFW_UE &&
   3395				    (val & POST_STAGE_RECOVERABLE_ERR)
   3396				     != POST_STAGE_RECOVERABLE_ERR)
   3397					return;
   3398			}
   3399
   3400			dev_err(dev, "Error detected in the adapter");
   3401			be_set_error(adapter, BE_ERROR_UE);
   3402
   3403			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
   3404				if (ue_lo & 1)
   3405					dev_err(dev, "UE: %s bit set\n",
   3406						ue_status_low_desc[i]);
   3407			}
   3408			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
   3409				if (ue_hi & 1)
   3410					dev_err(dev, "UE: %s bit set\n",
   3411						ue_status_hi_desc[i]);
   3412			}
   3413		}
   3414	}
   3415}
   3416
   3417static void be_msix_disable(struct be_adapter *adapter)
   3418{
   3419	if (msix_enabled(adapter)) {
   3420		pci_disable_msix(adapter->pdev);
   3421		adapter->num_msix_vec = 0;
   3422		adapter->num_msix_roce_vec = 0;
   3423	}
   3424}
   3425
   3426static int be_msix_enable(struct be_adapter *adapter)
   3427{
   3428	unsigned int i, max_roce_eqs;
   3429	struct device *dev = &adapter->pdev->dev;
   3430	int num_vec;
   3431
   3432	/* If RoCE is supported, program the max number of vectors that
   3433	 * could be used for NIC and RoCE, else, just program the number
   3434	 * we'll use initially.
   3435	 */
   3436	if (be_roce_supported(adapter)) {
   3437		max_roce_eqs =
   3438			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
   3439		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
   3440		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
   3441	} else {
   3442		num_vec = max(adapter->cfg_num_rx_irqs,
   3443			      adapter->cfg_num_tx_irqs);
   3444	}
   3445
   3446	for (i = 0; i < num_vec; i++)
   3447		adapter->msix_entries[i].entry = i;
   3448
   3449	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
   3450					MIN_MSIX_VECTORS, num_vec);
   3451	if (num_vec < 0)
   3452		goto fail;
   3453
   3454	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
   3455		adapter->num_msix_roce_vec = num_vec / 2;
   3456		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
   3457			 adapter->num_msix_roce_vec);
   3458	}
   3459
   3460	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
   3461
   3462	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
   3463		 adapter->num_msix_vec);
   3464	return 0;
   3465
   3466fail:
   3467	dev_warn(dev, "MSIx enable failed\n");
   3468
   3469	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
   3470	if (be_virtfn(adapter))
   3471		return num_vec;
   3472	return 0;
   3473}
   3474
   3475static inline int be_msix_vec_get(struct be_adapter *adapter,
   3476				  struct be_eq_obj *eqo)
   3477{
   3478	return adapter->msix_entries[eqo->msix_idx].vector;
   3479}
   3480
   3481static int be_msix_register(struct be_adapter *adapter)
   3482{
   3483	struct net_device *netdev = adapter->netdev;
   3484	struct be_eq_obj *eqo;
   3485	int status, i, vec;
   3486
   3487	for_all_evt_queues(adapter, eqo, i) {
   3488		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
   3489		vec = be_msix_vec_get(adapter, eqo);
   3490		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
   3491		if (status)
   3492			goto err_msix;
   3493
   3494		irq_update_affinity_hint(vec, eqo->affinity_mask);
   3495	}
   3496
   3497	return 0;
   3498err_msix:
   3499	for (i--; i >= 0; i--) {
   3500		eqo = &adapter->eq_obj[i];
   3501		free_irq(be_msix_vec_get(adapter, eqo), eqo);
   3502	}
   3503	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
   3504		 status);
   3505	be_msix_disable(adapter);
   3506	return status;
   3507}
   3508
   3509static int be_irq_register(struct be_adapter *adapter)
   3510{
   3511	struct net_device *netdev = adapter->netdev;
   3512	int status;
   3513
   3514	if (msix_enabled(adapter)) {
   3515		status = be_msix_register(adapter);
   3516		if (status == 0)
   3517			goto done;
   3518		/* INTx is not supported for VF */
   3519		if (be_virtfn(adapter))
   3520			return status;
   3521	}
   3522
   3523	/* INTx: only the first EQ is used */
   3524	netdev->irq = adapter->pdev->irq;
   3525	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
   3526			     &adapter->eq_obj[0]);
   3527	if (status) {
   3528		dev_err(&adapter->pdev->dev,
   3529			"INTx request IRQ failed - err %d\n", status);
   3530		return status;
   3531	}
   3532done:
   3533	adapter->isr_registered = true;
   3534	return 0;
   3535}
   3536
   3537static void be_irq_unregister(struct be_adapter *adapter)
   3538{
   3539	struct net_device *netdev = adapter->netdev;
   3540	struct be_eq_obj *eqo;
   3541	int i, vec;
   3542
   3543	if (!adapter->isr_registered)
   3544		return;
   3545
   3546	/* INTx */
   3547	if (!msix_enabled(adapter)) {
   3548		free_irq(netdev->irq, &adapter->eq_obj[0]);
   3549		goto done;
   3550	}
   3551
   3552	/* MSIx */
   3553	for_all_evt_queues(adapter, eqo, i) {
   3554		vec = be_msix_vec_get(adapter, eqo);
   3555		irq_update_affinity_hint(vec, NULL);
   3556		free_irq(vec, eqo);
   3557	}
   3558
   3559done:
   3560	adapter->isr_registered = false;
   3561}
   3562
   3563static void be_rx_qs_destroy(struct be_adapter *adapter)
   3564{
   3565	struct rss_info *rss = &adapter->rss_info;
   3566	struct be_queue_info *q;
   3567	struct be_rx_obj *rxo;
   3568	int i;
   3569
   3570	for_all_rx_queues(adapter, rxo, i) {
   3571		q = &rxo->q;
   3572		if (q->created) {
   3573			/* If RXQs are destroyed while in an "out of buffer"
   3574			 * state, there is a possibility of an HW stall on
   3575			 * Lancer. So, post 64 buffers to each queue to relieve
   3576			 * the "out of buffer" condition.
   3577			 * Make sure there's space in the RXQ before posting.
   3578			 */
   3579			if (lancer_chip(adapter)) {
   3580				be_rx_cq_clean(rxo);
   3581				if (atomic_read(&q->used) == 0)
   3582					be_post_rx_frags(rxo, GFP_KERNEL,
   3583							 MAX_RX_POST);
   3584			}
   3585
   3586			be_cmd_rxq_destroy(adapter, q);
   3587			be_rx_cq_clean(rxo);
   3588			be_rxq_clean(rxo);
   3589		}
   3590		be_queue_free(adapter, q);
   3591	}
   3592
   3593	if (rss->rss_flags) {
   3594		rss->rss_flags = RSS_ENABLE_NONE;
   3595		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
   3596				  128, rss->rss_hkey);
   3597	}
   3598}
   3599
   3600static void be_disable_if_filters(struct be_adapter *adapter)
   3601{
   3602	/* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
   3603	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
   3604	    check_privilege(adapter, BE_PRIV_FILTMGMT)) {
   3605		be_dev_mac_del(adapter, adapter->pmac_id[0]);
   3606		eth_zero_addr(adapter->dev_mac);
   3607	}
   3608
   3609	be_clear_uc_list(adapter);
   3610	be_clear_mc_list(adapter);
   3611
   3612	/* The IFACE flags are enabled in the open path and cleared
   3613	 * in the close path. When a VF gets detached from the host and
   3614	 * assigned to a VM the following happens:
   3615	 *	- VF's IFACE flags get cleared in the detach path
   3616	 *	- IFACE create is issued by the VF in the attach path
   3617	 * Due to a bug in the BE3/Skyhawk-R FW
   3618	 * (Lancer FW doesn't have the bug), the IFACE capability flags
   3619	 * specified along with the IFACE create cmd issued by a VF are not
   3620	 * honoured by FW.  As a consequence, if a *new* driver
   3621	 * (that enables/disables IFACE flags in open/close)
   3622	 * is loaded in the host and an *old* driver is * used by a VM/VF,
   3623	 * the IFACE gets created *without* the needed flags.
   3624	 * To avoid this, disable RX-filter flags only for Lancer.
   3625	 */
   3626	if (lancer_chip(adapter)) {
   3627		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
   3628		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
   3629	}
   3630}
   3631
   3632static int be_close(struct net_device *netdev)
   3633{
   3634	struct be_adapter *adapter = netdev_priv(netdev);
   3635	struct be_eq_obj *eqo;
   3636	int i;
   3637
   3638	/* This protection is needed as be_close() may be called even when the
   3639	 * adapter is in cleared state (after eeh perm failure)
   3640	 */
   3641	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
   3642		return 0;
   3643
   3644	/* Before attempting cleanup ensure all the pending cmds in the
   3645	 * config_wq have finished execution
   3646	 */
   3647	flush_workqueue(be_wq);
   3648
   3649	be_disable_if_filters(adapter);
   3650
   3651	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
   3652		for_all_evt_queues(adapter, eqo, i) {
   3653			napi_disable(&eqo->napi);
   3654		}
   3655		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
   3656	}
   3657
   3658	be_async_mcc_disable(adapter);
   3659
   3660	/* Wait for all pending tx completions to arrive so that
   3661	 * all tx skbs are freed.
   3662	 */
   3663	netif_tx_disable(netdev);
   3664	be_tx_compl_clean(adapter);
   3665
   3666	be_rx_qs_destroy(adapter);
   3667
   3668	for_all_evt_queues(adapter, eqo, i) {
   3669		if (msix_enabled(adapter))
   3670			synchronize_irq(be_msix_vec_get(adapter, eqo));
   3671		else
   3672			synchronize_irq(netdev->irq);
   3673		be_eq_clean(eqo);
   3674	}
   3675
   3676	be_irq_unregister(adapter);
   3677
   3678	return 0;
   3679}
   3680
   3681static int be_rx_qs_create(struct be_adapter *adapter)
   3682{
   3683	struct rss_info *rss = &adapter->rss_info;
   3684	u8 rss_key[RSS_HASH_KEY_LEN];
   3685	struct be_rx_obj *rxo;
   3686	int rc, i, j;
   3687
   3688	for_all_rx_queues(adapter, rxo, i) {
   3689		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
   3690				    sizeof(struct be_eth_rx_d));
   3691		if (rc)
   3692			return rc;
   3693	}
   3694
   3695	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
   3696		rxo = default_rxo(adapter);
   3697		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
   3698				       rx_frag_size, adapter->if_handle,
   3699				       false, &rxo->rss_id);
   3700		if (rc)
   3701			return rc;
   3702	}
   3703
   3704	for_all_rss_queues(adapter, rxo, i) {
   3705		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
   3706				       rx_frag_size, adapter->if_handle,
   3707				       true, &rxo->rss_id);
   3708		if (rc)
   3709			return rc;
   3710	}
   3711
   3712	if (be_multi_rxq(adapter)) {
   3713		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
   3714			for_all_rss_queues(adapter, rxo, i) {
   3715				if ((j + i) >= RSS_INDIR_TABLE_LEN)
   3716					break;
   3717				rss->rsstable[j + i] = rxo->rss_id;
   3718				rss->rss_queue[j + i] = i;
   3719			}
   3720		}
   3721		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
   3722			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
   3723
   3724		if (!BEx_chip(adapter))
   3725			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
   3726				RSS_ENABLE_UDP_IPV6;
   3727
   3728		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
   3729		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
   3730				       RSS_INDIR_TABLE_LEN, rss_key);
   3731		if (rc) {
   3732			rss->rss_flags = RSS_ENABLE_NONE;
   3733			return rc;
   3734		}
   3735
   3736		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
   3737	} else {
   3738		/* Disable RSS, if only default RX Q is created */
   3739		rss->rss_flags = RSS_ENABLE_NONE;
   3740	}
   3741
   3742
   3743	/* Post 1 less than RXQ-len to avoid head being equal to tail,
   3744	 * which is a queue empty condition
   3745	 */
   3746	for_all_rx_queues(adapter, rxo, i)
   3747		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
   3748
   3749	return 0;
   3750}
   3751
   3752static int be_enable_if_filters(struct be_adapter *adapter)
   3753{
   3754	int status;
   3755
   3756	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
   3757	if (status)
   3758		return status;
   3759
   3760	/* Normally this condition usually true as the ->dev_mac is zeroed.
   3761	 * But on BE3 VFs the initial MAC is pre-programmed by PF and
   3762	 * subsequent be_dev_mac_add() can fail (after fresh boot)
   3763	 */
   3764	if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
   3765		int old_pmac_id = -1;
   3766
   3767		/* Remember old programmed MAC if any - can happen on BE3 VF */
   3768		if (!is_zero_ether_addr(adapter->dev_mac))
   3769			old_pmac_id = adapter->pmac_id[0];
   3770
   3771		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
   3772		if (status)
   3773			return status;
   3774
   3775		/* Delete the old programmed MAC as we successfully programmed
   3776		 * a new MAC
   3777		 */
   3778		if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
   3779			be_dev_mac_del(adapter, old_pmac_id);
   3780
   3781		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
   3782	}
   3783
   3784	if (adapter->vlans_added)
   3785		be_vid_config(adapter);
   3786
   3787	__be_set_rx_mode(adapter);
   3788
   3789	return 0;
   3790}
   3791
   3792static int be_open(struct net_device *netdev)
   3793{
   3794	struct be_adapter *adapter = netdev_priv(netdev);
   3795	struct be_eq_obj *eqo;
   3796	struct be_rx_obj *rxo;
   3797	struct be_tx_obj *txo;
   3798	u8 link_status;
   3799	int status, i;
   3800
   3801	status = be_rx_qs_create(adapter);
   3802	if (status)
   3803		goto err;
   3804
   3805	status = be_enable_if_filters(adapter);
   3806	if (status)
   3807		goto err;
   3808
   3809	status = be_irq_register(adapter);
   3810	if (status)
   3811		goto err;
   3812
   3813	for_all_rx_queues(adapter, rxo, i)
   3814		be_cq_notify(adapter, rxo->cq.id, true, 0);
   3815
   3816	for_all_tx_queues(adapter, txo, i)
   3817		be_cq_notify(adapter, txo->cq.id, true, 0);
   3818
   3819	be_async_mcc_enable(adapter);
   3820
   3821	for_all_evt_queues(adapter, eqo, i) {
   3822		napi_enable(&eqo->napi);
   3823		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
   3824	}
   3825	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
   3826
   3827	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
   3828	if (!status)
   3829		be_link_status_update(adapter, link_status);
   3830
   3831	netif_tx_start_all_queues(netdev);
   3832
   3833	udp_tunnel_nic_reset_ntf(netdev);
   3834
   3835	return 0;
   3836err:
   3837	be_close(adapter->netdev);
   3838	return -EIO;
   3839}
   3840
   3841static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
   3842{
   3843	u32 addr;
   3844
   3845	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
   3846
   3847	mac[5] = (u8)(addr & 0xFF);
   3848	mac[4] = (u8)((addr >> 8) & 0xFF);
   3849	mac[3] = (u8)((addr >> 16) & 0xFF);
   3850	/* Use the OUI from the current MAC address */
   3851	memcpy(mac, adapter->netdev->dev_addr, 3);
   3852}
   3853
   3854/*
   3855 * Generate a seed MAC address from the PF MAC Address using jhash.
   3856 * MAC Address for VFs are assigned incrementally starting from the seed.
   3857 * These addresses are programmed in the ASIC by the PF and the VF driver
   3858 * queries for the MAC address during its probe.
   3859 */
   3860static int be_vf_eth_addr_config(struct be_adapter *adapter)
   3861{
   3862	u32 vf;
   3863	int status = 0;
   3864	u8 mac[ETH_ALEN];
   3865	struct be_vf_cfg *vf_cfg;
   3866
   3867	be_vf_eth_addr_generate(adapter, mac);
   3868
   3869	for_all_vfs(adapter, vf_cfg, vf) {
   3870		if (BEx_chip(adapter))
   3871			status = be_cmd_pmac_add(adapter, mac,
   3872						 vf_cfg->if_handle,
   3873						 &vf_cfg->pmac_id, vf + 1);
   3874		else
   3875			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
   3876						vf + 1);
   3877
   3878		if (status)
   3879			dev_err(&adapter->pdev->dev,
   3880				"Mac address assignment failed for VF %d\n",
   3881				vf);
   3882		else
   3883			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
   3884
   3885		mac[5] += 1;
   3886	}
   3887	return status;
   3888}
   3889
   3890static int be_vfs_mac_query(struct be_adapter *adapter)
   3891{
   3892	int status, vf;
   3893	u8 mac[ETH_ALEN];
   3894	struct be_vf_cfg *vf_cfg;
   3895
   3896	for_all_vfs(adapter, vf_cfg, vf) {
   3897		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
   3898					       mac, vf_cfg->if_handle,
   3899					       false, vf+1);
   3900		if (status)
   3901			return status;
   3902		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
   3903	}
   3904	return 0;
   3905}
   3906
   3907static void be_vf_clear(struct be_adapter *adapter)
   3908{
   3909	struct be_vf_cfg *vf_cfg;
   3910	u32 vf;
   3911
   3912	if (pci_vfs_assigned(adapter->pdev)) {
   3913		dev_warn(&adapter->pdev->dev,
   3914			 "VFs are assigned to VMs: not disabling VFs\n");
   3915		goto done;
   3916	}
   3917
   3918	pci_disable_sriov(adapter->pdev);
   3919
   3920	for_all_vfs(adapter, vf_cfg, vf) {
   3921		if (BEx_chip(adapter))
   3922			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
   3923					vf_cfg->pmac_id, vf + 1);
   3924		else
   3925			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
   3926				       vf + 1);
   3927
   3928		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
   3929	}
   3930
   3931	if (BE3_chip(adapter))
   3932		be_cmd_set_hsw_config(adapter, 0, 0,
   3933				      adapter->if_handle,
   3934				      PORT_FWD_TYPE_PASSTHRU, 0);
   3935done:
   3936	kfree(adapter->vf_cfg);
   3937	adapter->num_vfs = 0;
   3938	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
   3939}
   3940
   3941static void be_clear_queues(struct be_adapter *adapter)
   3942{
   3943	be_mcc_queues_destroy(adapter);
   3944	be_rx_cqs_destroy(adapter);
   3945	be_tx_queues_destroy(adapter);
   3946	be_evt_queues_destroy(adapter);
   3947}
   3948
   3949static void be_cancel_worker(struct be_adapter *adapter)
   3950{
   3951	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
   3952		cancel_delayed_work_sync(&adapter->work);
   3953		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
   3954	}
   3955}
   3956
   3957static void be_cancel_err_detection(struct be_adapter *adapter)
   3958{
   3959	struct be_error_recovery *err_rec = &adapter->error_recovery;
   3960
   3961	if (!be_err_recovery_workq)
   3962		return;
   3963
   3964	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
   3965		cancel_delayed_work_sync(&err_rec->err_detection_work);
   3966		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
   3967	}
   3968}
   3969
   3970/* VxLAN offload Notes:
   3971 *
   3972 * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
   3973 * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
   3974 * is expected to work across all types of IP tunnels once exported. Skyhawk
   3975 * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
   3976 * offloads in hw_enc_features only when a VxLAN port is added. If other (non
   3977 * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
   3978 * those other tunnels are unexported on the fly through ndo_features_check().
   3979 */
   3980static int be_vxlan_set_port(struct net_device *netdev, unsigned int table,
   3981			     unsigned int entry, struct udp_tunnel_info *ti)
   3982{
   3983	struct be_adapter *adapter = netdev_priv(netdev);
   3984	struct device *dev = &adapter->pdev->dev;
   3985	int status;
   3986
   3987	status = be_cmd_manage_iface(adapter, adapter->if_handle,
   3988				     OP_CONVERT_NORMAL_TO_TUNNEL);
   3989	if (status) {
   3990		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
   3991		return status;
   3992	}
   3993	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
   3994
   3995	status = be_cmd_set_vxlan_port(adapter, ti->port);
   3996	if (status) {
   3997		dev_warn(dev, "Failed to add VxLAN port\n");
   3998		return status;
   3999	}
   4000	adapter->vxlan_port = ti->port;
   4001
   4002	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
   4003				   NETIF_F_TSO | NETIF_F_TSO6 |
   4004				   NETIF_F_GSO_UDP_TUNNEL;
   4005
   4006	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
   4007		 be16_to_cpu(ti->port));
   4008	return 0;
   4009}
   4010
   4011static int be_vxlan_unset_port(struct net_device *netdev, unsigned int table,
   4012			       unsigned int entry, struct udp_tunnel_info *ti)
   4013{
   4014	struct be_adapter *adapter = netdev_priv(netdev);
   4015
   4016	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
   4017		be_cmd_manage_iface(adapter, adapter->if_handle,
   4018				    OP_CONVERT_TUNNEL_TO_NORMAL);
   4019
   4020	if (adapter->vxlan_port)
   4021		be_cmd_set_vxlan_port(adapter, 0);
   4022
   4023	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
   4024	adapter->vxlan_port = 0;
   4025
   4026	netdev->hw_enc_features = 0;
   4027	return 0;
   4028}
   4029
   4030static const struct udp_tunnel_nic_info be_udp_tunnels = {
   4031	.set_port	= be_vxlan_set_port,
   4032	.unset_port	= be_vxlan_unset_port,
   4033	.flags		= UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
   4034			  UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
   4035	.tables		= {
   4036		{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
   4037	},
   4038};
   4039
   4040static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
   4041				struct be_resources *vft_res)
   4042{
   4043	struct be_resources res = adapter->pool_res;
   4044	u32 vf_if_cap_flags = res.vf_if_cap_flags;
   4045	struct be_resources res_mod = {0};
   4046	u16 num_vf_qs = 1;
   4047
   4048	/* Distribute the queue resources among the PF and it's VFs */
   4049	if (num_vfs) {
   4050		/* Divide the rx queues evenly among the VFs and the PF, capped
   4051		 * at VF-EQ-count. Any remainder queues belong to the PF.
   4052		 */
   4053		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
   4054				res.max_rss_qs / (num_vfs + 1));
   4055
   4056		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
   4057		 * RSS Tables per port. Provide RSS on VFs, only if number of
   4058		 * VFs requested is less than it's PF Pool's RSS Tables limit.
   4059		 */
   4060		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
   4061			num_vf_qs = 1;
   4062	}
   4063
   4064	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
   4065	 * which are modifiable using SET_PROFILE_CONFIG cmd.
   4066	 */
   4067	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
   4068				  RESOURCE_MODIFIABLE, 0);
   4069
   4070	/* If RSS IFACE capability flags are modifiable for a VF, set the
   4071	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
   4072	 * more than 1 RSSQ is available for a VF.
   4073	 * Otherwise, provision only 1 queue pair for VF.
   4074	 */
   4075	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
   4076		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
   4077		if (num_vf_qs > 1) {
   4078			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
   4079			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
   4080				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
   4081		} else {
   4082			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
   4083					     BE_IF_FLAGS_DEFQ_RSS);
   4084		}
   4085	} else {
   4086		num_vf_qs = 1;
   4087	}
   4088
   4089	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
   4090		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
   4091		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
   4092	}
   4093
   4094	vft_res->vf_if_cap_flags = vf_if_cap_flags;
   4095	vft_res->max_rx_qs = num_vf_qs;
   4096	vft_res->max_rss_qs = num_vf_qs;
   4097	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
   4098	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
   4099
   4100	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
   4101	 * among the PF and it's VFs, if the fields are changeable
   4102	 */
   4103	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
   4104		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
   4105
   4106	if (res_mod.max_vlans == FIELD_MODIFIABLE)
   4107		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
   4108
   4109	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
   4110		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
   4111
   4112	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
   4113		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
   4114}
   4115
   4116static void be_if_destroy(struct be_adapter *adapter)
   4117{
   4118	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
   4119
   4120	kfree(adapter->pmac_id);
   4121	adapter->pmac_id = NULL;
   4122
   4123	kfree(adapter->mc_list);
   4124	adapter->mc_list = NULL;
   4125
   4126	kfree(adapter->uc_list);
   4127	adapter->uc_list = NULL;
   4128}
   4129
   4130static int be_clear(struct be_adapter *adapter)
   4131{
   4132	struct pci_dev *pdev = adapter->pdev;
   4133	struct  be_resources vft_res = {0};
   4134
   4135	be_cancel_worker(adapter);
   4136
   4137	flush_workqueue(be_wq);
   4138
   4139	if (sriov_enabled(adapter))
   4140		be_vf_clear(adapter);
   4141
   4142	/* Re-configure FW to distribute resources evenly across max-supported
   4143	 * number of VFs, only when VFs are not already enabled.
   4144	 */
   4145	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
   4146	    !pci_vfs_assigned(pdev)) {
   4147		be_calculate_vf_res(adapter,
   4148				    pci_sriov_get_totalvfs(pdev),
   4149				    &vft_res);
   4150		be_cmd_set_sriov_config(adapter, adapter->pool_res,
   4151					pci_sriov_get_totalvfs(pdev),
   4152					&vft_res);
   4153	}
   4154
   4155	be_vxlan_unset_port(adapter->netdev, 0, 0, NULL);
   4156
   4157	be_if_destroy(adapter);
   4158
   4159	be_clear_queues(adapter);
   4160
   4161	be_msix_disable(adapter);
   4162	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
   4163	return 0;
   4164}
   4165
   4166static int be_vfs_if_create(struct be_adapter *adapter)
   4167{
   4168	struct be_resources res = {0};
   4169	u32 cap_flags, en_flags, vf;
   4170	struct be_vf_cfg *vf_cfg;
   4171	int status;
   4172
   4173	/* If a FW profile exists, then cap_flags are updated */
   4174	cap_flags = BE_VF_IF_EN_FLAGS;
   4175
   4176	for_all_vfs(adapter, vf_cfg, vf) {
   4177		if (!BE3_chip(adapter)) {
   4178			status = be_cmd_get_profile_config(adapter, &res, NULL,
   4179							   ACTIVE_PROFILE_TYPE,
   4180							   RESOURCE_LIMITS,
   4181							   vf + 1);
   4182			if (!status) {
   4183				cap_flags = res.if_cap_flags;
   4184				/* Prevent VFs from enabling VLAN promiscuous
   4185				 * mode
   4186				 */
   4187				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
   4188			}
   4189		}
   4190
   4191		/* PF should enable IF flags during proxy if_create call */
   4192		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
   4193		status = be_cmd_if_create(adapter, cap_flags, en_flags,
   4194					  &vf_cfg->if_handle, vf + 1);
   4195		if (status)
   4196			return status;
   4197	}
   4198
   4199	return 0;
   4200}
   4201
   4202static int be_vf_setup_init(struct be_adapter *adapter)
   4203{
   4204	struct be_vf_cfg *vf_cfg;
   4205	int vf;
   4206
   4207	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
   4208				  GFP_KERNEL);
   4209	if (!adapter->vf_cfg)
   4210		return -ENOMEM;
   4211
   4212	for_all_vfs(adapter, vf_cfg, vf) {
   4213		vf_cfg->if_handle = -1;
   4214		vf_cfg->pmac_id = -1;
   4215	}
   4216	return 0;
   4217}
   4218
   4219static int be_vf_setup(struct be_adapter *adapter)
   4220{
   4221	struct device *dev = &adapter->pdev->dev;
   4222	struct be_vf_cfg *vf_cfg;
   4223	int status, old_vfs, vf;
   4224	bool spoofchk;
   4225
   4226	old_vfs = pci_num_vf(adapter->pdev);
   4227
   4228	status = be_vf_setup_init(adapter);
   4229	if (status)
   4230		goto err;
   4231
   4232	if (old_vfs) {
   4233		for_all_vfs(adapter, vf_cfg, vf) {
   4234			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
   4235			if (status)
   4236				goto err;
   4237		}
   4238
   4239		status = be_vfs_mac_query(adapter);
   4240		if (status)
   4241			goto err;
   4242	} else {
   4243		status = be_vfs_if_create(adapter);
   4244		if (status)
   4245			goto err;
   4246
   4247		status = be_vf_eth_addr_config(adapter);
   4248		if (status)
   4249			goto err;
   4250	}
   4251
   4252	for_all_vfs(adapter, vf_cfg, vf) {
   4253		/* Allow VFs to programs MAC/VLAN filters */
   4254		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
   4255						  vf + 1);
   4256		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
   4257			status = be_cmd_set_fn_privileges(adapter,
   4258							  vf_cfg->privileges |
   4259							  BE_PRIV_FILTMGMT,
   4260							  vf + 1);
   4261			if (!status) {
   4262				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
   4263				dev_info(dev, "VF%d has FILTMGMT privilege\n",
   4264					 vf);
   4265			}
   4266		}
   4267
   4268		/* Allow full available bandwidth */
   4269		if (!old_vfs)
   4270			be_cmd_config_qos(adapter, 0, 0, vf + 1);
   4271
   4272		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
   4273					       vf_cfg->if_handle, NULL,
   4274					       &spoofchk);
   4275		if (!status)
   4276			vf_cfg->spoofchk = spoofchk;
   4277
   4278		if (!old_vfs) {
   4279			be_cmd_enable_vf(adapter, vf + 1);
   4280			be_cmd_set_logical_link_config(adapter,
   4281						       IFLA_VF_LINK_STATE_AUTO,
   4282						       vf+1);
   4283		}
   4284	}
   4285
   4286	if (!old_vfs) {
   4287		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
   4288		if (status) {
   4289			dev_err(dev, "SRIOV enable failed\n");
   4290			adapter->num_vfs = 0;
   4291			goto err;
   4292		}
   4293	}
   4294
   4295	if (BE3_chip(adapter)) {
   4296		/* On BE3, enable VEB only when SRIOV is enabled */
   4297		status = be_cmd_set_hsw_config(adapter, 0, 0,
   4298					       adapter->if_handle,
   4299					       PORT_FWD_TYPE_VEB, 0);
   4300		if (status)
   4301			goto err;
   4302	}
   4303
   4304	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
   4305	return 0;
   4306err:
   4307	dev_err(dev, "VF setup failed\n");
   4308	be_vf_clear(adapter);
   4309	return status;
   4310}
   4311
   4312/* Converting function_mode bits on BE3 to SH mc_type enums */
   4313
   4314static u8 be_convert_mc_type(u32 function_mode)
   4315{
   4316	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
   4317		return vNIC1;
   4318	else if (function_mode & QNQ_MODE)
   4319		return FLEX10;
   4320	else if (function_mode & VNIC_MODE)
   4321		return vNIC2;
   4322	else if (function_mode & UMC_ENABLED)
   4323		return UMC;
   4324	else
   4325		return MC_NONE;
   4326}
   4327
   4328/* On BE2/BE3 FW does not suggest the supported limits */
   4329static void BEx_get_resources(struct be_adapter *adapter,
   4330			      struct be_resources *res)
   4331{
   4332	bool use_sriov = adapter->num_vfs ? 1 : 0;
   4333
   4334	if (be_physfn(adapter))
   4335		res->max_uc_mac = BE_UC_PMAC_COUNT;
   4336	else
   4337		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
   4338
   4339	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
   4340
   4341	if (be_is_mc(adapter)) {
   4342		/* Assuming that there are 4 channels per port,
   4343		 * when multi-channel is enabled
   4344		 */
   4345		if (be_is_qnq_mode(adapter))
   4346			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
   4347		else
   4348			/* In a non-qnq multichannel mode, the pvid
   4349			 * takes up one vlan entry
   4350			 */
   4351			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
   4352	} else {
   4353		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
   4354	}
   4355
   4356	res->max_mcast_mac = BE_MAX_MC;
   4357
   4358	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
   4359	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
   4360	 *    *only* if it is RSS-capable.
   4361	 */
   4362	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
   4363	    be_virtfn(adapter) ||
   4364	    (be_is_mc(adapter) &&
   4365	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
   4366		res->max_tx_qs = 1;
   4367	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
   4368		struct be_resources super_nic_res = {0};
   4369
   4370		/* On a SuperNIC profile, the driver needs to use the
   4371		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
   4372		 */
   4373		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
   4374					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
   4375					  0);
   4376		/* Some old versions of BE3 FW don't report max_tx_qs value */
   4377		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
   4378	} else {
   4379		res->max_tx_qs = BE3_MAX_TX_QS;
   4380	}
   4381
   4382	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
   4383	    !use_sriov && be_physfn(adapter))
   4384		res->max_rss_qs = (adapter->be3_native) ?
   4385					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
   4386	res->max_rx_qs = res->max_rss_qs + 1;
   4387
   4388	if (be_physfn(adapter))
   4389		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
   4390					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
   4391	else
   4392		res->max_evt_qs = 1;
   4393
   4394	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
   4395	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
   4396	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
   4397		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
   4398}
   4399
   4400static void be_setup_init(struct be_adapter *adapter)
   4401{
   4402	adapter->vlan_prio_bmap = 0xff;
   4403	adapter->phy.link_speed = -1;
   4404	adapter->if_handle = -1;
   4405	adapter->be3_native = false;
   4406	adapter->if_flags = 0;
   4407	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
   4408	if (be_physfn(adapter))
   4409		adapter->cmd_privileges = MAX_PRIVILEGES;
   4410	else
   4411		adapter->cmd_privileges = MIN_PRIVILEGES;
   4412}
   4413
   4414/* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
   4415 * However, this HW limitation is not exposed to the host via any SLI cmd.
   4416 * As a result, in the case of SRIOV and in particular multi-partition configs
   4417 * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
   4418 * for distribution between the VFs. This self-imposed limit will determine the
   4419 * no: of VFs for which RSS can be enabled.
   4420 */
   4421static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
   4422{
   4423	struct be_port_resources port_res = {0};
   4424	u8 rss_tables_on_port;
   4425	u16 max_vfs = be_max_vfs(adapter);
   4426
   4427	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
   4428				  RESOURCE_LIMITS, 0);
   4429
   4430	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
   4431
   4432	/* Each PF Pool's RSS Tables limit =
   4433	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
   4434	 */
   4435	adapter->pool_res.max_rss_tables =
   4436		max_vfs * rss_tables_on_port / port_res.max_vfs;
   4437}
   4438
   4439static int be_get_sriov_config(struct be_adapter *adapter)
   4440{
   4441	struct be_resources res = {0};
   4442	int max_vfs, old_vfs;
   4443
   4444	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
   4445				  RESOURCE_LIMITS, 0);
   4446
   4447	/* Some old versions of BE3 FW don't report max_vfs value */
   4448	if (BE3_chip(adapter) && !res.max_vfs) {
   4449		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
   4450		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
   4451	}
   4452
   4453	adapter->pool_res = res;
   4454
   4455	/* If during previous unload of the driver, the VFs were not disabled,
   4456	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
   4457	 * Instead use the TotalVFs value stored in the pci-dev struct.
   4458	 */
   4459	old_vfs = pci_num_vf(adapter->pdev);
   4460	if (old_vfs) {
   4461		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
   4462			 old_vfs);
   4463
   4464		adapter->pool_res.max_vfs =
   4465			pci_sriov_get_totalvfs(adapter->pdev);
   4466		adapter->num_vfs = old_vfs;
   4467	}
   4468
   4469	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
   4470		be_calculate_pf_pool_rss_tables(adapter);
   4471		dev_info(&adapter->pdev->dev,
   4472			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
   4473			 be_max_pf_pool_rss_tables(adapter));
   4474	}
   4475	return 0;
   4476}
   4477
   4478static void be_alloc_sriov_res(struct be_adapter *adapter)
   4479{
   4480	int old_vfs = pci_num_vf(adapter->pdev);
   4481	struct  be_resources vft_res = {0};
   4482	int status;
   4483
   4484	be_get_sriov_config(adapter);
   4485
   4486	if (!old_vfs)
   4487		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
   4488
   4489	/* When the HW is in SRIOV capable configuration, the PF-pool
   4490	 * resources are given to PF during driver load, if there are no
   4491	 * old VFs. This facility is not available in BE3 FW.
   4492	 * Also, this is done by FW in Lancer chip.
   4493	 */
   4494	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
   4495		be_calculate_vf_res(adapter, 0, &vft_res);
   4496		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
   4497						 &vft_res);
   4498		if (status)
   4499			dev_err(&adapter->pdev->dev,
   4500				"Failed to optimize SRIOV resources\n");
   4501	}
   4502}
   4503
   4504static int be_get_resources(struct be_adapter *adapter)
   4505{
   4506	struct device *dev = &adapter->pdev->dev;
   4507	struct be_resources res = {0};
   4508	int status;
   4509
   4510	/* For Lancer, SH etc read per-function resource limits from FW.
   4511	 * GET_FUNC_CONFIG returns per function guaranteed limits.
   4512	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
   4513	 */
   4514	if (BEx_chip(adapter)) {
   4515		BEx_get_resources(adapter, &res);
   4516	} else {
   4517		status = be_cmd_get_func_config(adapter, &res);
   4518		if (status)
   4519			return status;
   4520
   4521		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
   4522		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
   4523		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
   4524			res.max_rss_qs -= 1;
   4525	}
   4526
   4527	/* If RoCE is supported stash away half the EQs for RoCE */
   4528	res.max_nic_evt_qs = be_roce_supported(adapter) ?
   4529				res.max_evt_qs / 2 : res.max_evt_qs;
   4530	adapter->res = res;
   4531
   4532	/* If FW supports RSS default queue, then skip creating non-RSS
   4533	 * queue for non-IP traffic.
   4534	 */
   4535	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
   4536				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
   4537
   4538	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
   4539		 be_max_txqs(adapter), be_max_rxqs(adapter),
   4540		 be_max_rss(adapter), be_max_nic_eqs(adapter),
   4541		 be_max_vfs(adapter));
   4542	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
   4543		 be_max_uc(adapter), be_max_mc(adapter),
   4544		 be_max_vlans(adapter));
   4545
   4546	/* Ensure RX and TX queues are created in pairs at init time */
   4547	adapter->cfg_num_rx_irqs =
   4548				min_t(u16, netif_get_num_default_rss_queues(),
   4549				      be_max_qp_irqs(adapter));
   4550	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
   4551	return 0;
   4552}
   4553
   4554static int be_get_config(struct be_adapter *adapter)
   4555{
   4556	int status, level;
   4557	u16 profile_id;
   4558
   4559	status = be_cmd_get_cntl_attributes(adapter);
   4560	if (status)
   4561		return status;
   4562
   4563	status = be_cmd_query_fw_cfg(adapter);
   4564	if (status)
   4565		return status;
   4566
   4567	if (!lancer_chip(adapter) && be_physfn(adapter))
   4568		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
   4569
   4570	if (BEx_chip(adapter)) {
   4571		level = be_cmd_get_fw_log_level(adapter);
   4572		adapter->msg_enable =
   4573			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
   4574	}
   4575
   4576	be_cmd_get_acpi_wol_cap(adapter);
   4577	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
   4578	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
   4579
   4580	be_cmd_query_port_name(adapter);
   4581
   4582	if (be_physfn(adapter)) {
   4583		status = be_cmd_get_active_profile(adapter, &profile_id);
   4584		if (!status)
   4585			dev_info(&adapter->pdev->dev,
   4586				 "Using profile 0x%x\n", profile_id);
   4587	}
   4588
   4589	return 0;
   4590}
   4591
   4592static int be_mac_setup(struct be_adapter *adapter)
   4593{
   4594	u8 mac[ETH_ALEN];
   4595	int status;
   4596
   4597	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
   4598		status = be_cmd_get_perm_mac(adapter, mac);
   4599		if (status)
   4600			return status;
   4601
   4602		eth_hw_addr_set(adapter->netdev, mac);
   4603		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
   4604
   4605		/* Initial MAC for BE3 VFs is already programmed by PF */
   4606		if (BEx_chip(adapter) && be_virtfn(adapter))
   4607			memcpy(adapter->dev_mac, mac, ETH_ALEN);
   4608	}
   4609
   4610	return 0;
   4611}
   4612
   4613static void be_schedule_worker(struct be_adapter *adapter)
   4614{
   4615	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
   4616	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
   4617}
   4618
   4619static void be_destroy_err_recovery_workq(void)
   4620{
   4621	if (!be_err_recovery_workq)
   4622		return;
   4623
   4624	destroy_workqueue(be_err_recovery_workq);
   4625	be_err_recovery_workq = NULL;
   4626}
   4627
   4628static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
   4629{
   4630	struct be_error_recovery *err_rec = &adapter->error_recovery;
   4631
   4632	if (!be_err_recovery_workq)
   4633		return;
   4634
   4635	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
   4636			   msecs_to_jiffies(delay));
   4637	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
   4638}
   4639
   4640static int be_setup_queues(struct be_adapter *adapter)
   4641{
   4642	struct net_device *netdev = adapter->netdev;
   4643	int status;
   4644
   4645	status = be_evt_queues_create(adapter);
   4646	if (status)
   4647		goto err;
   4648
   4649	status = be_tx_qs_create(adapter);
   4650	if (status)
   4651		goto err;
   4652
   4653	status = be_rx_cqs_create(adapter);
   4654	if (status)
   4655		goto err;
   4656
   4657	status = be_mcc_queues_create(adapter);
   4658	if (status)
   4659		goto err;
   4660
   4661	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
   4662	if (status)
   4663		goto err;
   4664
   4665	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
   4666	if (status)
   4667		goto err;
   4668
   4669	return 0;
   4670err:
   4671	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
   4672	return status;
   4673}
   4674
   4675static int be_if_create(struct be_adapter *adapter)
   4676{
   4677	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
   4678	u32 cap_flags = be_if_cap_flags(adapter);
   4679
   4680	/* alloc required memory for other filtering fields */
   4681	adapter->pmac_id = kcalloc(be_max_uc(adapter),
   4682				   sizeof(*adapter->pmac_id), GFP_KERNEL);
   4683	if (!adapter->pmac_id)
   4684		return -ENOMEM;
   4685
   4686	adapter->mc_list = kcalloc(be_max_mc(adapter),
   4687				   sizeof(*adapter->mc_list), GFP_KERNEL);
   4688	if (!adapter->mc_list)
   4689		return -ENOMEM;
   4690
   4691	adapter->uc_list = kcalloc(be_max_uc(adapter),
   4692				   sizeof(*adapter->uc_list), GFP_KERNEL);
   4693	if (!adapter->uc_list)
   4694		return -ENOMEM;
   4695
   4696	if (adapter->cfg_num_rx_irqs == 1)
   4697		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
   4698
   4699	en_flags &= cap_flags;
   4700	/* will enable all the needed filter flags in be_open() */
   4701	return be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
   4702				  &adapter->if_handle, 0);
   4703}
   4704
   4705int be_update_queues(struct be_adapter *adapter)
   4706{
   4707	struct net_device *netdev = adapter->netdev;
   4708	int status;
   4709
   4710	if (netif_running(netdev)) {
   4711		/* be_tx_timeout() must not run concurrently with this
   4712		 * function, synchronize with an already-running dev_watchdog
   4713		 */
   4714		netif_tx_lock_bh(netdev);
   4715		/* device cannot transmit now, avoid dev_watchdog timeouts */
   4716		netif_carrier_off(netdev);
   4717		netif_tx_unlock_bh(netdev);
   4718
   4719		be_close(netdev);
   4720	}
   4721
   4722	be_cancel_worker(adapter);
   4723
   4724	/* If any vectors have been shared with RoCE we cannot re-program
   4725	 * the MSIx table.
   4726	 */
   4727	if (!adapter->num_msix_roce_vec)
   4728		be_msix_disable(adapter);
   4729
   4730	be_clear_queues(adapter);
   4731	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
   4732	if (status)
   4733		return status;
   4734
   4735	if (!msix_enabled(adapter)) {
   4736		status = be_msix_enable(adapter);
   4737		if (status)
   4738			return status;
   4739	}
   4740
   4741	status = be_if_create(adapter);
   4742	if (status)
   4743		return status;
   4744
   4745	status = be_setup_queues(adapter);
   4746	if (status)
   4747		return status;
   4748
   4749	be_schedule_worker(adapter);
   4750
   4751	/* The IF was destroyed and re-created. We need to clear
   4752	 * all promiscuous flags valid for the destroyed IF.
   4753	 * Without this promisc mode is not restored during
   4754	 * be_open() because the driver thinks that it is
   4755	 * already enabled in HW.
   4756	 */
   4757	adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
   4758
   4759	if (netif_running(netdev))
   4760		status = be_open(netdev);
   4761
   4762	return status;
   4763}
   4764
   4765static inline int fw_major_num(const char *fw_ver)
   4766{
   4767	int fw_major = 0, i;
   4768
   4769	i = sscanf(fw_ver, "%d.", &fw_major);
   4770	if (i != 1)
   4771		return 0;
   4772
   4773	return fw_major;
   4774}
   4775
   4776/* If it is error recovery, FLR the PF
   4777 * Else if any VFs are already enabled don't FLR the PF
   4778 */
   4779static bool be_reset_required(struct be_adapter *adapter)
   4780{
   4781	if (be_error_recovering(adapter))
   4782		return true;
   4783	else
   4784		return pci_num_vf(adapter->pdev) == 0;
   4785}
   4786
   4787/* Wait for the FW to be ready and perform the required initialization */
   4788static int be_func_init(struct be_adapter *adapter)
   4789{
   4790	int status;
   4791
   4792	status = be_fw_wait_ready(adapter);
   4793	if (status)
   4794		return status;
   4795
   4796	/* FW is now ready; clear errors to allow cmds/doorbell */
   4797	be_clear_error(adapter, BE_CLEAR_ALL);
   4798
   4799	if (be_reset_required(adapter)) {
   4800		status = be_cmd_reset_function(adapter);
   4801		if (status)
   4802			return status;
   4803
   4804		/* Wait for interrupts to quiesce after an FLR */
   4805		msleep(100);
   4806	}
   4807
   4808	/* Tell FW we're ready to fire cmds */
   4809	status = be_cmd_fw_init(adapter);
   4810	if (status)
   4811		return status;
   4812
   4813	/* Allow interrupts for other ULPs running on NIC function */
   4814	be_intr_set(adapter, true);
   4815
   4816	return 0;
   4817}
   4818
   4819static int be_setup(struct be_adapter *adapter)
   4820{
   4821	struct device *dev = &adapter->pdev->dev;
   4822	int status;
   4823
   4824	status = be_func_init(adapter);
   4825	if (status)
   4826		return status;
   4827
   4828	be_setup_init(adapter);
   4829
   4830	if (!lancer_chip(adapter))
   4831		be_cmd_req_native_mode(adapter);
   4832
   4833	/* invoke this cmd first to get pf_num and vf_num which are needed
   4834	 * for issuing profile related cmds
   4835	 */
   4836	if (!BEx_chip(adapter)) {
   4837		status = be_cmd_get_func_config(adapter, NULL);
   4838		if (status)
   4839			return status;
   4840	}
   4841
   4842	status = be_get_config(adapter);
   4843	if (status)
   4844		goto err;
   4845
   4846	if (!BE2_chip(adapter) && be_physfn(adapter))
   4847		be_alloc_sriov_res(adapter);
   4848
   4849	status = be_get_resources(adapter);
   4850	if (status)
   4851		goto err;
   4852
   4853	status = be_msix_enable(adapter);
   4854	if (status)
   4855		goto err;
   4856
   4857	/* will enable all the needed filter flags in be_open() */
   4858	status = be_if_create(adapter);
   4859	if (status)
   4860		goto err;
   4861
   4862	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
   4863	rtnl_lock();
   4864	status = be_setup_queues(adapter);
   4865	rtnl_unlock();
   4866	if (status)
   4867		goto err;
   4868
   4869	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
   4870
   4871	status = be_mac_setup(adapter);
   4872	if (status)
   4873		goto err;
   4874
   4875	be_cmd_get_fw_ver(adapter);
   4876	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
   4877
   4878	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
   4879		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
   4880			adapter->fw_ver);
   4881		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
   4882	}
   4883
   4884	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
   4885					 adapter->rx_fc);
   4886	if (status)
   4887		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
   4888					&adapter->rx_fc);
   4889
   4890	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
   4891		 adapter->tx_fc, adapter->rx_fc);
   4892
   4893	if (be_physfn(adapter))
   4894		be_cmd_set_logical_link_config(adapter,
   4895					       IFLA_VF_LINK_STATE_AUTO, 0);
   4896
   4897	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
   4898	 * confusing a linux bridge or OVS that it might be connected to.
   4899	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
   4900	 * when SRIOV is not enabled.
   4901	 */
   4902	if (BE3_chip(adapter))
   4903		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
   4904				      PORT_FWD_TYPE_PASSTHRU, 0);
   4905
   4906	if (adapter->num_vfs)
   4907		be_vf_setup(adapter);
   4908
   4909	status = be_cmd_get_phy_info(adapter);
   4910	if (!status && be_pause_supported(adapter))
   4911		adapter->phy.fc_autoneg = 1;
   4912
   4913	if (be_physfn(adapter) && !lancer_chip(adapter))
   4914		be_cmd_set_features(adapter);
   4915
   4916	be_schedule_worker(adapter);
   4917	adapter->flags |= BE_FLAGS_SETUP_DONE;
   4918	return 0;
   4919err:
   4920	be_clear(adapter);
   4921	return status;
   4922}
   4923
   4924#ifdef CONFIG_NET_POLL_CONTROLLER
   4925static void be_netpoll(struct net_device *netdev)
   4926{
   4927	struct be_adapter *adapter = netdev_priv(netdev);
   4928	struct be_eq_obj *eqo;
   4929	int i;
   4930
   4931	for_all_evt_queues(adapter, eqo, i) {
   4932		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
   4933		napi_schedule(&eqo->napi);
   4934	}
   4935}
   4936#endif
   4937
   4938int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
   4939{
   4940	const struct firmware *fw;
   4941	int status;
   4942
   4943	if (!netif_running(adapter->netdev)) {
   4944		dev_err(&adapter->pdev->dev,
   4945			"Firmware load not allowed (interface is down)\n");
   4946		return -ENETDOWN;
   4947	}
   4948
   4949	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
   4950	if (status)
   4951		goto fw_exit;
   4952
   4953	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
   4954
   4955	if (lancer_chip(adapter))
   4956		status = lancer_fw_download(adapter, fw);
   4957	else
   4958		status = be_fw_download(adapter, fw);
   4959
   4960	if (!status)
   4961		be_cmd_get_fw_ver(adapter);
   4962
   4963fw_exit:
   4964	release_firmware(fw);
   4965	return status;
   4966}
   4967
   4968static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
   4969				 u16 flags, struct netlink_ext_ack *extack)
   4970{
   4971	struct be_adapter *adapter = netdev_priv(dev);
   4972	struct nlattr *attr, *br_spec;
   4973	int rem;
   4974	int status = 0;
   4975	u16 mode = 0;
   4976
   4977	if (!sriov_enabled(adapter))
   4978		return -EOPNOTSUPP;
   4979
   4980	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
   4981	if (!br_spec)
   4982		return -EINVAL;
   4983
   4984	nla_for_each_nested(attr, br_spec, rem) {
   4985		if (nla_type(attr) != IFLA_BRIDGE_MODE)
   4986			continue;
   4987
   4988		if (nla_len(attr) < sizeof(mode))
   4989			return -EINVAL;
   4990
   4991		mode = nla_get_u16(attr);
   4992		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
   4993			return -EOPNOTSUPP;
   4994
   4995		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
   4996			return -EINVAL;
   4997
   4998		status = be_cmd_set_hsw_config(adapter, 0, 0,
   4999					       adapter->if_handle,
   5000					       mode == BRIDGE_MODE_VEPA ?
   5001					       PORT_FWD_TYPE_VEPA :
   5002					       PORT_FWD_TYPE_VEB, 0);
   5003		if (status)
   5004			goto err;
   5005
   5006		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
   5007			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
   5008
   5009		return status;
   5010	}
   5011err:
   5012	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
   5013		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
   5014
   5015	return status;
   5016}
   5017
   5018static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
   5019				 struct net_device *dev, u32 filter_mask,
   5020				 int nlflags)
   5021{
   5022	struct be_adapter *adapter = netdev_priv(dev);
   5023	int status = 0;
   5024	u8 hsw_mode;
   5025
   5026	/* BE and Lancer chips support VEB mode only */
   5027	if (BEx_chip(adapter) || lancer_chip(adapter)) {
   5028		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
   5029		if (!pci_sriov_get_totalvfs(adapter->pdev))
   5030			return 0;
   5031		hsw_mode = PORT_FWD_TYPE_VEB;
   5032	} else {
   5033		status = be_cmd_get_hsw_config(adapter, NULL, 0,
   5034					       adapter->if_handle, &hsw_mode,
   5035					       NULL);
   5036		if (status)
   5037			return 0;
   5038
   5039		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
   5040			return 0;
   5041	}
   5042
   5043	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
   5044				       hsw_mode == PORT_FWD_TYPE_VEPA ?
   5045				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
   5046				       0, 0, nlflags, filter_mask, NULL);
   5047}
   5048
   5049static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
   5050					 void (*func)(struct work_struct *))
   5051{
   5052	struct be_cmd_work *work;
   5053
   5054	work = kzalloc(sizeof(*work), GFP_ATOMIC);
   5055	if (!work) {
   5056		dev_err(&adapter->pdev->dev,
   5057			"be_work memory allocation failed\n");
   5058		return NULL;
   5059	}
   5060
   5061	INIT_WORK(&work->work, func);
   5062	work->adapter = adapter;
   5063	return work;
   5064}
   5065
   5066static netdev_features_t be_features_check(struct sk_buff *skb,
   5067					   struct net_device *dev,
   5068					   netdev_features_t features)
   5069{
   5070	struct be_adapter *adapter = netdev_priv(dev);
   5071	u8 l4_hdr = 0;
   5072
   5073	if (skb_is_gso(skb)) {
   5074		/* IPv6 TSO requests with extension hdrs are a problem
   5075		 * to Lancer and BE3 HW. Disable TSO6 feature.
   5076		 */
   5077		if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
   5078			features &= ~NETIF_F_TSO6;
   5079
   5080		/* Lancer cannot handle the packet with MSS less than 256.
   5081		 * Also it can't handle a TSO packet with a single segment
   5082		 * Disable the GSO support in such cases
   5083		 */
   5084		if (lancer_chip(adapter) &&
   5085		    (skb_shinfo(skb)->gso_size < 256 ||
   5086		     skb_shinfo(skb)->gso_segs == 1))
   5087			features &= ~NETIF_F_GSO_MASK;
   5088	}
   5089
   5090	/* The code below restricts offload features for some tunneled and
   5091	 * Q-in-Q packets.
   5092	 * Offload features for normal (non tunnel) packets are unchanged.
   5093	 */
   5094	features = vlan_features_check(skb, features);
   5095	if (!skb->encapsulation ||
   5096	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
   5097		return features;
   5098
   5099	/* It's an encapsulated packet and VxLAN offloads are enabled. We
   5100	 * should disable tunnel offload features if it's not a VxLAN packet,
   5101	 * as tunnel offloads have been enabled only for VxLAN. This is done to
   5102	 * allow other tunneled traffic like GRE work fine while VxLAN
   5103	 * offloads are configured in Skyhawk-R.
   5104	 */
   5105	switch (vlan_get_protocol(skb)) {
   5106	case htons(ETH_P_IP):
   5107		l4_hdr = ip_hdr(skb)->protocol;
   5108		break;
   5109	case htons(ETH_P_IPV6):
   5110		l4_hdr = ipv6_hdr(skb)->nexthdr;
   5111		break;
   5112	default:
   5113		return features;
   5114	}
   5115
   5116	if (l4_hdr != IPPROTO_UDP ||
   5117	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
   5118	    skb->inner_protocol != htons(ETH_P_TEB) ||
   5119	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
   5120		sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
   5121	    !adapter->vxlan_port ||
   5122	    udp_hdr(skb)->dest != adapter->vxlan_port)
   5123		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
   5124
   5125	return features;
   5126}
   5127
   5128static int be_get_phys_port_id(struct net_device *dev,
   5129			       struct netdev_phys_item_id *ppid)
   5130{
   5131	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
   5132	struct be_adapter *adapter = netdev_priv(dev);
   5133	u8 *id;
   5134
   5135	if (MAX_PHYS_ITEM_ID_LEN < id_len)
   5136		return -ENOSPC;
   5137
   5138	ppid->id[0] = adapter->hba_port_num + 1;
   5139	id = &ppid->id[1];
   5140	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
   5141	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
   5142		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
   5143
   5144	ppid->id_len = id_len;
   5145
   5146	return 0;
   5147}
   5148
   5149static void be_set_rx_mode(struct net_device *dev)
   5150{
   5151	struct be_adapter *adapter = netdev_priv(dev);
   5152	struct be_cmd_work *work;
   5153
   5154	work = be_alloc_work(adapter, be_work_set_rx_mode);
   5155	if (work)
   5156		queue_work(be_wq, &work->work);
   5157}
   5158
   5159static const struct net_device_ops be_netdev_ops = {
   5160	.ndo_open		= be_open,
   5161	.ndo_stop		= be_close,
   5162	.ndo_start_xmit		= be_xmit,
   5163	.ndo_set_rx_mode	= be_set_rx_mode,
   5164	.ndo_set_mac_address	= be_mac_addr_set,
   5165	.ndo_get_stats64	= be_get_stats64,
   5166	.ndo_validate_addr	= eth_validate_addr,
   5167	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
   5168	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
   5169	.ndo_set_vf_mac		= be_set_vf_mac,
   5170	.ndo_set_vf_vlan	= be_set_vf_vlan,
   5171	.ndo_set_vf_rate	= be_set_vf_tx_rate,
   5172	.ndo_get_vf_config	= be_get_vf_config,
   5173	.ndo_set_vf_link_state  = be_set_vf_link_state,
   5174	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
   5175	.ndo_tx_timeout		= be_tx_timeout,
   5176#ifdef CONFIG_NET_POLL_CONTROLLER
   5177	.ndo_poll_controller	= be_netpoll,
   5178#endif
   5179	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
   5180	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
   5181	.ndo_features_check	= be_features_check,
   5182	.ndo_get_phys_port_id   = be_get_phys_port_id,
   5183};
   5184
   5185static void be_netdev_init(struct net_device *netdev)
   5186{
   5187	struct be_adapter *adapter = netdev_priv(netdev);
   5188
   5189	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
   5190		NETIF_F_GSO_UDP_TUNNEL |
   5191		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
   5192		NETIF_F_HW_VLAN_CTAG_TX;
   5193	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
   5194		netdev->hw_features |= NETIF_F_RXHASH;
   5195
   5196	netdev->features |= netdev->hw_features |
   5197		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER |
   5198		NETIF_F_HIGHDMA;
   5199
   5200	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
   5201		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
   5202
   5203	netdev->priv_flags |= IFF_UNICAST_FLT;
   5204
   5205	netdev->flags |= IFF_MULTICAST;
   5206
   5207	netif_set_tso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
   5208
   5209	netdev->netdev_ops = &be_netdev_ops;
   5210
   5211	netdev->ethtool_ops = &be_ethtool_ops;
   5212
   5213	if (!lancer_chip(adapter) && !BEx_chip(adapter) && !be_is_mc(adapter))
   5214		netdev->udp_tunnel_nic_info = &be_udp_tunnels;
   5215
   5216	/* MTU range: 256 - 9000 */
   5217	netdev->min_mtu = BE_MIN_MTU;
   5218	netdev->max_mtu = BE_MAX_MTU;
   5219}
   5220
   5221static void be_cleanup(struct be_adapter *adapter)
   5222{
   5223	struct net_device *netdev = adapter->netdev;
   5224
   5225	rtnl_lock();
   5226	netif_device_detach(netdev);
   5227	if (netif_running(netdev))
   5228		be_close(netdev);
   5229	rtnl_unlock();
   5230
   5231	be_clear(adapter);
   5232}
   5233
   5234static int be_resume(struct be_adapter *adapter)
   5235{
   5236	struct net_device *netdev = adapter->netdev;
   5237	int status;
   5238
   5239	status = be_setup(adapter);
   5240	if (status)
   5241		return status;
   5242
   5243	rtnl_lock();
   5244	if (netif_running(netdev))
   5245		status = be_open(netdev);
   5246	rtnl_unlock();
   5247
   5248	if (status)
   5249		return status;
   5250
   5251	netif_device_attach(netdev);
   5252
   5253	return 0;
   5254}
   5255
   5256static void be_soft_reset(struct be_adapter *adapter)
   5257{
   5258	u32 val;
   5259
   5260	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
   5261	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
   5262	val |= SLIPORT_SOFTRESET_SR_MASK;
   5263	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
   5264}
   5265
   5266static bool be_err_is_recoverable(struct be_adapter *adapter)
   5267{
   5268	struct be_error_recovery *err_rec = &adapter->error_recovery;
   5269	unsigned long initial_idle_time =
   5270		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
   5271	unsigned long recovery_interval =
   5272		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
   5273	u16 ue_err_code;
   5274	u32 val;
   5275
   5276	val = be_POST_stage_get(adapter);
   5277	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
   5278		return false;
   5279	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
   5280	if (ue_err_code == 0)
   5281		return false;
   5282
   5283	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
   5284		ue_err_code);
   5285
   5286	if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
   5287		dev_err(&adapter->pdev->dev,
   5288			"Cannot recover within %lu sec from driver load\n",
   5289			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
   5290		return false;
   5291	}
   5292
   5293	if (err_rec->last_recovery_time && time_before_eq(
   5294		jiffies - err_rec->last_recovery_time, recovery_interval)) {
   5295		dev_err(&adapter->pdev->dev,
   5296			"Cannot recover within %lu sec from last recovery\n",
   5297			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
   5298		return false;
   5299	}
   5300
   5301	if (ue_err_code == err_rec->last_err_code) {
   5302		dev_err(&adapter->pdev->dev,
   5303			"Cannot recover from a consecutive TPE error\n");
   5304		return false;
   5305	}
   5306
   5307	err_rec->last_recovery_time = jiffies;
   5308	err_rec->last_err_code = ue_err_code;
   5309	return true;
   5310}
   5311
   5312static int be_tpe_recover(struct be_adapter *adapter)
   5313{
   5314	struct be_error_recovery *err_rec = &adapter->error_recovery;
   5315	int status = -EAGAIN;
   5316	u32 val;
   5317
   5318	switch (err_rec->recovery_state) {
   5319	case ERR_RECOVERY_ST_NONE:
   5320		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
   5321		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
   5322		break;
   5323
   5324	case ERR_RECOVERY_ST_DETECT:
   5325		val = be_POST_stage_get(adapter);
   5326		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
   5327		    POST_STAGE_RECOVERABLE_ERR) {
   5328			dev_err(&adapter->pdev->dev,
   5329				"Unrecoverable HW error detected: 0x%x\n", val);
   5330			status = -EINVAL;
   5331			err_rec->resched_delay = 0;
   5332			break;
   5333		}
   5334
   5335		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
   5336
   5337		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
   5338		 * milliseconds before it checks for final error status in
   5339		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
   5340		 * If it does, then PF0 initiates a Soft Reset.
   5341		 */
   5342		if (adapter->pf_num == 0) {
   5343			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
   5344			err_rec->resched_delay = err_rec->ue_to_reset_time -
   5345					ERR_RECOVERY_UE_DETECT_DURATION;
   5346			break;
   5347		}
   5348
   5349		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
   5350		err_rec->resched_delay = err_rec->ue_to_poll_time -
   5351					ERR_RECOVERY_UE_DETECT_DURATION;
   5352		break;
   5353
   5354	case ERR_RECOVERY_ST_RESET:
   5355		if (!be_err_is_recoverable(adapter)) {
   5356			dev_err(&adapter->pdev->dev,
   5357				"Failed to meet recovery criteria\n");
   5358			status = -EIO;
   5359			err_rec->resched_delay = 0;
   5360			break;
   5361		}
   5362		be_soft_reset(adapter);
   5363		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
   5364		err_rec->resched_delay = err_rec->ue_to_poll_time -
   5365					err_rec->ue_to_reset_time;
   5366		break;
   5367
   5368	case ERR_RECOVERY_ST_PRE_POLL:
   5369		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
   5370		err_rec->resched_delay = 0;
   5371		status = 0;			/* done */
   5372		break;
   5373
   5374	default:
   5375		status = -EINVAL;
   5376		err_rec->resched_delay = 0;
   5377		break;
   5378	}
   5379
   5380	return status;
   5381}
   5382
   5383static int be_err_recover(struct be_adapter *adapter)
   5384{
   5385	int status;
   5386
   5387	if (!lancer_chip(adapter)) {
   5388		if (!adapter->error_recovery.recovery_supported ||
   5389		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
   5390			return -EIO;
   5391		status = be_tpe_recover(adapter);
   5392		if (status)
   5393			goto err;
   5394	}
   5395
   5396	/* Wait for adapter to reach quiescent state before
   5397	 * destroying queues
   5398	 */
   5399	status = be_fw_wait_ready(adapter);
   5400	if (status)
   5401		goto err;
   5402
   5403	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
   5404
   5405	be_cleanup(adapter);
   5406
   5407	status = be_resume(adapter);
   5408	if (status)
   5409		goto err;
   5410
   5411	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
   5412
   5413err:
   5414	return status;
   5415}
   5416
   5417static void be_err_detection_task(struct work_struct *work)
   5418{
   5419	struct be_error_recovery *err_rec =
   5420			container_of(work, struct be_error_recovery,
   5421				     err_detection_work.work);
   5422	struct be_adapter *adapter =
   5423			container_of(err_rec, struct be_adapter,
   5424				     error_recovery);
   5425	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
   5426	struct device *dev = &adapter->pdev->dev;
   5427	int recovery_status;
   5428
   5429	be_detect_error(adapter);
   5430	if (!be_check_error(adapter, BE_ERROR_HW))
   5431		goto reschedule_task;
   5432
   5433	recovery_status = be_err_recover(adapter);
   5434	if (!recovery_status) {
   5435		err_rec->recovery_retries = 0;
   5436		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
   5437		dev_info(dev, "Adapter recovery successful\n");
   5438		goto reschedule_task;
   5439	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
   5440		/* BEx/SH recovery state machine */
   5441		if (adapter->pf_num == 0 &&
   5442		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
   5443			dev_err(&adapter->pdev->dev,
   5444				"Adapter recovery in progress\n");
   5445		resched_delay = err_rec->resched_delay;
   5446		goto reschedule_task;
   5447	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
   5448		/* For VFs, check if PF have allocated resources
   5449		 * every second.
   5450		 */
   5451		dev_err(dev, "Re-trying adapter recovery\n");
   5452		goto reschedule_task;
   5453	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
   5454		   ERR_RECOVERY_MAX_RETRY_COUNT) {
   5455		/* In case of another error during recovery, it takes 30 sec
   5456		 * for adapter to come out of error. Retry error recovery after
   5457		 * this time interval.
   5458		 */
   5459		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
   5460		resched_delay = ERR_RECOVERY_RETRY_DELAY;
   5461		goto reschedule_task;
   5462	} else {
   5463		dev_err(dev, "Adapter recovery failed\n");
   5464		dev_err(dev, "Please reboot server to recover\n");
   5465	}
   5466
   5467	return;
   5468
   5469reschedule_task:
   5470	be_schedule_err_detection(adapter, resched_delay);
   5471}
   5472
   5473static void be_log_sfp_info(struct be_adapter *adapter)
   5474{
   5475	int status;
   5476
   5477	status = be_cmd_query_sfp_info(adapter);
   5478	if (!status) {
   5479		dev_err(&adapter->pdev->dev,
   5480			"Port %c: %s Vendor: %s part no: %s",
   5481			adapter->port_name,
   5482			be_misconfig_evt_port_state[adapter->phy_state],
   5483			adapter->phy.vendor_name,
   5484			adapter->phy.vendor_pn);
   5485	}
   5486	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
   5487}
   5488
   5489static void be_worker(struct work_struct *work)
   5490{
   5491	struct be_adapter *adapter =
   5492		container_of(work, struct be_adapter, work.work);
   5493	struct be_rx_obj *rxo;
   5494	int i;
   5495
   5496	if (be_physfn(adapter) &&
   5497	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
   5498		be_cmd_get_die_temperature(adapter);
   5499
   5500	/* when interrupts are not yet enabled, just reap any pending
   5501	 * mcc completions
   5502	 */
   5503	if (!netif_running(adapter->netdev)) {
   5504		local_bh_disable();
   5505		be_process_mcc(adapter);
   5506		local_bh_enable();
   5507		goto reschedule;
   5508	}
   5509
   5510	if (!adapter->stats_cmd_sent) {
   5511		if (lancer_chip(adapter))
   5512			lancer_cmd_get_pport_stats(adapter,
   5513						   &adapter->stats_cmd);
   5514		else
   5515			be_cmd_get_stats(adapter, &adapter->stats_cmd);
   5516	}
   5517
   5518	for_all_rx_queues(adapter, rxo, i) {
   5519		/* Replenish RX-queues starved due to memory
   5520		 * allocation failures.
   5521		 */
   5522		if (rxo->rx_post_starved)
   5523			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
   5524	}
   5525
   5526	/* EQ-delay update for Skyhawk is done while notifying EQ */
   5527	if (!skyhawk_chip(adapter))
   5528		be_eqd_update(adapter, false);
   5529
   5530	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
   5531		be_log_sfp_info(adapter);
   5532
   5533reschedule:
   5534	adapter->work_counter++;
   5535	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
   5536}
   5537
   5538static void be_unmap_pci_bars(struct be_adapter *adapter)
   5539{
   5540	if (adapter->csr)
   5541		pci_iounmap(adapter->pdev, adapter->csr);
   5542	if (adapter->db)
   5543		pci_iounmap(adapter->pdev, adapter->db);
   5544	if (adapter->pcicfg && adapter->pcicfg_mapped)
   5545		pci_iounmap(adapter->pdev, adapter->pcicfg);
   5546}
   5547
   5548static int db_bar(struct be_adapter *adapter)
   5549{
   5550	if (lancer_chip(adapter) || be_virtfn(adapter))
   5551		return 0;
   5552	else
   5553		return 4;
   5554}
   5555
   5556static int be_roce_map_pci_bars(struct be_adapter *adapter)
   5557{
   5558	if (skyhawk_chip(adapter)) {
   5559		adapter->roce_db.size = 4096;
   5560		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
   5561							      db_bar(adapter));
   5562		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
   5563							       db_bar(adapter));
   5564	}
   5565	return 0;
   5566}
   5567
   5568static int be_map_pci_bars(struct be_adapter *adapter)
   5569{
   5570	struct pci_dev *pdev = adapter->pdev;
   5571	u8 __iomem *addr;
   5572	u32 sli_intf;
   5573
   5574	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
   5575	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
   5576				SLI_INTF_FAMILY_SHIFT;
   5577	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
   5578
   5579	if (BEx_chip(adapter) && be_physfn(adapter)) {
   5580		adapter->csr = pci_iomap(pdev, 2, 0);
   5581		if (!adapter->csr)
   5582			return -ENOMEM;
   5583	}
   5584
   5585	addr = pci_iomap(pdev, db_bar(adapter), 0);
   5586	if (!addr)
   5587		goto pci_map_err;
   5588	adapter->db = addr;
   5589
   5590	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
   5591		if (be_physfn(adapter)) {
   5592			/* PCICFG is the 2nd BAR in BE2 */
   5593			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
   5594			if (!addr)
   5595				goto pci_map_err;
   5596			adapter->pcicfg = addr;
   5597			adapter->pcicfg_mapped = true;
   5598		} else {
   5599			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
   5600			adapter->pcicfg_mapped = false;
   5601		}
   5602	}
   5603
   5604	be_roce_map_pci_bars(adapter);
   5605	return 0;
   5606
   5607pci_map_err:
   5608	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
   5609	be_unmap_pci_bars(adapter);
   5610	return -ENOMEM;
   5611}
   5612
   5613static void be_drv_cleanup(struct be_adapter *adapter)
   5614{
   5615	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
   5616	struct device *dev = &adapter->pdev->dev;
   5617
   5618	if (mem->va)
   5619		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
   5620
   5621	mem = &adapter->rx_filter;
   5622	if (mem->va)
   5623		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
   5624
   5625	mem = &adapter->stats_cmd;
   5626	if (mem->va)
   5627		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
   5628}
   5629
   5630/* Allocate and initialize various fields in be_adapter struct */
   5631static int be_drv_init(struct be_adapter *adapter)
   5632{
   5633	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
   5634	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
   5635	struct be_dma_mem *rx_filter = &adapter->rx_filter;
   5636	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
   5637	struct device *dev = &adapter->pdev->dev;
   5638	int status = 0;
   5639
   5640	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
   5641	mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
   5642						&mbox_mem_alloc->dma,
   5643						GFP_KERNEL);
   5644	if (!mbox_mem_alloc->va)
   5645		return -ENOMEM;
   5646
   5647	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
   5648	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
   5649	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
   5650
   5651	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
   5652	rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
   5653					   &rx_filter->dma, GFP_KERNEL);
   5654	if (!rx_filter->va) {
   5655		status = -ENOMEM;
   5656		goto free_mbox;
   5657	}
   5658
   5659	if (lancer_chip(adapter))
   5660		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
   5661	else if (BE2_chip(adapter))
   5662		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
   5663	else if (BE3_chip(adapter))
   5664		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
   5665	else
   5666		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
   5667	stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
   5668					   &stats_cmd->dma, GFP_KERNEL);
   5669	if (!stats_cmd->va) {
   5670		status = -ENOMEM;
   5671		goto free_rx_filter;
   5672	}
   5673
   5674	mutex_init(&adapter->mbox_lock);
   5675	mutex_init(&adapter->mcc_lock);
   5676	mutex_init(&adapter->rx_filter_lock);
   5677	spin_lock_init(&adapter->mcc_cq_lock);
   5678	init_completion(&adapter->et_cmd_compl);
   5679
   5680	pci_save_state(adapter->pdev);
   5681
   5682	INIT_DELAYED_WORK(&adapter->work, be_worker);
   5683
   5684	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
   5685	adapter->error_recovery.resched_delay = 0;
   5686	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
   5687			  be_err_detection_task);
   5688
   5689	adapter->rx_fc = true;
   5690	adapter->tx_fc = true;
   5691
   5692	/* Must be a power of 2 or else MODULO will BUG_ON */
   5693	adapter->be_get_temp_freq = 64;
   5694
   5695	return 0;
   5696
   5697free_rx_filter:
   5698	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
   5699free_mbox:
   5700	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
   5701			  mbox_mem_alloc->dma);
   5702	return status;
   5703}
   5704
   5705static void be_remove(struct pci_dev *pdev)
   5706{
   5707	struct be_adapter *adapter = pci_get_drvdata(pdev);
   5708
   5709	if (!adapter)
   5710		return;
   5711
   5712	be_roce_dev_remove(adapter);
   5713	be_intr_set(adapter, false);
   5714
   5715	be_cancel_err_detection(adapter);
   5716
   5717	unregister_netdev(adapter->netdev);
   5718
   5719	be_clear(adapter);
   5720
   5721	if (!pci_vfs_assigned(adapter->pdev))
   5722		be_cmd_reset_function(adapter);
   5723
   5724	/* tell fw we're done with firing cmds */
   5725	be_cmd_fw_clean(adapter);
   5726
   5727	be_unmap_pci_bars(adapter);
   5728	be_drv_cleanup(adapter);
   5729
   5730	pci_disable_pcie_error_reporting(pdev);
   5731
   5732	pci_release_regions(pdev);
   5733	pci_disable_device(pdev);
   5734
   5735	free_netdev(adapter->netdev);
   5736}
   5737
   5738static ssize_t be_hwmon_show_temp(struct device *dev,
   5739				  struct device_attribute *dev_attr,
   5740				  char *buf)
   5741{
   5742	struct be_adapter *adapter = dev_get_drvdata(dev);
   5743
   5744	/* Unit: millidegree Celsius */
   5745	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
   5746		return -EIO;
   5747	else
   5748		return sprintf(buf, "%u\n",
   5749			       adapter->hwmon_info.be_on_die_temp * 1000);
   5750}
   5751
   5752static SENSOR_DEVICE_ATTR(temp1_input, 0444,
   5753			  be_hwmon_show_temp, NULL, 1);
   5754
   5755static struct attribute *be_hwmon_attrs[] = {
   5756	&sensor_dev_attr_temp1_input.dev_attr.attr,
   5757	NULL
   5758};
   5759
   5760ATTRIBUTE_GROUPS(be_hwmon);
   5761
   5762static char *mc_name(struct be_adapter *adapter)
   5763{
   5764	char *str = "";	/* default */
   5765
   5766	switch (adapter->mc_type) {
   5767	case UMC:
   5768		str = "UMC";
   5769		break;
   5770	case FLEX10:
   5771		str = "FLEX10";
   5772		break;
   5773	case vNIC1:
   5774		str = "vNIC-1";
   5775		break;
   5776	case nPAR:
   5777		str = "nPAR";
   5778		break;
   5779	case UFP:
   5780		str = "UFP";
   5781		break;
   5782	case vNIC2:
   5783		str = "vNIC-2";
   5784		break;
   5785	default:
   5786		str = "";
   5787	}
   5788
   5789	return str;
   5790}
   5791
   5792static inline char *func_name(struct be_adapter *adapter)
   5793{
   5794	return be_physfn(adapter) ? "PF" : "VF";
   5795}
   5796
   5797static inline char *nic_name(struct pci_dev *pdev)
   5798{
   5799	switch (pdev->device) {
   5800	case OC_DEVICE_ID1:
   5801		return OC_NAME;
   5802	case OC_DEVICE_ID2:
   5803		return OC_NAME_BE;
   5804	case OC_DEVICE_ID3:
   5805	case OC_DEVICE_ID4:
   5806		return OC_NAME_LANCER;
   5807	case BE_DEVICE_ID2:
   5808		return BE3_NAME;
   5809	case OC_DEVICE_ID5:
   5810	case OC_DEVICE_ID6:
   5811		return OC_NAME_SH;
   5812	default:
   5813		return BE_NAME;
   5814	}
   5815}
   5816
   5817static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
   5818{
   5819	struct be_adapter *adapter;
   5820	struct net_device *netdev;
   5821	int status = 0;
   5822
   5823	status = pci_enable_device(pdev);
   5824	if (status)
   5825		goto do_none;
   5826
   5827	status = pci_request_regions(pdev, DRV_NAME);
   5828	if (status)
   5829		goto disable_dev;
   5830	pci_set_master(pdev);
   5831
   5832	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
   5833	if (!netdev) {
   5834		status = -ENOMEM;
   5835		goto rel_reg;
   5836	}
   5837	adapter = netdev_priv(netdev);
   5838	adapter->pdev = pdev;
   5839	pci_set_drvdata(pdev, adapter);
   5840	adapter->netdev = netdev;
   5841	SET_NETDEV_DEV(netdev, &pdev->dev);
   5842
   5843	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
   5844	if (status) {
   5845		dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
   5846		goto free_netdev;
   5847	}
   5848
   5849	status = pci_enable_pcie_error_reporting(pdev);
   5850	if (!status)
   5851		dev_info(&pdev->dev, "PCIe error reporting enabled\n");
   5852
   5853	status = be_map_pci_bars(adapter);
   5854	if (status)
   5855		goto free_netdev;
   5856
   5857	status = be_drv_init(adapter);
   5858	if (status)
   5859		goto unmap_bars;
   5860
   5861	status = be_setup(adapter);
   5862	if (status)
   5863		goto drv_cleanup;
   5864
   5865	be_netdev_init(netdev);
   5866	status = register_netdev(netdev);
   5867	if (status != 0)
   5868		goto unsetup;
   5869
   5870	be_roce_dev_add(adapter);
   5871
   5872	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
   5873	adapter->error_recovery.probe_time = jiffies;
   5874
   5875	/* On Die temperature not supported for VF. */
   5876	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
   5877		adapter->hwmon_info.hwmon_dev =
   5878			devm_hwmon_device_register_with_groups(&pdev->dev,
   5879							       DRV_NAME,
   5880							       adapter,
   5881							       be_hwmon_groups);
   5882		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
   5883	}
   5884
   5885	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
   5886		 func_name(adapter), mc_name(adapter), adapter->port_name);
   5887
   5888	return 0;
   5889
   5890unsetup:
   5891	be_clear(adapter);
   5892drv_cleanup:
   5893	be_drv_cleanup(adapter);
   5894unmap_bars:
   5895	be_unmap_pci_bars(adapter);
   5896free_netdev:
   5897	pci_disable_pcie_error_reporting(pdev);
   5898	free_netdev(netdev);
   5899rel_reg:
   5900	pci_release_regions(pdev);
   5901disable_dev:
   5902	pci_disable_device(pdev);
   5903do_none:
   5904	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
   5905	return status;
   5906}
   5907
   5908static int __maybe_unused be_suspend(struct device *dev_d)
   5909{
   5910	struct be_adapter *adapter = dev_get_drvdata(dev_d);
   5911
   5912	be_intr_set(adapter, false);
   5913	be_cancel_err_detection(adapter);
   5914
   5915	be_cleanup(adapter);
   5916
   5917	return 0;
   5918}
   5919
   5920static int __maybe_unused be_pci_resume(struct device *dev_d)
   5921{
   5922	struct be_adapter *adapter = dev_get_drvdata(dev_d);
   5923	int status = 0;
   5924
   5925	status = be_resume(adapter);
   5926	if (status)
   5927		return status;
   5928
   5929	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
   5930
   5931	return 0;
   5932}
   5933
   5934/*
   5935 * An FLR will stop BE from DMAing any data.
   5936 */
   5937static void be_shutdown(struct pci_dev *pdev)
   5938{
   5939	struct be_adapter *adapter = pci_get_drvdata(pdev);
   5940
   5941	if (!adapter)
   5942		return;
   5943
   5944	be_roce_dev_shutdown(adapter);
   5945	cancel_delayed_work_sync(&adapter->work);
   5946	be_cancel_err_detection(adapter);
   5947
   5948	netif_device_detach(adapter->netdev);
   5949
   5950	be_cmd_reset_function(adapter);
   5951
   5952	pci_disable_device(pdev);
   5953}
   5954
   5955static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
   5956					    pci_channel_state_t state)
   5957{
   5958	struct be_adapter *adapter = pci_get_drvdata(pdev);
   5959
   5960	dev_err(&adapter->pdev->dev, "EEH error detected\n");
   5961
   5962	be_roce_dev_remove(adapter);
   5963
   5964	if (!be_check_error(adapter, BE_ERROR_EEH)) {
   5965		be_set_error(adapter, BE_ERROR_EEH);
   5966
   5967		be_cancel_err_detection(adapter);
   5968
   5969		be_cleanup(adapter);
   5970	}
   5971
   5972	if (state == pci_channel_io_perm_failure)
   5973		return PCI_ERS_RESULT_DISCONNECT;
   5974
   5975	pci_disable_device(pdev);
   5976
   5977	/* The error could cause the FW to trigger a flash debug dump.
   5978	 * Resetting the card while flash dump is in progress
   5979	 * can cause it not to recover; wait for it to finish.
   5980	 * Wait only for first function as it is needed only once per
   5981	 * adapter.
   5982	 */
   5983	if (pdev->devfn == 0)
   5984		ssleep(30);
   5985
   5986	return PCI_ERS_RESULT_NEED_RESET;
   5987}
   5988
   5989static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
   5990{
   5991	struct be_adapter *adapter = pci_get_drvdata(pdev);
   5992	int status;
   5993
   5994	dev_info(&adapter->pdev->dev, "EEH reset\n");
   5995
   5996	status = pci_enable_device(pdev);
   5997	if (status)
   5998		return PCI_ERS_RESULT_DISCONNECT;
   5999
   6000	pci_set_master(pdev);
   6001	pci_restore_state(pdev);
   6002
   6003	/* Check if card is ok and fw is ready */
   6004	dev_info(&adapter->pdev->dev,
   6005		 "Waiting for FW to be ready after EEH reset\n");
   6006	status = be_fw_wait_ready(adapter);
   6007	if (status)
   6008		return PCI_ERS_RESULT_DISCONNECT;
   6009
   6010	be_clear_error(adapter, BE_CLEAR_ALL);
   6011	return PCI_ERS_RESULT_RECOVERED;
   6012}
   6013
   6014static void be_eeh_resume(struct pci_dev *pdev)
   6015{
   6016	int status = 0;
   6017	struct be_adapter *adapter = pci_get_drvdata(pdev);
   6018
   6019	dev_info(&adapter->pdev->dev, "EEH resume\n");
   6020
   6021	pci_save_state(pdev);
   6022
   6023	status = be_resume(adapter);
   6024	if (status)
   6025		goto err;
   6026
   6027	be_roce_dev_add(adapter);
   6028
   6029	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
   6030	return;
   6031err:
   6032	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
   6033}
   6034
   6035static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
   6036{
   6037	struct be_adapter *adapter = pci_get_drvdata(pdev);
   6038	struct be_resources vft_res = {0};
   6039	int status;
   6040
   6041	if (!num_vfs)
   6042		be_vf_clear(adapter);
   6043
   6044	adapter->num_vfs = num_vfs;
   6045
   6046	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
   6047		dev_warn(&pdev->dev,
   6048			 "Cannot disable VFs while they are assigned\n");
   6049		return -EBUSY;
   6050	}
   6051
   6052	/* When the HW is in SRIOV capable configuration, the PF-pool resources
   6053	 * are equally distributed across the max-number of VFs. The user may
   6054	 * request only a subset of the max-vfs to be enabled.
   6055	 * Based on num_vfs, redistribute the resources across num_vfs so that
   6056	 * each VF will have access to more number of resources.
   6057	 * This facility is not available in BE3 FW.
   6058	 * Also, this is done by FW in Lancer chip.
   6059	 */
   6060	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
   6061		be_calculate_vf_res(adapter, adapter->num_vfs,
   6062				    &vft_res);
   6063		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
   6064						 adapter->num_vfs, &vft_res);
   6065		if (status)
   6066			dev_err(&pdev->dev,
   6067				"Failed to optimize SR-IOV resources\n");
   6068	}
   6069
   6070	status = be_get_resources(adapter);
   6071	if (status)
   6072		return be_cmd_status(status);
   6073
   6074	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
   6075	rtnl_lock();
   6076	status = be_update_queues(adapter);
   6077	rtnl_unlock();
   6078	if (status)
   6079		return be_cmd_status(status);
   6080
   6081	if (adapter->num_vfs)
   6082		status = be_vf_setup(adapter);
   6083
   6084	if (!status)
   6085		return adapter->num_vfs;
   6086
   6087	return 0;
   6088}
   6089
   6090static const struct pci_error_handlers be_eeh_handlers = {
   6091	.error_detected = be_eeh_err_detected,
   6092	.slot_reset = be_eeh_reset,
   6093	.resume = be_eeh_resume,
   6094};
   6095
   6096static SIMPLE_DEV_PM_OPS(be_pci_pm_ops, be_suspend, be_pci_resume);
   6097
   6098static struct pci_driver be_driver = {
   6099	.name = DRV_NAME,
   6100	.id_table = be_dev_ids,
   6101	.probe = be_probe,
   6102	.remove = be_remove,
   6103	.driver.pm = &be_pci_pm_ops,
   6104	.shutdown = be_shutdown,
   6105	.sriov_configure = be_pci_sriov_configure,
   6106	.err_handler = &be_eeh_handlers
   6107};
   6108
   6109static int __init be_init_module(void)
   6110{
   6111	int status;
   6112
   6113	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
   6114	    rx_frag_size != 2048) {
   6115		printk(KERN_WARNING DRV_NAME
   6116			" : Module param rx_frag_size must be 2048/4096/8192."
   6117			" Using 2048\n");
   6118		rx_frag_size = 2048;
   6119	}
   6120
   6121	if (num_vfs > 0) {
   6122		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
   6123		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
   6124	}
   6125
   6126	be_wq = create_singlethread_workqueue("be_wq");
   6127	if (!be_wq) {
   6128		pr_warn(DRV_NAME "workqueue creation failed\n");
   6129		return -1;
   6130	}
   6131
   6132	be_err_recovery_workq =
   6133		create_singlethread_workqueue("be_err_recover");
   6134	if (!be_err_recovery_workq)
   6135		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
   6136
   6137	status = pci_register_driver(&be_driver);
   6138	if (status) {
   6139		destroy_workqueue(be_wq);
   6140		be_destroy_err_recovery_workq();
   6141	}
   6142	return status;
   6143}
   6144module_init(be_init_module);
   6145
   6146static void __exit be_exit_module(void)
   6147{
   6148	pci_unregister_driver(&be_driver);
   6149
   6150	be_destroy_err_recovery_workq();
   6151
   6152	if (be_wq)
   6153		destroy_workqueue(be_wq);
   6154}
   6155module_exit(be_exit_module);