nicvf_main.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
nicvf_main.c (60174B)
      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2015 Cavium, Inc.
      4 */
      5
      6#include <linux/module.h>
      7#include <linux/interrupt.h>
      8#include <linux/pci.h>
      9#include <linux/netdevice.h>
     10#include <linux/if_vlan.h>
     11#include <linux/etherdevice.h>
     12#include <linux/ethtool.h>
     13#include <linux/log2.h>
     14#include <linux/prefetch.h>
     15#include <linux/irq.h>
     16#include <linux/iommu.h>
     17#include <linux/bpf.h>
     18#include <linux/bpf_trace.h>
     19#include <linux/filter.h>
     20#include <linux/net_tstamp.h>
     21#include <linux/workqueue.h>
     22
     23#include "nic_reg.h"
     24#include "nic.h"
     25#include "nicvf_queues.h"
     26#include "thunder_bgx.h"
     27#include "../common/cavium_ptp.h"
     28
     29#define DRV_NAME	"nicvf"
     30#define DRV_VERSION	"1.0"
     31
     32/* NOTE: Packets bigger than 1530 are split across multiple pages and XDP needs
     33 * the buffer to be contiguous. Allow XDP to be set up only if we don't exceed
     34 * this value, keeping headroom for the 14 byte Ethernet header and two
     35 * VLAN tags (for QinQ)
     36 */
     37#define MAX_XDP_MTU	(1530 - ETH_HLEN - VLAN_HLEN * 2)
     38
     39/* Supported devices */
     40static const struct pci_device_id nicvf_id_table[] = {
     41	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
     42			 PCI_DEVICE_ID_THUNDER_NIC_VF,
     43			 PCI_VENDOR_ID_CAVIUM,
     44			 PCI_SUBSYS_DEVID_88XX_NIC_VF) },
     45	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
     46			 PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF,
     47			 PCI_VENDOR_ID_CAVIUM,
     48			 PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF) },
     49	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
     50			 PCI_DEVICE_ID_THUNDER_NIC_VF,
     51			 PCI_VENDOR_ID_CAVIUM,
     52			 PCI_SUBSYS_DEVID_81XX_NIC_VF) },
     53	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
     54			 PCI_DEVICE_ID_THUNDER_NIC_VF,
     55			 PCI_VENDOR_ID_CAVIUM,
     56			 PCI_SUBSYS_DEVID_83XX_NIC_VF) },
     57	{ 0, }  /* end of table */
     58};
     59
     60MODULE_AUTHOR("Sunil Goutham");
     61MODULE_DESCRIPTION("Cavium Thunder NIC Virtual Function Driver");
     62MODULE_LICENSE("GPL v2");
     63MODULE_VERSION(DRV_VERSION);
     64MODULE_DEVICE_TABLE(pci, nicvf_id_table);
     65
     66static int debug = 0x00;
     67module_param(debug, int, 0644);
     68MODULE_PARM_DESC(debug, "Debug message level bitmap");
     69
     70static int cpi_alg = CPI_ALG_NONE;
     71module_param(cpi_alg, int, 0444);
     72MODULE_PARM_DESC(cpi_alg,
     73		 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
     74
     75static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
     76{
     77	if (nic->sqs_mode)
     78		return qidx + ((nic->sqs_id + 1) * MAX_CMP_QUEUES_PER_QS);
     79	else
     80		return qidx;
     81}
     82
     83/* The Cavium ThunderX network controller can *only* be found in SoCs
     84 * containing the ThunderX ARM64 CPU implementation.  All accesses to the device
     85 * registers on this platform are implicitly strongly ordered with respect
     86 * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use
     87 * with no memory barriers in this driver.  The readq()/writeq() functions add
     88 * explicit ordering operation which in this case are redundant, and only
     89 * add overhead.
     90 */
     91
     92/* Register read/write APIs */
     93void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val)
     94{
     95	writeq_relaxed(val, nic->reg_base + offset);
     96}
     97
     98u64 nicvf_reg_read(struct nicvf *nic, u64 offset)
     99{
    100	return readq_relaxed(nic->reg_base + offset);
    101}
    102
    103void nicvf_queue_reg_write(struct nicvf *nic, u64 offset,
    104			   u64 qidx, u64 val)
    105{
    106	void __iomem *addr = nic->reg_base + offset;
    107
    108	writeq_relaxed(val, addr + (qidx << NIC_Q_NUM_SHIFT));
    109}
    110
    111u64 nicvf_queue_reg_read(struct nicvf *nic, u64 offset, u64 qidx)
    112{
    113	void __iomem *addr = nic->reg_base + offset;
    114
    115	return readq_relaxed(addr + (qidx << NIC_Q_NUM_SHIFT));
    116}
    117
    118/* VF -> PF mailbox communication */
    119static void nicvf_write_to_mbx(struct nicvf *nic, union nic_mbx *mbx)
    120{
    121	u64 *msg = (u64 *)mbx;
    122
    123	nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 0, msg[0]);
    124	nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 8, msg[1]);
    125}
    126
    127int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
    128{
    129	unsigned long timeout;
    130	int ret = 0;
    131
    132	mutex_lock(&nic->rx_mode_mtx);
    133
    134	nic->pf_acked = false;
    135	nic->pf_nacked = false;
    136
    137	nicvf_write_to_mbx(nic, mbx);
    138
    139	timeout = jiffies + msecs_to_jiffies(NIC_MBOX_MSG_TIMEOUT);
    140	/* Wait for previous message to be acked, timeout 2sec */
    141	while (!nic->pf_acked) {
    142		if (nic->pf_nacked) {
    143			netdev_err(nic->netdev,
    144				   "PF NACK to mbox msg 0x%02x from VF%d\n",
    145				   (mbx->msg.msg & 0xFF), nic->vf_id);
    146			ret = -EINVAL;
    147			break;
    148		}
    149		usleep_range(8000, 10000);
    150		if (nic->pf_acked)
    151			break;
    152		if (time_after(jiffies, timeout)) {
    153			netdev_err(nic->netdev,
    154				   "PF didn't ACK to mbox msg 0x%02x from VF%d\n",
    155				   (mbx->msg.msg & 0xFF), nic->vf_id);
    156			ret = -EBUSY;
    157			break;
    158		}
    159	}
    160	mutex_unlock(&nic->rx_mode_mtx);
    161	return ret;
    162}
    163
    164/* Checks if VF is able to comminicate with PF
    165* and also gets the VNIC number this VF is associated to.
    166*/
    167static int nicvf_check_pf_ready(struct nicvf *nic)
    168{
    169	union nic_mbx mbx = {};
    170
    171	mbx.msg.msg = NIC_MBOX_MSG_READY;
    172	if (nicvf_send_msg_to_pf(nic, &mbx)) {
    173		netdev_err(nic->netdev,
    174			   "PF didn't respond to READY msg\n");
    175		return 0;
    176	}
    177
    178	return 1;
    179}
    180
    181static void nicvf_send_cfg_done(struct nicvf *nic)
    182{
    183	union nic_mbx mbx = {};
    184
    185	mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
    186	if (nicvf_send_msg_to_pf(nic, &mbx)) {
    187		netdev_err(nic->netdev,
    188			   "PF didn't respond to CFG DONE msg\n");
    189	}
    190}
    191
    192static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx)
    193{
    194	if (bgx->rx)
    195		nic->bgx_stats.rx_stats[bgx->idx] = bgx->stats;
    196	else
    197		nic->bgx_stats.tx_stats[bgx->idx] = bgx->stats;
    198}
    199
    200static void  nicvf_handle_mbx_intr(struct nicvf *nic)
    201{
    202	union nic_mbx mbx = {};
    203	u64 *mbx_data;
    204	u64 mbx_addr;
    205	int i;
    206
    207	mbx_addr = NIC_VF_PF_MAILBOX_0_1;
    208	mbx_data = (u64 *)&mbx;
    209
    210	for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) {
    211		*mbx_data = nicvf_reg_read(nic, mbx_addr);
    212		mbx_data++;
    213		mbx_addr += sizeof(u64);
    214	}
    215
    216	netdev_dbg(nic->netdev, "Mbox message: msg: 0x%x\n", mbx.msg.msg);
    217	switch (mbx.msg.msg) {
    218	case NIC_MBOX_MSG_READY:
    219		nic->pf_acked = true;
    220		nic->vf_id = mbx.nic_cfg.vf_id & 0x7F;
    221		nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F;
    222		nic->node = mbx.nic_cfg.node_id;
    223		if (!nic->set_mac_pending)
    224			eth_hw_addr_set(nic->netdev, mbx.nic_cfg.mac_addr);
    225		nic->sqs_mode = mbx.nic_cfg.sqs_mode;
    226		nic->loopback_supported = mbx.nic_cfg.loopback_supported;
    227		nic->link_up = false;
    228		nic->duplex = 0;
    229		nic->speed = 0;
    230		break;
    231	case NIC_MBOX_MSG_ACK:
    232		nic->pf_acked = true;
    233		break;
    234	case NIC_MBOX_MSG_NACK:
    235		nic->pf_nacked = true;
    236		break;
    237	case NIC_MBOX_MSG_RSS_SIZE:
    238		nic->rss_info.rss_size = mbx.rss_size.ind_tbl_size;
    239		nic->pf_acked = true;
    240		break;
    241	case NIC_MBOX_MSG_BGX_STATS:
    242		nicvf_read_bgx_stats(nic, &mbx.bgx_stats);
    243		nic->pf_acked = true;
    244		break;
    245	case NIC_MBOX_MSG_BGX_LINK_CHANGE:
    246		nic->pf_acked = true;
    247		if (nic->link_up != mbx.link_status.link_up) {
    248			nic->link_up = mbx.link_status.link_up;
    249			nic->duplex = mbx.link_status.duplex;
    250			nic->speed = mbx.link_status.speed;
    251			nic->mac_type = mbx.link_status.mac_type;
    252			if (nic->link_up) {
    253				netdev_info(nic->netdev,
    254					    "Link is Up %d Mbps %s duplex\n",
    255					    nic->speed,
    256					    nic->duplex == DUPLEX_FULL ?
    257					    "Full" : "Half");
    258				netif_carrier_on(nic->netdev);
    259				netif_tx_start_all_queues(nic->netdev);
    260			} else {
    261				netdev_info(nic->netdev, "Link is Down\n");
    262				netif_carrier_off(nic->netdev);
    263				netif_tx_stop_all_queues(nic->netdev);
    264			}
    265		}
    266		break;
    267	case NIC_MBOX_MSG_ALLOC_SQS:
    268		nic->sqs_count = mbx.sqs_alloc.qs_count;
    269		nic->pf_acked = true;
    270		break;
    271	case NIC_MBOX_MSG_SNICVF_PTR:
    272		/* Primary VF: make note of secondary VF's pointer
    273		 * to be used while packet transmission.
    274		 */
    275		nic->snicvf[mbx.nicvf.sqs_id] =
    276			(struct nicvf *)mbx.nicvf.nicvf;
    277		nic->pf_acked = true;
    278		break;
    279	case NIC_MBOX_MSG_PNICVF_PTR:
    280		/* Secondary VF/Qset: make note of primary VF's pointer
    281		 * to be used while packet reception, to handover packet
    282		 * to primary VF's netdev.
    283		 */
    284		nic->pnicvf = (struct nicvf *)mbx.nicvf.nicvf;
    285		nic->pf_acked = true;
    286		break;
    287	case NIC_MBOX_MSG_PFC:
    288		nic->pfc.autoneg = mbx.pfc.autoneg;
    289		nic->pfc.fc_rx = mbx.pfc.fc_rx;
    290		nic->pfc.fc_tx = mbx.pfc.fc_tx;
    291		nic->pf_acked = true;
    292		break;
    293	default:
    294		netdev_err(nic->netdev,
    295			   "Invalid message from PF, msg 0x%x\n", mbx.msg.msg);
    296		break;
    297	}
    298	nicvf_clear_intr(nic, NICVF_INTR_MBOX, 0);
    299}
    300
    301static int nicvf_hw_set_mac_addr(struct nicvf *nic, struct net_device *netdev)
    302{
    303	union nic_mbx mbx = {};
    304
    305	mbx.mac.msg = NIC_MBOX_MSG_SET_MAC;
    306	mbx.mac.vf_id = nic->vf_id;
    307	ether_addr_copy(mbx.mac.mac_addr, netdev->dev_addr);
    308
    309	return nicvf_send_msg_to_pf(nic, &mbx);
    310}
    311
    312static void nicvf_config_cpi(struct nicvf *nic)
    313{
    314	union nic_mbx mbx = {};
    315
    316	mbx.cpi_cfg.msg = NIC_MBOX_MSG_CPI_CFG;
    317	mbx.cpi_cfg.vf_id = nic->vf_id;
    318	mbx.cpi_cfg.cpi_alg = nic->cpi_alg;
    319	mbx.cpi_cfg.rq_cnt = nic->qs->rq_cnt;
    320
    321	nicvf_send_msg_to_pf(nic, &mbx);
    322}
    323
    324static void nicvf_get_rss_size(struct nicvf *nic)
    325{
    326	union nic_mbx mbx = {};
    327
    328	mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE;
    329	mbx.rss_size.vf_id = nic->vf_id;
    330	nicvf_send_msg_to_pf(nic, &mbx);
    331}
    332
    333void nicvf_config_rss(struct nicvf *nic)
    334{
    335	union nic_mbx mbx = {};
    336	struct nicvf_rss_info *rss = &nic->rss_info;
    337	int ind_tbl_len = rss->rss_size;
    338	int i, nextq = 0;
    339
    340	mbx.rss_cfg.vf_id = nic->vf_id;
    341	mbx.rss_cfg.hash_bits = rss->hash_bits;
    342	while (ind_tbl_len) {
    343		mbx.rss_cfg.tbl_offset = nextq;
    344		mbx.rss_cfg.tbl_len = min(ind_tbl_len,
    345					       RSS_IND_TBL_LEN_PER_MBX_MSG);
    346		mbx.rss_cfg.msg = mbx.rss_cfg.tbl_offset ?
    347			  NIC_MBOX_MSG_RSS_CFG_CONT : NIC_MBOX_MSG_RSS_CFG;
    348
    349		for (i = 0; i < mbx.rss_cfg.tbl_len; i++)
    350			mbx.rss_cfg.ind_tbl[i] = rss->ind_tbl[nextq++];
    351
    352		nicvf_send_msg_to_pf(nic, &mbx);
    353
    354		ind_tbl_len -= mbx.rss_cfg.tbl_len;
    355	}
    356}
    357
    358void nicvf_set_rss_key(struct nicvf *nic)
    359{
    360	struct nicvf_rss_info *rss = &nic->rss_info;
    361	u64 key_addr = NIC_VNIC_RSS_KEY_0_4;
    362	int idx;
    363
    364	for (idx = 0; idx < RSS_HASH_KEY_SIZE; idx++) {
    365		nicvf_reg_write(nic, key_addr, rss->key[idx]);
    366		key_addr += sizeof(u64);
    367	}
    368}
    369
    370static int nicvf_rss_init(struct nicvf *nic)
    371{
    372	struct nicvf_rss_info *rss = &nic->rss_info;
    373	int idx;
    374
    375	nicvf_get_rss_size(nic);
    376
    377	if (cpi_alg != CPI_ALG_NONE) {
    378		rss->enable = false;
    379		rss->hash_bits = 0;
    380		return 0;
    381	}
    382
    383	rss->enable = true;
    384
    385	netdev_rss_key_fill(rss->key, RSS_HASH_KEY_SIZE * sizeof(u64));
    386	nicvf_set_rss_key(nic);
    387
    388	rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA;
    389	nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss->cfg);
    390
    391	rss->hash_bits =  ilog2(rounddown_pow_of_two(rss->rss_size));
    392
    393	for (idx = 0; idx < rss->rss_size; idx++)
    394		rss->ind_tbl[idx] = ethtool_rxfh_indir_default(idx,
    395							       nic->rx_queues);
    396	nicvf_config_rss(nic);
    397	return 1;
    398}
    399
    400/* Request PF to allocate additional Qsets */
    401static void nicvf_request_sqs(struct nicvf *nic)
    402{
    403	union nic_mbx mbx = {};
    404	int sqs;
    405	int sqs_count = nic->sqs_count;
    406	int rx_queues = 0, tx_queues = 0;
    407
    408	/* Only primary VF should request */
    409	if (nic->sqs_mode ||  !nic->sqs_count)
    410		return;
    411
    412	mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS;
    413	mbx.sqs_alloc.vf_id = nic->vf_id;
    414	mbx.sqs_alloc.qs_count = nic->sqs_count;
    415	if (nicvf_send_msg_to_pf(nic, &mbx)) {
    416		/* No response from PF */
    417		nic->sqs_count = 0;
    418		return;
    419	}
    420
    421	/* Return if no Secondary Qsets available */
    422	if (!nic->sqs_count)
    423		return;
    424
    425	if (nic->rx_queues > MAX_RCV_QUEUES_PER_QS)
    426		rx_queues = nic->rx_queues - MAX_RCV_QUEUES_PER_QS;
    427
    428	tx_queues = nic->tx_queues + nic->xdp_tx_queues;
    429	if (tx_queues > MAX_SND_QUEUES_PER_QS)
    430		tx_queues = tx_queues - MAX_SND_QUEUES_PER_QS;
    431
    432	/* Set no of Rx/Tx queues in each of the SQsets */
    433	for (sqs = 0; sqs < nic->sqs_count; sqs++) {
    434		mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR;
    435		mbx.nicvf.vf_id = nic->vf_id;
    436		mbx.nicvf.sqs_id = sqs;
    437		nicvf_send_msg_to_pf(nic, &mbx);
    438
    439		nic->snicvf[sqs]->sqs_id = sqs;
    440		if (rx_queues > MAX_RCV_QUEUES_PER_QS) {
    441			nic->snicvf[sqs]->qs->rq_cnt = MAX_RCV_QUEUES_PER_QS;
    442			rx_queues -= MAX_RCV_QUEUES_PER_QS;
    443		} else {
    444			nic->snicvf[sqs]->qs->rq_cnt = rx_queues;
    445			rx_queues = 0;
    446		}
    447
    448		if (tx_queues > MAX_SND_QUEUES_PER_QS) {
    449			nic->snicvf[sqs]->qs->sq_cnt = MAX_SND_QUEUES_PER_QS;
    450			tx_queues -= MAX_SND_QUEUES_PER_QS;
    451		} else {
    452			nic->snicvf[sqs]->qs->sq_cnt = tx_queues;
    453			tx_queues = 0;
    454		}
    455
    456		nic->snicvf[sqs]->qs->cq_cnt =
    457		max(nic->snicvf[sqs]->qs->rq_cnt, nic->snicvf[sqs]->qs->sq_cnt);
    458
    459		/* Initialize secondary Qset's queues and its interrupts */
    460		nicvf_open(nic->snicvf[sqs]->netdev);
    461	}
    462
    463	/* Update stack with actual Rx/Tx queue count allocated */
    464	if (sqs_count != nic->sqs_count)
    465		nicvf_set_real_num_queues(nic->netdev,
    466					  nic->tx_queues, nic->rx_queues);
    467}
    468
    469/* Send this Qset's nicvf pointer to PF.
    470 * PF inturn sends primary VF's nicvf struct to secondary Qsets/VFs
    471 * so that packets received by these Qsets can use primary VF's netdev
    472 */
    473static void nicvf_send_vf_struct(struct nicvf *nic)
    474{
    475	union nic_mbx mbx = {};
    476
    477	mbx.nicvf.msg = NIC_MBOX_MSG_NICVF_PTR;
    478	mbx.nicvf.sqs_mode = nic->sqs_mode;
    479	mbx.nicvf.nicvf = (u64)nic;
    480	nicvf_send_msg_to_pf(nic, &mbx);
    481}
    482
    483static void nicvf_get_primary_vf_struct(struct nicvf *nic)
    484{
    485	union nic_mbx mbx = {};
    486
    487	mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR;
    488	nicvf_send_msg_to_pf(nic, &mbx);
    489}
    490
    491int nicvf_set_real_num_queues(struct net_device *netdev,
    492			      int tx_queues, int rx_queues)
    493{
    494	int err = 0;
    495
    496	err = netif_set_real_num_tx_queues(netdev, tx_queues);
    497	if (err) {
    498		netdev_err(netdev,
    499			   "Failed to set no of Tx queues: %d\n", tx_queues);
    500		return err;
    501	}
    502
    503	err = netif_set_real_num_rx_queues(netdev, rx_queues);
    504	if (err)
    505		netdev_err(netdev,
    506			   "Failed to set no of Rx queues: %d\n", rx_queues);
    507	return err;
    508}
    509
    510static int nicvf_init_resources(struct nicvf *nic)
    511{
    512	int err;
    513
    514	/* Enable Qset */
    515	nicvf_qset_config(nic, true);
    516
    517	/* Initialize queues and HW for data transfer */
    518	err = nicvf_config_data_transfer(nic, true);
    519	if (err) {
    520		netdev_err(nic->netdev,
    521			   "Failed to alloc/config VF's QSet resources\n");
    522		return err;
    523	}
    524
    525	return 0;
    526}
    527
    528static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
    529				struct cqe_rx_t *cqe_rx, struct snd_queue *sq,
    530				struct rcv_queue *rq, struct sk_buff **skb)
    531{
    532	unsigned char *hard_start, *data;
    533	struct xdp_buff xdp;
    534	struct page *page;
    535	u32 action;
    536	u16 len, offset = 0;
    537	u64 dma_addr, cpu_addr;
    538	void *orig_data;
    539
    540	/* Retrieve packet buffer's DMA address and length */
    541	len = *((u16 *)((void *)cqe_rx + (3 * sizeof(u64))));
    542	dma_addr = *((u64 *)((void *)cqe_rx + (7 * sizeof(u64))));
    543
    544	cpu_addr = nicvf_iova_to_phys(nic, dma_addr);
    545	if (!cpu_addr)
    546		return false;
    547	cpu_addr = (u64)phys_to_virt(cpu_addr);
    548	page = virt_to_page((void *)cpu_addr);
    549
    550	xdp_init_buff(&xdp, RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
    551		      &rq->xdp_rxq);
    552	hard_start = page_address(page);
    553	data = (unsigned char *)cpu_addr;
    554	xdp_prepare_buff(&xdp, hard_start, data - hard_start, len, false);
    555	orig_data = xdp.data;
    556
    557	action = bpf_prog_run_xdp(prog, &xdp);
    558
    559	len = xdp.data_end - xdp.data;
    560	/* Check if XDP program has changed headers */
    561	if (orig_data != xdp.data) {
    562		offset = orig_data - xdp.data;
    563		dma_addr -= offset;
    564	}
    565
    566	switch (action) {
    567	case XDP_PASS:
    568		/* Check if it's a recycled page, if not
    569		 * unmap the DMA mapping.
    570		 *
    571		 * Recycled page holds an extra reference.
    572		 */
    573		if (page_ref_count(page) == 1) {
    574			dma_addr &= PAGE_MASK;
    575			dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
    576					     RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
    577					     DMA_FROM_DEVICE,
    578					     DMA_ATTR_SKIP_CPU_SYNC);
    579		}
    580
    581		/* Build SKB and pass on packet to network stack */
    582		*skb = build_skb(xdp.data,
    583				 RCV_FRAG_LEN - cqe_rx->align_pad + offset);
    584		if (!*skb)
    585			put_page(page);
    586		else
    587			skb_put(*skb, len);
    588		return false;
    589	case XDP_TX:
    590		nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len);
    591		return true;
    592	default:
    593		bpf_warn_invalid_xdp_action(nic->netdev, prog, action);
    594		fallthrough;
    595	case XDP_ABORTED:
    596		trace_xdp_exception(nic->netdev, prog, action);
    597		fallthrough;
    598	case XDP_DROP:
    599		/* Check if it's a recycled page, if not
    600		 * unmap the DMA mapping.
    601		 *
    602		 * Recycled page holds an extra reference.
    603		 */
    604		if (page_ref_count(page) == 1) {
    605			dma_addr &= PAGE_MASK;
    606			dma_unmap_page_attrs(&nic->pdev->dev, dma_addr,
    607					     RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
    608					     DMA_FROM_DEVICE,
    609					     DMA_ATTR_SKIP_CPU_SYNC);
    610		}
    611		put_page(page);
    612		return true;
    613	}
    614	return false;
    615}
    616
    617static void nicvf_snd_ptp_handler(struct net_device *netdev,
    618				  struct cqe_send_t *cqe_tx)
    619{
    620	struct nicvf *nic = netdev_priv(netdev);
    621	struct skb_shared_hwtstamps ts;
    622	u64 ns;
    623
    624	nic = nic->pnicvf;
    625
    626	/* Sync for 'ptp_skb' */
    627	smp_rmb();
    628
    629	/* New timestamp request can be queued now */
    630	atomic_set(&nic->tx_ptp_skbs, 0);
    631
    632	/* Check for timestamp requested skb */
    633	if (!nic->ptp_skb)
    634		return;
    635
    636	/* Check if timestamping is timedout, which is set to 10us */
    637	if (cqe_tx->send_status == CQ_TX_ERROP_TSTMP_TIMEOUT ||
    638	    cqe_tx->send_status == CQ_TX_ERROP_TSTMP_CONFLICT)
    639		goto no_tstamp;
    640
    641	/* Get the timestamp */
    642	memset(&ts, 0, sizeof(ts));
    643	ns = cavium_ptp_tstamp2time(nic->ptp_clock, cqe_tx->ptp_timestamp);
    644	ts.hwtstamp = ns_to_ktime(ns);
    645	skb_tstamp_tx(nic->ptp_skb, &ts);
    646
    647no_tstamp:
    648	/* Free the original skb */
    649	dev_kfree_skb_any(nic->ptp_skb);
    650	nic->ptp_skb = NULL;
    651	/* Sync 'ptp_skb' */
    652	smp_wmb();
    653}
    654
    655static void nicvf_snd_pkt_handler(struct net_device *netdev,
    656				  struct cqe_send_t *cqe_tx,
    657				  int budget, int *subdesc_cnt,
    658				  unsigned int *tx_pkts, unsigned int *tx_bytes)
    659{
    660	struct sk_buff *skb = NULL;
    661	struct page *page;
    662	struct nicvf *nic = netdev_priv(netdev);
    663	struct snd_queue *sq;
    664	struct sq_hdr_subdesc *hdr;
    665	struct sq_hdr_subdesc *tso_sqe;
    666
    667	sq = &nic->qs->sq[cqe_tx->sq_idx];
    668
    669	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr);
    670	if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER)
    671		return;
    672
    673	/* Check for errors */
    674	if (cqe_tx->send_status)
    675		nicvf_check_cqe_tx_errs(nic->pnicvf, cqe_tx);
    676
    677	/* Is this a XDP designated Tx queue */
    678	if (sq->is_xdp) {
    679		page = (struct page *)sq->xdp_page[cqe_tx->sqe_ptr];
    680		/* Check if it's recycled page or else unmap DMA mapping */
    681		if (page && (page_ref_count(page) == 1))
    682			nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr,
    683						 hdr->subdesc_cnt);
    684
    685		/* Release page reference for recycling */
    686		if (page)
    687			put_page(page);
    688		sq->xdp_page[cqe_tx->sqe_ptr] = (u64)NULL;
    689		*subdesc_cnt += hdr->subdesc_cnt + 1;
    690		return;
    691	}
    692
    693	skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr];
    694	if (skb) {
    695		/* Check for dummy descriptor used for HW TSO offload on 88xx */
    696		if (hdr->dont_send) {
    697			/* Get actual TSO descriptors and free them */
    698			tso_sqe =
    699			 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
    700			nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
    701						 tso_sqe->subdesc_cnt);
    702			*subdesc_cnt += tso_sqe->subdesc_cnt + 1;
    703		} else {
    704			nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr,
    705						 hdr->subdesc_cnt);
    706		}
    707		*subdesc_cnt += hdr->subdesc_cnt + 1;
    708		prefetch(skb);
    709		(*tx_pkts)++;
    710		*tx_bytes += skb->len;
    711		/* If timestamp is requested for this skb, don't free it */
    712		if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS &&
    713		    !nic->pnicvf->ptp_skb)
    714			nic->pnicvf->ptp_skb = skb;
    715		else
    716			napi_consume_skb(skb, budget);
    717		sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL;
    718	} else {
    719		/* In case of SW TSO on 88xx, only last segment will have
    720		 * a SKB attached, so just free SQEs here.
    721		 */
    722		if (!nic->hw_tso)
    723			*subdesc_cnt += hdr->subdesc_cnt + 1;
    724	}
    725}
    726
    727static inline void nicvf_set_rxhash(struct net_device *netdev,
    728				    struct cqe_rx_t *cqe_rx,
    729				    struct sk_buff *skb)
    730{
    731	u8 hash_type;
    732	u32 hash;
    733
    734	if (!(netdev->features & NETIF_F_RXHASH))
    735		return;
    736
    737	switch (cqe_rx->rss_alg) {
    738	case RSS_ALG_TCP_IP:
    739	case RSS_ALG_UDP_IP:
    740		hash_type = PKT_HASH_TYPE_L4;
    741		hash = cqe_rx->rss_tag;
    742		break;
    743	case RSS_ALG_IP:
    744		hash_type = PKT_HASH_TYPE_L3;
    745		hash = cqe_rx->rss_tag;
    746		break;
    747	default:
    748		hash_type = PKT_HASH_TYPE_NONE;
    749		hash = 0;
    750	}
    751
    752	skb_set_hash(skb, hash, hash_type);
    753}
    754
    755static inline void nicvf_set_rxtstamp(struct nicvf *nic, struct sk_buff *skb)
    756{
    757	u64 ns;
    758
    759	if (!nic->ptp_clock || !nic->hw_rx_tstamp)
    760		return;
    761
    762	/* The first 8 bytes is the timestamp */
    763	ns = cavium_ptp_tstamp2time(nic->ptp_clock,
    764				    be64_to_cpu(*(__be64 *)skb->data));
    765	skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(ns);
    766
    767	__skb_pull(skb, 8);
    768}
    769
    770static void nicvf_rcv_pkt_handler(struct net_device *netdev,
    771				  struct napi_struct *napi,
    772				  struct cqe_rx_t *cqe_rx,
    773				  struct snd_queue *sq, struct rcv_queue *rq)
    774{
    775	struct sk_buff *skb = NULL;
    776	struct nicvf *nic = netdev_priv(netdev);
    777	struct nicvf *snic = nic;
    778	int err = 0;
    779	int rq_idx;
    780
    781	rq_idx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx);
    782
    783	if (nic->sqs_mode) {
    784		/* Use primary VF's 'nicvf' struct */
    785		nic = nic->pnicvf;
    786		netdev = nic->netdev;
    787	}
    788
    789	/* Check for errors */
    790	if (cqe_rx->err_level || cqe_rx->err_opcode) {
    791		err = nicvf_check_cqe_rx_errs(nic, cqe_rx);
    792		if (err && !cqe_rx->rb_cnt)
    793			return;
    794	}
    795
    796	/* For XDP, ignore pkts spanning multiple pages */
    797	if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) {
    798		/* Packet consumed by XDP */
    799		if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, rq, &skb))
    800			return;
    801	} else {
    802		skb = nicvf_get_rcv_skb(snic, cqe_rx,
    803					nic->xdp_prog ? true : false);
    804	}
    805
    806	if (!skb)
    807		return;
    808
    809	if (netif_msg_pktdata(nic)) {
    810		netdev_info(nic->netdev, "skb 0x%p, len=%d\n", skb, skb->len);
    811		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1,
    812			       skb->data, skb->len, true);
    813	}
    814
    815	/* If error packet, drop it here */
    816	if (err) {
    817		dev_kfree_skb_any(skb);
    818		return;
    819	}
    820
    821	nicvf_set_rxtstamp(nic, skb);
    822	nicvf_set_rxhash(netdev, cqe_rx, skb);
    823
    824	skb_record_rx_queue(skb, rq_idx);
    825	if (netdev->hw_features & NETIF_F_RXCSUM) {
    826		/* HW by default verifies TCP/UDP/SCTP checksums */
    827		skb->ip_summed = CHECKSUM_UNNECESSARY;
    828	} else {
    829		skb_checksum_none_assert(skb);
    830	}
    831
    832	skb->protocol = eth_type_trans(skb, netdev);
    833
    834	/* Check for stripped VLAN */
    835	if (cqe_rx->vlan_found && cqe_rx->vlan_stripped)
    836		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
    837				       ntohs((__force __be16)cqe_rx->vlan_tci));
    838
    839	if (napi && (netdev->features & NETIF_F_GRO))
    840		napi_gro_receive(napi, skb);
    841	else
    842		netif_receive_skb(skb);
    843}
    844
    845static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
    846				 struct napi_struct *napi, int budget)
    847{
    848	int processed_cqe, work_done = 0, tx_done = 0;
    849	int cqe_count, cqe_head;
    850	int subdesc_cnt = 0;
    851	struct nicvf *nic = netdev_priv(netdev);
    852	struct queue_set *qs = nic->qs;
    853	struct cmp_queue *cq = &qs->cq[cq_idx];
    854	struct cqe_rx_t *cq_desc;
    855	struct netdev_queue *txq;
    856	struct snd_queue *sq = &qs->sq[cq_idx];
    857	struct rcv_queue *rq = &qs->rq[cq_idx];
    858	unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx;
    859
    860	spin_lock_bh(&cq->lock);
    861loop:
    862	processed_cqe = 0;
    863	/* Get no of valid CQ entries to process */
    864	cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx);
    865	cqe_count &= CQ_CQE_COUNT;
    866	if (!cqe_count)
    867		goto done;
    868
    869	/* Get head of the valid CQ entries */
    870	cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9;
    871	cqe_head &= 0xFFFF;
    872
    873	while (processed_cqe < cqe_count) {
    874		/* Get the CQ descriptor */
    875		cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head);
    876		cqe_head++;
    877		cqe_head &= (cq->dmem.q_len - 1);
    878		/* Initiate prefetch for next descriptor */
    879		prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head));
    880
    881		if ((work_done >= budget) && napi &&
    882		    (cq_desc->cqe_type != CQE_TYPE_SEND)) {
    883			break;
    884		}
    885
    886		switch (cq_desc->cqe_type) {
    887		case CQE_TYPE_RX:
    888			nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq, rq);
    889			work_done++;
    890		break;
    891		case CQE_TYPE_SEND:
    892			nicvf_snd_pkt_handler(netdev, (void *)cq_desc,
    893					      budget, &subdesc_cnt,
    894					      &tx_pkts, &tx_bytes);
    895			tx_done++;
    896		break;
    897		case CQE_TYPE_SEND_PTP:
    898			nicvf_snd_ptp_handler(netdev, (void *)cq_desc);
    899		break;
    900		case CQE_TYPE_INVALID:
    901		case CQE_TYPE_RX_SPLIT:
    902		case CQE_TYPE_RX_TCP:
    903			/* Ignore for now */
    904		break;
    905		}
    906		processed_cqe++;
    907	}
    908
    909	/* Ring doorbell to inform H/W to reuse processed CQEs */
    910	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR,
    911			      cq_idx, processed_cqe);
    912
    913	if ((work_done < budget) && napi)
    914		goto loop;
    915
    916done:
    917	/* Update SQ's descriptor free count */
    918	if (subdesc_cnt)
    919		nicvf_put_sq_desc(sq, subdesc_cnt);
    920
    921	txq_idx = nicvf_netdev_qidx(nic, cq_idx);
    922	/* Handle XDP TX queues */
    923	if (nic->pnicvf->xdp_prog) {
    924		if (txq_idx < nic->pnicvf->xdp_tx_queues) {
    925			nicvf_xdp_sq_doorbell(nic, sq, cq_idx);
    926			goto out;
    927		}
    928		nic = nic->pnicvf;
    929		txq_idx -= nic->pnicvf->xdp_tx_queues;
    930	}
    931
    932	/* Wakeup TXQ if its stopped earlier due to SQ full */
    933	if (tx_done ||
    934	    (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) {
    935		netdev = nic->pnicvf->netdev;
    936		txq = netdev_get_tx_queue(netdev, txq_idx);
    937		if (tx_pkts)
    938			netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
    939
    940		/* To read updated queue and carrier status */
    941		smp_mb();
    942		if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) {
    943			netif_tx_wake_queue(txq);
    944			nic = nic->pnicvf;
    945			this_cpu_inc(nic->drv_stats->txq_wake);
    946			netif_warn(nic, tx_err, netdev,
    947				   "Transmit queue wakeup SQ%d\n", txq_idx);
    948		}
    949	}
    950
    951out:
    952	spin_unlock_bh(&cq->lock);
    953	return work_done;
    954}
    955
    956static int nicvf_poll(struct napi_struct *napi, int budget)
    957{
    958	u64  cq_head;
    959	int  work_done = 0;
    960	struct net_device *netdev = napi->dev;
    961	struct nicvf *nic = netdev_priv(netdev);
    962	struct nicvf_cq_poll *cq;
    963
    964	cq = container_of(napi, struct nicvf_cq_poll, napi);
    965	work_done = nicvf_cq_intr_handler(netdev, cq->cq_idx, napi, budget);
    966
    967	if (work_done < budget) {
    968		/* Slow packet rate, exit polling */
    969		napi_complete_done(napi, work_done);
    970		/* Re-enable interrupts */
    971		cq_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD,
    972					       cq->cq_idx);
    973		nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->cq_idx);
    974		nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_HEAD,
    975				      cq->cq_idx, cq_head);
    976		nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->cq_idx);
    977	}
    978	return work_done;
    979}
    980
    981/* Qset error interrupt handler
    982 *
    983 * As of now only CQ errors are handled
    984 */
    985static void nicvf_handle_qs_err(struct tasklet_struct *t)
    986{
    987	struct nicvf *nic = from_tasklet(nic, t, qs_err_task);
    988	struct queue_set *qs = nic->qs;
    989	int qidx;
    990	u64 status;
    991
    992	netif_tx_disable(nic->netdev);
    993
    994	/* Check if it is CQ err */
    995	for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
    996		status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS,
    997					      qidx);
    998		if (!(status & CQ_ERR_MASK))
    999			continue;
   1000		/* Process already queued CQEs and reconfig CQ */
   1001		nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
   1002		nicvf_sq_disable(nic, qidx);
   1003		nicvf_cq_intr_handler(nic->netdev, qidx, NULL, 0);
   1004		nicvf_cmp_queue_config(nic, qs, qidx, true);
   1005		nicvf_sq_free_used_descs(nic->netdev, &qs->sq[qidx], qidx);
   1006		nicvf_sq_enable(nic, &qs->sq[qidx], qidx);
   1007
   1008		nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx);
   1009	}
   1010
   1011	netif_tx_start_all_queues(nic->netdev);
   1012	/* Re-enable Qset error interrupt */
   1013	nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0);
   1014}
   1015
   1016static void nicvf_dump_intr_status(struct nicvf *nic)
   1017{
   1018	netif_info(nic, intr, nic->netdev, "interrupt status 0x%llx\n",
   1019		   nicvf_reg_read(nic, NIC_VF_INT));
   1020}
   1021
   1022static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq)
   1023{
   1024	struct nicvf *nic = (struct nicvf *)nicvf_irq;
   1025	u64 intr;
   1026
   1027	nicvf_dump_intr_status(nic);
   1028
   1029	intr = nicvf_reg_read(nic, NIC_VF_INT);
   1030	/* Check for spurious interrupt */
   1031	if (!(intr & NICVF_INTR_MBOX_MASK))
   1032		return IRQ_HANDLED;
   1033
   1034	nicvf_handle_mbx_intr(nic);
   1035
   1036	return IRQ_HANDLED;
   1037}
   1038
   1039static irqreturn_t nicvf_intr_handler(int irq, void *cq_irq)
   1040{
   1041	struct nicvf_cq_poll *cq_poll = (struct nicvf_cq_poll *)cq_irq;
   1042	struct nicvf *nic = cq_poll->nicvf;
   1043	int qidx = cq_poll->cq_idx;
   1044
   1045	nicvf_dump_intr_status(nic);
   1046
   1047	/* Disable interrupts */
   1048	nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
   1049
   1050	/* Schedule NAPI */
   1051	napi_schedule_irqoff(&cq_poll->napi);
   1052
   1053	/* Clear interrupt */
   1054	nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
   1055
   1056	return IRQ_HANDLED;
   1057}
   1058
   1059static irqreturn_t nicvf_rbdr_intr_handler(int irq, void *nicvf_irq)
   1060{
   1061	struct nicvf *nic = (struct nicvf *)nicvf_irq;
   1062	u8 qidx;
   1063
   1064
   1065	nicvf_dump_intr_status(nic);
   1066
   1067	/* Disable RBDR interrupt and schedule softirq */
   1068	for (qidx = 0; qidx < nic->qs->rbdr_cnt; qidx++) {
   1069		if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx))
   1070			continue;
   1071		nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx);
   1072		tasklet_hi_schedule(&nic->rbdr_task);
   1073		/* Clear interrupt */
   1074		nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx);
   1075	}
   1076
   1077	return IRQ_HANDLED;
   1078}
   1079
   1080static irqreturn_t nicvf_qs_err_intr_handler(int irq, void *nicvf_irq)
   1081{
   1082	struct nicvf *nic = (struct nicvf *)nicvf_irq;
   1083
   1084	nicvf_dump_intr_status(nic);
   1085
   1086	/* Disable Qset err interrupt and schedule softirq */
   1087	nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0);
   1088	tasklet_hi_schedule(&nic->qs_err_task);
   1089	nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0);
   1090
   1091	return IRQ_HANDLED;
   1092}
   1093
   1094static void nicvf_set_irq_affinity(struct nicvf *nic)
   1095{
   1096	int vec, cpu;
   1097
   1098	for (vec = 0; vec < nic->num_vec; vec++) {
   1099		if (!nic->irq_allocated[vec])
   1100			continue;
   1101
   1102		if (!zalloc_cpumask_var(&nic->affinity_mask[vec], GFP_KERNEL))
   1103			return;
   1104		 /* CQ interrupts */
   1105		if (vec < NICVF_INTR_ID_SQ)
   1106			/* Leave CPU0 for RBDR and other interrupts */
   1107			cpu = nicvf_netdev_qidx(nic, vec) + 1;
   1108		else
   1109			cpu = 0;
   1110
   1111		cpumask_set_cpu(cpumask_local_spread(cpu, nic->node),
   1112				nic->affinity_mask[vec]);
   1113		irq_set_affinity_hint(pci_irq_vector(nic->pdev, vec),
   1114				      nic->affinity_mask[vec]);
   1115	}
   1116}
   1117
   1118static int nicvf_register_interrupts(struct nicvf *nic)
   1119{
   1120	int irq, ret = 0;
   1121
   1122	for_each_cq_irq(irq)
   1123		sprintf(nic->irq_name[irq], "%s-rxtx-%d",
   1124			nic->pnicvf->netdev->name,
   1125			nicvf_netdev_qidx(nic, irq));
   1126
   1127	for_each_sq_irq(irq)
   1128		sprintf(nic->irq_name[irq], "%s-sq-%d",
   1129			nic->pnicvf->netdev->name,
   1130			nicvf_netdev_qidx(nic, irq - NICVF_INTR_ID_SQ));
   1131
   1132	for_each_rbdr_irq(irq)
   1133		sprintf(nic->irq_name[irq], "%s-rbdr-%d",
   1134			nic->pnicvf->netdev->name,
   1135			nic->sqs_mode ? (nic->sqs_id + 1) : 0);
   1136
   1137	/* Register CQ interrupts */
   1138	for (irq = 0; irq < nic->qs->cq_cnt; irq++) {
   1139		ret = request_irq(pci_irq_vector(nic->pdev, irq),
   1140				  nicvf_intr_handler,
   1141				  0, nic->irq_name[irq], nic->napi[irq]);
   1142		if (ret)
   1143			goto err;
   1144		nic->irq_allocated[irq] = true;
   1145	}
   1146
   1147	/* Register RBDR interrupt */
   1148	for (irq = NICVF_INTR_ID_RBDR;
   1149	     irq < (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt); irq++) {
   1150		ret = request_irq(pci_irq_vector(nic->pdev, irq),
   1151				  nicvf_rbdr_intr_handler,
   1152				  0, nic->irq_name[irq], nic);
   1153		if (ret)
   1154			goto err;
   1155		nic->irq_allocated[irq] = true;
   1156	}
   1157
   1158	/* Register QS error interrupt */
   1159	sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], "%s-qset-err-%d",
   1160		nic->pnicvf->netdev->name,
   1161		nic->sqs_mode ? (nic->sqs_id + 1) : 0);
   1162	irq = NICVF_INTR_ID_QS_ERR;
   1163	ret = request_irq(pci_irq_vector(nic->pdev, irq),
   1164			  nicvf_qs_err_intr_handler,
   1165			  0, nic->irq_name[irq], nic);
   1166	if (ret)
   1167		goto err;
   1168
   1169	nic->irq_allocated[irq] = true;
   1170
   1171	/* Set IRQ affinities */
   1172	nicvf_set_irq_affinity(nic);
   1173
   1174err:
   1175	if (ret)
   1176		netdev_err(nic->netdev, "request_irq failed, vector %d\n", irq);
   1177
   1178	return ret;
   1179}
   1180
   1181static void nicvf_unregister_interrupts(struct nicvf *nic)
   1182{
   1183	struct pci_dev *pdev = nic->pdev;
   1184	int irq;
   1185
   1186	/* Free registered interrupts */
   1187	for (irq = 0; irq < nic->num_vec; irq++) {
   1188		if (!nic->irq_allocated[irq])
   1189			continue;
   1190
   1191		irq_set_affinity_hint(pci_irq_vector(pdev, irq), NULL);
   1192		free_cpumask_var(nic->affinity_mask[irq]);
   1193
   1194		if (irq < NICVF_INTR_ID_SQ)
   1195			free_irq(pci_irq_vector(pdev, irq), nic->napi[irq]);
   1196		else
   1197			free_irq(pci_irq_vector(pdev, irq), nic);
   1198
   1199		nic->irq_allocated[irq] = false;
   1200	}
   1201
   1202	/* Disable MSI-X */
   1203	pci_free_irq_vectors(pdev);
   1204	nic->num_vec = 0;
   1205}
   1206
   1207/* Initialize MSIX vectors and register MISC interrupt.
   1208 * Send READY message to PF to check if its alive
   1209 */
   1210static int nicvf_register_misc_interrupt(struct nicvf *nic)
   1211{
   1212	int ret = 0;
   1213	int irq = NICVF_INTR_ID_MISC;
   1214
   1215	/* Return if mailbox interrupt is already registered */
   1216	if (nic->pdev->msix_enabled)
   1217		return 0;
   1218
   1219	/* Enable MSI-X */
   1220	nic->num_vec = pci_msix_vec_count(nic->pdev);
   1221	ret = pci_alloc_irq_vectors(nic->pdev, nic->num_vec, nic->num_vec,
   1222				    PCI_IRQ_MSIX);
   1223	if (ret < 0) {
   1224		netdev_err(nic->netdev,
   1225			   "Req for #%d msix vectors failed\n", nic->num_vec);
   1226		return ret;
   1227	}
   1228
   1229	sprintf(nic->irq_name[irq], "%s Mbox", "NICVF");
   1230	/* Register Misc interrupt */
   1231	ret = request_irq(pci_irq_vector(nic->pdev, irq),
   1232			  nicvf_misc_intr_handler, 0, nic->irq_name[irq], nic);
   1233
   1234	if (ret)
   1235		return ret;
   1236	nic->irq_allocated[irq] = true;
   1237
   1238	/* Enable mailbox interrupt */
   1239	nicvf_enable_intr(nic, NICVF_INTR_MBOX, 0);
   1240
   1241	/* Check if VF is able to communicate with PF */
   1242	if (!nicvf_check_pf_ready(nic)) {
   1243		nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
   1244		nicvf_unregister_interrupts(nic);
   1245		return -EIO;
   1246	}
   1247
   1248	return 0;
   1249}
   1250
   1251static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
   1252{
   1253	struct nicvf *nic = netdev_priv(netdev);
   1254	int qid = skb_get_queue_mapping(skb);
   1255	struct netdev_queue *txq = netdev_get_tx_queue(netdev, qid);
   1256	struct nicvf *snic;
   1257	struct snd_queue *sq;
   1258	int tmp;
   1259
   1260	/* Check for minimum packet length */
   1261	if (skb->len <= ETH_HLEN) {
   1262		dev_kfree_skb(skb);
   1263		return NETDEV_TX_OK;
   1264	}
   1265
   1266	/* In XDP case, initial HW tx queues are used for XDP,
   1267	 * but stack's queue mapping starts at '0', so skip the
   1268	 * Tx queues attached to Rx queues for XDP.
   1269	 */
   1270	if (nic->xdp_prog)
   1271		qid += nic->xdp_tx_queues;
   1272
   1273	snic = nic;
   1274	/* Get secondary Qset's SQ structure */
   1275	if (qid >= MAX_SND_QUEUES_PER_QS) {
   1276		tmp = qid / MAX_SND_QUEUES_PER_QS;
   1277		snic = (struct nicvf *)nic->snicvf[tmp - 1];
   1278		if (!snic) {
   1279			netdev_warn(nic->netdev,
   1280				    "Secondary Qset#%d's ptr not initialized\n",
   1281				    tmp - 1);
   1282			dev_kfree_skb(skb);
   1283			return NETDEV_TX_OK;
   1284		}
   1285		qid = qid % MAX_SND_QUEUES_PER_QS;
   1286	}
   1287
   1288	sq = &snic->qs->sq[qid];
   1289	if (!netif_tx_queue_stopped(txq) &&
   1290	    !nicvf_sq_append_skb(snic, sq, skb, qid)) {
   1291		netif_tx_stop_queue(txq);
   1292
   1293		/* Barrier, so that stop_queue visible to other cpus */
   1294		smp_mb();
   1295
   1296		/* Check again, incase another cpu freed descriptors */
   1297		if (atomic_read(&sq->free_cnt) > MIN_SQ_DESC_PER_PKT_XMIT) {
   1298			netif_tx_wake_queue(txq);
   1299		} else {
   1300			this_cpu_inc(nic->drv_stats->txq_stop);
   1301			netif_warn(nic, tx_err, netdev,
   1302				   "Transmit ring full, stopping SQ%d\n", qid);
   1303		}
   1304		return NETDEV_TX_BUSY;
   1305	}
   1306
   1307	return NETDEV_TX_OK;
   1308}
   1309
   1310static inline void nicvf_free_cq_poll(struct nicvf *nic)
   1311{
   1312	struct nicvf_cq_poll *cq_poll;
   1313	int qidx;
   1314
   1315	for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) {
   1316		cq_poll = nic->napi[qidx];
   1317		if (!cq_poll)
   1318			continue;
   1319		nic->napi[qidx] = NULL;
   1320		kfree(cq_poll);
   1321	}
   1322}
   1323
   1324int nicvf_stop(struct net_device *netdev)
   1325{
   1326	int irq, qidx;
   1327	struct nicvf *nic = netdev_priv(netdev);
   1328	struct queue_set *qs = nic->qs;
   1329	struct nicvf_cq_poll *cq_poll = NULL;
   1330	union nic_mbx mbx = {};
   1331
   1332	/* wait till all queued set_rx_mode tasks completes */
   1333	if (nic->nicvf_rx_mode_wq) {
   1334		cancel_delayed_work_sync(&nic->link_change_work);
   1335		drain_workqueue(nic->nicvf_rx_mode_wq);
   1336	}
   1337
   1338	mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN;
   1339	nicvf_send_msg_to_pf(nic, &mbx);
   1340
   1341	netif_carrier_off(netdev);
   1342	netif_tx_stop_all_queues(nic->netdev);
   1343	nic->link_up = false;
   1344
   1345	/* Teardown secondary qsets first */
   1346	if (!nic->sqs_mode) {
   1347		for (qidx = 0; qidx < nic->sqs_count; qidx++) {
   1348			if (!nic->snicvf[qidx])
   1349				continue;
   1350			nicvf_stop(nic->snicvf[qidx]->netdev);
   1351			nic->snicvf[qidx] = NULL;
   1352		}
   1353	}
   1354
   1355	/* Disable RBDR & QS error interrupts */
   1356	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
   1357		nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx);
   1358		nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx);
   1359	}
   1360	nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0);
   1361	nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0);
   1362
   1363	/* Wait for pending IRQ handlers to finish */
   1364	for (irq = 0; irq < nic->num_vec; irq++)
   1365		synchronize_irq(pci_irq_vector(nic->pdev, irq));
   1366
   1367	tasklet_kill(&nic->rbdr_task);
   1368	tasklet_kill(&nic->qs_err_task);
   1369	if (nic->rb_work_scheduled)
   1370		cancel_delayed_work_sync(&nic->rbdr_work);
   1371
   1372	for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) {
   1373		cq_poll = nic->napi[qidx];
   1374		if (!cq_poll)
   1375			continue;
   1376		napi_synchronize(&cq_poll->napi);
   1377		/* CQ intr is enabled while napi_complete,
   1378		 * so disable it now
   1379		 */
   1380		nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
   1381		nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
   1382		napi_disable(&cq_poll->napi);
   1383		netif_napi_del(&cq_poll->napi);
   1384	}
   1385
   1386	netif_tx_disable(netdev);
   1387
   1388	for (qidx = 0; qidx < netdev->num_tx_queues; qidx++)
   1389		netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx));
   1390
   1391	/* Free resources */
   1392	nicvf_config_data_transfer(nic, false);
   1393
   1394	/* Disable HW Qset */
   1395	nicvf_qset_config(nic, false);
   1396
   1397	/* disable mailbox interrupt */
   1398	nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
   1399
   1400	nicvf_unregister_interrupts(nic);
   1401
   1402	nicvf_free_cq_poll(nic);
   1403
   1404	/* Free any pending SKB saved to receive timestamp */
   1405	if (nic->ptp_skb) {
   1406		dev_kfree_skb_any(nic->ptp_skb);
   1407		nic->ptp_skb = NULL;
   1408	}
   1409
   1410	/* Clear multiqset info */
   1411	nic->pnicvf = nic;
   1412
   1413	return 0;
   1414}
   1415
   1416static int nicvf_config_hw_rx_tstamp(struct nicvf *nic, bool enable)
   1417{
   1418	union nic_mbx mbx = {};
   1419
   1420	mbx.ptp.msg = NIC_MBOX_MSG_PTP_CFG;
   1421	mbx.ptp.enable = enable;
   1422
   1423	return nicvf_send_msg_to_pf(nic, &mbx);
   1424}
   1425
   1426static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu)
   1427{
   1428	union nic_mbx mbx = {};
   1429
   1430	mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS;
   1431	mbx.frs.max_frs = mtu;
   1432	mbx.frs.vf_id = nic->vf_id;
   1433
   1434	return nicvf_send_msg_to_pf(nic, &mbx);
   1435}
   1436
   1437static void nicvf_link_status_check_task(struct work_struct *work_arg)
   1438{
   1439	struct nicvf *nic = container_of(work_arg,
   1440					 struct nicvf,
   1441					 link_change_work.work);
   1442	union nic_mbx mbx = {};
   1443	mbx.msg.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
   1444	nicvf_send_msg_to_pf(nic, &mbx);
   1445	queue_delayed_work(nic->nicvf_rx_mode_wq,
   1446			   &nic->link_change_work, 2 * HZ);
   1447}
   1448
   1449int nicvf_open(struct net_device *netdev)
   1450{
   1451	int cpu, err, qidx;
   1452	struct nicvf *nic = netdev_priv(netdev);
   1453	struct queue_set *qs = nic->qs;
   1454	struct nicvf_cq_poll *cq_poll = NULL;
   1455
   1456	/* wait till all queued set_rx_mode tasks completes if any */
   1457	if (nic->nicvf_rx_mode_wq)
   1458		drain_workqueue(nic->nicvf_rx_mode_wq);
   1459
   1460	netif_carrier_off(netdev);
   1461
   1462	err = nicvf_register_misc_interrupt(nic);
   1463	if (err)
   1464		return err;
   1465
   1466	/* Register NAPI handler for processing CQEs */
   1467	for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
   1468		cq_poll = kzalloc(sizeof(*cq_poll), GFP_KERNEL);
   1469		if (!cq_poll) {
   1470			err = -ENOMEM;
   1471			goto napi_del;
   1472		}
   1473		cq_poll->cq_idx = qidx;
   1474		cq_poll->nicvf = nic;
   1475		netif_napi_add(netdev, &cq_poll->napi, nicvf_poll,
   1476			       NAPI_POLL_WEIGHT);
   1477		napi_enable(&cq_poll->napi);
   1478		nic->napi[qidx] = cq_poll;
   1479	}
   1480
   1481	/* Check if we got MAC address from PF or else generate a radom MAC */
   1482	if (!nic->sqs_mode && is_zero_ether_addr(netdev->dev_addr)) {
   1483		eth_hw_addr_random(netdev);
   1484		nicvf_hw_set_mac_addr(nic, netdev);
   1485	}
   1486
   1487	if (nic->set_mac_pending) {
   1488		nic->set_mac_pending = false;
   1489		nicvf_hw_set_mac_addr(nic, netdev);
   1490	}
   1491
   1492	/* Init tasklet for handling Qset err interrupt */
   1493	tasklet_setup(&nic->qs_err_task, nicvf_handle_qs_err);
   1494
   1495	/* Init RBDR tasklet which will refill RBDR */
   1496	tasklet_setup(&nic->rbdr_task, nicvf_rbdr_task);
   1497	INIT_DELAYED_WORK(&nic->rbdr_work, nicvf_rbdr_work);
   1498
   1499	/* Configure CPI alorithm */
   1500	nic->cpi_alg = cpi_alg;
   1501	if (!nic->sqs_mode)
   1502		nicvf_config_cpi(nic);
   1503
   1504	nicvf_request_sqs(nic);
   1505	if (nic->sqs_mode)
   1506		nicvf_get_primary_vf_struct(nic);
   1507
   1508	/* Configure PTP timestamp */
   1509	if (nic->ptp_clock)
   1510		nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp);
   1511	atomic_set(&nic->tx_ptp_skbs, 0);
   1512	nic->ptp_skb = NULL;
   1513
   1514	/* Configure receive side scaling and MTU */
   1515	if (!nic->sqs_mode) {
   1516		nicvf_rss_init(nic);
   1517		err = nicvf_update_hw_max_frs(nic, netdev->mtu);
   1518		if (err)
   1519			goto cleanup;
   1520
   1521		/* Clear percpu stats */
   1522		for_each_possible_cpu(cpu)
   1523			memset(per_cpu_ptr(nic->drv_stats, cpu), 0,
   1524			       sizeof(struct nicvf_drv_stats));
   1525	}
   1526
   1527	err = nicvf_register_interrupts(nic);
   1528	if (err)
   1529		goto cleanup;
   1530
   1531	/* Initialize the queues */
   1532	err = nicvf_init_resources(nic);
   1533	if (err)
   1534		goto cleanup;
   1535
   1536	/* Make sure queue initialization is written */
   1537	wmb();
   1538
   1539	nicvf_reg_write(nic, NIC_VF_INT, -1);
   1540	/* Enable Qset err interrupt */
   1541	nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0);
   1542
   1543	/* Enable completion queue interrupt */
   1544	for (qidx = 0; qidx < qs->cq_cnt; qidx++)
   1545		nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx);
   1546
   1547	/* Enable RBDR threshold interrupt */
   1548	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
   1549		nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx);
   1550
   1551	/* Send VF config done msg to PF */
   1552	nicvf_send_cfg_done(nic);
   1553
   1554	if (nic->nicvf_rx_mode_wq) {
   1555		INIT_DELAYED_WORK(&nic->link_change_work,
   1556				  nicvf_link_status_check_task);
   1557		queue_delayed_work(nic->nicvf_rx_mode_wq,
   1558				   &nic->link_change_work, 0);
   1559	}
   1560
   1561	return 0;
   1562cleanup:
   1563	nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
   1564	nicvf_unregister_interrupts(nic);
   1565	tasklet_kill(&nic->qs_err_task);
   1566	tasklet_kill(&nic->rbdr_task);
   1567napi_del:
   1568	for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
   1569		cq_poll = nic->napi[qidx];
   1570		if (!cq_poll)
   1571			continue;
   1572		napi_disable(&cq_poll->napi);
   1573		netif_napi_del(&cq_poll->napi);
   1574	}
   1575	nicvf_free_cq_poll(nic);
   1576	return err;
   1577}
   1578
   1579static int nicvf_change_mtu(struct net_device *netdev, int new_mtu)
   1580{
   1581	struct nicvf *nic = netdev_priv(netdev);
   1582	int orig_mtu = netdev->mtu;
   1583
   1584	/* For now just support only the usual MTU sized frames,
   1585	 * plus some headroom for VLAN, QinQ.
   1586	 */
   1587	if (nic->xdp_prog && new_mtu > MAX_XDP_MTU) {
   1588		netdev_warn(netdev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
   1589			    netdev->mtu);
   1590		return -EINVAL;
   1591	}
   1592
   1593	netdev->mtu = new_mtu;
   1594
   1595	if (!netif_running(netdev))
   1596		return 0;
   1597
   1598	if (nicvf_update_hw_max_frs(nic, new_mtu)) {
   1599		netdev->mtu = orig_mtu;
   1600		return -EINVAL;
   1601	}
   1602
   1603	return 0;
   1604}
   1605
   1606static int nicvf_set_mac_address(struct net_device *netdev, void *p)
   1607{
   1608	struct sockaddr *addr = p;
   1609	struct nicvf *nic = netdev_priv(netdev);
   1610
   1611	if (!is_valid_ether_addr(addr->sa_data))
   1612		return -EADDRNOTAVAIL;
   1613
   1614	eth_hw_addr_set(netdev, addr->sa_data);
   1615
   1616	if (nic->pdev->msix_enabled) {
   1617		if (nicvf_hw_set_mac_addr(nic, netdev))
   1618			return -EBUSY;
   1619	} else {
   1620		nic->set_mac_pending = true;
   1621	}
   1622
   1623	return 0;
   1624}
   1625
   1626void nicvf_update_lmac_stats(struct nicvf *nic)
   1627{
   1628	int stat = 0;
   1629	union nic_mbx mbx = {};
   1630
   1631	if (!netif_running(nic->netdev))
   1632		return;
   1633
   1634	mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS;
   1635	mbx.bgx_stats.vf_id = nic->vf_id;
   1636	/* Rx stats */
   1637	mbx.bgx_stats.rx = 1;
   1638	while (stat < BGX_RX_STATS_COUNT) {
   1639		mbx.bgx_stats.idx = stat;
   1640		if (nicvf_send_msg_to_pf(nic, &mbx))
   1641			return;
   1642		stat++;
   1643	}
   1644
   1645	stat = 0;
   1646
   1647	/* Tx stats */
   1648	mbx.bgx_stats.rx = 0;
   1649	while (stat < BGX_TX_STATS_COUNT) {
   1650		mbx.bgx_stats.idx = stat;
   1651		if (nicvf_send_msg_to_pf(nic, &mbx))
   1652			return;
   1653		stat++;
   1654	}
   1655}
   1656
   1657void nicvf_update_stats(struct nicvf *nic)
   1658{
   1659	int qidx, cpu;
   1660	u64 tmp_stats = 0;
   1661	struct nicvf_hw_stats *stats = &nic->hw_stats;
   1662	struct nicvf_drv_stats *drv_stats;
   1663	struct queue_set *qs = nic->qs;
   1664
   1665#define GET_RX_STATS(reg) \
   1666	nicvf_reg_read(nic, NIC_VNIC_RX_STAT_0_13 | (reg << 3))
   1667#define GET_TX_STATS(reg) \
   1668	nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | (reg << 3))
   1669
   1670	stats->rx_bytes = GET_RX_STATS(RX_OCTS);
   1671	stats->rx_ucast_frames = GET_RX_STATS(RX_UCAST);
   1672	stats->rx_bcast_frames = GET_RX_STATS(RX_BCAST);
   1673	stats->rx_mcast_frames = GET_RX_STATS(RX_MCAST);
   1674	stats->rx_fcs_errors = GET_RX_STATS(RX_FCS);
   1675	stats->rx_l2_errors = GET_RX_STATS(RX_L2ERR);
   1676	stats->rx_drop_red = GET_RX_STATS(RX_RED);
   1677	stats->rx_drop_red_bytes = GET_RX_STATS(RX_RED_OCTS);
   1678	stats->rx_drop_overrun = GET_RX_STATS(RX_ORUN);
   1679	stats->rx_drop_overrun_bytes = GET_RX_STATS(RX_ORUN_OCTS);
   1680	stats->rx_drop_bcast = GET_RX_STATS(RX_DRP_BCAST);
   1681	stats->rx_drop_mcast = GET_RX_STATS(RX_DRP_MCAST);
   1682	stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST);
   1683	stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST);
   1684
   1685	stats->tx_bytes = GET_TX_STATS(TX_OCTS);
   1686	stats->tx_ucast_frames = GET_TX_STATS(TX_UCAST);
   1687	stats->tx_bcast_frames = GET_TX_STATS(TX_BCAST);
   1688	stats->tx_mcast_frames = GET_TX_STATS(TX_MCAST);
   1689	stats->tx_drops = GET_TX_STATS(TX_DROP);
   1690
   1691	/* On T88 pass 2.0, the dummy SQE added for TSO notification
   1692	 * via CQE has 'dont_send' set. Hence HW drops the pkt pointed
   1693	 * pointed by dummy SQE and results in tx_drops counter being
   1694	 * incremented. Subtracting it from tx_tso counter will give
   1695	 * exact tx_drops counter.
   1696	 */
   1697	if (nic->t88 && nic->hw_tso) {
   1698		for_each_possible_cpu(cpu) {
   1699			drv_stats = per_cpu_ptr(nic->drv_stats, cpu);
   1700			tmp_stats += drv_stats->tx_tso;
   1701		}
   1702		stats->tx_drops = tmp_stats - stats->tx_drops;
   1703	}
   1704	stats->tx_frames = stats->tx_ucast_frames +
   1705			   stats->tx_bcast_frames +
   1706			   stats->tx_mcast_frames;
   1707	stats->rx_frames = stats->rx_ucast_frames +
   1708			   stats->rx_bcast_frames +
   1709			   stats->rx_mcast_frames;
   1710	stats->rx_drops = stats->rx_drop_red +
   1711			  stats->rx_drop_overrun;
   1712
   1713	/* Update RQ and SQ stats */
   1714	for (qidx = 0; qidx < qs->rq_cnt; qidx++)
   1715		nicvf_update_rq_stats(nic, qidx);
   1716	for (qidx = 0; qidx < qs->sq_cnt; qidx++)
   1717		nicvf_update_sq_stats(nic, qidx);
   1718}
   1719
   1720static void nicvf_get_stats64(struct net_device *netdev,
   1721			      struct rtnl_link_stats64 *stats)
   1722{
   1723	struct nicvf *nic = netdev_priv(netdev);
   1724	struct nicvf_hw_stats *hw_stats = &nic->hw_stats;
   1725
   1726	nicvf_update_stats(nic);
   1727
   1728	stats->rx_bytes = hw_stats->rx_bytes;
   1729	stats->rx_packets = hw_stats->rx_frames;
   1730	stats->rx_dropped = hw_stats->rx_drops;
   1731	stats->multicast = hw_stats->rx_mcast_frames;
   1732
   1733	stats->tx_bytes = hw_stats->tx_bytes;
   1734	stats->tx_packets = hw_stats->tx_frames;
   1735	stats->tx_dropped = hw_stats->tx_drops;
   1736
   1737}
   1738
   1739static void nicvf_tx_timeout(struct net_device *dev, unsigned int txqueue)
   1740{
   1741	struct nicvf *nic = netdev_priv(dev);
   1742
   1743	netif_warn(nic, tx_err, dev, "Transmit timed out, resetting\n");
   1744
   1745	this_cpu_inc(nic->drv_stats->tx_timeout);
   1746	schedule_work(&nic->reset_task);
   1747}
   1748
   1749static void nicvf_reset_task(struct work_struct *work)
   1750{
   1751	struct nicvf *nic;
   1752
   1753	nic = container_of(work, struct nicvf, reset_task);
   1754
   1755	if (!netif_running(nic->netdev))
   1756		return;
   1757
   1758	nicvf_stop(nic->netdev);
   1759	nicvf_open(nic->netdev);
   1760	netif_trans_update(nic->netdev);
   1761}
   1762
   1763static int nicvf_config_loopback(struct nicvf *nic,
   1764				 netdev_features_t features)
   1765{
   1766	union nic_mbx mbx = {};
   1767
   1768	mbx.lbk.msg = NIC_MBOX_MSG_LOOPBACK;
   1769	mbx.lbk.vf_id = nic->vf_id;
   1770	mbx.lbk.enable = (features & NETIF_F_LOOPBACK) != 0;
   1771
   1772	return nicvf_send_msg_to_pf(nic, &mbx);
   1773}
   1774
   1775static netdev_features_t nicvf_fix_features(struct net_device *netdev,
   1776					    netdev_features_t features)
   1777{
   1778	struct nicvf *nic = netdev_priv(netdev);
   1779
   1780	if ((features & NETIF_F_LOOPBACK) &&
   1781	    netif_running(netdev) && !nic->loopback_supported)
   1782		features &= ~NETIF_F_LOOPBACK;
   1783
   1784	return features;
   1785}
   1786
   1787static int nicvf_set_features(struct net_device *netdev,
   1788			      netdev_features_t features)
   1789{
   1790	struct nicvf *nic = netdev_priv(netdev);
   1791	netdev_features_t changed = features ^ netdev->features;
   1792
   1793	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
   1794		nicvf_config_vlan_stripping(nic, features);
   1795
   1796	if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev))
   1797		return nicvf_config_loopback(nic, features);
   1798
   1799	return 0;
   1800}
   1801
   1802static void nicvf_set_xdp_queues(struct nicvf *nic, bool bpf_attached)
   1803{
   1804	u8 cq_count, txq_count;
   1805
   1806	/* Set XDP Tx queue count same as Rx queue count */
   1807	if (!bpf_attached)
   1808		nic->xdp_tx_queues = 0;
   1809	else
   1810		nic->xdp_tx_queues = nic->rx_queues;
   1811
   1812	/* If queue count > MAX_CMP_QUEUES_PER_QS, then additional qsets
   1813	 * needs to be allocated, check how many.
   1814	 */
   1815	txq_count = nic->xdp_tx_queues + nic->tx_queues;
   1816	cq_count = max(nic->rx_queues, txq_count);
   1817	if (cq_count > MAX_CMP_QUEUES_PER_QS) {
   1818		nic->sqs_count = roundup(cq_count, MAX_CMP_QUEUES_PER_QS);
   1819		nic->sqs_count = (nic->sqs_count / MAX_CMP_QUEUES_PER_QS) - 1;
   1820	} else {
   1821		nic->sqs_count = 0;
   1822	}
   1823
   1824	/* Set primary Qset's resources */
   1825	nic->qs->rq_cnt = min_t(u8, nic->rx_queues, MAX_RCV_QUEUES_PER_QS);
   1826	nic->qs->sq_cnt = min_t(u8, txq_count, MAX_SND_QUEUES_PER_QS);
   1827	nic->qs->cq_cnt = max_t(u8, nic->qs->rq_cnt, nic->qs->sq_cnt);
   1828
   1829	/* Update stack */
   1830	nicvf_set_real_num_queues(nic->netdev, nic->tx_queues, nic->rx_queues);
   1831}
   1832
   1833static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
   1834{
   1835	struct net_device *dev = nic->netdev;
   1836	bool if_up = netif_running(nic->netdev);
   1837	struct bpf_prog *old_prog;
   1838	bool bpf_attached = false;
   1839	int ret = 0;
   1840
   1841	/* For now just support only the usual MTU sized frames,
   1842	 * plus some headroom for VLAN, QinQ.
   1843	 */
   1844	if (prog && dev->mtu > MAX_XDP_MTU) {
   1845		netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
   1846			    dev->mtu);
   1847		return -EOPNOTSUPP;
   1848	}
   1849
   1850	/* ALL SQs attached to CQs i.e same as RQs, are treated as
   1851	 * XDP Tx queues and more Tx queues are allocated for
   1852	 * network stack to send pkts out.
   1853	 *
   1854	 * No of Tx queues are either same as Rx queues or whatever
   1855	 * is left in max no of queues possible.
   1856	 */
   1857	if ((nic->rx_queues + nic->tx_queues) > nic->max_queues) {
   1858		netdev_warn(dev,
   1859			    "Failed to attach BPF prog, RXQs + TXQs > Max %d\n",
   1860			    nic->max_queues);
   1861		return -ENOMEM;
   1862	}
   1863
   1864	if (if_up)
   1865		nicvf_stop(nic->netdev);
   1866
   1867	old_prog = xchg(&nic->xdp_prog, prog);
   1868	/* Detach old prog, if any */
   1869	if (old_prog)
   1870		bpf_prog_put(old_prog);
   1871
   1872	if (nic->xdp_prog) {
   1873		/* Attach BPF program */
   1874		bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1);
   1875		bpf_attached = true;
   1876	}
   1877
   1878	/* Calculate Tx queues needed for XDP and network stack */
   1879	nicvf_set_xdp_queues(nic, bpf_attached);
   1880
   1881	if (if_up) {
   1882		/* Reinitialize interface, clean slate */
   1883		nicvf_open(nic->netdev);
   1884		netif_trans_update(nic->netdev);
   1885	}
   1886
   1887	return ret;
   1888}
   1889
   1890static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
   1891{
   1892	struct nicvf *nic = netdev_priv(netdev);
   1893
   1894	/* To avoid checks while retrieving buffer address from CQE_RX,
   1895	 * do not support XDP for T88 pass1.x silicons which are anyway
   1896	 * not in use widely.
   1897	 */
   1898	if (pass1_silicon(nic->pdev))
   1899		return -EOPNOTSUPP;
   1900
   1901	switch (xdp->command) {
   1902	case XDP_SETUP_PROG:
   1903		return nicvf_xdp_setup(nic, xdp->prog);
   1904	default:
   1905		return -EINVAL;
   1906	}
   1907}
   1908
   1909static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr)
   1910{
   1911	struct hwtstamp_config config;
   1912	struct nicvf *nic = netdev_priv(netdev);
   1913
   1914	if (!nic->ptp_clock)
   1915		return -ENODEV;
   1916
   1917	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
   1918		return -EFAULT;
   1919
   1920	switch (config.tx_type) {
   1921	case HWTSTAMP_TX_OFF:
   1922	case HWTSTAMP_TX_ON:
   1923		break;
   1924	default:
   1925		return -ERANGE;
   1926	}
   1927
   1928	switch (config.rx_filter) {
   1929	case HWTSTAMP_FILTER_NONE:
   1930		nic->hw_rx_tstamp = false;
   1931		break;
   1932	case HWTSTAMP_FILTER_ALL:
   1933	case HWTSTAMP_FILTER_SOME:
   1934	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
   1935	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
   1936	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
   1937	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
   1938	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
   1939	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
   1940	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
   1941	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
   1942	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
   1943	case HWTSTAMP_FILTER_PTP_V2_EVENT:
   1944	case HWTSTAMP_FILTER_PTP_V2_SYNC:
   1945	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
   1946		nic->hw_rx_tstamp = true;
   1947		config.rx_filter = HWTSTAMP_FILTER_ALL;
   1948		break;
   1949	default:
   1950		return -ERANGE;
   1951	}
   1952
   1953	if (netif_running(netdev))
   1954		nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp);
   1955
   1956	if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
   1957		return -EFAULT;
   1958
   1959	return 0;
   1960}
   1961
   1962static int nicvf_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
   1963{
   1964	switch (cmd) {
   1965	case SIOCSHWTSTAMP:
   1966		return nicvf_config_hwtstamp(netdev, req);
   1967	default:
   1968		return -EOPNOTSUPP;
   1969	}
   1970}
   1971
   1972static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
   1973				     struct nicvf *nic)
   1974{
   1975	union nic_mbx mbx = {};
   1976	int idx;
   1977
   1978	/* From the inside of VM code flow we have only 128 bits memory
   1979	 * available to send message to host's PF, so send all mc addrs
   1980	 * one by one, starting from flush command in case if kernel
   1981	 * requests to configure specific MAC filtering
   1982	 */
   1983
   1984	/* flush DMAC filters and reset RX mode */
   1985	mbx.xcast.msg = NIC_MBOX_MSG_RESET_XCAST;
   1986	if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
   1987		goto free_mc;
   1988
   1989	if (mode & BGX_XCAST_MCAST_FILTER) {
   1990		/* once enabling filtering, we need to signal to PF to add
   1991		 * its' own LMAC to the filter to accept packets for it.
   1992		 */
   1993		mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
   1994		mbx.xcast.mac = 0;
   1995		if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
   1996			goto free_mc;
   1997	}
   1998
   1999	/* check if we have any specific MACs to be added to PF DMAC filter */
   2000	if (mc_addrs) {
   2001		/* now go through kernel list of MACs and add them one by one */
   2002		for (idx = 0; idx < mc_addrs->count; idx++) {
   2003			mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
   2004			mbx.xcast.mac = mc_addrs->mc[idx];
   2005			if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
   2006				goto free_mc;
   2007		}
   2008	}
   2009
   2010	/* and finally set rx mode for PF accordingly */
   2011	mbx.xcast.msg = NIC_MBOX_MSG_SET_XCAST;
   2012	mbx.xcast.mode = mode;
   2013
   2014	nicvf_send_msg_to_pf(nic, &mbx);
   2015free_mc:
   2016	kfree(mc_addrs);
   2017}
   2018
   2019static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
   2020{
   2021	struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work,
   2022						  work);
   2023	struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work);
   2024	u8 mode;
   2025	struct xcast_addr_list *mc;
   2026
   2027	/* Save message data locally to prevent them from
   2028	 * being overwritten by next ndo_set_rx_mode call().
   2029	 */
   2030	spin_lock_bh(&nic->rx_mode_wq_lock);
   2031	mode = vf_work->mode;
   2032	mc = vf_work->mc;
   2033	vf_work->mc = NULL;
   2034	spin_unlock_bh(&nic->rx_mode_wq_lock);
   2035
   2036	__nicvf_set_rx_mode_task(mode, mc, nic);
   2037}
   2038
   2039static void nicvf_set_rx_mode(struct net_device *netdev)
   2040{
   2041	struct nicvf *nic = netdev_priv(netdev);
   2042	struct netdev_hw_addr *ha;
   2043	struct xcast_addr_list *mc_list = NULL;
   2044	u8 mode = 0;
   2045
   2046	if (netdev->flags & IFF_PROMISC) {
   2047		mode = BGX_XCAST_BCAST_ACCEPT | BGX_XCAST_MCAST_ACCEPT;
   2048	} else {
   2049		if (netdev->flags & IFF_BROADCAST)
   2050			mode |= BGX_XCAST_BCAST_ACCEPT;
   2051
   2052		if (netdev->flags & IFF_ALLMULTI) {
   2053			mode |= BGX_XCAST_MCAST_ACCEPT;
   2054		} else if (netdev->flags & IFF_MULTICAST) {
   2055			mode |= BGX_XCAST_MCAST_FILTER;
   2056			/* here we need to copy mc addrs */
   2057			if (netdev_mc_count(netdev)) {
   2058				mc_list = kmalloc(struct_size(mc_list, mc,
   2059							      netdev_mc_count(netdev)),
   2060						  GFP_ATOMIC);
   2061				if (unlikely(!mc_list))
   2062					return;
   2063				mc_list->count = 0;
   2064				netdev_hw_addr_list_for_each(ha, &netdev->mc) {
   2065					mc_list->mc[mc_list->count] =
   2066						ether_addr_to_u64(ha->addr);
   2067					mc_list->count++;
   2068				}
   2069			}
   2070		}
   2071	}
   2072	spin_lock(&nic->rx_mode_wq_lock);
   2073	kfree(nic->rx_mode_work.mc);
   2074	nic->rx_mode_work.mc = mc_list;
   2075	nic->rx_mode_work.mode = mode;
   2076	queue_work(nic->nicvf_rx_mode_wq, &nic->rx_mode_work.work);
   2077	spin_unlock(&nic->rx_mode_wq_lock);
   2078}
   2079
   2080static const struct net_device_ops nicvf_netdev_ops = {
   2081	.ndo_open		= nicvf_open,
   2082	.ndo_stop		= nicvf_stop,
   2083	.ndo_start_xmit		= nicvf_xmit,
   2084	.ndo_change_mtu		= nicvf_change_mtu,
   2085	.ndo_set_mac_address	= nicvf_set_mac_address,
   2086	.ndo_get_stats64	= nicvf_get_stats64,
   2087	.ndo_tx_timeout         = nicvf_tx_timeout,
   2088	.ndo_fix_features       = nicvf_fix_features,
   2089	.ndo_set_features       = nicvf_set_features,
   2090	.ndo_bpf		= nicvf_xdp,
   2091	.ndo_eth_ioctl           = nicvf_ioctl,
   2092	.ndo_set_rx_mode        = nicvf_set_rx_mode,
   2093};
   2094
   2095static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
   2096{
   2097	struct device *dev = &pdev->dev;
   2098	struct net_device *netdev;
   2099	struct nicvf *nic;
   2100	int    err, qcount;
   2101	u16    sdevid;
   2102	struct cavium_ptp *ptp_clock;
   2103
   2104	ptp_clock = cavium_ptp_get();
   2105	if (IS_ERR(ptp_clock)) {
   2106		if (PTR_ERR(ptp_clock) == -ENODEV)
   2107			/* In virtualized environment we proceed without ptp */
   2108			ptp_clock = NULL;
   2109		else
   2110			return PTR_ERR(ptp_clock);
   2111	}
   2112
   2113	err = pci_enable_device(pdev);
   2114	if (err)
   2115		return dev_err_probe(dev, err, "Failed to enable PCI device\n");
   2116
   2117	err = pci_request_regions(pdev, DRV_NAME);
   2118	if (err) {
   2119		dev_err(dev, "PCI request regions failed 0x%x\n", err);
   2120		goto err_disable_device;
   2121	}
   2122
   2123	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
   2124	if (err) {
   2125		dev_err(dev, "Unable to get usable DMA configuration\n");
   2126		goto err_release_regions;
   2127	}
   2128
   2129	qcount = netif_get_num_default_rss_queues();
   2130
   2131	/* Restrict multiqset support only for host bound VFs */
   2132	if (pdev->is_virtfn) {
   2133		/* Set max number of queues per VF */
   2134		qcount = min_t(int, num_online_cpus(),
   2135			       (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS);
   2136	}
   2137
   2138	netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount);
   2139	if (!netdev) {
   2140		err = -ENOMEM;
   2141		goto err_release_regions;
   2142	}
   2143
   2144	pci_set_drvdata(pdev, netdev);
   2145
   2146	SET_NETDEV_DEV(netdev, &pdev->dev);
   2147
   2148	nic = netdev_priv(netdev);
   2149	nic->netdev = netdev;
   2150	nic->pdev = pdev;
   2151	nic->pnicvf = nic;
   2152	nic->max_queues = qcount;
   2153	/* If no of CPUs are too low, there won't be any queues left
   2154	 * for XDP_TX, hence double it.
   2155	 */
   2156	if (!nic->t88)
   2157		nic->max_queues *= 2;
   2158	nic->ptp_clock = ptp_clock;
   2159
   2160	/* Initialize mutex that serializes usage of VF's mailbox */
   2161	mutex_init(&nic->rx_mode_mtx);
   2162
   2163	/* MAP VF's configuration registers */
   2164	nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
   2165	if (!nic->reg_base) {
   2166		dev_err(dev, "Cannot map config register space, aborting\n");
   2167		err = -ENOMEM;
   2168		goto err_free_netdev;
   2169	}
   2170
   2171	nic->drv_stats = netdev_alloc_pcpu_stats(struct nicvf_drv_stats);
   2172	if (!nic->drv_stats) {
   2173		err = -ENOMEM;
   2174		goto err_free_netdev;
   2175	}
   2176
   2177	err = nicvf_set_qset_resources(nic);
   2178	if (err)
   2179		goto err_free_netdev;
   2180
   2181	/* Check if PF is alive and get MAC address for this VF */
   2182	err = nicvf_register_misc_interrupt(nic);
   2183	if (err)
   2184		goto err_free_netdev;
   2185
   2186	nicvf_send_vf_struct(nic);
   2187
   2188	if (!pass1_silicon(nic->pdev))
   2189		nic->hw_tso = true;
   2190
   2191	/* Get iommu domain for iova to physical addr conversion */
   2192	nic->iommu_domain = iommu_get_domain_for_dev(dev);
   2193
   2194	pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
   2195	if (sdevid == 0xA134)
   2196		nic->t88 = true;
   2197
   2198	/* Check if this VF is in QS only mode */
   2199	if (nic->sqs_mode)
   2200		return 0;
   2201
   2202	err = nicvf_set_real_num_queues(netdev, nic->tx_queues, nic->rx_queues);
   2203	if (err)
   2204		goto err_unregister_interrupts;
   2205
   2206	netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_SG |
   2207			       NETIF_F_TSO | NETIF_F_GRO | NETIF_F_TSO6 |
   2208			       NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
   2209			       NETIF_F_HW_VLAN_CTAG_RX);
   2210
   2211	netdev->hw_features |= NETIF_F_RXHASH;
   2212
   2213	netdev->features |= netdev->hw_features;
   2214	netdev->hw_features |= NETIF_F_LOOPBACK;
   2215
   2216	netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM |
   2217				NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6;
   2218
   2219	netdev->netdev_ops = &nicvf_netdev_ops;
   2220	netdev->watchdog_timeo = NICVF_TX_TIMEOUT;
   2221
   2222	/* MTU range: 64 - 9200 */
   2223	netdev->min_mtu = NIC_HW_MIN_FRS;
   2224	netdev->max_mtu = NIC_HW_MAX_FRS;
   2225
   2226	INIT_WORK(&nic->reset_task, nicvf_reset_task);
   2227
   2228	nic->nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_rx_mode_wq_VF%d",
   2229							WQ_MEM_RECLAIM,
   2230							nic->vf_id);
   2231	if (!nic->nicvf_rx_mode_wq) {
   2232		err = -ENOMEM;
   2233		dev_err(dev, "Failed to allocate work queue\n");
   2234		goto err_unregister_interrupts;
   2235	}
   2236
   2237	INIT_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
   2238	spin_lock_init(&nic->rx_mode_wq_lock);
   2239
   2240	err = register_netdev(netdev);
   2241	if (err) {
   2242		dev_err(dev, "Failed to register netdevice\n");
   2243		goto err_unregister_interrupts;
   2244	}
   2245
   2246	nic->msg_enable = debug;
   2247
   2248	nicvf_set_ethtool_ops(netdev);
   2249
   2250	return 0;
   2251
   2252err_unregister_interrupts:
   2253	nicvf_unregister_interrupts(nic);
   2254err_free_netdev:
   2255	pci_set_drvdata(pdev, NULL);
   2256	if (nic->drv_stats)
   2257		free_percpu(nic->drv_stats);
   2258	free_netdev(netdev);
   2259err_release_regions:
   2260	pci_release_regions(pdev);
   2261err_disable_device:
   2262	pci_disable_device(pdev);
   2263	return err;
   2264}
   2265
   2266static void nicvf_remove(struct pci_dev *pdev)
   2267{
   2268	struct net_device *netdev = pci_get_drvdata(pdev);
   2269	struct nicvf *nic;
   2270	struct net_device *pnetdev;
   2271
   2272	if (!netdev)
   2273		return;
   2274
   2275	nic = netdev_priv(netdev);
   2276	pnetdev = nic->pnicvf->netdev;
   2277
   2278	/* Check if this Qset is assigned to different VF.
   2279	 * If yes, clean primary and all secondary Qsets.
   2280	 */
   2281	if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED))
   2282		unregister_netdev(pnetdev);
   2283	if (nic->nicvf_rx_mode_wq) {
   2284		destroy_workqueue(nic->nicvf_rx_mode_wq);
   2285		nic->nicvf_rx_mode_wq = NULL;
   2286	}
   2287	nicvf_unregister_interrupts(nic);
   2288	pci_set_drvdata(pdev, NULL);
   2289	if (nic->drv_stats)
   2290		free_percpu(nic->drv_stats);
   2291	cavium_ptp_put(nic->ptp_clock);
   2292	free_netdev(netdev);
   2293	pci_release_regions(pdev);
   2294	pci_disable_device(pdev);
   2295}
   2296
   2297static void nicvf_shutdown(struct pci_dev *pdev)
   2298{
   2299	nicvf_remove(pdev);
   2300}
   2301
   2302static struct pci_driver nicvf_driver = {
   2303	.name = DRV_NAME,
   2304	.id_table = nicvf_id_table,
   2305	.probe = nicvf_probe,
   2306	.remove = nicvf_remove,
   2307	.shutdown = nicvf_shutdown,
   2308};
   2309
   2310static int __init nicvf_init_module(void)
   2311{
   2312	pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
   2313	return pci_register_driver(&nicvf_driver);
   2314}
   2315
   2316static void __exit nicvf_cleanup_module(void)
   2317{
   2318	pci_unregister_driver(&nicvf_driver);
   2319}
   2320
   2321module_init(nicvf_init_module);
   2322module_exit(nicvf_cleanup_module);